3737)
3838from pandas .core .dtypes .dtypes import ExtensionDtype
3939
40- import pandas .core .algorithms as algos
4140from pandas .core .arrays import (
4241 DatetimeArray ,
4342 ExtensionArray ,
@@ -189,19 +188,29 @@ def concatenate_managers(
189188 if isinstance (mgrs_indexers [0 ][0 ], ArrayManager ):
190189 return _concatenate_array_managers (mgrs_indexers , axes , concat_axis , copy )
191190
191+ # Assertions disabled for performance
192+ # for tup in mgrs_indexers:
193+ # # caller is responsible for ensuring this
194+ # indexers = tup[1]
195+ # assert concat_axis not in indexers
196+
197+ if concat_axis == 0 :
198+ return _concat_managers_axis0 (mgrs_indexers , axes , copy )
199+
192200 mgrs_indexers = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers )
193201
194- concat_plans = [
195- _get_mgr_concatenation_plan (mgr , indexers ) for mgr , indexers in mgrs_indexers
196- ]
197- concat_plan = _combine_concat_plans (concat_plans , concat_axis )
202+ # Assertion disabled for performance
203+ # assert all(not x[1] for x in mgrs_indexers)
204+
205+ concat_plans = [_get_mgr_concatenation_plan (mgr ) for mgr , _ in mgrs_indexers ]
206+ concat_plan = _combine_concat_plans (concat_plans )
198207 blocks = []
199208
200209 for placement , join_units in concat_plan :
201210 unit = join_units [0 ]
202211 blk = unit .block
203212
204- if len (join_units ) == 1 and not join_units [ 0 ]. indexers :
213+ if len (join_units ) == 1 :
205214 values = blk .values
206215 if copy :
207216 values = values .copy ()
@@ -225,7 +234,7 @@ def concatenate_managers(
225234
226235 fastpath = blk .values .dtype == values .dtype
227236 else :
228- values = _concatenate_join_units (join_units , concat_axis , copy = copy )
237+ values = _concatenate_join_units (join_units , copy = copy )
229238 fastpath = False
230239
231240 if fastpath :
@@ -238,6 +247,42 @@ def concatenate_managers(
238247 return BlockManager (tuple (blocks ), axes )
239248
240249
250+ def _concat_managers_axis0 (
251+ mgrs_indexers , axes : list [Index ], copy : bool
252+ ) -> BlockManager :
253+ """
254+ concat_managers specialized to concat_axis=0, with reindexing already
255+ having been done in _maybe_reindex_columns_na_proxy.
256+ """
257+ had_reindexers = {
258+ i : len (mgrs_indexers [i ][1 ]) > 0 for i in range (len (mgrs_indexers ))
259+ }
260+ mgrs_indexers = _maybe_reindex_columns_na_proxy (axes , mgrs_indexers )
261+
262+ mgrs = [x [0 ] for x in mgrs_indexers ]
263+
264+ offset = 0
265+ blocks = []
266+ for i , mgr in enumerate (mgrs ):
267+ # If we already reindexed, then we definitely don't need another copy
268+ made_copy = had_reindexers [i ]
269+
270+ for blk in mgr .blocks :
271+ if made_copy :
272+ nb = blk .copy (deep = False )
273+ elif copy :
274+ nb = blk .copy ()
275+ else :
276+ # by slicing instead of copy(deep=False), we get a new array
277+ # object, see test_concat_copy
278+ nb = blk .getitem_block (slice (None ))
279+ nb ._mgr_locs = nb ._mgr_locs .add (offset )
280+ blocks .append (nb )
281+
282+ offset += len (mgr .items )
283+ return BlockManager (tuple (blocks ), axes )
284+
285+
241286def _maybe_reindex_columns_na_proxy (
242287 axes : list [Index ], mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]]
243288) -> list [tuple [BlockManager , dict [int , np .ndarray ]]]:
@@ -248,36 +293,33 @@ def _maybe_reindex_columns_na_proxy(
248293 Columns added in this reindexing have dtype=np.void, indicating they
249294 should be ignored when choosing a column's final dtype.
250295 """
251- new_mgrs_indexers = []
296+ new_mgrs_indexers : list [tuple [BlockManager , dict [int , np .ndarray ]]] = []
297+
252298 for mgr , indexers in mgrs_indexers :
253- # We only reindex for axis=0 (i.e. columns), as this can be done cheaply
254- if 0 in indexers :
255- new_mgr = mgr .reindex_indexer (
256- axes [0 ],
257- indexers [0 ],
258- axis = 0 ,
299+ # For axis=0 (i.e. columns) we use_na_proxy and only_slice, so this
300+ # is a cheap reindexing.
301+ for i , indexer in indexers .items ():
302+ mgr = mgr .reindex_indexer (
303+ axes [i ],
304+ indexers [i ],
305+ axis = i ,
259306 copy = False ,
260- only_slice = True ,
307+ only_slice = True , # only relevant for i==0
261308 allow_dups = True ,
262- use_na_proxy = True ,
309+ use_na_proxy = True , # only relevant for i==0
263310 )
264- new_indexers = indexers .copy ()
265- del new_indexers [0 ]
266- new_mgrs_indexers .append ((new_mgr , new_indexers ))
267- else :
268- new_mgrs_indexers .append ((mgr , indexers ))
311+ new_mgrs_indexers .append ((mgr , {}))
269312
270313 return new_mgrs_indexers
271314
272315
273- def _get_mgr_concatenation_plan (mgr : BlockManager , indexers : dict [ int , np . ndarray ] ):
316+ def _get_mgr_concatenation_plan (mgr : BlockManager ):
274317 """
275- Construct concatenation plan for given block manager and indexers .
318+ Construct concatenation plan for given block manager.
276319
277320 Parameters
278321 ----------
279322 mgr : BlockManager
280- indexers : dict of {axis: indexer}
281323
282324 Returns
283325 -------
@@ -287,27 +329,11 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
287329 # Calculate post-reindex shape , save for item axis which will be separate
288330 # for each block anyway.
289331 mgr_shape_list = list (mgr .shape )
290- for ax , indexer in indexers .items ():
291- mgr_shape_list [ax ] = len (indexer )
292332 mgr_shape = tuple (mgr_shape_list )
293333
294- assert 0 not in indexers
295-
296- needs_filling = False
297- if 1 in indexers :
298- # indexers[1] is shared by all the JoinUnits, so we can save time
299- # by only doing this check once
300- if (indexers [1 ] == - 1 ).any ():
301- needs_filling = True
302-
303334 if mgr .is_single_block :
304335 blk = mgr .blocks [0 ]
305- return [
306- (
307- blk .mgr_locs ,
308- JoinUnit (blk , mgr_shape , indexers , needs_filling = needs_filling ),
309- )
310- ]
336+ return [(blk .mgr_locs , JoinUnit (blk , mgr_shape ))]
311337
312338 blknos = mgr .blknos
313339 blklocs = mgr .blklocs
@@ -318,8 +344,6 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
318344 assert placements .is_slice_like
319345 assert blkno != - 1
320346
321- join_unit_indexers = indexers .copy ()
322-
323347 shape_list = list (mgr_shape )
324348 shape_list [0 ] = len (placements )
325349 shape = tuple (shape_list )
@@ -351,30 +375,21 @@ def _get_mgr_concatenation_plan(mgr: BlockManager, indexers: dict[int, np.ndarra
351375 # Assertions disabled for performance
352376 # assert blk._mgr_locs.as_slice == placements.as_slice
353377 # assert blk.shape[0] == shape[0]
354- unit = JoinUnit (blk , shape , join_unit_indexers , needs_filling = needs_filling )
378+ unit = JoinUnit (blk , shape )
355379
356380 plan .append ((placements , unit ))
357381
358382 return plan
359383
360384
361385class JoinUnit :
362- def __init__ (
363- self , block : Block , shape : Shape , indexers = None , * , needs_filling : bool = False
364- ):
386+ def __init__ (self , block : Block , shape : Shape ):
365387 # Passing shape explicitly is required for cases when block is None.
366- # Note: block is None implies indexers is None, but not vice-versa
367- if indexers is None :
368- indexers = {}
369- # we should *never* have `0 in indexers`
370388 self .block = block
371- self .indexers = indexers
372389 self .shape = shape
373390
374- self .needs_filling = needs_filling
375-
376391 def __repr__ (self ) -> str :
377- return f"{ type (self ).__name__ } ({ repr (self .block )} , { self . indexers } )"
392+ return f"{ type (self ).__name__ } ({ repr (self .block )} )"
378393
379394 @cache_readonly
380395 def is_na (self ) -> bool :
@@ -391,24 +406,14 @@ def get_reindexed_values(self, empty_dtype: DtypeObj) -> ArrayLike:
391406
392407 else :
393408
394- if ( not self .indexers ) and ( not self . block ._can_consolidate ) :
409+ if not self .block ._can_consolidate :
395410 # preserve these for validation in concat_compat
396411 return self .block .values
397412
398413 # No dtype upcasting is done here, it will be performed during
399414 # concatenation itself.
400415 values = self .block .values
401416
402- if not self .indexers :
403- # If there's no indexing to be done, we want to signal outside
404- # code that this array must be copied explicitly. This is done
405- # by returning a view and checking `retval.base`.
406- values = values .view ()
407-
408- else :
409- for ax , indexer in self .indexers .items ():
410- values = algos .take_nd (values , indexer , axis = ax )
411-
412417 return values
413418
414419
@@ -446,15 +451,10 @@ def make_na_array(dtype: DtypeObj, shape: Shape) -> ArrayLike:
446451 return missing_arr
447452
448453
449- def _concatenate_join_units (
450- join_units : list [JoinUnit ], concat_axis : int , copy : bool
451- ) -> ArrayLike :
454+ def _concatenate_join_units (join_units : list [JoinUnit ], copy : bool ) -> ArrayLike :
452455 """
453- Concatenate values from several join units along selected axis.
456+ Concatenate values from several join units along axis=1 .
454457 """
455- if concat_axis == 0 and len (join_units ) > 1 :
456- # Concatenating join units along ax0 is handled in _merge_blocks.
457- raise AssertionError ("Concatenating join units along axis0" )
458458
459459 empty_dtype = _get_empty_dtype (join_units )
460460
@@ -488,7 +488,7 @@ def _concatenate_join_units(
488488 concat_values = ensure_block_shape (concat_values , 2 )
489489
490490 else :
491- concat_values = concat_compat (to_concat , axis = concat_axis )
491+ concat_values = concat_compat (to_concat , axis = 1 )
492492
493493 return concat_values
494494
@@ -532,7 +532,7 @@ def _get_empty_dtype(join_units: Sequence[JoinUnit]) -> DtypeObj:
532532 empty_dtype = join_units [0 ].block .dtype
533533 return empty_dtype
534534
535- needs_can_hold_na = any (unit .is_na or unit . needs_filling for unit in join_units )
535+ needs_can_hold_na = any (unit .is_na for unit in join_units )
536536
537537 dtypes = [unit .block .dtype for unit in join_units if not unit .is_na ]
538538
@@ -569,9 +569,6 @@ def _is_uniform_join_units(join_units: list[JoinUnit]) -> bool:
569569 # unless we're an extension dtype.
570570 all (not ju .is_na or ju .block .is_extension for ju in join_units )
571571 and
572- # no blocks with indexers (as then the dimensions do not fit)
573- all (not ju .indexers for ju in join_units )
574- and
575572 # only use this path when there is something to concatenate
576573 len (join_units ) > 1
577574 )
@@ -591,25 +588,17 @@ def _trim_join_unit(join_unit: JoinUnit, length: int) -> JoinUnit:
591588
592589 Extra items that didn't fit are returned as a separate block.
593590 """
594- assert 0 not in join_unit .indexers
595- extra_indexers = join_unit .indexers
596591
597592 extra_block = join_unit .block .getitem_block (slice (length , None ))
598593 join_unit .block = join_unit .block .getitem_block (slice (length ))
599594
600595 extra_shape = (join_unit .shape [0 ] - length ,) + join_unit .shape [1 :]
601596 join_unit .shape = (length ,) + join_unit .shape [1 :]
602597
603- # extra_indexers does not introduce any -1s, so we can inherit needs_filling
604- return JoinUnit (
605- block = extra_block ,
606- indexers = extra_indexers ,
607- shape = extra_shape ,
608- needs_filling = join_unit .needs_filling ,
609- )
598+ return JoinUnit (block = extra_block , shape = extra_shape )
610599
611600
612- def _combine_concat_plans (plans , concat_axis : int ):
601+ def _combine_concat_plans (plans ):
613602 """
614603 Combine multiple concatenation plans into one.
615604
@@ -619,18 +608,6 @@ def _combine_concat_plans(plans, concat_axis: int):
619608 for p in plans [0 ]:
620609 yield p [0 ], [p [1 ]]
621610
622- elif concat_axis == 0 :
623- offset = 0
624- for plan in plans :
625- last_plc = None
626-
627- for plc , unit in plan :
628- yield plc .add (offset ), [unit ]
629- last_plc = plc
630-
631- if last_plc is not None :
632- offset += last_plc .as_slice .stop
633-
634611 else :
635612 # singleton list so we can modify it as a side-effect within _next_or_none
636613 num_ended = [0 ]
0 commit comments