99
1010import numpy as np
1111
12- from pandas ._libs import internals as libinternals
12+ from pandas ._libs import (
13+ NaT ,
14+ internals as libinternals ,
15+ )
1316from pandas ._typing import (
1417 ArrayLike ,
1518 DtypeObj ,
@@ -383,59 +386,21 @@ def is_na(self) -> bool:
383386 return True
384387 return False
385388
386- def get_reindexed_values (self , empty_dtype : DtypeObj , upcasted_na ) -> ArrayLike :
389+ def get_reindexed_values (self , empty_dtype : DtypeObj ) -> ArrayLike :
387390 values : ArrayLike
388391
389- if upcasted_na is None and not self .is_na :
390- # No upcasting is necessary
391- fill_value = self .block .fill_value
392- values = self .block .get_values ()
392+ if self .is_na :
393+ return make_na_array (empty_dtype , self .shape )
394+
393395 else :
394- fill_value = upcasted_na
395-
396- if self .is_na :
397-
398- if is_datetime64tz_dtype (empty_dtype ):
399- i8values = np .full (self .shape , fill_value .value )
400- return DatetimeArray (i8values , dtype = empty_dtype )
401-
402- elif is_1d_only_ea_dtype (empty_dtype ):
403- empty_dtype = cast (ExtensionDtype , empty_dtype )
404- cls = empty_dtype .construct_array_type ()
405-
406- missing_arr = cls ._from_sequence ([], dtype = empty_dtype )
407- ncols , nrows = self .shape
408- assert ncols == 1 , ncols
409- empty_arr = - 1 * np .ones ((nrows ,), dtype = np .intp )
410- return missing_arr .take (
411- empty_arr , allow_fill = True , fill_value = fill_value
412- )
413- elif isinstance (empty_dtype , ExtensionDtype ):
414- # TODO: no tests get here, a handful would if we disabled
415- # the dt64tz special-case above (which is faster)
416- cls = empty_dtype .construct_array_type ()
417- missing_arr = cls ._empty (shape = self .shape , dtype = empty_dtype )
418- missing_arr [:] = fill_value
419- return missing_arr
420- else :
421- # NB: we should never get here with empty_dtype integer or bool;
422- # if we did, the missing_arr.fill would cast to gibberish
423- missing_arr = np .empty (self .shape , dtype = empty_dtype )
424- missing_arr .fill (fill_value )
425- return missing_arr
426396
427397 if (not self .indexers ) and (not self .block ._can_consolidate ):
428398 # preserve these for validation in concat_compat
429399 return self .block .values
430400
431- if self .block .is_bool :
432- # External code requested filling/upcasting, bool values must
433- # be upcasted to object to avoid being upcasted to numeric.
434- values = self .block .astype (np .object_ ).values
435- else :
436- # No dtype upcasting is done here, it will be performed during
437- # concatenation itself.
438- values = self .block .values
401+ # No dtype upcasting is done here, it will be performed during
402+ # concatenation itself.
403+ values = self .block .values
439404
440405 if not self .indexers :
441406 # If there's no indexing to be done, we want to signal outside
@@ -450,6 +415,40 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
450415 return values
451416
452417
418+ def make_na_array (dtype : DtypeObj , shape : Shape ) -> ArrayLike :
419+ """
420+ Construct an np.ndarray or ExtensionArray of the given dtype and shape
421+ holding all-NA values.
422+ """
423+ if is_datetime64tz_dtype (dtype ):
424+ # NaT here is analogous to dtype.na_value below
425+ i8values = np .full (shape , NaT .value )
426+ return DatetimeArray (i8values , dtype = dtype )
427+
428+ elif is_1d_only_ea_dtype (dtype ):
429+ dtype = cast (ExtensionDtype , dtype )
430+ cls = dtype .construct_array_type ()
431+
432+ missing_arr = cls ._from_sequence ([], dtype = dtype )
433+ nrows = shape [- 1 ]
434+ taker = - 1 * np .ones ((nrows ,), dtype = np .intp )
435+ return missing_arr .take (taker , allow_fill = True , fill_value = dtype .na_value )
436+ elif isinstance (dtype , ExtensionDtype ):
437+ # TODO: no tests get here, a handful would if we disabled
438+ # the dt64tz special-case above (which is faster)
439+ cls = dtype .construct_array_type ()
440+ missing_arr = cls ._empty (shape = shape , dtype = dtype )
441+ missing_arr [:] = dtype .na_value
442+ return missing_arr
443+ else :
444+ # NB: we should never get here with dtype integer or bool;
445+ # if we did, the missing_arr.fill would cast to gibberish
446+ missing_arr = np .empty (shape , dtype = dtype )
447+ fill_value = _dtype_to_na_value (dtype )
448+ missing_arr .fill (fill_value )
449+ return missing_arr
450+
451+
453452def _concatenate_join_units (
454453 join_units : list [JoinUnit ], concat_axis : int , copy : bool
455454) -> ArrayLike :
@@ -462,12 +461,7 @@ def _concatenate_join_units(
462461
463462 empty_dtype = _get_empty_dtype (join_units )
464463
465- upcasted_na = _dtype_to_na_value (empty_dtype )
466-
467- to_concat = [
468- ju .get_reindexed_values (empty_dtype = empty_dtype , upcasted_na = upcasted_na )
469- for ju in join_units
470- ]
464+ to_concat = [ju .get_reindexed_values (empty_dtype = empty_dtype ) for ju in join_units ]
471465
472466 if len (to_concat ) == 1 :
473467 # Only one block, nothing to concatenate.
0 commit comments