124124from pandas .core .dtypes .generic import (
125125 ABCDataFrame ,
126126 ABCDatetimeIndex ,
127+ ABCIntervalIndex ,
127128 ABCMultiIndex ,
128129 ABCPeriodIndex ,
129130 ABCSeries ,
@@ -1396,8 +1397,8 @@ def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]
13961397
13971398 values = self ._values
13981399
1399- if is_object_dtype (values .dtype ):
1400- values = cast ( np .ndarray , values )
1400+ if is_object_dtype (values .dtype ) or is_string_dtype ( values . dtype ) :
1401+ values = np .asarray ( values )
14011402 values = lib .maybe_convert_objects (values , safe = True )
14021403
14031404 result = [pprint_thing (x , escape_chars = ("\t " , "\r " , "\n " )) for x in values ]
@@ -3492,8 +3493,6 @@ def _intersection(self, other: Index, sort: bool = False):
34923493 and other .is_monotonic_increasing
34933494 and self ._can_use_libjoin
34943495 and other ._can_use_libjoin
3495- and not isinstance (self , ABCMultiIndex )
3496- and not isinstance (other , ABCMultiIndex )
34973496 ):
34983497 try :
34993498 res_indexer , indexer , _ = self ._inner_indexer (other )
@@ -4632,28 +4631,13 @@ def join(
46324631
46334632 _validate_join_method (how )
46344633
4635- if not self .is_unique and not other .is_unique :
4636- return self ._join_non_unique (other , how = how , sort = sort )
4637- elif not self .is_unique or not other .is_unique :
4638- if self .is_monotonic_increasing and other .is_monotonic_increasing :
4639- # Note: 2023-08-15 we *do* have tests that get here with
4640- # Categorical, string[python] (can use libjoin)
4641- # and Interval (cannot)
4642- if self ._can_use_libjoin and other ._can_use_libjoin :
4643- # otherwise we will fall through to _join_via_get_indexer
4644- # GH#39133
4645- # go through object dtype for ea till engine is supported properly
4646- return self ._join_monotonic (other , how = how )
4647- else :
4648- return self ._join_non_unique (other , how = how , sort = sort )
4649- elif (
4650- # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4651- self .is_monotonic_increasing
4634+ if (
4635+ not isinstance (self .dtype , CategoricalDtype )
4636+ and self .is_monotonic_increasing
46524637 and other .is_monotonic_increasing
46534638 and self ._can_use_libjoin
46544639 and other ._can_use_libjoin
4655- and not isinstance (self , ABCMultiIndex )
4656- and not isinstance (self .dtype , CategoricalDtype )
4640+ and (self .is_unique or other .is_unique )
46574641 ):
46584642 # Categorical is monotonic if data are ordered as categories, but join can
46594643 # not handle this in case of not lexicographically monotonic GH#38502
@@ -4662,6 +4646,8 @@ def join(
46624646 except TypeError :
46634647 # object dtype; non-comparable objects
46644648 pass
4649+ elif not self .is_unique or not other .is_unique :
4650+ return self ._join_non_unique (other , how = how , sort = sort )
46654651
46664652 return self ._join_via_get_indexer (other , how , sort )
46674653
@@ -4797,6 +4783,9 @@ def _join_non_unique(
47974783 join_idx = self .take (left_idx )
47984784 right = other .take (right_idx )
47994785 join_index = join_idx .putmask (mask , right )
4786+ if isinstance (join_index , ABCMultiIndex ) and how == "outer" :
4787+ # test_join_index_levels
4788+ join_index = join_index ._sort_levels_monotonic ()
48004789 return join_index , left_idx , right_idx
48014790
48024791 @final
@@ -5042,10 +5031,10 @@ def _can_use_libjoin(self) -> bool:
50425031 or isinstance (self ._values , (ArrowExtensionArray , BaseMaskedArray ))
50435032 or self .dtype == "string[python]"
50445033 )
5045- # For IntervalIndex, the conversion to numpy converts
5046- # to object dtype, which negates the performance benefit of libjoin
5047- # TODO: exclude RangeIndex and MultiIndex as these also make copies?
5048- return not isinstance (self . dtype , IntervalDtype )
5034+ # Exclude index types where the conversion to numpy converts to object dtype,
5035+ # which negates the performance benefit of libjoin
5036+ # TODO: exclude RangeIndex? Seems to break test_concat_datetime_timezone
5037+ return not isinstance (self , ( ABCIntervalIndex , ABCMultiIndex ) )
50495038
50505039 # --------------------------------------------------------------------
50515040 # Uncategorized Methods
@@ -5180,8 +5169,7 @@ def _get_join_target(self) -> np.ndarray:
51805169 # present
51815170 return self ._values .to_numpy ()
51825171
5183- # TODO: exclude ABCRangeIndex, ABCMultiIndex cases here as those create
5184- # copies.
5172+ # TODO: exclude ABCRangeIndex case here as it copies
51855173 target = self ._get_engine_target ()
51865174 if not isinstance (target , np .ndarray ):
51875175 raise ValueError ("_can_use_libjoin should return False." )
@@ -5194,7 +5182,7 @@ def _from_join_target(self, result: np.ndarray) -> ArrayLike:
51945182 """
51955183 if isinstance (self .values , BaseMaskedArray ):
51965184 return type (self .values )(result , np .zeros (result .shape , dtype = np .bool_ ))
5197- elif isinstance (self .values , ArrowExtensionArray ):
5185+ elif isinstance (self .values , ( ArrowExtensionArray , StringArray ) ):
51985186 return type (self .values )._from_sequence (result )
51995187 return result
52005188
0 commit comments