1414)
1515from pandas .tests .strings import (
1616 _convert_na_value ,
17- object_pyarrow_numpy ,
17+ is_object_or_nan_string_dtype ,
1818)
1919
2020# --------------------------------------------------------------------------------------
@@ -34,7 +34,9 @@ def test_contains(any_string_dtype):
3434 pat = "mmm[_]+"
3535
3636 result = values .str .contains (pat )
37- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
37+ expected_dtype = (
38+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
39+ )
3840 expected = Series (
3941 np .array ([False , np .nan , True , True , False ], dtype = np .object_ ),
4042 dtype = expected_dtype ,
@@ -53,7 +55,9 @@ def test_contains(any_string_dtype):
5355 dtype = any_string_dtype ,
5456 )
5557 result = values .str .contains (pat )
56- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
58+ expected_dtype = (
59+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
60+ )
5761 expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
5862 tm .assert_series_equal (result , expected )
5963
@@ -80,14 +84,18 @@ def test_contains(any_string_dtype):
8084 pat = "mmm[_]+"
8185
8286 result = values .str .contains (pat )
83- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
87+ expected_dtype = (
88+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
89+ )
8490 expected = Series (
8591 np .array ([False , np .nan , True , True ], dtype = np .object_ ), dtype = expected_dtype
8692 )
8793 tm .assert_series_equal (result , expected )
8894
8995 result = values .str .contains (pat , na = False )
90- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
96+ expected_dtype = (
97+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
98+ )
9199 expected = Series (np .array ([False , False , True , True ]), dtype = expected_dtype )
92100 tm .assert_series_equal (result , expected )
93101
@@ -172,7 +180,9 @@ def test_contains_moar(any_string_dtype):
172180 )
173181
174182 result = s .str .contains ("a" )
175- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
183+ expected_dtype = (
184+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
185+ )
176186 expected = Series (
177187 [False , False , False , True , True , False , np .nan , False , False , True ],
178188 dtype = expected_dtype ,
@@ -213,7 +223,9 @@ def test_contains_nan(any_string_dtype):
213223 s = Series ([np .nan , np .nan , np .nan ], dtype = any_string_dtype )
214224
215225 result = s .str .contains ("foo" , na = False )
216- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
226+ expected_dtype = (
227+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
228+ )
217229 expected = Series ([False , False , False ], dtype = expected_dtype )
218230 tm .assert_series_equal (result , expected )
219231
@@ -231,7 +243,9 @@ def test_contains_nan(any_string_dtype):
231243 tm .assert_series_equal (result , expected )
232244
233245 result = s .str .contains ("foo" )
234- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
246+ expected_dtype = (
247+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
248+ )
235249 expected = Series ([np .nan , np .nan , np .nan ], dtype = expected_dtype )
236250 tm .assert_series_equal (result , expected )
237251
@@ -641,7 +655,9 @@ def test_replace_regex_single_character(regex, any_string_dtype):
641655
642656def test_match (any_string_dtype ):
643657 # New match behavior introduced in 0.13
644- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
658+ expected_dtype = (
659+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
660+ )
645661
646662 values = Series (["fooBAD__barBAD" , np .nan , "foo" ], dtype = any_string_dtype )
647663 result = values .str .match (".*(BAD[_]+).*(BAD)" )
@@ -696,20 +712,26 @@ def test_match_na_kwarg(any_string_dtype):
696712 s = Series (["a" , "b" , np .nan ], dtype = any_string_dtype )
697713
698714 result = s .str .match ("a" , na = False )
699- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
715+ expected_dtype = (
716+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
717+ )
700718 expected = Series ([True , False , False ], dtype = expected_dtype )
701719 tm .assert_series_equal (result , expected )
702720
703721 result = s .str .match ("a" )
704- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
722+ expected_dtype = (
723+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
724+ )
705725 expected = Series ([True , False , np .nan ], dtype = expected_dtype )
706726 tm .assert_series_equal (result , expected )
707727
708728
709729def test_match_case_kwarg (any_string_dtype ):
710730 values = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
711731 result = values .str .match ("ab" , case = False )
712- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
732+ expected_dtype = (
733+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
734+ )
713735 expected = Series ([True , True , True , True ], dtype = expected_dtype )
714736 tm .assert_series_equal (result , expected )
715737
@@ -725,7 +747,9 @@ def test_fullmatch(any_string_dtype):
725747 ["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
726748 )
727749 result = ser .str .fullmatch (".*BAD[_]+.*BAD" )
728- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
750+ expected_dtype = (
751+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
752+ )
729753 expected = Series ([True , False , np .nan , False ], dtype = expected_dtype )
730754 tm .assert_series_equal (result , expected )
731755
@@ -734,7 +758,9 @@ def test_fullmatch_dollar_literal(any_string_dtype):
734758 # GH 56652
735759 ser = Series (["foo" , "foo$foo" , np .nan , "foo$" ], dtype = any_string_dtype )
736760 result = ser .str .fullmatch ("foo\\ $" )
737- expected_dtype = "object" if any_string_dtype in object_pyarrow_numpy else "boolean"
761+ expected_dtype = (
762+ "object" if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
763+ )
738764 expected = Series ([False , False , np .nan , True ], dtype = expected_dtype )
739765 tm .assert_series_equal (result , expected )
740766
@@ -744,14 +770,18 @@ def test_fullmatch_na_kwarg(any_string_dtype):
744770 ["fooBAD__barBAD" , "BAD_BADleroybrown" , np .nan , "foo" ], dtype = any_string_dtype
745771 )
746772 result = ser .str .fullmatch (".*BAD[_]+.*BAD" , na = False )
747- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
773+ expected_dtype = (
774+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
775+ )
748776 expected = Series ([True , False , False , False ], dtype = expected_dtype )
749777 tm .assert_series_equal (result , expected )
750778
751779
752780def test_fullmatch_case_kwarg (any_string_dtype ):
753781 ser = Series (["ab" , "AB" , "abc" , "ABC" ], dtype = any_string_dtype )
754- expected_dtype = np .bool_ if any_string_dtype in object_pyarrow_numpy else "boolean"
782+ expected_dtype = (
783+ np .bool_ if is_object_or_nan_string_dtype (any_string_dtype ) else "boolean"
784+ )
755785
756786 expected = Series ([True , False , False , False ], dtype = expected_dtype )
757787
@@ -823,7 +853,9 @@ def test_find(any_string_dtype):
823853 ser = Series (
824854 ["ABCDEFG" , "BCDEFEF" , "DEFGHIJEF" , "EFGHEF" , "XXXX" ], dtype = any_string_dtype
825855 )
826- expected_dtype = np .int64 if any_string_dtype in object_pyarrow_numpy else "Int64"
856+ expected_dtype = (
857+ np .int64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
858+ )
827859
828860 result = ser .str .find ("EF" )
829861 expected = Series ([4 , 3 , 1 , 0 , - 1 ], dtype = expected_dtype )
@@ -875,7 +907,9 @@ def test_find_nan(any_string_dtype):
875907 ser = Series (
876908 ["ABCDEFG" , np .nan , "DEFGHIJEF" , np .nan , "XXXX" ], dtype = any_string_dtype
877909 )
878- expected_dtype = np .float64 if any_string_dtype in object_pyarrow_numpy else "Int64"
910+ expected_dtype = (
911+ np .float64 if is_object_or_nan_string_dtype (any_string_dtype ) else "Int64"
912+ )
879913
880914 result = ser .str .find ("EF" )
881915 expected = Series ([4 , np .nan , 1 , np .nan , - 1 ], dtype = expected_dtype )
0 commit comments