5555 pytest .mark .filterwarnings (
5656 "ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
5757 ),
58- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
5958]
6059
6160
6463 params = [
6564 pytest .param (
6665 "fastparquet" ,
67- marks = pytest .mark .skipif (
68- not _HAVE_FASTPARQUET
69- or _get_option ("mode.data_manager" , silent = True ) == "array" ,
70- reason = "fastparquet is not installed or ArrayManager is used" ,
71- ),
66+ marks = [
67+ pytest .mark .skipif (
68+ not _HAVE_FASTPARQUET
69+ or _get_option ("mode.data_manager" , silent = True ) == "array" ,
70+ reason = "fastparquet is not installed or ArrayManager is used" ,
71+ ),
72+ pytest .mark .xfail (
73+ using_string_dtype (),
74+ reason = "TODO(infer_string) fastparquet" ,
75+ strict = False ,
76+ ),
77+ ],
7278 ),
7379 pytest .param (
7480 "pyarrow" ,
@@ -90,17 +96,24 @@ def pa():
9096
9197
9298@pytest .fixture
93- def fp ():
99+ def fp (request ):
94100 if not _HAVE_FASTPARQUET :
95101 pytest .skip ("fastparquet is not installed" )
96102 elif _get_option ("mode.data_manager" , silent = True ) == "array" :
97103 pytest .skip ("ArrayManager is not supported with fastparquet" )
104+ if using_string_dtype ():
105+ request .applymarker (
106+ pytest .mark .xfail (reason = "TODO(infer_string) fastparquet" , strict = False )
107+ )
98108 return "fastparquet"
99109
100110
101111@pytest .fixture
102112def df_compat ():
103- return pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : "foo" })
113+ # TODO(infer_string) should this give str columns?
114+ return pd .DataFrame (
115+ {"A" : [1 , 2 , 3 ], "B" : "foo" }, columns = pd .Index (["A" , "B" ], dtype = object )
116+ )
104117
105118
106119@pytest .fixture
@@ -389,16 +402,6 @@ def check_external_error_on_write(self, df, engine, exc):
389402 with tm .external_error_raised (exc ):
390403 to_parquet (df , path , engine , compression = None )
391404
392- @pytest .mark .network
393- @pytest .mark .single_cpu
394- def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
395- if engine != "auto" :
396- pytest .importorskip (engine )
397- with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
398- httpserver .serve_content (content = f .read ())
399- df = read_parquet (httpserver .url )
400- tm .assert_frame_equal (df , df_compat )
401-
402405
403406class TestBasic (Base ):
404407 def test_error (self , engine ):
@@ -696,6 +699,16 @@ def test_read_empty_array(self, pa, dtype):
696699 df , pa , read_kwargs = {"dtype_backend" : "numpy_nullable" }, expected = expected
697700 )
698701
702+ @pytest .mark .network
703+ @pytest .mark .single_cpu
704+ def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
705+ if engine != "auto" :
706+ pytest .importorskip (engine )
707+ with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
708+ httpserver .serve_content (content = f .read ())
709+ df = read_parquet (httpserver .url , engine = engine )
710+ tm .assert_frame_equal (df , df_compat )
711+
699712
700713class TestParquetPyArrow (Base ):
701714 def test_basic (self , pa , df_full ):
@@ -925,7 +938,7 @@ def test_write_with_schema(self, pa):
925938 out_df = df .astype (bool )
926939 check_round_trip (df , pa , write_kwargs = {"schema" : schema }, expected = out_df )
927940
928- def test_additional_extension_arrays (self , pa ):
941+ def test_additional_extension_arrays (self , pa , using_infer_string ):
929942 # test additional ExtensionArrays that are supported through the
930943 # __arrow_array__ protocol
931944 pytest .importorskip ("pyarrow" )
@@ -936,17 +949,25 @@ def test_additional_extension_arrays(self, pa):
936949 "c" : pd .Series (["a" , None , "c" ], dtype = "string" ),
937950 }
938951 )
939- check_round_trip (df , pa )
952+ if using_infer_string :
953+ check_round_trip (df , pa , expected = df .astype ({"c" : "str" }))
954+ else :
955+ check_round_trip (df , pa )
940956
941957 df = pd .DataFrame ({"a" : pd .Series ([1 , 2 , 3 , None ], dtype = "Int64" )})
942958 check_round_trip (df , pa )
943959
944- def test_pyarrow_backed_string_array (self , pa , string_storage ):
960+ def test_pyarrow_backed_string_array (self , pa , string_storage , using_infer_string ):
945961 # test ArrowStringArray supported through the __arrow_array__ protocol
946962 pytest .importorskip ("pyarrow" )
947963 df = pd .DataFrame ({"a" : pd .Series (["a" , None , "c" ], dtype = "string[pyarrow]" )})
948964 with pd .option_context ("string_storage" , string_storage ):
949- check_round_trip (df , pa , expected = df .astype (f"string[{ string_storage } ]" ))
965+ if using_infer_string :
966+ expected = df .astype ("str" )
967+ expected .columns = expected .columns .astype ("str" )
968+ else :
969+ expected = df .astype (f"string[{ string_storage } ]" )
970+ check_round_trip (df , pa , expected = expected )
950971
951972 def test_additional_extension_types (self , pa ):
952973 # test additional ExtensionArrays that are supported through the
0 commit comments