55
55
pytest .mark .filterwarnings (
56
56
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
57
57
),
58
- pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False ),
59
58
]
60
59
61
60
64
63
params = [
65
64
pytest .param (
66
65
"fastparquet" ,
67
- marks = pytest .mark .skipif (
68
- not _HAVE_FASTPARQUET
69
- or _get_option ("mode.data_manager" , silent = True ) == "array" ,
70
- reason = "fastparquet is not installed or ArrayManager is used" ,
71
- ),
66
+ marks = [
67
+ pytest .mark .skipif (
68
+ not _HAVE_FASTPARQUET
69
+ or _get_option ("mode.data_manager" , silent = True ) == "array" ,
70
+ reason = "fastparquet is not installed or ArrayManager is used" ,
71
+ ),
72
+ pytest .mark .xfail (
73
+ using_string_dtype (),
74
+ reason = "TODO(infer_string) fastparquet" ,
75
+ strict = False ,
76
+ ),
77
+ ],
72
78
),
73
79
pytest .param (
74
80
"pyarrow" ,
@@ -90,17 +96,24 @@ def pa():
90
96
91
97
92
98
@pytest .fixture
93
- def fp ():
99
+ def fp (request ):
94
100
if not _HAVE_FASTPARQUET :
95
101
pytest .skip ("fastparquet is not installed" )
96
102
elif _get_option ("mode.data_manager" , silent = True ) == "array" :
97
103
pytest .skip ("ArrayManager is not supported with fastparquet" )
104
+ if using_string_dtype ():
105
+ request .applymarker (
106
+ pytest .mark .xfail (reason = "TODO(infer_string) fastparquet" , strict = False )
107
+ )
98
108
return "fastparquet"
99
109
100
110
101
111
@pytest .fixture
102
112
def df_compat ():
103
- return pd .DataFrame ({"A" : [1 , 2 , 3 ], "B" : "foo" })
113
+ # TODO(infer_string) should this give str columns?
114
+ return pd .DataFrame (
115
+ {"A" : [1 , 2 , 3 ], "B" : "foo" }, columns = pd .Index (["A" , "B" ], dtype = object )
116
+ )
104
117
105
118
106
119
@pytest .fixture
@@ -389,16 +402,6 @@ def check_external_error_on_write(self, df, engine, exc):
389
402
with tm .external_error_raised (exc ):
390
403
to_parquet (df , path , engine , compression = None )
391
404
392
- @pytest .mark .network
393
- @pytest .mark .single_cpu
394
- def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
395
- if engine != "auto" :
396
- pytest .importorskip (engine )
397
- with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
398
- httpserver .serve_content (content = f .read ())
399
- df = read_parquet (httpserver .url )
400
- tm .assert_frame_equal (df , df_compat )
401
-
402
405
403
406
class TestBasic (Base ):
404
407
def test_error (self , engine ):
@@ -696,6 +699,16 @@ def test_read_empty_array(self, pa, dtype):
696
699
df , pa , read_kwargs = {"dtype_backend" : "numpy_nullable" }, expected = expected
697
700
)
698
701
702
+ @pytest .mark .network
703
+ @pytest .mark .single_cpu
704
+ def test_parquet_read_from_url (self , httpserver , datapath , df_compat , engine ):
705
+ if engine != "auto" :
706
+ pytest .importorskip (engine )
707
+ with open (datapath ("io" , "data" , "parquet" , "simple.parquet" ), mode = "rb" ) as f :
708
+ httpserver .serve_content (content = f .read ())
709
+ df = read_parquet (httpserver .url , engine = engine )
710
+ tm .assert_frame_equal (df , df_compat )
711
+
699
712
700
713
class TestParquetPyArrow (Base ):
701
714
def test_basic (self , pa , df_full ):
@@ -925,7 +938,7 @@ def test_write_with_schema(self, pa):
925
938
out_df = df .astype (bool )
926
939
check_round_trip (df , pa , write_kwargs = {"schema" : schema }, expected = out_df )
927
940
928
- def test_additional_extension_arrays (self , pa ):
941
+ def test_additional_extension_arrays (self , pa , using_infer_string ):
929
942
# test additional ExtensionArrays that are supported through the
930
943
# __arrow_array__ protocol
931
944
pytest .importorskip ("pyarrow" )
@@ -936,17 +949,25 @@ def test_additional_extension_arrays(self, pa):
936
949
"c" : pd .Series (["a" , None , "c" ], dtype = "string" ),
937
950
}
938
951
)
939
- check_round_trip (df , pa )
952
+ if using_infer_string :
953
+ check_round_trip (df , pa , expected = df .astype ({"c" : "str" }))
954
+ else :
955
+ check_round_trip (df , pa )
940
956
941
957
df = pd .DataFrame ({"a" : pd .Series ([1 , 2 , 3 , None ], dtype = "Int64" )})
942
958
check_round_trip (df , pa )
943
959
944
- def test_pyarrow_backed_string_array (self , pa , string_storage ):
960
+ def test_pyarrow_backed_string_array (self , pa , string_storage , using_infer_string ):
945
961
# test ArrowStringArray supported through the __arrow_array__ protocol
946
962
pytest .importorskip ("pyarrow" )
947
963
df = pd .DataFrame ({"a" : pd .Series (["a" , None , "c" ], dtype = "string[pyarrow]" )})
948
964
with pd .option_context ("string_storage" , string_storage ):
949
- check_round_trip (df , pa , expected = df .astype (f"string[{ string_storage } ]" ))
965
+ if using_infer_string :
966
+ expected = df .astype ("str" )
967
+ expected .columns = expected .columns .astype ("str" )
968
+ else :
969
+ expected = df .astype (f"string[{ string_storage } ]" )
970
+ check_round_trip (df , pa , expected = expected )
950
971
951
972
def test_additional_extension_types (self , pa ):
952
973
# test additional ExtensionArrays that are supported through the
0 commit comments