Skip to content

Commit 0ec5f26

Browse files
authored
BUG(string dtype): Resolve pytables xfail when reading with condition (#60943)
1 parent ee06e71 commit 0ec5f26

File tree

5 files changed

+15
-15
lines changed

5 files changed

+15
-15
lines changed

Diff for: pandas/io/pytables.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -4159,6 +4159,8 @@ def _create_axes(
41594159
ordered = data_converted.ordered
41604160
meta = "category"
41614161
metadata = np.asarray(data_converted.categories).ravel()
4162+
elif isinstance(blk.dtype, StringDtype):
4163+
meta = str(blk.dtype)
41624164

41634165
data, dtype_name = _get_data_and_dtype_name(data_converted)
41644166

@@ -4419,7 +4421,8 @@ def read_column(
44194421
errors=self.errors,
44204422
)
44214423
cvs = col_values[1]
4422-
return Series(cvs, name=column, copy=False)
4424+
dtype = getattr(self.table.attrs, f"{column}_meta", None)
4425+
return Series(cvs, name=column, copy=False, dtype=dtype)
44234426

44244427
raise KeyError(f"column [{column}] not found in the table")
44254428

@@ -4769,8 +4772,18 @@ def read(
47694772
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
47704773
if not (using_string_dtype() and values.dtype.kind == "O"):
47714774
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)
4775+
4776+
# If str / string dtype is stored in meta, use that.
4777+
converted = False
4778+
for column in cols_:
4779+
dtype = getattr(self.table.attrs, f"{column}_meta", None)
4780+
if dtype in ["str", "string"]:
4781+
df[column] = df[column].astype(dtype)
4782+
converted = True
4783+
# Otherwise try inference.
47724784
if (
4773-
using_string_dtype()
4785+
not converted
4786+
and using_string_dtype()
47744787
and isinstance(values, np.ndarray)
47754788
and is_string_array(
47764789
values,

Diff for: pandas/tests/io/pytables/test_append.py

-3
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas._libs.tslibs import Timestamp
119
from pandas.compat import PY312
1210

@@ -516,7 +514,6 @@ def test_append_with_empty_string(setup_path):
516514
tm.assert_frame_equal(store.select("df"), df)
517515

518516

519-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
520517
def test_append_with_data_columns(setup_path):
521518
with ensure_clean_store(setup_path) as store:
522519
df = DataFrame(

Diff for: pandas/tests/io/pytables/test_categorical.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas import (
75
Categorical,
86
DataFrame,
@@ -140,7 +138,6 @@ def test_categorical(setup_path):
140138
store.select("df3/meta/s/meta")
141139

142140

143-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
144141
def test_categorical_conversion(tmp_path, setup_path):
145142
# GH13322
146143
# Check that read_hdf with categorical columns doesn't return rows if

Diff for: pandas/tests/io/pytables/test_read.py

-3
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas._config import using_string_dtype
9-
108
from pandas.compat import is_platform_windows
119

1210
import pandas as pd
@@ -72,7 +70,6 @@ def test_read_missing_key_opened_store(tmp_path, setup_path):
7270
read_hdf(store, "k1")
7371

7472

75-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7673
def test_read_column(setup_path):
7774
df = DataFrame(
7875
np.random.default_rng(2).standard_normal((10, 4)),

Diff for: pandas/tests/io/pytables/test_select.py

-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
from pandas._libs.tslibs import Timestamp
75
from pandas.compat import PY312
86

@@ -666,7 +664,6 @@ def test_frame_select(setup_path, request):
666664
# store.select('frame', [crit1, crit2])
667665

668666

669-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
670667
def test_frame_select_complex(setup_path):
671668
# select via complex criteria
672669

@@ -980,7 +977,6 @@ def test_query_long_float_literal(setup_path):
980977
tm.assert_frame_equal(expected, result)
981978

982979

983-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
984980
def test_query_compare_column_type(setup_path):
985981
# GH 15492
986982
df = DataFrame(

0 commit comments

Comments
 (0)