Skip to content

Commit e1e38d4

Browse files
jorisvandenbosscheWillAyd
authored andcommitted
String dtype: honor mode.string_storage option (and change default to None) (pandas-dev#59488)
* String dtype: honor mode.string_storage option (and change default to None) * fix test + explicitly test default * use 'auto' instead of None
1 parent ffc9d45 commit e1e38d4

File tree

4 files changed

+24
-18
lines changed

4 files changed

+24
-18
lines changed

pandas/core/arrays/string_.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,16 @@ def __init__(
136136
# infer defaults
137137
if storage is None:
138138
if na_value is not libmissing.NA:
139-
if HAS_PYARROW:
140-
storage = "pyarrow"
141-
else:
142-
storage = "python"
139+
storage = get_option("mode.string_storage")
140+
if storage == "auto":
141+
if HAS_PYARROW:
142+
storage = "pyarrow"
143+
else:
144+
storage = "python"
143145
else:
144146
storage = get_option("mode.string_storage")
147+
if storage == "auto":
148+
storage = "python"
145149

146150
if storage == "pyarrow_numpy":
147151
# TODO raise a deprecation warning

pandas/core/config_init.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,12 @@ def use_inf_as_na_cb(key) -> None:
505505

506506
string_storage_doc = """
507507
: string
508-
The default storage for StringDtype. This option is ignored if
509-
``future.infer_string`` is set to True.
508+
The default storage for StringDtype.
510509
"""
511510

512511

513512
def is_valid_string_storage(value: Any) -> None:
514-
legal_values = ["python", "pyarrow"]
513+
legal_values = ["auto", "python", "pyarrow"]
515514
if value not in legal_values:
516515
msg = "Value must be one of python|pyarrow"
517516
if value == "pyarrow_numpy":
@@ -526,7 +525,7 @@ def is_valid_string_storage(value: Any) -> None:
526525
with cf.config_prefix("mode"):
527526
cf.register_option(
528527
"string_storage",
529-
"python",
528+
"auto",
530529
string_storage_doc,
531530
# validator=is_one_of_factory(["python", "pyarrow"]),
532531
validator=is_valid_string_storage,

pandas/tests/arrays/string_/test_string_arrow.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import HAS_PYARROW
87
import pandas.util._test_decorators as td
98

109
import pandas as pd
@@ -27,11 +26,10 @@ def test_eq_all_na():
2726
tm.assert_extension_array_equal(result, expected)
2827

2928

30-
def test_config(string_storage, request, using_infer_string):
31-
if using_infer_string and string_storage == "python" and HAS_PYARROW:
32-
# string storage with na_value=NaN always uses pyarrow if available
33-
# -> does not yet honor the option
34-
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
29+
def test_config(string_storage, using_infer_string):
30+
# with the default string_storage setting
31+
# always "python" at the moment
32+
assert StringDtype().storage == "python"
3533

3634
with pd.option_context("string_storage", string_storage):
3735
assert StringDtype().storage == string_storage

pandas/tests/dtypes/test_common.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import numpy as np
44
import pytest
55

6+
from pandas.compat import HAS_PYARROW
67
import pandas.util._test_decorators as td
78

89
from pandas.core.dtypes.astype import astype_array
@@ -802,13 +803,17 @@ def test_pandas_dtype_ea_not_instance():
802803

803804

804805
def test_pandas_dtype_string_dtypes(string_storage):
805-
# TODO(infer_string) remove skip if "python" is supported
806-
pytest.importorskip("pyarrow")
806+
with pd.option_context("future.infer_string", True):
807+
# with the default string_storage setting
808+
result = pandas_dtype("str")
809+
assert result == pd.StringDtype(
810+
"pyarrow" if HAS_PYARROW else "python", na_value=np.nan
811+
)
812+
807813
with pd.option_context("future.infer_string", True):
808814
with pd.option_context("string_storage", string_storage):
809815
result = pandas_dtype("str")
810-
# TODO(infer_string) hardcoded to pyarrow until python is supported
811-
assert result == pd.StringDtype("pyarrow", na_value=np.nan)
816+
assert result == pd.StringDtype(string_storage, na_value=np.nan)
812817

813818
with pd.option_context("future.infer_string", False):
814819
with pd.option_context("string_storage", string_storage):

0 commit comments

Comments
 (0)