Skip to content

Commit 95b6057

Browse files
authored
DEPR: downcasting in replace (#54710)
* DEPR: downcasting in replace * GH refs * fix docbuild i hope * suppress doc warnings * avoid warning in docs
1 parent 66a4945 commit 95b6057

File tree

7 files changed

+162
-49
lines changed

7 files changed

+162
-49
lines changed

Diff for: doc/source/user_guide/missing_data.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ Limit the number of NA values filled
401401
402402
df.ffill(limit=1)
403403
404-
NA values can be replaced with corresponding value from a :class:`Series`` or :class:`DataFrame``
404+
NA values can be replaced with corresponding value from a :class:`Series` or :class:`DataFrame`
405405
where the index and column aligns between the original object and the filled object.
406406

407407
.. ipython:: python
@@ -660,7 +660,7 @@ Pass a list of regular expressions that will replace matches with a scalar.
660660

661661
.. ipython:: python
662662
663-
df.replace([r"\s*\.\s*", r"a|b"], np.nan, regex=True)
663+
df.replace([r"\s*\.\s*", r"a|b"], "placeholder", regex=True)
664664
665665
All of the regular expression examples can also be passed with the
666666
``to_replace`` argument as the ``regex`` argument. In this case the ``value``
@@ -669,7 +669,7 @@ dictionary.
669669

670670
.. ipython:: python
671671
672-
df.replace(regex=[r"\s*\.\s*", r"a|b"], value=np.nan)
672+
df.replace(regex=[r"\s*\.\s*", r"a|b"], value="placeholder")
673673
674674
.. note::
675675

Diff for: doc/source/whatsnew/v2.2.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,8 @@ Deprecations
190190
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_parquet` except ``path``. (:issue:`54229`)
191191
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
192192
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
193-
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
193+
- Deprecated automatic downcasting of object-dtype results in :meth:`Series.replace` and :meth:`DataFrame.replace`, explicitly call ``result = result.infer_objects(copy=False)`` instead. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54710`)
194+
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
194195
- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
195196
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
196197
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)

Diff for: pandas/core/internals/blocks.py

+59-9
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,23 @@ def replace(
761761
if not (self.is_object and value is None):
762762
# if the user *explicitly* gave None, we keep None, otherwise
763763
# may downcast to NaN
764-
blocks = blk.convert(copy=False, using_cow=using_cow)
764+
if get_option("future.no_silent_downcasting") is True:
765+
blocks = [blk]
766+
else:
767+
blocks = blk.convert(copy=False, using_cow=using_cow)
768+
if len(blocks) > 1 or blocks[0].dtype != blk.dtype:
769+
warnings.warn(
770+
# GH#54710
771+
"Downcasting behavior in `replace` is deprecated and "
772+
"will be removed in a future version. To retain the old "
773+
"behavior, explicitly call "
774+
"`result.infer_objects(copy=False)`. "
775+
"To opt-in to the future "
776+
"behavior, set "
777+
"`pd.set_option('future.no_silent_downcasting', True)`",
778+
FutureWarning,
779+
stacklevel=find_stack_level(),
780+
)
765781
else:
766782
blocks = [blk]
767783
return blocks
@@ -836,7 +852,21 @@ def _replace_regex(
836852

837853
replace_regex(block.values, rx, value, mask)
838854

839-
return block.convert(copy=False, using_cow=using_cow)
855+
nbs = block.convert(copy=False, using_cow=using_cow)
856+
opt = get_option("future.no_silent_downcasting")
857+
if (len(nbs) > 1 or nbs[0].dtype != block.dtype) and not opt:
858+
warnings.warn(
859+
# GH#54710
860+
"Downcasting behavior in `replace` is deprecated and "
861+
"will be removed in a future version. To retain the old "
862+
"behavior, explicitly call `result.infer_objects(copy=False)`. "
863+
"To opt-in to the future "
864+
"behavior, set "
865+
"`pd.set_option('future.no_silent_downcasting', True)`",
866+
FutureWarning,
867+
stacklevel=find_stack_level(),
868+
)
869+
return nbs
840870

841871
@final
842872
def replace_list(
@@ -902,6 +932,7 @@ def replace_list(
902932
else:
903933
rb = [self if inplace else self.copy()]
904934

935+
opt = get_option("future.no_silent_downcasting")
905936
for i, ((src, dest), mask) in enumerate(zip(pairs, masks)):
906937
convert = i == src_len # only convert once at the end
907938
new_rb: list[Block] = []
@@ -939,14 +970,33 @@ def replace_list(
939970
b.refs.referenced_blocks.index(ref)
940971
)
941972

942-
if convert and blk.is_object and not all(x is None for x in dest_list):
973+
if (
974+
not opt
975+
and convert
976+
and blk.is_object
977+
and not all(x is None for x in dest_list)
978+
):
943979
# GH#44498 avoid unwanted cast-back
944-
result = extend_blocks(
945-
[
946-
b.convert(copy=True and not using_cow, using_cow=using_cow)
947-
for b in result
948-
]
949-
)
980+
nbs = []
981+
for res_blk in result:
982+
converted = res_blk.convert(
983+
copy=True and not using_cow, using_cow=using_cow
984+
)
985+
if len(converted) > 1 or converted[0].dtype != res_blk.dtype:
986+
warnings.warn(
987+
# GH#54710
988+
"Downcasting behavior in `replace` is deprecated "
989+
"and will be removed in a future version. To "
990+
"retain the old behavior, explicitly call "
991+
"`result.infer_objects(copy=False)`. "
992+
"To opt-in to the future "
993+
"behavior, set "
994+
"`pd.set_option('future.no_silent_downcasting', True)`",
995+
FutureWarning,
996+
stacklevel=find_stack_level(),
997+
)
998+
nbs.extend(converted)
999+
result = nbs
9501000
new_rb.extend(result)
9511001
rb = new_rb
9521002
return rb

Diff for: pandas/tests/frame/methods/test_replace.py

+44-18
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,9 @@ def test_regex_replace_dict_nested_non_first_character(self, any_string_dtype):
289289
def test_regex_replace_dict_nested_gh4115(self):
290290
df = DataFrame({"Type": ["Q", "T", "Q", "Q", "T"], "tmp": 2})
291291
expected = DataFrame({"Type": [0, 1, 0, 0, 1], "tmp": 2})
292-
result = df.replace({"Type": {"Q": 0, "T": 1}})
292+
msg = "Downcasting behavior in `replace`"
293+
with tm.assert_produces_warning(FutureWarning, match=msg):
294+
result = df.replace({"Type": {"Q": 0, "T": 1}})
293295
tm.assert_frame_equal(result, expected)
294296

295297
def test_regex_replace_list_to_scalar(self, mix_abc):
@@ -301,16 +303,20 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
301303
"c": [np.nan, np.nan, np.nan, "d"],
302304
}
303305
)
304-
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
306+
msg = "Downcasting behavior in `replace`"
307+
with tm.assert_produces_warning(FutureWarning, match=msg):
308+
res = df.replace([r"\s*\.\s*", "a|b"], np.nan, regex=True)
305309
res2 = df.copy()
306310
res3 = df.copy()
307-
return_value = res2.replace(
308-
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
309-
)
311+
with tm.assert_produces_warning(FutureWarning, match=msg):
312+
return_value = res2.replace(
313+
[r"\s*\.\s*", "a|b"], np.nan, regex=True, inplace=True
314+
)
310315
assert return_value is None
311-
return_value = res3.replace(
312-
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
313-
)
316+
with tm.assert_produces_warning(FutureWarning, match=msg):
317+
return_value = res3.replace(
318+
regex=[r"\s*\.\s*", "a|b"], value=np.nan, inplace=True
319+
)
314320
assert return_value is None
315321
tm.assert_frame_equal(res, expec)
316322
tm.assert_frame_equal(res2, expec)
@@ -520,7 +526,9 @@ def test_replace_convert(self):
520526
# gh 3907
521527
df = DataFrame([["foo", "bar", "bah"], ["bar", "foo", "bah"]])
522528
m = {"foo": 1, "bar": 2, "bah": 3}
523-
rep = df.replace(m)
529+
msg = "Downcasting behavior in `replace` "
530+
with tm.assert_produces_warning(FutureWarning, match=msg):
531+
rep = df.replace(m)
524532
expec = Series([np.int64] * 3)
525533
res = rep.dtypes
526534
tm.assert_series_equal(expec, res)
@@ -838,7 +846,12 @@ def test_replace_for_new_dtypes(self, datetime_frame):
838846
],
839847
)
840848
def test_replace_dtypes(self, frame, to_replace, value, expected):
841-
result = frame.replace(to_replace, value)
849+
warn = None
850+
if isinstance(to_replace, datetime) and to_replace.year == 2920:
851+
warn = FutureWarning
852+
msg = "Downcasting behavior in `replace` "
853+
with tm.assert_produces_warning(warn, match=msg):
854+
result = frame.replace(to_replace, value)
842855
tm.assert_frame_equal(result, expected)
843856

844857
def test_replace_input_formats_listlike(self):
@@ -927,7 +940,9 @@ def test_replace_dict_no_regex(self):
927940
"Strongly Disagree": 1,
928941
}
929942
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
930-
result = answer.replace(weights)
943+
msg = "Downcasting behavior in `replace` "
944+
with tm.assert_produces_warning(FutureWarning, match=msg):
945+
result = answer.replace(weights)
931946
tm.assert_series_equal(result, expected)
932947

933948
def test_replace_series_no_regex(self):
@@ -950,7 +965,9 @@ def test_replace_series_no_regex(self):
950965
}
951966
)
952967
expected = Series({0: 5, 1: 4, 2: 3, 3: 2, 4: 1})
953-
result = answer.replace(weights)
968+
msg = "Downcasting behavior in `replace` "
969+
with tm.assert_produces_warning(FutureWarning, match=msg):
970+
result = answer.replace(weights)
954971
tm.assert_series_equal(result, expected)
955972

956973
def test_replace_dict_tuple_list_ordering_remains_the_same(self):
@@ -1076,7 +1093,9 @@ def test_replace_period(self):
10761093

10771094
expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
10781095
assert expected.dtypes.iloc[0] == "Period[M]"
1079-
result = df.replace(d)
1096+
msg = "Downcasting behavior in `replace` "
1097+
with tm.assert_produces_warning(FutureWarning, match=msg):
1098+
result = df.replace(d)
10801099
tm.assert_frame_equal(result, expected)
10811100

10821101
def test_replace_datetime(self):
@@ -1106,7 +1125,9 @@ def test_replace_datetime(self):
11061125
)
11071126
assert set(df.fname.values) == set(d["fname"].keys())
11081127
expected = DataFrame({"fname": [d["fname"][k] for k in df.fname.values]})
1109-
result = df.replace(d)
1128+
msg = "Downcasting behavior in `replace` "
1129+
with tm.assert_produces_warning(FutureWarning, match=msg):
1130+
result = df.replace(d)
11101131
tm.assert_frame_equal(result, expected)
11111132

11121133
def test_replace_datetimetz(self):
@@ -1307,10 +1328,12 @@ def test_replace_commutative(self, df, to_replace, exp):
13071328
np.float64(1),
13081329
],
13091330
)
1310-
def test_replace_replacer_dtype(self, request, replacer):
1331+
def test_replace_replacer_dtype(self, replacer):
13111332
# GH26632
13121333
df = DataFrame(["a"])
1313-
result = df.replace({"a": replacer, "b": replacer})
1334+
msg = "Downcasting behavior in `replace` "
1335+
with tm.assert_produces_warning(FutureWarning, match=msg):
1336+
result = df.replace({"a": replacer, "b": replacer})
13141337
expected = DataFrame([replacer])
13151338
tm.assert_frame_equal(result, expected)
13161339

@@ -1564,12 +1587,15 @@ def test_replace_regex_dtype_frame(self, regex):
15641587
# GH-48644
15651588
df1 = DataFrame({"A": ["0"], "B": ["0"]})
15661589
expected_df1 = DataFrame({"A": [1], "B": [1]})
1567-
result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
1590+
msg = "Downcasting behavior in `replace`"
1591+
with tm.assert_produces_warning(FutureWarning, match=msg):
1592+
result_df1 = df1.replace(to_replace="0", value=1, regex=regex)
15681593
tm.assert_frame_equal(result_df1, expected_df1)
15691594

15701595
df2 = DataFrame({"A": ["0"], "B": ["1"]})
15711596
expected_df2 = DataFrame({"A": [1], "B": ["1"]})
1572-
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
1597+
with tm.assert_produces_warning(FutureWarning, match=msg):
1598+
result_df2 = df2.replace(to_replace="0", value=1, regex=regex)
15731599
tm.assert_frame_equal(result_df2, expected_df2)
15741600

15751601
def test_replace_with_value_also_being_replaced(self):

Diff for: pandas/tests/indexing/test_coercion.py

+24-6
Original file line numberDiff line numberDiff line change
@@ -836,8 +836,6 @@ def test_replace_series(self, how, to_key, from_key, replacer):
836836
# tested below
837837
return
838838

839-
result = obj.replace(replacer)
840-
841839
if (from_key == "float64" and to_key in ("int64")) or (
842840
from_key == "complex128" and to_key in ("int64", "float64")
843841
):
@@ -851,6 +849,17 @@ def test_replace_series(self, how, to_key, from_key, replacer):
851849
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
852850
assert exp.dtype == to_key
853851

852+
msg = "Downcasting behavior in `replace`"
853+
warn = FutureWarning
854+
if (
855+
exp.dtype == obj.dtype
856+
or exp.dtype == object
857+
or (exp.dtype.kind in "iufc" and obj.dtype.kind in "iufc")
858+
):
859+
warn = None
860+
with tm.assert_produces_warning(warn, match=msg):
861+
result = obj.replace(replacer)
862+
854863
tm.assert_series_equal(result, exp)
855864

856865
@pytest.mark.parametrize(
@@ -866,11 +875,14 @@ def test_replace_series_datetime_tz(self, how, to_key, from_key, replacer):
866875
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
867876
assert obj.dtype == from_key
868877

869-
result = obj.replace(replacer)
870-
871878
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
872879
assert exp.dtype == to_key
873880

881+
msg = "Downcasting behavior in `replace`"
882+
warn = FutureWarning if exp.dtype != object else None
883+
with tm.assert_produces_warning(warn, match=msg):
884+
result = obj.replace(replacer)
885+
874886
tm.assert_series_equal(result, exp)
875887

876888
@pytest.mark.parametrize(
@@ -888,16 +900,22 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer)
888900
obj = pd.Series(self.rep[from_key], index=index, name="yyy")
889901
assert obj.dtype == from_key
890902

891-
result = obj.replace(replacer)
892-
893903
exp = pd.Series(self.rep[to_key], index=index, name="yyy")
904+
warn = FutureWarning
894905
if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance(
895906
exp.dtype, pd.DatetimeTZDtype
896907
):
897908
# with mismatched tzs, we retain the original dtype as of 2.0
898909
exp = exp.astype(obj.dtype)
910+
warn = None
899911
else:
900912
assert exp.dtype == to_key
913+
if to_key == from_key:
914+
warn = None
915+
916+
msg = "Downcasting behavior in `replace`"
917+
with tm.assert_produces_warning(warn, match=msg):
918+
result = obj.replace(replacer)
901919

902920
tm.assert_series_equal(result, exp)
903921

Diff for: pandas/tests/io/excel/test_writers.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1197,7 +1197,9 @@ def test_render_as_column_name(self, path):
11971197
def test_true_and_false_value_options(self, path):
11981198
# see gh-13347
11991199
df = DataFrame([["foo", "bar"]], columns=["col1", "col2"])
1200-
expected = df.replace({"foo": True, "bar": False})
1200+
msg = "Downcasting behavior in `replace`"
1201+
with tm.assert_produces_warning(FutureWarning, match=msg):
1202+
expected = df.replace({"foo": True, "bar": False})
12011203

12021204
df.to_excel(path)
12031205
read_frame = pd.read_excel(

0 commit comments

Comments
 (0)