Skip to content

Commit 5457e59

Browse files
DEPR: groupby.fillna (#55719)
* DEPR: groupby.fillna * fixup * Ignore doctest warnings * Add deprecation to docstrings * Update doc/source/whatsnew/v2.2.0.rst --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 713c4dc commit 5457e59

File tree

11 files changed

+190
-79
lines changed

11 files changed

+190
-79
lines changed

Diff for: doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ Other Deprecations
300300
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
301301
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)
302302
- Deprecating downcasting the results of :meth:`DataFrame.fillna`, :meth:`Series.fillna`, :meth:`DataFrame.ffill`, :meth:`Series.ffill`, :meth:`DataFrame.bfill`, :meth:`Series.bfill` in object-dtype cases. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`54261`)
303+
- Deprecated :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna`; use :meth:`.DataFrameGroupBy.ffill`, :meth:`.DataFrameGroupBy.bfill` for forward and backward filling or :meth:`.DataFrame.fillna` to fill with a single value (or the Series equivalents) (:issue:`55718`)
303304
-
304305

305306
.. ---------------------------------------------------------------------------

Diff for: pandas/conftest.py

+2
Original file line numberDiff line numberDiff line change
@@ -143,11 +143,13 @@ def pytest_collection_modifyitems(items, config) -> None:
143143
("is_datetime64tz_dtype", "is_datetime64tz_dtype is deprecated"),
144144
("is_categorical_dtype", "is_categorical_dtype is deprecated"),
145145
("is_sparse", "is_sparse is deprecated"),
146+
("DataFrameGroupBy.fillna", "DataFrameGroupBy.fillna is deprecated"),
146147
("NDFrame.replace", "The 'method' keyword"),
147148
("NDFrame.replace", "Series.replace without 'value'"),
148149
("NDFrame.clip", "Downcasting behavior in Series and DataFrame methods"),
149150
("Series.idxmin", "The behavior of Series.idxmin"),
150151
("Series.idxmax", "The behavior of Series.idxmax"),
152+
("SeriesGroupBy.fillna", "SeriesGroupBy.fillna is deprecated"),
151153
("SeriesGroupBy.idxmin", "The behavior of Series.idxmin"),
152154
("SeriesGroupBy.idxmax", "The behavior of Series.idxmax"),
153155
# Docstring divides by zero to show behavior difference

Diff for: pandas/core/groupby/generic.py

+28-26
Original file line numberDiff line numberDiff line change
@@ -901,6 +901,12 @@ def fillna(
901901
"""
902902
Fill NA/NaN values using the specified method within groups.
903903
904+
.. deprecated:: 2.2.0
905+
This method is deprecated and will be removed in a future version.
906+
Use the :meth:`.SeriesGroupBy.ffill` or :meth:`.SeriesGroupBy.bfill`
907+
for forward or backward filling instead. If you want to fill with a
908+
single value, use :meth:`Series.fillna` instead.
909+
904910
Parameters
905911
----------
906912
value : scalar, dict, Series, or DataFrame
@@ -915,17 +921,8 @@ def fillna(
915921
Method to use for filling holes. ``'ffill'`` will propagate
916922
the last valid observation forward within a group.
917923
``'bfill'`` will use next valid observation to fill the gap.
918-
919-
.. deprecated:: 2.1.0
920-
Use obj.ffill or obj.bfill instead.
921-
922924
axis : {0 or 'index', 1 or 'columns'}
923925
Unused, only for compatibility with :meth:`DataFrameGroupBy.fillna`.
924-
925-
.. deprecated:: 2.1.0
926-
For axis=1, operate on the underlying object instead. Otherwise
927-
the axis keyword is not necessary.
928-
929926
inplace : bool, default False
930927
Broken. Do not set to True.
931928
limit : int, default None
@@ -940,8 +937,6 @@ def fillna(
940937
or the string 'infer' which will try to downcast to an appropriate
941938
equal type (e.g. float64 to int64 if possible).
942939
943-
.. deprecated:: 2.1.0
944-
945940
Returns
946941
-------
947942
Series
@@ -973,6 +968,14 @@ def fillna(
973968
mouse 0.0
974969
dtype: float64
975970
"""
971+
warnings.warn(
972+
f"{type(self).__name__}.fillna is deprecated and "
973+
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
974+
"for forward or backward filling instead. If you want to fill with a "
975+
f"single value, use {type(self.obj).__name__}.fillna instead",
976+
FutureWarning,
977+
stacklevel=find_stack_level(),
978+
)
976979
result = self._op_via_apply(
977980
"fillna",
978981
value=value,
@@ -2401,6 +2404,12 @@ def fillna(
24012404
"""
24022405
Fill NA/NaN values using the specified method within groups.
24032406
2407+
.. deprecated:: 2.2.0
2408+
This method is deprecated and will be removed in a future version.
2409+
Use the :meth:`.DataFrameGroupBy.ffill` or :meth:`.DataFrameGroupBy.bfill`
2410+
for forward or backward filling instead. If you want to fill with a
2411+
single value, use :meth:`DataFrame.fillna` instead.
2412+
24042413
Parameters
24052414
----------
24062415
value : scalar, dict, Series, or DataFrame
@@ -2421,11 +2430,6 @@ def fillna(
24212430
the same results as :meth:`.DataFrame.fillna`. When the
24222431
:class:`DataFrameGroupBy` ``axis`` argument is ``1``, using ``axis=0``
24232432
or ``axis=1`` here will produce the same results.
2424-
2425-
.. deprecated:: 2.1.0
2426-
For axis=1, operate on the underlying object instead. Otherwise
2427-
the axis keyword is not necessary.
2428-
24292433
inplace : bool, default False
24302434
Broken. Do not set to True.
24312435
limit : int, default None
@@ -2440,8 +2444,6 @@ def fillna(
24402444
or the string 'infer' which will try to downcast to an appropriate
24412445
equal type (e.g. float64 to int64 if possible).
24422446
2443-
.. deprecated:: 2.1.0
2444-
24452447
Returns
24462448
-------
24472449
DataFrame
@@ -2516,14 +2518,14 @@ def fillna(
25162518
3 3.0 NaN 2.0
25172519
4 3.0 NaN NaN
25182520
"""
2519-
if method is not None:
2520-
warnings.warn(
2521-
f"{type(self).__name__}.fillna with 'method' is deprecated and "
2522-
"will raise in a future version. Use obj.ffill() or obj.bfill() "
2523-
"instead.",
2524-
FutureWarning,
2525-
stacklevel=find_stack_level(),
2526-
)
2521+
warnings.warn(
2522+
f"{type(self).__name__}.fillna is deprecated and "
2523+
"will be removed in a future version. Use obj.ffill() or obj.bfill() "
2524+
"for forward or backward filling instead. If you want to fill with a "
2525+
f"single value, use {type(self.obj).__name__}.fillna instead",
2526+
FutureWarning,
2527+
stacklevel=find_stack_level(),
2528+
)
25272529

25282530
result = self._op_via_apply(
25292531
"fillna",

Diff for: pandas/tests/apply/test_str.py

+15-3
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,11 @@ def test_transform_groupby_kernel_series(request, string_series, op):
261261
)
262262
args = [0.0] if op == "fillna" else []
263263
ones = np.ones(string_series.shape[0])
264-
expected = string_series.groupby(ones).transform(op, *args)
264+
265+
warn = FutureWarning if op == "fillna" else None
266+
msg = "SeriesGroupBy.fillna is deprecated"
267+
with tm.assert_produces_warning(warn, match=msg):
268+
expected = string_series.groupby(ones).transform(op, *args)
265269
result = string_series.transform(op, 0, *args)
266270
tm.assert_series_equal(result, expected)
267271

@@ -285,7 +289,12 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
285289

286290
with tm.assert_produces_warning(FutureWarning, match=msg):
287291
gb = float_frame.groupby(ones, axis=axis)
288-
expected = gb.transform(op, *args)
292+
293+
warn = FutureWarning if op == "fillna" else None
294+
op_msg = "DataFrameGroupBy.fillna is deprecated"
295+
with tm.assert_produces_warning(warn, match=op_msg):
296+
expected = gb.transform(op, *args)
297+
289298
result = float_frame.transform(op, axis, *args)
290299
tm.assert_frame_equal(result, expected)
291300

@@ -300,7 +309,10 @@ def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
300309
ones = np.ones(float_frame.shape[1])
301310
with tm.assert_produces_warning(FutureWarning, match=msg):
302311
gb2 = float_frame.groupby(ones, axis=axis)
303-
expected2 = gb2.transform(op, *args)
312+
warn = FutureWarning if op == "fillna" else None
313+
op_msg = "DataFrameGroupBy.fillna is deprecated"
314+
with tm.assert_produces_warning(warn, match=op_msg):
315+
expected2 = gb2.transform(op, *args)
304316
result2 = float_frame.transform(op, axis, *args)
305317
tm.assert_frame_equal(result2, expected2)
306318

Diff for: pandas/tests/groupby/test_categorical.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1975,7 +1975,10 @@ def test_category_order_transformer(
19751975
df = df.set_index(keys)
19761976
args = get_groupby_method_args(transformation_func, df)
19771977
gb = df.groupby(keys, as_index=as_index, sort=sort, observed=observed)
1978-
op_result = getattr(gb, transformation_func)(*args)
1978+
warn = FutureWarning if transformation_func == "fillna" else None
1979+
msg = "DataFrameGroupBy.fillna is deprecated"
1980+
with tm.assert_produces_warning(warn, match=msg):
1981+
op_result = getattr(gb, transformation_func)(*args)
19791982
result = op_result.index.get_level_values("a").categories
19801983
expected = Index([1, 4, 3, 2])
19811984
tm.assert_index_equal(result, expected)

Diff for: pandas/tests/groupby/test_function.py

+27-8
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,10 @@ def test_numeric_only(kernel, has_arg, numeric_only, keys):
627627
and numeric_only is lib.no_default
628628
)
629629
):
630-
result = method(*args, **kwargs)
630+
warn = FutureWarning if kernel == "fillna" else None
631+
msg = "DataFrameGroupBy.fillna is deprecated"
632+
with tm.assert_produces_warning(warn, match=msg):
633+
result = method(*args, **kwargs)
631634
assert "b" in result.columns
632635
elif has_arg:
633636
assert numeric_only is not True
@@ -725,11 +728,18 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request):
725728
msg = "cannot be performed against 'object' dtypes"
726729
else:
727730
msg = "is not supported for object dtype"
728-
with pytest.raises(TypeError, match=msg):
729-
method(*args)
731+
warn = FutureWarning if groupby_func == "fillna" else None
732+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
733+
with tm.assert_produces_warning(warn, match=warn_msg):
734+
with pytest.raises(TypeError, match=msg):
735+
method(*args)
730736
elif dtype is object:
731-
result = method(*args)
732-
expected = expected_method(*args)
737+
warn = FutureWarning if groupby_func == "fillna" else None
738+
warn_msg = "SeriesGroupBy.fillna is deprecated"
739+
with tm.assert_produces_warning(warn, match=warn_msg):
740+
result = method(*args)
741+
with tm.assert_produces_warning(warn, match=warn_msg):
742+
expected = expected_method(*args)
733743
if groupby_func in obj_result:
734744
expected = expected.astype(object)
735745
tm.assert_series_equal(result, expected)
@@ -813,7 +823,10 @@ def test_multiindex_group_all_columns_when_empty(groupby_func):
813823
method = getattr(gb, groupby_func)
814824
args = get_groupby_method_args(groupby_func, df)
815825

816-
result = method(*args).index
826+
warn = FutureWarning if groupby_func == "fillna" else None
827+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
828+
with tm.assert_produces_warning(warn, match=warn_msg):
829+
result = method(*args).index
817830
expected = df.index
818831
tm.assert_index_equal(result, expected)
819832

@@ -826,12 +839,18 @@ def test_duplicate_columns(request, groupby_func, as_index):
826839
df = DataFrame([[1, 3, 6], [1, 4, 7], [2, 5, 8]], columns=list("abb"))
827840
args = get_groupby_method_args(groupby_func, df)
828841
gb = df.groupby("a", as_index=as_index)
829-
result = getattr(gb, groupby_func)(*args)
842+
warn = FutureWarning if groupby_func == "fillna" else None
843+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
844+
with tm.assert_produces_warning(warn, match=warn_msg):
845+
result = getattr(gb, groupby_func)(*args)
830846

831847
expected_df = df.set_axis(["a", "b", "c"], axis=1)
832848
expected_args = get_groupby_method_args(groupby_func, expected_df)
833849
expected_gb = expected_df.groupby("a", as_index=as_index)
834-
expected = getattr(expected_gb, groupby_func)(*expected_args)
850+
warn = FutureWarning if groupby_func == "fillna" else None
851+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
852+
with tm.assert_produces_warning(warn, match=warn_msg):
853+
expected = getattr(expected_gb, groupby_func)(*expected_args)
835854
if groupby_func not in ("size", "ngroup", "cumcount"):
836855
expected = expected.rename(columns={"c": "b"})
837856
tm.assert_equal(result, expected)

Diff for: pandas/tests/groupby/test_groupby.py

+29-10
Original file line numberDiff line numberDiff line change
@@ -2370,18 +2370,32 @@ def test_group_on_empty_multiindex(transformation_func, request):
23702370
args = ("ffill",)
23712371
else:
23722372
args = ()
2373-
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
2374-
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
2373+
warn = FutureWarning if transformation_func == "fillna" else None
2374+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
2375+
with tm.assert_produces_warning(warn, match=warn_msg):
2376+
result = df.iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
2377+
with tm.assert_produces_warning(warn, match=warn_msg):
2378+
expected = df.groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
23752379
if transformation_func in ("diff", "shift"):
23762380
expected = expected.astype(int)
23772381
tm.assert_equal(result, expected)
23782382

2379-
result = (
2380-
df["col_3"].iloc[:0].groupby(["col_1"]).transform(transformation_func, *args)
2381-
)
2382-
expected = (
2383-
df["col_3"].groupby(["col_1"]).transform(transformation_func, *args).iloc[:0]
2384-
)
2383+
warn_msg = "SeriesGroupBy.fillna is deprecated"
2384+
with tm.assert_produces_warning(warn, match=warn_msg):
2385+
result = (
2386+
df["col_3"]
2387+
.iloc[:0]
2388+
.groupby(["col_1"])
2389+
.transform(transformation_func, *args)
2390+
)
2391+
warn_msg = "SeriesGroupBy.fillna is deprecated"
2392+
with tm.assert_produces_warning(warn, match=warn_msg):
2393+
expected = (
2394+
df["col_3"]
2395+
.groupby(["col_1"])
2396+
.transform(transformation_func, *args)
2397+
.iloc[:0]
2398+
)
23852399
if transformation_func in ("diff", "shift"):
23862400
expected = expected.astype(int)
23872401
tm.assert_equal(result, expected)
@@ -2402,7 +2416,10 @@ def test_dup_labels_output_shape(groupby_func, idx):
24022416
grp_by = df.groupby([0])
24032417

24042418
args = get_groupby_method_args(groupby_func, df)
2405-
result = getattr(grp_by, groupby_func)(*args)
2419+
warn = FutureWarning if groupby_func == "fillna" else None
2420+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
2421+
with tm.assert_produces_warning(warn, match=warn_msg):
2422+
result = getattr(grp_by, groupby_func)(*args)
24062423

24072424
assert result.shape == (1, 2)
24082425
tm.assert_index_equal(result.columns, idx)
@@ -3158,7 +3175,9 @@ def test_groupby_selection_other_methods(df):
31583175
g_exp = df[["C"]].groupby(df["A"])
31593176

31603177
# methods which aren't just .foo()
3161-
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
3178+
warn_msg = "DataFrameGroupBy.fillna is deprecated"
3179+
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
3180+
tm.assert_frame_equal(g.fillna(0), g_exp.fillna(0))
31623181
msg = "DataFrameGroupBy.dtypes is deprecated"
31633182
with tm.assert_produces_warning(FutureWarning, match=msg):
31643183
tm.assert_frame_equal(g.dtypes, g_exp.dtypes)

Diff for: pandas/tests/groupby/test_groupby_subclass.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,12 @@ def test_groupby_preserves_subclass(obj, groupby_func):
3636

3737
args = get_groupby_method_args(groupby_func, obj)
3838

39-
result1 = getattr(grouped, groupby_func)(*args)
40-
result2 = grouped.agg(groupby_func, *args)
39+
warn = FutureWarning if groupby_func == "fillna" else None
40+
msg = f"{type(grouped).__name__}.fillna is deprecated"
41+
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
42+
result1 = getattr(grouped, groupby_func)(*args)
43+
with tm.assert_produces_warning(warn, match=msg, raise_on_extra_warnings=False):
44+
result2 = grouped.agg(groupby_func, *args)
4145

4246
# Reduction or transformation kernels should preserve type
4347
slices = {"ngroup", "cumcount", "size"}

Diff for: pandas/tests/groupby/test_missing.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ def test_groupby_fill_duplicate_column_names(func):
3939
def test_ffill_missing_arguments():
4040
# GH 14955
4141
df = DataFrame({"a": [1, 2], "b": [1, 1]})
42-
with pytest.raises(ValueError, match="Must specify a fill"):
43-
df.groupby("b").fillna()
42+
msg = "DataFrameGroupBy.fillna is deprecated"
43+
with tm.assert_produces_warning(FutureWarning, match=msg):
44+
with pytest.raises(ValueError, match="Must specify a fill"):
45+
df.groupby("b").fillna()
4446

4547

4648
@pytest.mark.parametrize(
@@ -50,7 +52,7 @@ def test_fillna_with_string_dtype(method, expected):
5052
# GH 40250
5153
df = DataFrame({"a": pd.array([None, "a", None], dtype="string"), "b": [0, 0, 0]})
5254
grp = df.groupby("b")
53-
msg = "DataFrameGroupBy.fillna with 'method' is deprecated"
55+
msg = "DataFrameGroupBy.fillna is deprecated"
5456
with tm.assert_produces_warning(FutureWarning, match=msg):
5557
result = grp.fillna(method=method)
5658
expected = DataFrame({"a": pd.array(expected, dtype="string")})

0 commit comments

Comments
 (0)