Skip to content

CLN: Enforce deprecation of groupby.idxmin/idxmax with skipna=False not raising #57746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 6, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
@@ -189,6 +189,7 @@ Other Deprecations

Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- :class:`.DataFrameGroupBy.idxmin`, :class:`.DataFrameGroupBy.idxmax`, :class:`.SeriesGroupBy.idxmin`, and :class:`.SeriesGroupBy.idxmax` will now raise a ``ValueError`` when used with ``skipna=False`` and an NA value is encountered (:issue:`10694`)
- :func:`read_excel`, :func:`read_json`, :func:`read_html`, and :func:`read_xml` no longer accept raw string or byte representation of the data. That type of data must be wrapped in a :py:class:`StringIO` or :py:class:`BytesIO` (:issue:`53767`)
- :meth:`Series.dt.to_pydatetime` now returns a :class:`Series` of :py:class:`datetime.datetime` objects (:issue:`52459`)
- :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
24 changes: 8 additions & 16 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
@@ -1179,8 +1179,7 @@ def idxmin(self, skipna: bool = True) -> Series:
Parameters
----------
skipna : bool, default True
Exclude NA/null values. If the entire Series is NA, the result
will be NA.
Exclude NA values.

Returns
-------
@@ -1190,7 +1189,7 @@ def idxmin(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty.
If the Series is empty or skipna=False and any value is NA.

See Also
--------
@@ -1233,8 +1232,7 @@ def idxmax(self, skipna: bool = True) -> Series:
Parameters
----------
skipna : bool, default True
Exclude NA/null values. If the entire Series is NA, the result
will be NA.
Exclude NA values.

Returns
-------
@@ -1244,7 +1242,7 @@ def idxmax(self, skipna: bool = True) -> Series:
Raises
------
ValueError
If the Series is empty.
If the Series is empty or skipna=False and any value is NA.

See Also
--------
@@ -2165,13 +2163,10 @@ def idxmax(
"""
Return index of first occurrence of maximum in each group.

NA/null values are excluded.

Parameters
----------
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
Exclude NA values.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

@@ -2185,7 +2180,7 @@ def idxmax(
Raises
------
ValueError
* If the row/column is empty
* If a column is empty or skipna=False and any value is NA.

See Also
--------
@@ -2230,13 +2225,10 @@ def idxmin(
"""
Return index of first occurrence of minimum in each group.

NA/null values are excluded.

Parameters
----------
skipna : bool, default True
Exclude NA/null values. If an entire row/column is NA, the result
will be NA.
Exclude NA values.
numeric_only : bool, default False
Include only `float`, `int` or `boolean` data.

@@ -2250,7 +2242,7 @@ def idxmin(
Raises
------
ValueError
* If the row/column is empty
* If a column is empty or skipna=False and any value is NA.

See Also
--------
14 changes: 5 additions & 9 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
@@ -5553,15 +5553,11 @@ def _idxmax_idxmin(
f"Can't get {how} of an empty group due to unobserved categories. "
"Specify observed=True in groupby instead."
)
elif not skipna:
if self._obj_with_exclusions.isna().any(axis=None):
warnings.warn(
f"The behavior of {type(self).__name__}.{how} with all-NA "
"values, or any-NA and skipna=False, is deprecated. In a future "
"version this will raise ValueError",
FutureWarning,
stacklevel=find_stack_level(),
)
elif not skipna and self._obj_with_exclusions.isna().any(axis=None):
raise ValueError(
f"{type(self).__name__}.{how} with skipna=False encountered an NA "
f"value."
)

result = self._agg_general(
numeric_only=numeric_only,
16 changes: 7 additions & 9 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
@@ -291,16 +291,14 @@ def test_idxmin_idxmax_extremes_skipna(skipna, how, float_numpy_dtype):
)
gb = df.groupby("a")

warn = None if skipna else FutureWarning
msg = f"The behavior of DataFrameGroupBy.{how} with all-NA values"
with tm.assert_produces_warning(warn, match=msg):
result = getattr(gb, how)(skipna=skipna)
if skipna:
values = [1, 3, 4, 6, np.nan]
else:
values = np.nan
if not skipna:
msg = f"DataFrameGroupBy.{how} with skipna=False"
with pytest.raises(ValueError, match=msg):
getattr(gb, how)(skipna=skipna)
return
result = getattr(gb, how)(skipna=skipna)
expected = DataFrame(
{"b": values}, index=pd.Index(range(1, 6), name="a", dtype="intp")
{"b": [1, 3, 4, 6, np.nan]}, index=pd.Index(range(1, 6), name="a", dtype="intp")
)
tm.assert_frame_equal(result, expected)

11 changes: 6 additions & 5 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
@@ -1525,10 +1525,11 @@ def test_idxmin_idxmax_transform_args(how, skipna, numeric_only):
# GH#55268 - ensure *args are passed through when calling transform
df = DataFrame({"a": [1, 1, 1, 2], "b": [3.0, 4.0, np.nan, 6.0], "c": list("abcd")})
gb = df.groupby("a")
warn = None if skipna else FutureWarning
msg = f"The behavior of DataFrameGroupBy.{how} with .* any-NA and skipna=False"
with tm.assert_produces_warning(warn, match=msg):
if skipna:
result = gb.transform(how, skipna, numeric_only)
with tm.assert_produces_warning(warn, match=msg):
expected = gb.transform(how, skipna=skipna, numeric_only=numeric_only)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)
else:
msg = f"DataFrameGroupBy.{how} with skipna=False encountered an NA value"
with pytest.raises(ValueError, match=msg):
gb.transform(how, skipna, numeric_only)