Skip to content

Commit 63cad6b

Browse files
authored
CLN: Enforce deprecation of argmin/max and idxmin/max with NA values (#57971)
* CLN: Enforce deprecation of argmin/max and idxmin/max with NA values * Docstrings
1 parent 2750652 commit 63cad6b

File tree

10 files changed

+139
-241
lines changed

10 files changed

+139
-241
lines changed

Diff for: doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ Removal of prior version deprecations/changes
259259
- Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`)
260260
- Removed the ``method`` keyword in ``ExtensionArray.fillna``, implement ``ExtensionArray._pad_or_backfill`` instead (:issue:`53621`)
261261
- Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`)
262+
- Enforced deprecation of ``argmin``, ``argmax``, ``idxmin``, and ``idxmax`` returning a result when ``skipna=False`` and an NA value is encountered or all values are NA values; these operations will now raise in such cases (:issue:`33941`, :issue:`51276`)
262263

263264
.. ---------------------------------------------------------------------------
264265
.. _whatsnew_300.performance:

Diff for: pandas/core/base.py

+14-41
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
final,
1515
overload,
1616
)
17-
import warnings
1817

1918
import numpy as np
2019

@@ -35,7 +34,6 @@
3534
cache_readonly,
3635
doc,
3736
)
38-
from pandas.util._exceptions import find_stack_level
3937

4038
from pandas.core.dtypes.cast import can_hold_element
4139
from pandas.core.dtypes.common import (
@@ -686,7 +684,8 @@ def argmax(
686684
axis : {{None}}
687685
Unused. Parameter needed for compatibility with DataFrame.
688686
skipna : bool, default True
689-
Exclude NA/null values when showing the result.
687+
Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
688+
and there is an NA value, this method will raise a ``ValueError``.
690689
*args, **kwargs
691690
Additional arguments and keywords for compatibility with NumPy.
692691
@@ -736,28 +735,15 @@ def argmax(
736735
nv.validate_minmax_axis(axis)
737736
skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
738737

738+
if skipna and len(delegate) > 0 and isna(delegate).all():
739+
raise ValueError("Encountered all NA values")
740+
elif not skipna and isna(delegate).any():
741+
raise ValueError("Encountered an NA value with skipna=False")
742+
739743
if isinstance(delegate, ExtensionArray):
740-
if not skipna and delegate.isna().any():
741-
warnings.warn(
742-
f"The behavior of {type(self).__name__}.argmax/argmin "
743-
"with skipna=False and NAs, or with all-NAs is deprecated. "
744-
"In a future version this will raise ValueError.",
745-
FutureWarning,
746-
stacklevel=find_stack_level(),
747-
)
748-
return -1
749-
else:
750-
return delegate.argmax()
744+
return delegate.argmax()
751745
else:
752746
result = nanops.nanargmax(delegate, skipna=skipna)
753-
if result == -1:
754-
warnings.warn(
755-
f"The behavior of {type(self).__name__}.argmax/argmin "
756-
"with skipna=False and NAs, or with all-NAs is deprecated. "
757-
"In a future version this will raise ValueError.",
758-
FutureWarning,
759-
stacklevel=find_stack_level(),
760-
)
761747
# error: Incompatible return value type (got "Union[int, ndarray]", expected
762748
# "int")
763749
return result # type: ignore[return-value]
@@ -770,28 +756,15 @@ def argmin(
770756
nv.validate_minmax_axis(axis)
771757
skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
772758

759+
if skipna and len(delegate) > 0 and isna(delegate).all():
760+
raise ValueError("Encountered all NA values")
761+
elif not skipna and isna(delegate).any():
762+
raise ValueError("Encountered an NA value with skipna=False")
763+
773764
if isinstance(delegate, ExtensionArray):
774-
if not skipna and delegate.isna().any():
775-
warnings.warn(
776-
f"The behavior of {type(self).__name__}.argmax/argmin "
777-
"with skipna=False and NAs, or with all-NAs is deprecated. "
778-
"In a future version this will raise ValueError.",
779-
FutureWarning,
780-
stacklevel=find_stack_level(),
781-
)
782-
return -1
783-
else:
784-
return delegate.argmin()
765+
return delegate.argmin()
785766
else:
786767
result = nanops.nanargmin(delegate, skipna=skipna)
787-
if result == -1:
788-
warnings.warn(
789-
f"The behavior of {type(self).__name__}.argmax/argmin "
790-
"with skipna=False and NAs, or with all-NAs is deprecated. "
791-
"In a future version this will raise ValueError.",
792-
FutureWarning,
793-
stacklevel=find_stack_level(),
794-
)
795768
# error: Incompatible return value type (got "Union[int, ndarray]", expected
796769
# "int")
797770
return result # type: ignore[return-value]

Diff for: pandas/core/indexes/base.py

+8-20
Original file line numberDiff line numberDiff line change
@@ -6976,16 +6976,10 @@ def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
69766976

69776977
if not self._is_multi and self.hasnans:
69786978
# Take advantage of cache
6979-
mask = self._isnan
6980-
if not skipna or mask.all():
6981-
warnings.warn(
6982-
f"The behavior of {type(self).__name__}.argmax/argmin "
6983-
"with skipna=False and NAs, or with all-NAs is deprecated. "
6984-
"In a future version this will raise ValueError.",
6985-
FutureWarning,
6986-
stacklevel=find_stack_level(),
6987-
)
6988-
return -1
6979+
if self._isnan.all():
6980+
raise ValueError("Encountered all NA values")
6981+
elif not skipna:
6982+
raise ValueError("Encountered an NA value with skipna=False")
69896983
return super().argmin(skipna=skipna)
69906984

69916985
@Appender(IndexOpsMixin.argmax.__doc__)
@@ -6995,16 +6989,10 @@ def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
69956989

69966990
if not self._is_multi and self.hasnans:
69976991
# Take advantage of cache
6998-
mask = self._isnan
6999-
if not skipna or mask.all():
7000-
warnings.warn(
7001-
f"The behavior of {type(self).__name__}.argmax/argmin "
7002-
"with skipna=False and NAs, or with all-NAs is deprecated. "
7003-
"In a future version this will raise ValueError.",
7004-
FutureWarning,
7005-
stacklevel=find_stack_level(),
7006-
)
7007-
return -1
6992+
if self._isnan.all():
6993+
raise ValueError("Encountered all NA values")
6994+
elif not skipna:
6995+
raise ValueError("Encountered an NA value with skipna=False")
70086996
return super().argmax(skipna=skipna)
70096997

70106998
def min(self, axis=None, skipna: bool = True, *args, **kwargs):

Diff for: pandas/core/nanops.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -1441,17 +1441,18 @@ def _maybe_arg_null_out(
14411441
if axis is None or not getattr(result, "ndim", False):
14421442
if skipna:
14431443
if mask.all():
1444-
return -1
1444+
raise ValueError("Encountered all NA values")
14451445
else:
14461446
if mask.any():
1447-
return -1
1447+
raise ValueError("Encountered an NA value with skipna=False")
14481448
else:
1449-
if skipna:
1450-
na_mask = mask.all(axis)
1451-
else:
1452-
na_mask = mask.any(axis)
1449+
na_mask = mask.all(axis)
14531450
if na_mask.any():
1454-
result[na_mask] = -1
1451+
raise ValueError("Encountered all NA values")
1452+
elif not skipna:
1453+
na_mask = mask.any(axis)
1454+
if na_mask.any():
1455+
raise ValueError("Encountered an NA value with skipna=False")
14551456
return result
14561457

14571458

Diff for: pandas/core/series.py

+8-52
Original file line numberDiff line numberDiff line change
@@ -2333,8 +2333,8 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
23332333
axis : {0 or 'index'}
23342334
Unused. Parameter needed for compatibility with DataFrame.
23352335
skipna : bool, default True
2336-
Exclude NA/null values. If the entire Series is NA, the result
2337-
will be NA.
2336+
Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
2337+
and there is an NA value, this method will raise a ``ValueError``.
23382338
*args, **kwargs
23392339
Additional arguments and keywords have no effect but might be
23402340
accepted for compatibility with NumPy.
@@ -2376,32 +2376,10 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
23762376
23772377
>>> s.idxmin()
23782378
'A'
2379-
2380-
If `skipna` is False and there is an NA value in the data,
2381-
the function returns ``nan``.
2382-
2383-
>>> s.idxmin(skipna=False)
2384-
nan
23852379
"""
23862380
axis = self._get_axis_number(axis)
2387-
with warnings.catch_warnings():
2388-
# TODO(3.0): this catching/filtering can be removed
2389-
# ignore warning produced by argmin since we will issue a different
2390-
# warning for idxmin
2391-
warnings.simplefilter("ignore")
2392-
i = self.argmin(axis, skipna, *args, **kwargs)
2393-
2394-
if i == -1:
2395-
# GH#43587 give correct NA value for Index.
2396-
warnings.warn(
2397-
f"The behavior of {type(self).__name__}.idxmin with all-NA "
2398-
"values, or any-NA and skipna=False, is deprecated. In a future "
2399-
"version this will raise ValueError",
2400-
FutureWarning,
2401-
stacklevel=find_stack_level(),
2402-
)
2403-
return self.index._na_value
2404-
return self.index[i]
2381+
iloc = self.argmin(axis, skipna, *args, **kwargs)
2382+
return self.index[iloc]
24052383

24062384
def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
24072385
"""
@@ -2415,8 +2393,8 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
24152393
axis : {0 or 'index'}
24162394
Unused. Parameter needed for compatibility with DataFrame.
24172395
skipna : bool, default True
2418-
Exclude NA/null values. If the entire Series is NA, the result
2419-
will be NA.
2396+
Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
2397+
and there is an NA value, this method will raise a ``ValueError``.
24202398
*args, **kwargs
24212399
Additional arguments and keywords have no effect but might be
24222400
accepted for compatibility with NumPy.
@@ -2459,32 +2437,10 @@ def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashab
24592437
24602438
>>> s.idxmax()
24612439
'C'
2462-
2463-
If `skipna` is False and there is an NA value in the data,
2464-
the function returns ``nan``.
2465-
2466-
>>> s.idxmax(skipna=False)
2467-
nan
24682440
"""
24692441
axis = self._get_axis_number(axis)
2470-
with warnings.catch_warnings():
2471-
# TODO(3.0): this catching/filtering can be removed
2472-
# ignore warning produced by argmax since we will issue a different
2473-
# warning for argmax
2474-
warnings.simplefilter("ignore")
2475-
i = self.argmax(axis, skipna, *args, **kwargs)
2476-
2477-
if i == -1:
2478-
# GH#43587 give correct NA value for Index.
2479-
warnings.warn(
2480-
f"The behavior of {type(self).__name__}.idxmax with all-NA "
2481-
"values, or any-NA and skipna=False, is deprecated. In a future "
2482-
"version this will raise ValueError",
2483-
FutureWarning,
2484-
stacklevel=find_stack_level(),
2485-
)
2486-
return self.index._na_value
2487-
return self.index[i]
2442+
iloc = self.argmax(axis, skipna, *args, **kwargs)
2443+
return self.index[iloc]
24882444

24892445
def round(self, decimals: int = 0, *args, **kwargs) -> Series:
24902446
"""

Diff for: pandas/core/shared_docs.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,8 @@
692692
axis : {{0 or 'index', 1 or 'columns'}}, default 0
693693
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
694694
skipna : bool, default True
695-
Exclude NA/null values. If an entire row/column is NA, the result
696-
will be NA.
695+
Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
696+
and there is an NA value, this method will raise a ``ValueError``.
697697
numeric_only : bool, default {numeric_only_default}
698698
Include only `float`, `int` or `boolean` data.
699699
@@ -757,8 +757,8 @@
757757
axis : {{0 or 'index', 1 or 'columns'}}, default 0
758758
The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise.
759759
skipna : bool, default True
760-
Exclude NA/null values. If an entire row/column is NA, the result
761-
will be NA.
760+
Exclude NA/null values. If the entire Series is NA, or if ``skipna=False``
761+
and there is an NA value, this method will raise a ``ValueError``.
762762
numeric_only : bool, default {numeric_only_default}
763763
Include only `float`, `int` or `boolean` data.
764764

Diff for: pandas/tests/extension/base/methods.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -169,8 +169,8 @@ def test_argmin_argmax_all_na(self, method, data, na_value):
169169
("idxmin", True, 2),
170170
("argmax", True, 0),
171171
("argmin", True, 2),
172-
("idxmax", False, np.nan),
173-
("idxmin", False, np.nan),
172+
("idxmax", False, -1),
173+
("idxmin", False, -1),
174174
("argmax", False, -1),
175175
("argmin", False, -1),
176176
],
@@ -179,17 +179,13 @@ def test_argreduce_series(
179179
self, data_missing_for_sorting, op_name, skipna, expected
180180
):
181181
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
182-
warn = None
183-
msg = "The behavior of Series.argmax/argmin"
184-
if op_name.startswith("arg") and expected == -1:
185-
warn = FutureWarning
186-
if op_name.startswith("idx") and np.isnan(expected):
187-
warn = FutureWarning
188-
msg = f"The behavior of Series.{op_name}"
189182
ser = pd.Series(data_missing_for_sorting)
190-
with tm.assert_produces_warning(warn, match=msg):
183+
if expected == -1:
184+
with pytest.raises(ValueError, match="Encountered an NA value"):
185+
getattr(ser, op_name)(skipna=skipna)
186+
else:
191187
result = getattr(ser, op_name)(skipna=skipna)
192-
tm.assert_almost_equal(result, expected)
188+
tm.assert_almost_equal(result, expected)
193189

194190
def test_argmax_argmin_no_skipna_notimplemented(self, data_missing_for_sorting):
195191
# GH#38733

0 commit comments

Comments
 (0)