From ac567d2904d7e30473ee0f970c01438aaf97f87b Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Sun, 12 Apr 2020 19:51:35 +0100 Subject: [PATCH 1/5] TYP: disallow decorator preserves function signature --- pandas/core/frame.py | 16 +++++++++++-- pandas/core/generic.py | 2 +- pandas/core/nanops.py | 51 ++++++++++++++++++++++++++++++------------ 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d19f1a263f71a..cc415c27110bd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8305,7 +8305,7 @@ def nunique(self, axis=0, dropna=True) -> Series: """ return self.apply(Series.nunique, axis=axis, dropna=dropna) - def idxmin(self, axis=0, skipna=True) -> Series: + def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: """ Return index of first occurrence of minimum over requested axis. @@ -8368,11 +8368,17 @@ def idxmin(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) indices = nanops.nanargmin(self.values, axis=axis, skipna=skipna) + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + indices = cast(np.ndarray, indices) + index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) - def idxmax(self, axis=0, skipna=True) -> Series: + def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: """ Return index of first occurrence of maximum over requested axis. @@ -8435,6 +8441,12 @@ def idxmax(self, axis=0, skipna=True) -> Series: """ axis = self._get_axis_number(axis) indices = nanops.nanargmax(self.values, axis=axis, skipna=skipna) + + # indices will always be np.ndarray since axis is not None and + # values is a 2d array for DataFrame + # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" + indices = cast(np.ndarray, indices) + index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2adfd2bb9a7b3..73f00b24e1aed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -353,7 +353,7 @@ def _construct_axes_from_arguments( return axes, kwargs @classmethod - def _get_axis_number(cls, axis): + def _get_axis_number(cls, axis) -> int: axis = cls._AXIS_ALIASES.get(axis, axis) if is_integer(axis): if axis in cls._AXIS_NAMES: diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 9494248a423a8..6c13a7e17f949 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,14 +1,14 @@ import functools import itertools import operator -from typing import Any, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Optional, Tuple, Union, cast import numpy as np from pandas._config import get_option from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib -from pandas._typing import ArrayLike, Dtype, Scalar +from pandas._typing import ArrayLike, Dtype, F, Scalar from pandas.compat._optional import import_optional_dependency from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask @@ -34,6 +34,9 @@ from pandas.core.construction import extract_array +if TYPE_CHECKING: + from pandas import Series # noqa: F401 + bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False @@ -57,7 +60,7 @@ def __init__(self, *dtypes): def check(self, obj) -> bool: return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) - def __call__(self, f): + def __call__(self, f: F) -> F: @functools.wraps(f) def _f(*args, **kwargs): obj_iter = itertools.chain(args, kwargs.values()) @@ -78,7 +81,7 @@ def _f(*args, **kwargs): raise TypeError(e) from e raise - return _f + return cast(F, _f) class bottleneck_switch: @@ -879,15 +882,15 @@ def reduction( @disallow("O") def nanargmax( - values: np.ndarray, + values: Union[np.ndarray, "Series"], axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, -) -> int: +) -> Union[int, np.ndarray]: """ Parameters ---------- - values : ndarray + values : ndarray or Series axis: int, optional skipna : bool, default True mask : ndarray[bool], optional @@ -895,8 +898,8 @@ def nanargmax( Returns ------- - result : int - The index of max value in specified axis or -1 in the NA case + result : int or ndarray[int] + The index/indices of max value in specified axis or -1 in the NA case Examples -------- @@ -904,6 +907,16 @@ def nanargmax( >>> s = pd.Series([1, 2, 3, np.nan, 4]) >>> nanops.nanargmax(s) 4 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 2] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [ 6., 7., nan], + [ 9., 10., nan]]) + >>> nanops.nanargmax(arr, axis=1) + array([2, 2, 1, 1], dtype=int64) """ values, mask, dtype, _, _ = _get_values( values, True, fill_value_typ="-inf", mask=mask @@ -915,15 +928,15 @@ def nanargmax( @disallow("O") def nanargmin( - values: np.ndarray, + values: Union[np.ndarray, "Series"], axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, -) -> int: +) -> Union[int, np.ndarray]: """ Parameters ---------- - values : ndarray + values : ndarray or Series axis: int, optional skipna : bool, default True mask : ndarray[bool], optional @@ -931,8 +944,8 @@ def nanargmin( Returns ------- - result : int - The index of min value in specified axis or -1 in the NA case + result : int or ndarray[int] + The index/indices of min value in specified axis or -1 in the NA case Examples -------- @@ -940,6 +953,16 @@ def nanargmin( >>> s = pd.Series([1, 2, 3, np.nan, 4]) >>> nanops.nanargmin(s) 0 + + >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) + >>> arr[2:, 0] = np.nan + >>> arr + array([[ 0., 1., 2.], + [ 3., 4., 5.], + [nan, 7., 8.], + [nan, 10., 11.]]) + >>> nanops.nanargmin(arr, axis=1) + array([0, 0, 1, 1], dtype=int64) """ values, mask, dtype, _, _ = _get_values( values, True, fill_value_typ="+inf", mask=mask From efb4ed8016362308bbe2ced32a0b86ca6ef05bce Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 13 Apr 2020 16:02:26 +0100 Subject: [PATCH 2/5] revert addition of Series to acceptable types --- pandas/core/nanops.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6c13a7e17f949..b65bc639b3258 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -882,7 +882,7 @@ def reduction( @disallow("O") def nanargmax( - values: Union[np.ndarray, "Series"], + values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -890,7 +890,7 @@ def nanargmax( """ Parameters ---------- - values : ndarray or Series + values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional @@ -904,8 +904,8 @@ def nanargmax( Examples -------- >>> import pandas.core.nanops as nanops - >>> s = pd.Series([1, 2, 3, np.nan, 4]) - >>> nanops.nanargmax(s) + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmax(arr) 4 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) @@ -928,7 +928,7 @@ def nanargmax( @disallow("O") def nanargmin( - values: Union[np.ndarray, "Series"], + values: np.ndarray, axis: Optional[int] = None, skipna: bool = True, mask: Optional[np.ndarray] = None, @@ -936,7 +936,7 @@ def nanargmin( """ Parameters ---------- - values : ndarray or Series + values : ndarray axis: int, optional skipna : bool, default True mask : ndarray[bool], optional @@ -950,8 +950,8 @@ def nanargmin( Examples -------- >>> import pandas.core.nanops as nanops - >>> s = pd.Series([1, 2, 3, np.nan, 4]) - >>> nanops.nanargmin(s) + >>> arr = np.array([1, 2, 3, np.nan, 4]) + >>> nanops.nanargmin(arr) 0 >>> arr = np.array(range(12), dtype=np.float64).reshape(4, 3) From 9293f40f355bf4ef57b4143aa546e6402014fcfd Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 13 Apr 2020 16:07:49 +0100 Subject: [PATCH 3/5] revert addition of Axis types --- pandas/core/frame.py | 4 ++-- pandas/core/generic.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cc415c27110bd..3558e5d3ac515 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8305,7 +8305,7 @@ def nunique(self, axis=0, dropna=True) -> Series: """ return self.apply(Series.nunique, axis=axis, dropna=dropna) - def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: + def idxmin(self, axis=0, skipna=True) -> Series: """ Return index of first occurrence of minimum over requested axis. @@ -8378,7 +8378,7 @@ def idxmin(self, axis: Axis = 0, skipna: bool = True) -> Series: result = [index[i] if i >= 0 else np.nan for i in indices] return Series(result, index=self._get_agg_axis(axis)) - def idxmax(self, axis: Axis = 0, skipna: bool = True) -> Series: + def idxmax(self, axis=0, skipna=True) -> Series: """ Return index of first occurrence of maximum over requested axis. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 13b00cc2a1be0..6a4f83427310e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -353,7 +353,7 @@ def _construct_axes_from_arguments( return axes, kwargs @classmethod - def _get_axis_number(cls, axis) -> int: + def _get_axis_number(cls, axis): axis = cls._AXIS_ALIASES.get(axis, axis) if is_integer(axis): if axis in cls._AXIS_NAMES: From f2c88b346c7fdb2ff58084dccff66d678c21d31c Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Mon, 13 Apr 2020 16:11:50 +0100 Subject: [PATCH 4/5] remove Series import --- pandas/core/nanops.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b65bc639b3258..b78fd9f6b9990 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,7 +1,7 @@ import functools import itertools import operator -from typing import TYPE_CHECKING, Any, Optional, Tuple, Union, cast +from typing import Any, Optional, Tuple, Union, cast import numpy as np @@ -34,9 +34,6 @@ from pandas.core.construction import extract_array -if TYPE_CHECKING: - from pandas import Series # noqa: F401 - bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False From 96fca739a3d9a65dd992f192623e2b29e7b10239 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 22 Apr 2020 12:59:58 +0100 Subject: [PATCH 5/5] runtime assert instead of offline cast that would have been easier to track with warn_unused_casts. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 32b676cbdcc9f..49c8b1b4e5144 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8374,7 +8374,7 @@ def idxmin(self, axis=0, skipna=True) -> Series: # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" - indices = cast(np.ndarray, indices) + assert isinstance(indices, np.ndarray) # for mypy index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices] @@ -8447,7 +8447,7 @@ def idxmax(self, axis=0, skipna=True) -> Series: # indices will always be np.ndarray since axis is not None and # values is a 2d array for DataFrame # error: Item "int" of "Union[int, Any]" has no attribute "__iter__" - indices = cast(np.ndarray, indices) + assert isinstance(indices, np.ndarray) # for mypy index = self._get_axis(axis) result = [index[i] if i >= 0 else np.nan for i in indices]