diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 178eab20c01c7..6abe70e56b9b9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -340,6 +340,7 @@ Indexing - Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`) - Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`) - Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`) +- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`) Missing ^^^^^^^ @@ -434,7 +435,9 @@ Other - Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`) - Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`) - Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`) +- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`) - Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index dd1b396ee761f..f4db68a2d7ac5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -1413,6 +1413,16 @@ def to_tuples(self, na_tuple=True): # --------------------------------------------------------------------- + def putmask(self, mask: np.ndarray, value) -> None: + value_left, value_right = self._validate_setitem_value(value) + + if isinstance(self._left, np.ndarray): + np.putmask(self._left, mask, value_left) + np.putmask(self._right, mask, value_right) + else: + self._left.putmask(mask, value_left) + self._right.putmask(mask, value_right) + def delete(self: IntervalArrayT, loc) -> IntervalArrayT: if isinstance(self._left, np.ndarray): new_left = np.delete(self._left, loc) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1e4204be24f6d..f2fd5ca9c62c7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -115,7 +115,7 @@ ) if TYPE_CHECKING: - from pandas import MultiIndex, RangeIndex, Series + from pandas import IntervalIndex, MultiIndex, RangeIndex, Series from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -4316,27 +4316,8 @@ def where(self, cond, other=None): >>> idx.where(idx.isin(['car', 'train']), 'other') Index(['car', 'other', 'train', 'other'], dtype='object') """ - if other is None: - other = self._na_value - - values = self.values - - try: - self._validate_fill_value(other) - except (ValueError, TypeError): - return self.astype(object).where(cond, other) - - if isinstance(other, np.timedelta64) and self.dtype == object: - # https://github.com/numpy/numpy/issues/12550 - # timedelta64 will incorrectly cast to int - other = [other] * (~cond).sum() - values = cast(np.ndarray, values).copy() - # error: Unsupported target for indexed assignment ("ArrayLike") - values[~cond] = other # type:ignore[index] - else: - values = np.where(cond, values, other) - - return Index(values, name=self.name) + cond = np.asarray(cond, dtype=bool) + return self.putmask(~cond, other) # construction helpers @final @@ -4551,17 +4532,32 @@ def putmask(self, mask, value): numpy.ndarray.putmask : Changes elements of an array based on conditional and input values. """ - values = self._values.copy() + mask = np.asarray(mask, dtype=bool) + if mask.shape != self.shape: + raise ValueError("putmask: mask and data must be the same size") + if not mask.any(): + return self.copy() + + if value is None: + value = self._na_value try: converted = self._validate_fill_value(value) except (ValueError, TypeError) as err: if is_object_dtype(self): raise err - # coerces to object - return self.astype(object).putmask(mask, value) + dtype = self._find_common_type_compat(value) + return self.astype(dtype).putmask(mask, value) + + values = self._values.copy() + if isinstance(converted, np.timedelta64) and self.dtype == object: + # https://github.com/numpy/numpy/issues/12550 + # timedelta64 will incorrectly cast to int + converted = [converted] * mask.sum() + values[mask] = converted + else: + np.putmask(values, mask, converted) - np.putmask(values, mask, converted) return type(self)._simple_new(values, name=self.name) def equals(self, other: Any) -> bool: @@ -5198,18 +5194,31 @@ def _maybe_promote(self, other: Index): return self, other - def _find_common_type_compat(self, target: Index) -> DtypeObj: + @final + def _find_common_type_compat(self, target) -> DtypeObj: """ Implementation of find_common_type that adjusts for Index-specific special cases. """ - dtype = find_common_type([self.dtype, target.dtype]) + if is_interval_dtype(self.dtype) and is_valid_nat_for_dtype(target, self.dtype): + # e.g. setting NA value into IntervalArray[int64] + self = cast("IntervalIndex", self) + return IntervalDtype(np.float64, closed=self.closed) + + target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) + dtype = find_common_type([self.dtype, target_dtype]) if dtype.kind in ["i", "u"]: # TODO: what about reversed with self being categorical? - if is_categorical_dtype(target.dtype) and target.hasnans: + if ( + isinstance(target, Index) + and is_categorical_dtype(target.dtype) + and target.hasnans + ): # FIXME: find_common_type incorrect with Categorical GH#38240 # FIXME: some cases where float64 cast can be lossy? dtype = np.dtype(np.float64) + if dtype.kind == "c": + dtype = np.dtype(object) return dtype @final diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 40413bfb40b4b..9841b63029f17 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -799,29 +799,22 @@ def length(self): return Index(self._data.length, copy=False) def putmask(self, mask, value): - arr = self._data.copy() + mask = np.asarray(mask, dtype=bool) + if mask.shape != self.shape: + raise ValueError("putmask: mask and data must be the same size") + if not mask.any(): + return self.copy() + try: - value_left, value_right = arr._validate_setitem_value(value) + self._validate_fill_value(value) except (ValueError, TypeError): - return self.astype(object).putmask(mask, value) + dtype = self._find_common_type_compat(value) + return self.astype(dtype).putmask(mask, value) - if isinstance(self._data._left, np.ndarray): - np.putmask(arr._left, mask, value_left) - np.putmask(arr._right, mask, value_right) - else: - # TODO: special case not needed with __array_function__ - arr._left.putmask(mask, value_left) - arr._right.putmask(mask, value_right) + arr = self._data.copy() + arr.putmask(mask, value) return type(self)._simple_new(arr, name=self.name) - @Appender(Index.where.__doc__) - def where(self, cond, other=None): - if other is None: - other = self._na_value - values = np.where(cond, self._values, other) - result = IntervalArray(values) - return type(self)._simple_new(result, name=self.name) - def insert(self, loc, item): """ Return a new IntervalIndex inserting new item at location. Follows @@ -998,6 +991,9 @@ def func(self, other, sort=sort): # -------------------------------------------------------------------- + def _validate_fill_value(self, value): + return self._data._validate_setitem_value(value) + @property def _is_all_dates(self) -> bool: """ diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index bbf3715d8e022..657f89512d79e 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import np_version_under1p20 - from pandas import ( DatetimeIndex, Index, @@ -380,8 +378,7 @@ def test_index_where(self, obj, key, expected, val, request): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True - if obj.dtype == bool and not mask.all(): - # When mask is all True, casting behavior does not apply + if obj.dtype == bool: msg = "Index/Series casting behavior inconsistent GH#38692" mark = pytest.mark.xfail(reason=msg) request.node.add_marker(mark) @@ -389,7 +386,6 @@ def test_index_where(self, obj, key, expected, val, request): res = Index(obj).where(~mask, val) tm.assert_index_equal(res, Index(expected)) - @pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692") def test_index_putmask(self, obj, key, expected, val): if Index(obj).dtype != obj.dtype: pytest.skip("test not applicable for this dtype") @@ -629,10 +625,6 @@ def test_index_where(self, obj, key, expected, val, request): res = Index(obj).where(~mask, val) tm.assert_index_equal(res, Index(expected)) - @pytest.mark.xfail( - np_version_under1p20, - reason="Index/Series casting behavior inconsistent GH#38692", - ) def test_index_putmask(self, obj, key, expected, val): if Index(obj).dtype != obj.dtype: pytest.skip("test not applicable for this dtype")