Skip to content

Commit 6b8cc98

Browse files
authored
BUG: make Index.where behavior mirror Index.putmask behavior (#39412)
1 parent 1efd74d commit 6b8cc98

File tree

5 files changed

+66
-56
lines changed

5 files changed

+66
-56
lines changed

Diff for: doc/source/whatsnew/v1.3.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,7 @@ Indexing
340340
- Bug in :meth:`Index.get_loc` not raising ``KeyError`` when method is specified for ``NaN`` value when ``NaN`` is not in :class:`Index` (:issue:`39382`)
341341
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
342342
- Bug in :meth:`RangeIndex.append` where a single object of length 1 was concatenated incorrectly (:issue:`39401`)
343+
- Bug in setting ``numpy.timedelta64`` values into an object-dtype :class:`Series` using a boolean indexer (:issue:`39488`)
343344

344345
Missing
345346
^^^^^^^
@@ -434,7 +435,9 @@ Other
434435
- Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`)
435436
- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
436437
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
438+
- :meth:`Index.where` behavior now mirrors :meth:`Index.putmask` behavior, i.e. ``index.where(mask, other)`` matches ``index.putmask(~mask, other)`` (:issue:`39412`)
437439
- Bug in :func:`pandas.testing.assert_series_equal`, :func:`pandas.testing.assert_frame_equal`, :func:`pandas.testing.assert_index_equal` and :func:`pandas.testing.assert_extension_array_equal` incorrectly raising when an attribute has an unrecognized NA type (:issue:`39461`)
440+
-
438441

439442
.. ---------------------------------------------------------------------------
440443

Diff for: pandas/core/arrays/interval.py

+10
Original file line numberDiff line numberDiff line change
@@ -1413,6 +1413,16 @@ def to_tuples(self, na_tuple=True):
14131413

14141414
# ---------------------------------------------------------------------
14151415

1416+
def putmask(self, mask: np.ndarray, value) -> None:
1417+
value_left, value_right = self._validate_setitem_value(value)
1418+
1419+
if isinstance(self._left, np.ndarray):
1420+
np.putmask(self._left, mask, value_left)
1421+
np.putmask(self._right, mask, value_right)
1422+
else:
1423+
self._left.putmask(mask, value_left)
1424+
self._right.putmask(mask, value_right)
1425+
14161426
def delete(self: IntervalArrayT, loc) -> IntervalArrayT:
14171427
if isinstance(self._left, np.ndarray):
14181428
new_left = np.delete(self._left, loc)

Diff for: pandas/core/indexes/base.py

+38-29
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@
115115
)
116116

117117
if TYPE_CHECKING:
118-
from pandas import MultiIndex, RangeIndex, Series
118+
from pandas import IntervalIndex, MultiIndex, RangeIndex, Series
119119
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
120120

121121

@@ -4316,27 +4316,8 @@ def where(self, cond, other=None):
43164316
>>> idx.where(idx.isin(['car', 'train']), 'other')
43174317
Index(['car', 'other', 'train', 'other'], dtype='object')
43184318
"""
4319-
if other is None:
4320-
other = self._na_value
4321-
4322-
values = self.values
4323-
4324-
try:
4325-
self._validate_fill_value(other)
4326-
except (ValueError, TypeError):
4327-
return self.astype(object).where(cond, other)
4328-
4329-
if isinstance(other, np.timedelta64) and self.dtype == object:
4330-
# https://github.com/numpy/numpy/issues/12550
4331-
# timedelta64 will incorrectly cast to int
4332-
other = [other] * (~cond).sum()
4333-
values = cast(np.ndarray, values).copy()
4334-
# error: Unsupported target for indexed assignment ("ArrayLike")
4335-
values[~cond] = other # type:ignore[index]
4336-
else:
4337-
values = np.where(cond, values, other)
4338-
4339-
return Index(values, name=self.name)
4319+
cond = np.asarray(cond, dtype=bool)
4320+
return self.putmask(~cond, other)
43404321

43414322
# construction helpers
43424323
@final
@@ -4551,17 +4532,32 @@ def putmask(self, mask, value):
45514532
numpy.ndarray.putmask : Changes elements of an array
45524533
based on conditional and input values.
45534534
"""
4554-
values = self._values.copy()
4535+
mask = np.asarray(mask, dtype=bool)
4536+
if mask.shape != self.shape:
4537+
raise ValueError("putmask: mask and data must be the same size")
4538+
if not mask.any():
4539+
return self.copy()
4540+
4541+
if value is None:
4542+
value = self._na_value
45554543
try:
45564544
converted = self._validate_fill_value(value)
45574545
except (ValueError, TypeError) as err:
45584546
if is_object_dtype(self):
45594547
raise err
45604548

4561-
# coerces to object
4562-
return self.astype(object).putmask(mask, value)
4549+
dtype = self._find_common_type_compat(value)
4550+
return self.astype(dtype).putmask(mask, value)
4551+
4552+
values = self._values.copy()
4553+
if isinstance(converted, np.timedelta64) and self.dtype == object:
4554+
# https://github.com/numpy/numpy/issues/12550
4555+
# timedelta64 will incorrectly cast to int
4556+
converted = [converted] * mask.sum()
4557+
values[mask] = converted
4558+
else:
4559+
np.putmask(values, mask, converted)
45634560

4564-
np.putmask(values, mask, converted)
45654561
return type(self)._simple_new(values, name=self.name)
45664562

45674563
def equals(self, other: Any) -> bool:
@@ -5198,18 +5194,31 @@ def _maybe_promote(self, other: Index):
51985194

51995195
return self, other
52005196

5201-
def _find_common_type_compat(self, target: Index) -> DtypeObj:
5197+
@final
5198+
def _find_common_type_compat(self, target) -> DtypeObj:
52025199
"""
52035200
Implementation of find_common_type that adjusts for Index-specific
52045201
special cases.
52055202
"""
5206-
dtype = find_common_type([self.dtype, target.dtype])
5203+
if is_interval_dtype(self.dtype) and is_valid_nat_for_dtype(target, self.dtype):
5204+
# e.g. setting NA value into IntervalArray[int64]
5205+
self = cast("IntervalIndex", self)
5206+
return IntervalDtype(np.float64, closed=self.closed)
5207+
5208+
target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
5209+
dtype = find_common_type([self.dtype, target_dtype])
52075210
if dtype.kind in ["i", "u"]:
52085211
# TODO: what about reversed with self being categorical?
5209-
if is_categorical_dtype(target.dtype) and target.hasnans:
5212+
if (
5213+
isinstance(target, Index)
5214+
and is_categorical_dtype(target.dtype)
5215+
and target.hasnans
5216+
):
52105217
# FIXME: find_common_type incorrect with Categorical GH#38240
52115218
# FIXME: some cases where float64 cast can be lossy?
52125219
dtype = np.dtype(np.float64)
5220+
if dtype.kind == "c":
5221+
dtype = np.dtype(object)
52135222
return dtype
52145223

52155224
@final

Diff for: pandas/core/indexes/interval.py

+14-18
Original file line numberDiff line numberDiff line change
@@ -799,29 +799,22 @@ def length(self):
799799
return Index(self._data.length, copy=False)
800800

801801
def putmask(self, mask, value):
802-
arr = self._data.copy()
802+
mask = np.asarray(mask, dtype=bool)
803+
if mask.shape != self.shape:
804+
raise ValueError("putmask: mask and data must be the same size")
805+
if not mask.any():
806+
return self.copy()
807+
803808
try:
804-
value_left, value_right = arr._validate_setitem_value(value)
809+
self._validate_fill_value(value)
805810
except (ValueError, TypeError):
806-
return self.astype(object).putmask(mask, value)
811+
dtype = self._find_common_type_compat(value)
812+
return self.astype(dtype).putmask(mask, value)
807813

808-
if isinstance(self._data._left, np.ndarray):
809-
np.putmask(arr._left, mask, value_left)
810-
np.putmask(arr._right, mask, value_right)
811-
else:
812-
# TODO: special case not needed with __array_function__
813-
arr._left.putmask(mask, value_left)
814-
arr._right.putmask(mask, value_right)
814+
arr = self._data.copy()
815+
arr.putmask(mask, value)
815816
return type(self)._simple_new(arr, name=self.name)
816817

817-
@Appender(Index.where.__doc__)
818-
def where(self, cond, other=None):
819-
if other is None:
820-
other = self._na_value
821-
values = np.where(cond, self._values, other)
822-
result = IntervalArray(values)
823-
return type(self)._simple_new(result, name=self.name)
824-
825818
def insert(self, loc, item):
826819
"""
827820
Return a new IntervalIndex inserting new item at location. Follows
@@ -998,6 +991,9 @@ def func(self, other, sort=sort):
998991

999992
# --------------------------------------------------------------------
1000993

994+
def _validate_fill_value(self, value):
995+
return self._data._validate_setitem_value(value)
996+
1001997
@property
1002998
def _is_all_dates(self) -> bool:
1003999
"""

Diff for: pandas/tests/series/indexing/test_setitem.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.compat import np_version_under1p20
7-
86
from pandas import (
97
DatetimeIndex,
108
Index,
@@ -380,16 +378,14 @@ def test_index_where(self, obj, key, expected, val, request):
380378
mask = np.zeros(obj.shape, dtype=bool)
381379
mask[key] = True
382380

383-
if obj.dtype == bool and not mask.all():
384-
# When mask is all True, casting behavior does not apply
381+
if obj.dtype == bool:
385382
msg = "Index/Series casting behavior inconsistent GH#38692"
386383
mark = pytest.mark.xfail(reason=msg)
387384
request.node.add_marker(mark)
388385

389386
res = Index(obj).where(~mask, val)
390387
tm.assert_index_equal(res, Index(expected))
391388

392-
@pytest.mark.xfail(reason="Index/Series casting behavior inconsistent GH#38692")
393389
def test_index_putmask(self, obj, key, expected, val):
394390
if Index(obj).dtype != obj.dtype:
395391
pytest.skip("test not applicable for this dtype")
@@ -629,10 +625,6 @@ def test_index_where(self, obj, key, expected, val, request):
629625
res = Index(obj).where(~mask, val)
630626
tm.assert_index_equal(res, Index(expected))
631627

632-
@pytest.mark.xfail(
633-
np_version_under1p20,
634-
reason="Index/Series casting behavior inconsistent GH#38692",
635-
)
636628
def test_index_putmask(self, obj, key, expected, val):
637629
if Index(obj).dtype != obj.dtype:
638630
pytest.skip("test not applicable for this dtype")

0 commit comments

Comments
 (0)