Skip to content

Allow where to receive a callable #3827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ New Features
in 0.14.1) is now on by default. To disable, use
``xarray.set_options(display_style="text")``.
By `Julia Signell <https://github.com/jsignell>`_.

- :py:meth:`Dataset.where` and :py:meth:`DataArray.where` accept a lambda as a
first argument, which is then called on the input; replicating pandas' behavior.
By `Maximilian Roos <https://github.com/max-sixty>`_

Bug fixes
~~~~~~~~~
Expand Down
22 changes: 22 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1119,6 +1119,15 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):

>>> import numpy as np
>>> a = xr.DataArray(np.arange(25).reshape(5, 5), dims=('x', 'y'))
>>> a
<xarray.DataArray (x: 5, y: 5)>
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14],
[15, 16, 17, 18, 19],
[20, 21, 22, 23, 24]])
Dimensions without coordinates: x, y

>>> a.where(a.x + a.y < 4)
<xarray.DataArray (x: 5, y: 5)>
array([[ 0., 1., 2., 3., nan],
Expand All @@ -1127,6 +1136,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
[ 15., nan, nan, nan, nan],
[ nan, nan, nan, nan, nan]])
Dimensions without coordinates: x, y

>>> a.where(a.x + a.y < 5, -1)
<xarray.DataArray (x: 5, y: 5)>
array([[ 0, 1, 2, 3, 4],
Expand All @@ -1135,6 +1145,7 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
[15, 16, -1, -1, -1],
[20, -1, -1, -1, -1]])
Dimensions without coordinates: x, y

>>> a.where(a.x + a.y < 4, drop=True)
<xarray.DataArray (x: 4, y: 4)>
array([[ 0., 1., 2., 3.],
Expand All @@ -1143,6 +1154,14 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
[ 15., nan, nan, nan]])
Dimensions without coordinates: x, y

>>> a.where(lambda x: x.x + x.y < 4, drop=True)
<xarray.DataArray (x: 4, y: 4)>
array([[ 0., 1., 2., 3.],
[ 5., 6., 7., nan],
[ 10., 11., nan, nan],
[ 15., nan, nan, nan]])
Dimensions without coordinates: x, y

See also
--------
numpy.where : corresponding numpy function
Expand All @@ -1152,6 +1171,9 @@ def where(self, cond, other=dtypes.NA, drop: bool = False):
from .dataarray import DataArray
from .dataset import Dataset

if callable(cond):
cond = cond(self)

if drop:
if other is not dtypes.NA:
raise ValueError("cannot set `other` if drop=True")
Expand Down
6 changes: 6 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2215,6 +2215,12 @@ def test_where(self):
actual = arr.where(arr.x < 2, drop=True)
assert_identical(actual, expected)

def test_where_lambda(self):
arr = DataArray(np.arange(4), dims="y")
expected = arr.sel(y=slice(2))
actual = arr.where(lambda x: x.y < 2, drop=True)
assert_identical(actual, expected)

def test_where_string(self):
array = DataArray(["a", "b"])
expected = DataArray(np.array(["a", np.nan], dtype=object))
Expand Down
9 changes: 9 additions & 0 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4349,13 +4349,22 @@ def test_where(self):
assert actual.a.name == "a"
assert actual.a.attrs == ds.a.attrs

# lambda
ds = Dataset({"a": ("x", range(5))})
expected = Dataset({"a": ("x", [np.nan, np.nan, 2, 3, 4])})
actual = ds.where(lambda x: x > 1)
assert_identical(expected, actual)

def test_where_other(self):
ds = Dataset({"a": ("x", range(5))}, {"x": range(5)})
expected = Dataset({"a": ("x", [-1, -1, 2, 3, 4])}, {"x": range(5)})
actual = ds.where(ds > 1, -1)
assert_equal(expected, actual)
assert actual.a.dtype == int

actual = ds.where(lambda x: x > 1, -1)
assert_equal(expected, actual)

with raises_regex(ValueError, "cannot set"):
ds.where(ds > 1, other=0, drop=True)

Expand Down