Skip to content

Commit 0023e5d

Browse files
JoelJaeschkedcherianheadtr1ck
authored
Change .groupby fastpath to work for monotonic increasing and decreasing (#7427)
* Fix GH6220 This fixes GH6220 which makes it possible to use the fast path for .groupby for monotonically increasing and decreasing values. * Implemented groupby tests as described by feedback * Implemented groupby tests as described by feedback * Minor test. * Test resampling error with monotonic decreasing data. * Fix test. * Added feature to whats-new.rst * Update whats-new.rst * main * flox test * Update xarray/tests/test_groupby.py Co-authored-by: Michael Niklas <[email protected]> --------- Co-authored-by: dcherian <[email protected]> Co-authored-by: Deepak Cherian <[email protected]> Co-authored-by: Michael Niklas <[email protected]>
1 parent d0048ef commit 0023e5d

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ v2024.06.1 (unreleased)
2222

2323
New Features
2424
~~~~~~~~~~~~
25+
- Use fastpath when grouping both montonically increasing and decreasing variable
26+
in :py:class:`GroupBy` (:issue:`6220`, :pull:`7427`). By `Joel Jaeschke <https://github.com/joeljaeschke>`_.
2527
- Introduce new :py:class:`groupers.UniqueGrouper`, :py:class:`groupers.BinGrouper`, and
2628
:py:class:`groupers.TimeResampler` objects as a step towards supporting grouping by
2729
multiple variables. See the `docs <groupby.groupers_>` and the

xarray/core/groupers.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ def is_unique_and_monotonic(self) -> bool:
123123
if isinstance(self.group, _DummyGroup):
124124
return True
125125
index = self.group_as_index
126-
return index.is_unique and index.is_monotonic_increasing
126+
return index.is_unique and (
127+
index.is_monotonic_increasing or index.is_monotonic_decreasing
128+
)
127129

128130
@property
129131
def group_as_index(self) -> pd.Index:
@@ -326,7 +328,7 @@ def _init_properties(self, group: T_Group) -> None:
326328

327329
if not group_as_index.is_monotonic_increasing:
328330
# TODO: sort instead of raising an error
329-
raise ValueError("index must be monotonic for resampling")
331+
raise ValueError("Index must be monotonic for resampling")
330332

331333
if isinstance(group_as_index, CFTimeIndex):
332334
from xarray.core.resample_cftime import CFTimeGrouper

xarray/tests/test_groupby.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -1792,6 +1792,23 @@ def test_groupby_fillna(self) -> None:
17921792
actual = a.groupby("b").fillna(DataArray([0, 2], dims="b"))
17931793
assert_identical(expected, actual)
17941794

1795+
@pytest.mark.parametrize("use_flox", [True, False])
1796+
def test_groupby_fastpath_for_monotonic(self, use_flox: bool) -> None:
1797+
# Fixes https://github.com/pydata/xarray/issues/6220
1798+
# Fixes https://github.com/pydata/xarray/issues/9279
1799+
index = [1, 2, 3, 4, 7, 9, 10]
1800+
array = DataArray(np.arange(len(index)), [("idx", index)])
1801+
array_rev = array.copy().assign_coords({"idx": index[::-1]})
1802+
fwd = array.groupby("idx", squeeze=False)
1803+
rev = array_rev.groupby("idx", squeeze=False)
1804+
1805+
for gb in [fwd, rev]:
1806+
assert all([isinstance(elem, slice) for elem in gb._group_indices])
1807+
1808+
with xr.set_options(use_flox=use_flox):
1809+
assert_identical(fwd.sum(), array)
1810+
assert_identical(rev.sum(), array_rev)
1811+
17951812

17961813
class TestDataArrayResample:
17971814
@pytest.mark.parametrize("use_cftime", [True, False])
@@ -1828,9 +1845,13 @@ def resample_as_pandas(array, *args, **kwargs):
18281845
expected = resample_as_pandas(array, "24h", closed="right")
18291846
assert_identical(expected, actual)
18301847

1831-
with pytest.raises(ValueError, match=r"index must be monotonic"):
1848+
with pytest.raises(ValueError, match=r"Index must be monotonic"):
18321849
array[[2, 0, 1]].resample(time="1D")
18331850

1851+
reverse = array.isel(time=slice(-1, None, -1))
1852+
with pytest.raises(ValueError):
1853+
reverse.resample(time="1D").mean()
1854+
18341855
@pytest.mark.parametrize("use_cftime", [True, False])
18351856
def test_resample_doctest(self, use_cftime: bool) -> None:
18361857
# run the doctest example here so we are not surprised

0 commit comments

Comments
 (0)