Skip to content
forked from pydata/xarray

Commit 20a8cd9

Browse files
committed
Merge branch 'main' into groupby-shuffle
* main: Accessibility: Add keyboard handling for XArray HTML view (pydata#9412) [pre-commit.ci] pre-commit autoupdate (pydata#9316) [skip-ci] Speed up docs build by limiting toctrees (pydata#9395) fix the failing `pre-commit.ci` runs (pydata#9411) Update benchmarks.yml (pydata#9406) GroupBy(multiple groupers) (pydata#9372) Encode/decode property tests use variables() (pydata#9401)
2 parents 6d9ed1c + d33e4ad commit 20a8cd9

18 files changed

+485
-165
lines changed

.github/workflows/benchmarks.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
# add "build" because of https://github.com/airspeed-velocity/asv/issues/1385
3535
create-args: >-
3636
asv
37-
build
37+
python-build
3838
mamba
3939
4040

.pre-commit-config.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -13,24 +13,24 @@ repos:
1313
- id: mixed-line-ending
1414
- repo: https://github.com/astral-sh/ruff-pre-commit
1515
# Ruff version.
16-
rev: 'v0.5.0'
16+
rev: 'v0.6.2'
1717
hooks:
1818
- id: ruff
1919
args: ["--fix", "--show-fixes"]
2020
# https://github.com/python/black#version-control-integration
2121
- repo: https://github.com/psf/black-pre-commit-mirror
22-
rev: 24.4.2
22+
rev: 24.8.0
2323
hooks:
2424
- id: black-jupyter
2525
- repo: https://github.com/keewis/blackdoc
2626
rev: v0.3.9
2727
hooks:
2828
- id: blackdoc
2929
exclude: "generate_aggregations.py"
30-
additional_dependencies: ["black==24.4.2"]
30+
additional_dependencies: ["black==24.8.0"]
3131
- id: blackdoc-autoupdate-black
3232
- repo: https://github.com/pre-commit/mirrors-mypy
33-
rev: v1.10.1
33+
rev: v1.11.2
3434
hooks:
3535
- id: mypy
3636
# Copied from setup.cfg
@@ -41,7 +41,7 @@ repos:
4141
additional_dependencies: [
4242
# Type stubs
4343
types-python-dateutil,
44-
types-pkg_resources,
44+
types-setuptools,
4545
types-PyYAML,
4646
types-pytz,
4747
typing-extensions>=4.1.0,

.readthedocs.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
version: 2
22

33
build:
4-
os: ubuntu-22.04
4+
os: ubuntu-lts-latest
55
tools:
6-
python: mambaforge-4.10
6+
python: mambaforge-latest
77
jobs:
88
post_checkout:
99
- (git --no-pager log --pretty="tformat:%s" -1 | grep -vqF "[skip-rtd]") || exit 183

ci/requirements/doc.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
- nodefaults
66
dependencies:
7-
- python=3.10
7+
- python=3.12
88
- bottleneck
99
- cartopy
1010
- cfgrib
@@ -40,6 +40,7 @@ dependencies:
4040
- sphinx-design
4141
- sphinx-inline-tabs
4242
- sphinx>=5.0
43+
- sphinx-remove-toctrees
4344
- sphinxext-opengraph
4445
- sphinxext-rediraffe
4546
- zarr>=2.10

doc/conf.py

+4
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@
8888
"sphinxext.rediraffe",
8989
"sphinx_design",
9090
"sphinx_inline_tabs",
91+
"sphinx_remove_toctrees",
9192
]
9293

9394

@@ -198,6 +199,8 @@
198199
# The master toctree document.
199200
master_doc = "index"
200201

202+
remove_from_toctrees = ["generated/*"]
203+
201204
# General information about the project.
202205
project = "xarray"
203206
copyright = f"2014-{datetime.datetime.now().year}, xarray Developers"
@@ -244,6 +247,7 @@
244247
repository_url="https://github.com/pydata/xarray",
245248
repository_branch="main",
246249
navigation_with_keys=False, # pydata/pydata-sphinx-theme#1492
250+
navigation_depth=4,
247251
path_to_docs="doc",
248252
use_edit_page_button=True,
249253
use_repository_button=True,

doc/user-guide/groupby.rst

+28-16
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,7 @@ You can index out a particular group:
8181
8282
ds.groupby("letters")["b"]
8383
84-
Just like in pandas, creating a GroupBy object is cheap: it does not actually
85-
split the data until you access particular values.
84+
To group by multiple variables, see :ref:`this section <groupby.multiple>`.
8685

8786
Binning
8887
~~~~~~~
@@ -180,19 +179,6 @@ This last line is roughly equivalent to the following::
180179
results.append(group - alt.sel(letters=label))
181180
xr.concat(results, dim='x')
182181

183-
Iterating and Squeezing
184-
~~~~~~~~~~~~~~~~~~~~~~~
185-
186-
Previously, Xarray defaulted to squeezing out dimensions of size one when iterating over
187-
a GroupBy object. This behaviour is being removed.
188-
You can always squeeze explicitly later with the Dataset or DataArray
189-
:py:meth:`DataArray.squeeze` methods.
190-
191-
.. ipython:: python
192-
193-
next(iter(arr.groupby("x", squeeze=False)))
194-
195-
196182
.. _groupby.multidim:
197183

198184
Multidimensional Grouping
@@ -236,6 +222,8 @@ applying your function, and then unstacking the result:
236222
stacked = da.stack(gridcell=["ny", "nx"])
237223
stacked.groupby("gridcell").sum(...).unstack("gridcell")
238224
225+
Alternatively, you can groupby both `lat` and `lon` at the :ref:`same time <groupby.multiple>`.
226+
239227
.. _groupby.groupers:
240228

241229
Grouper Objects
@@ -276,7 +264,8 @@ is identical to
276264
277265
ds.groupby(x=UniqueGrouper())
278266
279-
and
267+
268+
Similarly,
280269

281270
.. code-block:: python
282271
@@ -303,3 +292,26 @@ is identical to
303292
from xarray.groupers import TimeResampler
304293
305294
ds.resample(time=TimeResampler("ME"))
295+
296+
297+
.. _groupby.multiple:
298+
299+
Grouping by multiple variables
300+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
301+
302+
Use grouper objects to group by multiple dimensions:
303+
304+
.. ipython:: python
305+
306+
from xarray.groupers import UniqueGrouper
307+
308+
da.groupby(lat=UniqueGrouper(), lon=UniqueGrouper()).sum()
309+
310+
311+
Different groupers can be combined to construct sophisticated GroupBy operations.
312+
313+
.. ipython:: python
314+
315+
from xarray.groupers import BinGrouper
316+
317+
ds.groupby(x=BinGrouper(bins=[5, 15, 25]), letters=UniqueGrouper()).sum()

doc/whats-new.rst

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@ New Features
2424
~~~~~~~~~~~~
2525
- Make chunk manager an option in ``set_options`` (:pull:`9362`).
2626
By `Tom White <https://github.com/tomwhite>`_.
27+
- Support for :ref:`grouping by multiple variables <groupby.multiple>`.
28+
This is quite new, so please check your results and report bugs.
29+
Binary operations after grouping by multiple arrays are not supported yet.
30+
(:issue:`1056`, :issue:`9332`, :issue:`324`, :pull:`9372`).
31+
By `Deepak Cherian <https://github.com/dcherian>`_.
2732
- Allow data variable specific ``constant_values`` in the dataset ``pad`` function (:pull:`9353``).
2833
By `Tiago Sanona <https://github.com/tsanona>`_.
2934

properties/test_encode_decode.py

+17-24
Original file line numberDiff line numberDiff line change
@@ -11,42 +11,35 @@
1111
# isort: split
1212

1313
import hypothesis.extra.numpy as npst
14-
import hypothesis.strategies as st
14+
import numpy as np
1515
from hypothesis import given
1616

1717
import xarray as xr
18-
19-
an_array = npst.arrays(
20-
dtype=st.one_of(
21-
npst.unsigned_integer_dtypes(), npst.integer_dtypes(), npst.floating_dtypes()
22-
),
23-
shape=npst.array_shapes(max_side=3), # max_side specified for performance
24-
)
18+
from xarray.testing.strategies import variables
2519

2620

2721
@pytest.mark.slow
28-
@given(st.data(), an_array)
29-
def test_CFMask_coder_roundtrip(data, arr) -> None:
30-
names = data.draw(
31-
st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map(
32-
tuple
33-
)
34-
)
35-
original = xr.Variable(names, arr)
22+
@given(original=variables())
23+
def test_CFMask_coder_roundtrip(original) -> None:
3624
coder = xr.coding.variables.CFMaskCoder()
3725
roundtripped = coder.decode(coder.encode(original))
3826
xr.testing.assert_identical(original, roundtripped)
3927

4028

29+
@pytest.mark.xfail
30+
@pytest.mark.slow
31+
@given(var=variables(dtype=npst.floating_dtypes()))
32+
def test_CFMask_coder_decode(var) -> None:
33+
var[0] = -99
34+
var.attrs["_FillValue"] = -99
35+
coder = xr.coding.variables.CFMaskCoder()
36+
decoded = coder.decode(var)
37+
assert np.isnan(decoded[0])
38+
39+
4140
@pytest.mark.slow
42-
@given(st.data(), an_array)
43-
def test_CFScaleOffset_coder_roundtrip(data, arr) -> None:
44-
names = data.draw(
45-
st.lists(st.text(), min_size=arr.ndim, max_size=arr.ndim, unique=True).map(
46-
tuple
47-
)
48-
)
49-
original = xr.Variable(names, arr)
41+
@given(original=variables())
42+
def test_CFScaleOffset_coder_roundtrip(original) -> None:
5043
coder = xr.coding.variables.CFScaleOffsetCoder()
5144
roundtripped = coder.decode(coder.encode(original))
5245
xr.testing.assert_identical(original, roundtripped)

xarray/coding/cftime_offsets.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ def __sub__(self, other):
145145

146146
if isinstance(other, cftime.datetime):
147147
raise TypeError("Cannot subtract a cftime.datetime from a time offset.")
148-
elif type(other) == type(self):
148+
elif type(other) is type(self):
149149
return type(self)(self.n - other.n)
150150
else:
151151
return NotImplemented
@@ -165,7 +165,7 @@ def __radd__(self, other):
165165
return self.__add__(other)
166166

167167
def __rsub__(self, other):
168-
if isinstance(other, BaseCFTimeOffset) and type(self) != type(other):
168+
if isinstance(other, BaseCFTimeOffset) and type(self) is not type(other):
169169
raise TypeError("Cannot subtract cftime offsets of differing types")
170170
return -self + other
171171

@@ -462,7 +462,7 @@ def __sub__(self, other: Self) -> Self:
462462

463463
if isinstance(other, cftime.datetime):
464464
raise TypeError("Cannot subtract cftime.datetime from offset.")
465-
if type(other) == type(self) and other.month == self.month:
465+
if type(other) is type(self) and other.month == self.month:
466466
return type(self)(self.n - other.n, month=self.month)
467467
return NotImplemented
468468

@@ -548,7 +548,7 @@ def __sub__(self, other):
548548

549549
if isinstance(other, cftime.datetime):
550550
raise TypeError("Cannot subtract cftime.datetime from offset.")
551-
elif type(other) == type(self) and other.month == self.month:
551+
elif type(other) is type(self) and other.month == self.month:
552552
return type(self)(self.n - other.n, month=self.month)
553553
else:
554554
return NotImplemented

xarray/core/dataarray.py

+7-12
Original file line numberDiff line numberDiff line change
@@ -6801,27 +6801,22 @@ def groupby(
68016801
groupers = either_dict_or_kwargs(group, groupers, "groupby") # type: ignore
68026802
group = None
68036803

6804-
grouper: Grouper
6804+
rgroupers: tuple[ResolvedGrouper, ...]
68056805
if group is not None:
68066806
if groupers:
68076807
raise ValueError(
68086808
"Providing a combination of `group` and **groupers is not supported."
68096809
)
6810-
grouper = UniqueGrouper()
6810+
rgroupers = (ResolvedGrouper(UniqueGrouper(), group, self),)
68116811
else:
6812-
if len(groupers) > 1:
6813-
raise ValueError("grouping by multiple variables is not supported yet.")
68146812
if not groupers:
68156813
raise ValueError("Either `group` or `**groupers` must be provided.")
6816-
group, grouper = next(iter(groupers.items()))
6817-
6818-
rgrouper = ResolvedGrouper(grouper, group, self)
6814+
rgroupers = tuple(
6815+
ResolvedGrouper(grouper, group, self)
6816+
for group, grouper in groupers.items()
6817+
)
68196818

6820-
return DataArrayGroupBy(
6821-
self,
6822-
(rgrouper,),
6823-
restore_coord_dims=restore_coord_dims,
6824-
)
6819+
return DataArrayGroupBy(self, rgroupers, restore_coord_dims=restore_coord_dims)
68256820

68266821
@_deprecate_positional_args("v2024.07.0")
68276822
def groupby_bins(

xarray/core/dataset.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -10397,25 +10397,22 @@ def groupby(
1039710397
groupers = either_dict_or_kwargs(group, groupers, "groupby") # type: ignore
1039810398
group = None
1039910399

10400+
rgroupers: tuple[ResolvedGrouper, ...]
1040010401
if group is not None:
1040110402
if groupers:
1040210403
raise ValueError(
1040310404
"Providing a combination of `group` and **groupers is not supported."
1040410405
)
10405-
rgrouper = ResolvedGrouper(UniqueGrouper(), group, self)
10406+
rgroupers = (ResolvedGrouper(UniqueGrouper(), group, self),)
1040610407
else:
10407-
if len(groupers) > 1:
10408-
raise ValueError("Grouping by multiple variables is not supported yet.")
10409-
elif not groupers:
10408+
if not groupers:
1041010409
raise ValueError("Either `group` or `**groupers` must be provided.")
10411-
for group, grouper in groupers.items():
10412-
rgrouper = ResolvedGrouper(grouper, group, self)
10410+
rgroupers = tuple(
10411+
ResolvedGrouper(grouper, group, self)
10412+
for group, grouper in groupers.items()
10413+
)
1041310414

10414-
return DatasetGroupBy(
10415-
self,
10416-
(rgrouper,),
10417-
restore_coord_dims=restore_coord_dims,
10418-
)
10415+
return DatasetGroupBy(self, rgroupers, restore_coord_dims=restore_coord_dims)
1041910416

1042010417
@_deprecate_positional_args("v2024.07.0")
1042110418
def groupby_bins(

0 commit comments

Comments
 (0)