Skip to content

Commit 8fc6a23

Browse files
committed
Add FlagGrouper
Closes #472
1 parent 6d81913 commit 8fc6a23

File tree

5 files changed

+76
-2
lines changed

5 files changed

+76
-2
lines changed

cf_xarray/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,6 @@
99
from .options import set_options # noqa
1010
from .utils import _get_version
1111

12-
from . import geometry # noqa
12+
from . import geometry, groupers # noqa
1313

1414
__version__ = _get_version()

cf_xarray/groupers.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import numpy as np
2+
import pandas as pd
3+
from xarray.groupers import EncodedGroups, Grouper
4+
5+
6+
class FlagGrouper(Grouper):
7+
def factorize(self, group) -> EncodedGroups:
8+
assert "flag_values" in group.attrs
9+
assert "flag_meanings" in group.attrs
10+
11+
values = np.array(group.attrs["flag_values"])
12+
full_index = pd.Index(group.attrs["flag_meanings"].split(" "))
13+
14+
if group.dtype.kind in "iu" and (np.diff(values) == 1).all():
15+
# optimize
16+
codes = group.data - group.data[0].astype(int)
17+
else:
18+
codes, _ = pd.factorize(group.data.ravel())
19+
20+
codes_da = group.copy(data=codes.reshape(group.shape))
21+
codes_da.attrs.pop("flag_values")
22+
codes_da.attrs.pop("flag_meanings")
23+
24+
return EncodedGroups(codes=codes_da, full_index=full_index)
25+
26+
def reset(self):
27+
pass

cf_xarray/tests/test_groupers.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import numpy as np
2+
from xarray.testing import assert_identical
3+
4+
from cf_xarray.datasets import flag_excl
5+
from cf_xarray.groupers import FlagGrouper
6+
7+
8+
def test_flag_grouper():
9+
ds = flag_excl.to_dataset().set_coords("flag_var")
10+
ds["foo"] = ("time", np.arange(8))
11+
actual = ds.groupby(flag_var=FlagGrouper()).mean()
12+
expected = ds.groupby("flag_var").mean()
13+
expected["flag_var"] = ["flag_1", "flag_2", "flag_3"]
14+
expected["flag_var"].attrs["standard_name"] = "flag_mutual_exclusive"
15+
assert_identical(actual, expected)

doc/api.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,20 @@ Geometries
2121
----------
2222
.. autosummary::
2323
:toctree: generated/
24-
2524
geometry.decode_geometries
25+
2626
geometry.encode_geometries
2727
geometry.shapely_to_cf
2828
geometry.cf_to_shapely
2929
geometry.GeometryNames
3030

31+
32+
Groupers
33+
--------
34+
.. autosummary::
35+
:toctree: generated/
36+
groupers.FlagGrouper
37+
3138
.. currentmodule:: xarray
3239

3340
DataArray

doc/flags.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,31 @@ You can also check whether a DataArray has the appropriate attributes to be reco
6060
da.cf.is_flag_variable
6161
```
6262

63+
## GroupBy
64+
65+
Flag variables, such as that above, are naturally used for GroupBy operations.
66+
cf-xarray provides a `FlagGrouper` that understands the `flag_meanings` and `flag_values` attributes.
67+
68+
```{code-cell}
69+
import cf_xarray as cfxr
70+
import numpy as np
71+
72+
from cf_xarray.datasets import flag_excl
73+
from cf_xarray.groupers import FlagGrouper
74+
75+
ds = flag_excl.to_dataset().set_coords('flag_var')
76+
ds["foo"] = ("time", np.arange(8))
77+
ds.flag_var
78+
```
79+
80+
The `flag_var` array has the need attributes.
81+
82+
```{code-cell}
83+
ds.groupby(flag_var=FlagGrouper()).mean()
84+
```
85+
86+
Note how the output coordinate has the values from `flag_meanings`
87+
6388
## Flag Masks
6489

6590
```{warning}

0 commit comments

Comments
 (0)