|
52 | 52 | T_Variable,
|
53 | 53 | )
|
54 | 54 | from xarray.core.variable import Variable
|
55 |
| - from xarray.groupers import Resampler |
| 55 | + from xarray.groupers import Grouper, Resampler |
56 | 56 |
|
57 | 57 | DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
|
58 | 58 |
|
@@ -888,6 +888,68 @@ def rolling_exp(
|
888 | 888 |
|
889 | 889 | return rolling_exp.RollingExp(self, window, window_type)
|
890 | 890 |
|
| 891 | + def shuffle_by( |
| 892 | + self, |
| 893 | + group: Hashable | DataArray | Mapping[Any, Grouper] | None = None, |
| 894 | + chunks: T_Chunks = None, |
| 895 | + **groupers: Grouper, |
| 896 | + ) -> Self: |
| 897 | + """ |
| 898 | + Sort or "shuffle" this object by a Grouper. |
| 899 | +
|
| 900 | + "Shuffle" means the object is sorted so that all group members occur sequentially, |
| 901 | + in the same chunk. Multiple groups may occur in the same chunk. |
| 902 | + This method is particularly useful for chunked arrays (e.g. dask, cubed). |
| 903 | + For chunked array types, the order of appearance is not guaranteed, but will depend on |
| 904 | + the input chunking. |
| 905 | +
|
| 906 | + Parameters |
| 907 | + ---------- |
| 908 | + group : Hashable or DataArray or IndexVariable or mapping of Hashable to Grouper |
| 909 | + Array whose unique values should be used to group this array. If a |
| 910 | + Hashable, must be the name of a coordinate contained in this dataarray. If a dictionary, |
| 911 | + must map an existing variable name to a :py:class:`Grouper` instance. |
| 912 | + chunks : int, tuple of int, "auto" or mapping of hashable to int or tuple of int, optional |
| 913 | + How to adjust chunks along dimensions not present in the array being grouped by. |
| 914 | + **groupers : Grouper |
| 915 | + Grouper objects using which to shuffle the data. |
| 916 | +
|
| 917 | + Examples |
| 918 | + -------- |
| 919 | + >>> import dask |
| 920 | + >>> from xarray.groupers import UniqueGrouper |
| 921 | + >>> da = xr.DataArray( |
| 922 | + ... dims="x", |
| 923 | + ... data=dask.array.arange(10, chunks=1), |
| 924 | + ... coords={"x": [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]}, |
| 925 | + ... name="a", |
| 926 | + ... ) |
| 927 | + >>> da |
| 928 | + <xarray.DataArray 'a' (x: 10)> Size: 80B |
| 929 | + dask.array<arange, shape=(10,), dtype=int64, chunksize=(1,), chunktype=numpy.ndarray> |
| 930 | + Coordinates: |
| 931 | + * x (x) int64 80B 1 2 3 1 2 3 1 2 3 0 |
| 932 | +
|
| 933 | + >>> da.shuffle_by(x=UniqueGrouper()) |
| 934 | + <xarray.DataArray 'a' (x: 10)> Size: 80B |
| 935 | + dask.array<shuffle, shape=(10,), dtype=int64, chunksize=(3,), chunktype=numpy.ndarray> |
| 936 | + Coordinates: |
| 937 | + * x (x) int64 80B 0 1 1 1 2 2 2 3 3 3 |
| 938 | +
|
| 939 | + Returns |
| 940 | + ------- |
| 941 | + DataArray or Dataset |
| 942 | + The same type as this object |
| 943 | +
|
| 944 | + See Also |
| 945 | + -------- |
| 946 | + DataArrayGroupBy.shuffle |
| 947 | + DatasetGroupBy.shuffle |
| 948 | + dask.dataframe.DataFrame.shuffle |
| 949 | + dask.array.shuffle |
| 950 | + """ |
| 951 | + return self.groupby(group=group, **groupers)._shuffle_obj(chunks) |
| 952 | + |
891 | 953 | def _resample(
|
892 | 954 | self,
|
893 | 955 | resample_cls: type[T_Resample],
|
|
0 commit comments