Skip to content
forked from pydata/xarray

Commit 2d48690

Browse files
committed
Revert "remove shuffle_by for now."
This reverts commit 7a99c8f.
1 parent a22c7ed commit 2d48690

File tree

1 file changed

+63
-1
lines changed

1 file changed

+63
-1
lines changed

xarray/core/common.py

+63-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@
5252
T_Variable,
5353
)
5454
from xarray.core.variable import Variable
55-
from xarray.groupers import Resampler
55+
from xarray.groupers import Grouper, Resampler
5656

5757
DTypeMaybeMapping = Union[DTypeLikeSave, Mapping[Any, DTypeLikeSave]]
5858

@@ -888,6 +888,68 @@ def rolling_exp(
888888

889889
return rolling_exp.RollingExp(self, window, window_type)
890890

891+
def shuffle_by(
892+
self,
893+
group: Hashable | DataArray | Mapping[Any, Grouper] | None = None,
894+
chunks: T_Chunks = None,
895+
**groupers: Grouper,
896+
) -> Self:
897+
"""
898+
Sort or "shuffle" this object by a Grouper.
899+
900+
"Shuffle" means the object is sorted so that all group members occur sequentially,
901+
in the same chunk. Multiple groups may occur in the same chunk.
902+
This method is particularly useful for chunked arrays (e.g. dask, cubed).
903+
For chunked array types, the order of appearance is not guaranteed, but will depend on
904+
the input chunking.
905+
906+
Parameters
907+
----------
908+
group : Hashable or DataArray or IndexVariable or mapping of Hashable to Grouper
909+
Array whose unique values should be used to group this array. If a
910+
Hashable, must be the name of a coordinate contained in this dataarray. If a dictionary,
911+
must map an existing variable name to a :py:class:`Grouper` instance.
912+
chunks : int, tuple of int, "auto" or mapping of hashable to int or tuple of int, optional
913+
How to adjust chunks along dimensions not present in the array being grouped by.
914+
**groupers : Grouper
915+
Grouper objects using which to shuffle the data.
916+
917+
Examples
918+
--------
919+
>>> import dask
920+
>>> from xarray.groupers import UniqueGrouper
921+
>>> da = xr.DataArray(
922+
... dims="x",
923+
... data=dask.array.arange(10, chunks=1),
924+
... coords={"x": [1, 2, 3, 1, 2, 3, 1, 2, 3, 0]},
925+
... name="a",
926+
... )
927+
>>> da
928+
<xarray.DataArray 'a' (x: 10)> Size: 80B
929+
dask.array<arange, shape=(10,), dtype=int64, chunksize=(1,), chunktype=numpy.ndarray>
930+
Coordinates:
931+
* x (x) int64 80B 1 2 3 1 2 3 1 2 3 0
932+
933+
>>> da.shuffle_by(x=UniqueGrouper())
934+
<xarray.DataArray 'a' (x: 10)> Size: 80B
935+
dask.array<shuffle, shape=(10,), dtype=int64, chunksize=(3,), chunktype=numpy.ndarray>
936+
Coordinates:
937+
* x (x) int64 80B 0 1 1 1 2 2 2 3 3 3
938+
939+
Returns
940+
-------
941+
DataArray or Dataset
942+
The same type as this object
943+
944+
See Also
945+
--------
946+
DataArrayGroupBy.shuffle
947+
DatasetGroupBy.shuffle
948+
dask.dataframe.DataFrame.shuffle
949+
dask.array.shuffle
950+
"""
951+
return self.groupby(group=group, **groupers)._shuffle_obj(chunks)
952+
891953
def _resample(
892954
self,
893955
resample_cls: type[T_Resample],

0 commit comments

Comments
 (0)