Skip to content

Change return type of DataArray.chunks and Dataset.chunks to a dict #5846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions xarray/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1813,6 +1813,23 @@ def ones_like(other, dtype: DTypeLike = None):
return full_like(other, 1, dtype)


def get_chunks(
variables: Iterable[Variable],
) -> Mapping[Hashable, Tuple[int, ...]]:

chunks: Dict[Hashable, Tuple[int, ...]] = {}
for v in variables:
if hasattr(v.data, "chunks"):
for dim, c in v.chunks.items():
if dim in chunks and c != chunks[dim]:
raise ValueError(
f"Object has inconsistent chunks along dimension {dim}. "
"This can be fixed by calling unify_chunks()."
)
chunks[dim] = c
return Frozen(chunks)


def is_np_datetime_like(dtype: DTypeLike) -> bool:
"""Check if a dtype is a subclass of the numpy datetime types"""
return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64)
Expand Down
19 changes: 14 additions & 5 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
reindex_like_indexers,
)
from .arithmetic import DataArrayArithmetic
from .common import AbstractArray, DataWithCoords
from .common import AbstractArray, DataWithCoords, get_chunks
from .computation import unify_chunks
from .coordinates import (
DataArrayCoordinates,
Expand Down Expand Up @@ -1057,11 +1057,20 @@ def __deepcopy__(self, memo=None) -> "DataArray":
__hash__ = None # type: ignore[assignment]

@property
def chunks(self) -> Optional[Tuple[Tuple[int, ...], ...]]:
"""Block dimensions for this array's data or None if it's not a dask
array.
def chunks(self) -> Optional[Mapping[Hashable, Tuple[int, ...]]]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def chunks(self) -> Optional[Mapping[Hashable, Tuple[int, ...]]]:
def chunks(self) -> Optional[Mapping[Any, Tuple[int, ...]]]:

We switched all these because of how mypy handles variance of key types

"""
return self.variable.chunks
Mapping from dimension names to block lengths for this dataarray's data, or None if
the underlying data is not a dask array.

Cannot be modified directly, but can be modified by calling .chunk().

See Also
--------
DataArray.chunk
xarray.unify_chunks
"""
all_variables = [self.variable] + [c.variable for c in self.coords.values()]
return get_chunks(all_variables)

def chunk(
self,
Expand Down
26 changes: 12 additions & 14 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
)
from .alignment import _broadcast_helper, _get_broadcast_dims_map_common_coords, align
from .arithmetic import DatasetArithmetic
from .common import DataWithCoords, _contains_datetime_like_objects
from .common import DataWithCoords, _contains_datetime_like_objects, get_chunks
from .computation import unify_chunks
from .coordinates import (
DatasetCoordinates,
Expand Down Expand Up @@ -2090,20 +2090,18 @@ def info(self, buf=None) -> None:

@property
def chunks(self) -> Mapping[Hashable, Tuple[int, ...]]:
"""Block dimensions for this dataset's data or None if it's not a dask
array.
"""
chunks: Dict[Hashable, Tuple[int, ...]] = {}
for v in self.variables.values():
if v.chunks is not None:
for dim, c in zip(v.dims, v.chunks):
if dim in chunks and c != chunks[dim]:
raise ValueError(
f"Object has inconsistent chunks along dimension {dim}. "
"This can be fixed by calling unify_chunks()."
)
chunks[dim] = c
return Frozen(chunks)
Mapping from dimension names to block lengths for this dataset's data, or None if
the underlying data is not a dask array.

Cannot be modified directly, but can be modified by calling .chunk().

See Also
--------
Dataset.chunk
xarray.unify_chunks
"""
return get_chunks(self.variables.values())

def chunk(
self,
Expand Down
19 changes: 15 additions & 4 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
sparse_array_type,
)
from .utils import (
Frozen,
NdimSizeLenMixin,
OrderedSet,
_default,
Expand Down Expand Up @@ -997,15 +998,25 @@ def __deepcopy__(self, memo=None):

@property
def chunks(self):
"""Block dimensions for this array's data or None if it's not a dask
array.
"""
return getattr(self._data, "chunks", None)
Mapping from dimension names to block lengths for this array's data, or None if
the underlying data is not a dask array.

Cannot be modified directly, but can be modified by calling .chunk().

See Also
--------
Variable.chunk
"""
if hasattr(self._data, "chunks"):
return Frozen({dim: c for dim, c in zip(self.dims, self.data.chunks)})
else:
return None

_array_counter = itertools.count()

def chunk(self, chunks={}, name=None, lock=False):
"""Coerce this array's data into a dask arrays with the given chunks.
"""Coerce this array's data into a dask array with the given chunks.

If this variable is a non-dask array, it will be converted to dask
array. If it's a dask array, it will be rechunked to the given chunk
Expand Down