Skip to content

Commit 0d6cd2a

Browse files
authored
Fix & normalize typing for chunks (#8247)
* Fix & normalize typing for chunks I noticed that `"auto"` wasn't allowed as a value in a dict. So this normalizes all chunk types, and defines the mapping as containing the inner type. Allows removing some ignores (though also adds one). One question — not necessary to answer now — is whether we should allow a tuple of definitions, for each dimension. Generally we use names, which helps prevent mistakes, and allows us to be less concerned about dimension ordering.
1 parent 639ce0f commit 0d6cd2a

File tree

4 files changed

+27
-21
lines changed

4 files changed

+27
-21
lines changed

xarray/core/dataarray.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@
111111
ReindexMethodOptions,
112112
Self,
113113
SideOptions,
114+
T_Chunks,
114115
T_Xarray,
115116
)
116117
from xarray.core.weighted import DataArrayWeighted
@@ -1288,13 +1289,7 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:
12881289

12891290
def chunk(
12901291
self,
1291-
chunks: (
1292-
int
1293-
| Literal["auto"]
1294-
| tuple[int, ...]
1295-
| tuple[tuple[int, ...], ...]
1296-
| Mapping[Any, None | int | tuple[int, ...]]
1297-
) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
1292+
chunks: T_Chunks = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
12981293
name_prefix: str = "xarray-",
12991294
token: str | None = None,
13001295
lock: bool = False,
@@ -1362,7 +1357,7 @@ def chunk(
13621357

13631358
if isinstance(chunks, (float, str, int)):
13641359
# ignoring type; unclear why it won't accept a Literal into the value.
1365-
chunks = dict.fromkeys(self.dims, chunks) # type: ignore
1360+
chunks = dict.fromkeys(self.dims, chunks)
13661361
elif isinstance(chunks, (tuple, list)):
13671362
chunks = dict(zip(self.dims, chunks))
13681363
else:

xarray/core/dataset.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,14 @@
9393
is_duck_array,
9494
is_duck_dask_array,
9595
)
96-
from xarray.core.types import QuantileMethods, Self, T_DataArrayOrSet, T_Dataset
96+
from xarray.core.types import (
97+
QuantileMethods,
98+
Self,
99+
T_ChunkDim,
100+
T_Chunks,
101+
T_DataArrayOrSet,
102+
T_Dataset,
103+
)
97104
from xarray.core.utils import (
98105
Default,
99106
Frozen,
@@ -1478,7 +1485,7 @@ def __iter__(self) -> Iterator[Hashable]:
14781485
if TYPE_CHECKING:
14791486
# needed because __getattr__ is returning Any and otherwise
14801487
# this class counts as part of the SupportsArray Protocol
1481-
__array__ = None
1488+
__array__ = None # type: ignore[var-annotated,unused-ignore]
14821489

14831490
else:
14841491

@@ -2569,16 +2576,14 @@ def chunksizes(self) -> Mapping[Hashable, tuple[int, ...]]:
25692576

25702577
def chunk(
25712578
self,
2572-
chunks: (
2573-
int | Literal["auto"] | Mapping[Any, None | int | str | tuple[int, ...]]
2574-
) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
2579+
chunks: T_Chunks = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
25752580
name_prefix: str = "xarray-",
25762581
token: str | None = None,
25772582
lock: bool = False,
25782583
inline_array: bool = False,
25792584
chunked_array_type: str | ChunkManagerEntrypoint | None = None,
25802585
from_array_kwargs=None,
2581-
**chunks_kwargs: None | int | str | tuple[int, ...],
2586+
**chunks_kwargs: T_ChunkDim,
25822587
) -> Self:
25832588
"""Coerce all arrays in this dataset into dask arrays with the given
25842589
chunks.
@@ -2637,8 +2642,9 @@ def chunk(
26372642
)
26382643
chunks = {}
26392644

2640-
if isinstance(chunks, (Number, str, int)):
2641-
chunks = dict.fromkeys(self.dims, chunks)
2645+
if not isinstance(chunks, Mapping):
2646+
# We need to ignore since mypy doesn't recognize this can't be `None`
2647+
chunks = dict.fromkeys(self.dims, chunks) # type: ignore[arg-type]
26422648
else:
26432649
chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")
26442650

xarray/core/types.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@
1919

2020
try:
2121
if sys.version_info >= (3, 11):
22-
from typing import Self
22+
from typing import Self, TypeAlias
2323
else:
24-
from typing_extensions import Self
24+
from typing_extensions import Self, TypeAlias
2525
except ImportError:
2626
if TYPE_CHECKING:
2727
raise
@@ -183,7 +183,12 @@ def copy(
183183
Dims = Union[str, Iterable[Hashable], "ellipsis", None]
184184
OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]
185185

186-
T_Chunks = Union[int, dict[Any, Any], Literal["auto"], None]
186+
# FYI in some cases we don't allow `None`, which this doesn't take account of.
187+
T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]]
188+
# We allow the tuple form of this (though arguably we could transition to named dims only)
189+
T_Chunks: TypeAlias = Union[
190+
T_ChunkDim, Mapping[Any, T_ChunkDim], tuple[T_ChunkDim, ...]
191+
]
187192
T_NormalizedChunks = tuple[tuple[int, ...], ...]
188193

189194
DataVars = Mapping[Any, Any]

xarray/core/variable.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1035,7 +1035,7 @@ def chunk(
10351035

10361036
data_old = self._data
10371037
if chunkmanager.is_chunked_array(data_old):
1038-
data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type]
1038+
data_chunked = chunkmanager.rechunk(data_old, chunks)
10391039
else:
10401040
if isinstance(data_old, indexing.ExplicitlyIndexed):
10411041
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
@@ -1057,7 +1057,7 @@ def chunk(
10571057

10581058
data_chunked = chunkmanager.from_array(
10591059
ndata,
1060-
chunks, # type: ignore[arg-type]
1060+
chunks,
10611061
**_from_array_kwargs,
10621062
)
10631063

0 commit comments

Comments
 (0)