Skip to content

Fix & normalize typing for chunks #8247

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@
ReindexMethodOptions,
Self,
SideOptions,
T_Chunks,
T_Xarray,
)
from xarray.core.weighted import DataArrayWeighted
Expand Down Expand Up @@ -1288,13 +1289,7 @@ def chunksizes(self) -> Mapping[Any, tuple[int, ...]]:

def chunk(
self,
chunks: (
int
| Literal["auto"]
| tuple[int, ...]
| tuple[tuple[int, ...], ...]
| Mapping[Any, None | int | tuple[int, ...]]
) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
chunks: T_Chunks = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
name_prefix: str = "xarray-",
token: str | None = None,
lock: bool = False,
Expand Down Expand Up @@ -1362,7 +1357,7 @@ def chunk(

if isinstance(chunks, (float, str, int)):
# ignoring type; unclear why it won't accept a Literal into the value.
chunks = dict.fromkeys(self.dims, chunks) # type: ignore
chunks = dict.fromkeys(self.dims, chunks)
elif isinstance(chunks, (tuple, list)):
chunks = dict(zip(self.dims, chunks))
else:
Expand Down
22 changes: 14 additions & 8 deletions xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,14 @@
is_duck_array,
is_duck_dask_array,
)
from xarray.core.types import QuantileMethods, Self, T_DataArrayOrSet, T_Dataset
from xarray.core.types import (
QuantileMethods,
Self,
T_ChunkDim,
T_Chunks,
T_DataArrayOrSet,
T_Dataset,
)
from xarray.core.utils import (
Default,
Frozen,
Expand Down Expand Up @@ -1478,7 +1485,7 @@ def __iter__(self) -> Iterator[Hashable]:
if TYPE_CHECKING:
# needed because __getattr__ is returning Any and otherwise
# this class counts as part of the SupportsArray Protocol
__array__ = None
__array__ = None # type: ignore[var-annotated,unused-ignore]

else:

Expand Down Expand Up @@ -2569,16 +2576,14 @@ def chunksizes(self) -> Mapping[Hashable, tuple[int, ...]]:

def chunk(
self,
chunks: (
int | Literal["auto"] | Mapping[Any, None | int | str | tuple[int, ...]]
) = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
chunks: T_Chunks = {}, # {} even though it's technically unsafe, is being used intentionally here (#4667)
name_prefix: str = "xarray-",
token: str | None = None,
lock: bool = False,
inline_array: bool = False,
chunked_array_type: str | ChunkManagerEntrypoint | None = None,
from_array_kwargs=None,
**chunks_kwargs: None | int | str | tuple[int, ...],
**chunks_kwargs: T_ChunkDim,
) -> Self:
"""Coerce all arrays in this dataset into dask arrays with the given
chunks.
Expand Down Expand Up @@ -2637,8 +2642,9 @@ def chunk(
)
chunks = {}

if isinstance(chunks, (Number, str, int)):
chunks = dict.fromkeys(self.dims, chunks)
if not isinstance(chunks, Mapping):
# We need to ignore since mypy doesn't recognize this can't be `None`
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wait... is the above check for None correct? I'm not sure that the chunks_kwargs can be None, isn't it always a dict? So this would make the warning not reachable...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think chunks_kwargs can be None, but chunks can... Does that answer your Q?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, it answers the question.
But then this means that the above code is wrong because the if statement is never reachable...
Doesn't need to be fixed in this PR, but maybe rewriting this will remove the need for the type ignore.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh I see what you mean...

chunks = dict.fromkeys(self.dims, chunks) # type: ignore[arg-type]
else:
chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk")

Expand Down
11 changes: 8 additions & 3 deletions xarray/core/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

try:
if sys.version_info >= (3, 11):
from typing import Self
from typing import Self, TypeAlias
else:
from typing_extensions import Self
from typing_extensions import Self, TypeAlias
except ImportError:
if TYPE_CHECKING:
raise
Expand Down Expand Up @@ -183,7 +183,12 @@ def copy(
Dims = Union[str, Iterable[Hashable], "ellipsis", None]
OrderedDims = Union[str, Sequence[Union[Hashable, "ellipsis"]], "ellipsis", None]

T_Chunks = Union[int, dict[Any, Any], Literal["auto"], None]
# FYI in some cases we don't allow `None`, which this doesn't take account of.
T_ChunkDim: TypeAlias = Union[int, Literal["auto"], None, tuple[int, ...]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Slowly we should agree on a naming convention, haha.
But the addition of TypeAlias is nice, we should do that for all other types here as well.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ha! I even went back & forth on whether this should be T_ given it's not a TypeVar or Generic... But then I saw it existed elsewhere and didn't want to make a big change without having a strong view...

# We allow the tuple form of this (though arguably we could transition to named dims only)
T_Chunks: TypeAlias = Union[
T_ChunkDim, Mapping[Any, T_ChunkDim], tuple[T_ChunkDim, ...]
]
T_NormalizedChunks = tuple[tuple[int, ...], ...]

DataVars = Mapping[Any, Any]
Expand Down
4 changes: 2 additions & 2 deletions xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ def chunk(

data_old = self._data
if chunkmanager.is_chunked_array(data_old):
data_chunked = chunkmanager.rechunk(data_old, chunks) # type: ignore[arg-type]
data_chunked = chunkmanager.rechunk(data_old, chunks)
else:
if isinstance(data_old, indexing.ExplicitlyIndexed):
# Unambiguously handle array storage backends (like NetCDF4 and h5py)
Expand All @@ -1057,7 +1057,7 @@ def chunk(

data_chunked = chunkmanager.from_array(
ndata,
chunks, # type: ignore[arg-type]
chunks,
**_from_array_kwargs,
)

Expand Down