Skip to content
forked from pydata/xarray

Commit 3e65c0c

Browse files
committed
Merge branch 'main' into groupby-dask
* main: Refactor out utility functions from to_zarr (pydata#9695) Use the same function to floatize coords in polyfit and polyval (pydata#9691)
2 parents aada75d + 7467b1e commit 3e65c0c

File tree

5 files changed

+131
-100
lines changed

5 files changed

+131
-100
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ Bug fixes
4444

4545
- Fix inadvertent deep-copying of child data in DataTree.
4646
By `Stephan Hoyer <https://github.com/shoyer>`_.
47+
- Fix regression in the interoperability of :py:meth:`DataArray.polyfit` and :py:meth:`xr.polyval` for date-time coordinates. (:pull:`9691`).
48+
By `Pascal Bourgault <https://github.com/aulemahal>`_.
4749

4850
Documentation
4951
~~~~~~~~~~~~~

xarray/backends/api.py

+13-70
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
_normalize_path,
3434
)
3535
from xarray.backends.locks import _get_scheduler
36-
from xarray.backends.zarr import _zarr_v3
3736
from xarray.core import indexing
3837
from xarray.core.combine import (
3938
_infer_concat_order_from_positions,
@@ -2131,73 +2130,33 @@ def to_zarr(
21312130
21322131
See `Dataset.to_zarr` for full API docs.
21332132
"""
2133+
from xarray.backends.zarr import _choose_default_mode, _get_mappers
2134+
2135+
# validate Dataset keys, DataArray names
2136+
_validate_dataset_names(dataset)
21342137

21352138
# Load empty arrays to avoid bug saving zero length dimensions (Issue #5741)
2139+
# TODO: delete when min dask>=2023.12.1
2140+
# https://github.com/dask/dask/pull/10506
21362141
for v in dataset.variables.values():
21372142
if v.size == 0:
21382143
v.load()
21392144

2140-
# expand str and path-like arguments
2141-
store = _normalize_path(store)
2142-
chunk_store = _normalize_path(chunk_store)
2143-
2144-
kwargs = {}
2145-
if storage_options is None:
2146-
mapper = store
2147-
chunk_mapper = chunk_store
2148-
else:
2149-
if not isinstance(store, str):
2150-
raise ValueError(
2151-
f"store must be a string to use storage_options. Got {type(store)}"
2152-
)
2153-
2154-
if _zarr_v3():
2155-
kwargs["storage_options"] = storage_options
2156-
mapper = store
2157-
chunk_mapper = chunk_store
2158-
else:
2159-
from fsspec import get_mapper
2160-
2161-
mapper = get_mapper(store, **storage_options)
2162-
if chunk_store is not None:
2163-
chunk_mapper = get_mapper(chunk_store, **storage_options)
2164-
else:
2165-
chunk_mapper = chunk_store
2166-
21672145
if encoding is None:
21682146
encoding = {}
21692147

2170-
if mode is None:
2171-
if append_dim is not None:
2172-
mode = "a"
2173-
elif region is not None:
2174-
mode = "r+"
2175-
else:
2176-
mode = "w-"
2177-
2178-
if mode not in ["a", "a-"] and append_dim is not None:
2179-
raise ValueError("cannot set append_dim unless mode='a' or mode=None")
2180-
2181-
if mode not in ["a", "a-", "r+"] and region is not None:
2182-
raise ValueError(
2183-
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
2184-
)
2185-
2186-
if mode not in ["w", "w-", "a", "a-", "r+"]:
2187-
raise ValueError(
2188-
"The only supported options for mode are 'w', "
2189-
f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
2190-
)
2191-
2192-
# validate Dataset keys, DataArray names
2193-
_validate_dataset_names(dataset)
2148+
kwargs, mapper, chunk_mapper = _get_mappers(
2149+
storage_options=storage_options, store=store, chunk_store=chunk_store
2150+
)
2151+
mode = _choose_default_mode(mode=mode, append_dim=append_dim, region=region)
21942152

21952153
if mode == "r+":
21962154
already_consolidated = consolidated
21972155
consolidate_on_close = False
21982156
else:
21992157
already_consolidated = False
22002158
consolidate_on_close = consolidated or consolidated is None
2159+
22012160
zstore = backends.ZarrStore.open_group(
22022161
store=mapper,
22032162
mode=mode,
@@ -2209,30 +2168,14 @@ def to_zarr(
22092168
append_dim=append_dim,
22102169
write_region=region,
22112170
safe_chunks=safe_chunks,
2212-
stacklevel=4, # for Dataset.to_zarr()
22132171
zarr_version=zarr_version,
22142172
zarr_format=zarr_format,
22152173
write_empty=write_empty_chunks,
22162174
**kwargs,
22172175
)
22182176

2219-
if region is not None:
2220-
zstore._validate_and_autodetect_region(dataset)
2221-
# can't modify indexes with region writes
2222-
dataset = dataset.drop_vars(dataset.indexes)
2223-
if append_dim is not None and append_dim in region:
2224-
raise ValueError(
2225-
f"cannot list the same dimension in both ``append_dim`` and "
2226-
f"``region`` with to_zarr(), got {append_dim} in both"
2227-
)
2228-
2229-
if encoding and mode in ["a", "a-", "r+"]:
2230-
existing_var_names = set(zstore.zarr_group.array_keys())
2231-
for var_name in existing_var_names:
2232-
if var_name in encoding:
2233-
raise ValueError(
2234-
f"variable {var_name!r} already exists, but encoding was provided"
2235-
)
2177+
dataset = zstore._validate_and_autodetect_region(dataset)
2178+
zstore._validate_encoding(encoding)
22362179

22372180
writer = ArrayWriter()
22382181
# TODO: figure out how to properly handle unlimited_dims

xarray/backends/zarr.py

+84-17
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44
import json
55
import os
66
import struct
7-
import warnings
8-
from collections.abc import Iterable
7+
from collections.abc import Hashable, Iterable, Mapping
98
from typing import TYPE_CHECKING, Any, Literal
109

1110
import numpy as np
@@ -46,6 +45,66 @@
4645
from xarray.core.datatree import DataTree
4746

4847

48+
def _get_mappers(*, storage_options, store, chunk_store):
49+
# expand str and path-like arguments
50+
store = _normalize_path(store)
51+
chunk_store = _normalize_path(chunk_store)
52+
53+
kwargs = {}
54+
if storage_options is None:
55+
mapper = store
56+
chunk_mapper = chunk_store
57+
else:
58+
if not isinstance(store, str):
59+
raise ValueError(
60+
f"store must be a string to use storage_options. Got {type(store)}"
61+
)
62+
63+
if _zarr_v3():
64+
kwargs["storage_options"] = storage_options
65+
mapper = store
66+
chunk_mapper = chunk_store
67+
else:
68+
from fsspec import get_mapper
69+
70+
mapper = get_mapper(store, **storage_options)
71+
if chunk_store is not None:
72+
chunk_mapper = get_mapper(chunk_store, **storage_options)
73+
else:
74+
chunk_mapper = chunk_store
75+
return kwargs, mapper, chunk_mapper
76+
77+
78+
def _choose_default_mode(
79+
*,
80+
mode: ZarrWriteModes | None,
81+
append_dim: Hashable | None,
82+
region: Mapping[str, slice | Literal["auto"]] | Literal["auto"] | None,
83+
) -> ZarrWriteModes:
84+
if mode is None:
85+
if append_dim is not None:
86+
mode = "a"
87+
elif region is not None:
88+
mode = "r+"
89+
else:
90+
mode = "w-"
91+
92+
if mode not in ["a", "a-"] and append_dim is not None:
93+
raise ValueError("cannot set append_dim unless mode='a' or mode=None")
94+
95+
if mode not in ["a", "a-", "r+"] and region is not None:
96+
raise ValueError(
97+
"cannot set region unless mode='a', mode='a-', mode='r+' or mode=None"
98+
)
99+
100+
if mode not in ["w", "w-", "a", "a-", "r+"]:
101+
raise ValueError(
102+
"The only supported options for mode are 'w', "
103+
f"'w-', 'a', 'a-', and 'r+', but mode={mode!r}"
104+
)
105+
return mode
106+
107+
49108
def _zarr_v3() -> bool:
50109
# TODO: switch to "3" once Zarr V3 is released
51110
return module_available("zarr", minversion="2.99")
@@ -567,7 +626,6 @@ def open_store(
567626
append_dim=None,
568627
write_region=None,
569628
safe_chunks=True,
570-
stacklevel=2,
571629
zarr_version=None,
572630
zarr_format=None,
573631
use_zarr_fill_value_as_mask=None,
@@ -587,7 +645,6 @@ def open_store(
587645
consolidate_on_close=consolidate_on_close,
588646
chunk_store=chunk_store,
589647
storage_options=storage_options,
590-
stacklevel=stacklevel,
591648
zarr_version=zarr_version,
592649
use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
593650
zarr_format=zarr_format,
@@ -622,7 +679,6 @@ def open_group(
622679
append_dim=None,
623680
write_region=None,
624681
safe_chunks=True,
625-
stacklevel=2,
626682
zarr_version=None,
627683
zarr_format=None,
628684
use_zarr_fill_value_as_mask=None,
@@ -642,7 +698,6 @@ def open_group(
642698
consolidate_on_close=consolidate_on_close,
643699
chunk_store=chunk_store,
644700
storage_options=storage_options,
645-
stacklevel=stacklevel,
646701
zarr_version=zarr_version,
647702
use_zarr_fill_value_as_mask=use_zarr_fill_value_as_mask,
648703
zarr_format=zarr_format,
@@ -1105,7 +1160,10 @@ def _auto_detect_regions(self, ds, region):
11051160
region[dim] = slice(idxs[0], idxs[-1] + 1)
11061161
return region
11071162

1108-
def _validate_and_autodetect_region(self, ds) -> None:
1163+
def _validate_and_autodetect_region(self, ds: Dataset) -> Dataset:
1164+
if self._write_region is None:
1165+
return ds
1166+
11091167
region = self._write_region
11101168

11111169
if region == "auto":
@@ -1153,8 +1211,26 @@ def _validate_and_autodetect_region(self, ds) -> None:
11531211
f".drop_vars({non_matching_vars!r})"
11541212
)
11551213

1214+
if self._append_dim is not None and self._append_dim in region:
1215+
raise ValueError(
1216+
f"cannot list the same dimension in both ``append_dim`` and "
1217+
f"``region`` with to_zarr(), got {self._append_dim} in both"
1218+
)
1219+
11561220
self._write_region = region
11571221

1222+
# can't modify indexes with region writes
1223+
return ds.drop_vars(ds.indexes)
1224+
1225+
def _validate_encoding(self, encoding) -> None:
1226+
if encoding and self._mode in ["a", "a-", "r+"]:
1227+
existing_var_names = set(self.zarr_group.array_keys())
1228+
for var_name in existing_var_names:
1229+
if var_name in encoding:
1230+
raise ValueError(
1231+
f"variable {var_name!r} already exists, but encoding was provided"
1232+
)
1233+
11581234

11591235
def open_zarr(
11601236
store,
@@ -1329,7 +1405,6 @@ def open_zarr(
13291405
"overwrite_encoded_chunks": overwrite_encoded_chunks,
13301406
"chunk_store": chunk_store,
13311407
"storage_options": storage_options,
1332-
"stacklevel": 4,
13331408
"zarr_version": zarr_version,
13341409
"zarr_format": zarr_format,
13351410
}
@@ -1398,7 +1473,6 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti
13981473
consolidated=None,
13991474
chunk_store=None,
14001475
storage_options=None,
1401-
stacklevel=3,
14021476
zarr_version=None,
14031477
zarr_format=None,
14041478
store=None,
@@ -1416,7 +1490,6 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti
14161490
consolidate_on_close=False,
14171491
chunk_store=chunk_store,
14181492
storage_options=storage_options,
1419-
stacklevel=stacklevel + 1,
14201493
zarr_version=zarr_version,
14211494
use_zarr_fill_value_as_mask=None,
14221495
zarr_format=zarr_format,
@@ -1453,7 +1526,6 @@ def open_datatree(
14531526
consolidated=None,
14541527
chunk_store=None,
14551528
storage_options=None,
1456-
stacklevel=3,
14571529
zarr_version=None,
14581530
zarr_format=None,
14591531
**kwargs,
@@ -1474,7 +1546,6 @@ def open_datatree(
14741546
consolidated=consolidated,
14751547
chunk_store=chunk_store,
14761548
storage_options=storage_options,
1477-
stacklevel=stacklevel,
14781549
zarr_version=zarr_version,
14791550
zarr_format=zarr_format,
14801551
**kwargs,
@@ -1499,7 +1570,6 @@ def open_groups_as_dict(
14991570
consolidated=None,
15001571
chunk_store=None,
15011572
storage_options=None,
1502-
stacklevel=3,
15031573
zarr_version=None,
15041574
zarr_format=None,
15051575
**kwargs,
@@ -1523,7 +1593,6 @@ def open_groups_as_dict(
15231593
consolidate_on_close=False,
15241594
chunk_store=chunk_store,
15251595
storage_options=storage_options,
1526-
stacklevel=stacklevel + 1,
15271596
zarr_version=zarr_version,
15281597
zarr_format=zarr_format,
15291598
)
@@ -1569,7 +1638,6 @@ def _get_open_params(
15691638
consolidate_on_close,
15701639
chunk_store,
15711640
storage_options,
1572-
stacklevel,
15731641
zarr_version,
15741642
use_zarr_fill_value_as_mask,
15751643
zarr_format,
@@ -1614,7 +1682,7 @@ def _get_open_params(
16141682
# ValueError in zarr-python 3.x, KeyError in 2.x.
16151683
try:
16161684
zarr_group = zarr.open_group(store, **open_kwargs)
1617-
warnings.warn(
1685+
emit_user_level_warning(
16181686
"Failed to open Zarr store with consolidated metadata, "
16191687
"but successfully read with non-consolidated metadata. "
16201688
"This is typically much slower for opening a dataset. "
@@ -1627,7 +1695,6 @@ def _get_open_params(
16271695
"error in this case instead of falling back to try "
16281696
"reading non-consolidated metadata.",
16291697
RuntimeWarning,
1630-
stacklevel=stacklevel,
16311698
)
16321699
except missing_exc as err:
16331700
raise FileNotFoundError(

0 commit comments

Comments
 (0)