Skip to content

Commit 9973b6e

Browse files
grlee77pre-commit-ci[bot]jhamman
authored
implement Zarr v3 spec support (#6475)
* make zarr backend compatible with v3 spec * add tests for Zarr v3 stores * add tests for Zarr v3 stores when the store is not a StoreV3 class In this case where create_zarr_target returns a string, we must specify zarr_version=3 when opening/writing a store to make sure a version 3 store will be created rather than the default of a version 2 store. * update import path to match Zarr v2.12 and v2.13 experimental API remove path='xarray' default for zarr v3 path=None should work as of Zarr v2.13 * flake8 fixes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * disallow consolidated metadata for zarr v3 * whats new a + remove more consolidated metadata for v3 * activate upstream dev test for zarr v3 * better typing * untype zarr_version in open_dataset * update whats new * [test-upstream] * update comment * fix whats new * update whats new Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Joseph Hamman <[email protected]>
1 parent 4e9535a commit 9973b6e

File tree

6 files changed

+253
-67
lines changed

6 files changed

+253
-67
lines changed

.github/workflows/upstream-dev-ci.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ jobs:
8080
if: success()
8181
id: status
8282
run: |
83+
export ZARR_V3_EXPERIMENTAL_API=1
8384
python -m pytest --timeout=60 -rf \
8485
--report-log output-${{ matrix.python-version }}-log.jsonl
8586
- name: Generate and publish the report

doc/whats-new.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ What's New
1414
1515
np.random.seed(123456)
1616
17-
1817
.. _whats-new.2022.11.1:
1918

2019
v2022.11.1 (unreleased)
@@ -23,6 +22,8 @@ v2022.11.1 (unreleased)
2322
New Features
2423
~~~~~~~~~~~~
2524

25+
- Add experimental support for Zarr's in-progress V3 specification. (:pull:`6475`).
26+
By `Gregory Lee <https://github.com/grlee77>`_ and `Joe Hamman <https://github.com/jhamman>`_.
2627

2728
Breaking changes
2829
~~~~~~~~~~~~~~~~

xarray/backends/api.py

+11
Original file line numberDiff line numberDiff line change
@@ -1504,6 +1504,7 @@ def to_zarr(
15041504
region: Mapping[str, slice] | None = None,
15051505
safe_chunks: bool = True,
15061506
storage_options: dict[str, str] | None = None,
1507+
zarr_version: int | None = None,
15071508
) -> backends.ZarrStore:
15081509
...
15091510

@@ -1525,6 +1526,7 @@ def to_zarr(
15251526
region: Mapping[str, slice] | None = None,
15261527
safe_chunks: bool = True,
15271528
storage_options: dict[str, str] | None = None,
1529+
zarr_version: int | None = None,
15281530
) -> Delayed:
15291531
...
15301532

@@ -1543,6 +1545,7 @@ def to_zarr(
15431545
region: Mapping[str, slice] | None = None,
15441546
safe_chunks: bool = True,
15451547
storage_options: dict[str, str] | None = None,
1548+
zarr_version: int | None = None,
15461549
) -> backends.ZarrStore | Delayed:
15471550
"""This function creates an appropriate datastore for writing a dataset to
15481551
a zarr ztore
@@ -1609,6 +1612,13 @@ def to_zarr(
16091612
f"``region`` with to_zarr(), got {append_dim} in both"
16101613
)
16111614

1615+
if zarr_version is None:
1616+
# default to 2 if store doesn't specify it's version (e.g. a path)
1617+
zarr_version = int(getattr(store, "_store_version", 2))
1618+
1619+
if consolidated is None and zarr_version > 2:
1620+
consolidated = False
1621+
16121622
if mode == "r+":
16131623
already_consolidated = consolidated
16141624
consolidate_on_close = False
@@ -1627,6 +1637,7 @@ def to_zarr(
16271637
write_region=region,
16281638
safe_chunks=safe_chunks,
16291639
stacklevel=4, # for Dataset.to_zarr()
1640+
zarr_version=zarr_version,
16301641
)
16311642

16321643
if mode in ["a", "r+"]:

xarray/backends/zarr.py

+35
Original file line numberDiff line numberDiff line change
@@ -353,19 +353,37 @@ def open_group(
353353
write_region=None,
354354
safe_chunks=True,
355355
stacklevel=2,
356+
zarr_version=None,
356357
):
357358
import zarr
358359

359360
# zarr doesn't support pathlib.Path objects yet. zarr-python#601
360361
if isinstance(store, os.PathLike):
361362
store = os.fspath(store)
362363

364+
if zarr_version is None:
365+
# default to 2 if store doesn't specify it's version (e.g. a path)
366+
zarr_version = getattr(store, "_store_version", 2)
367+
363368
open_kwargs = dict(
364369
mode=mode,
365370
synchronizer=synchronizer,
366371
path=group,
367372
)
368373
open_kwargs["storage_options"] = storage_options
374+
if zarr_version > 2:
375+
open_kwargs["zarr_version"] = zarr_version
376+
377+
if consolidated or consolidate_on_close:
378+
raise ValueError(
379+
"consolidated metadata has not been implemented for zarr "
380+
f"version {zarr_version} yet. Set consolidated=False for "
381+
f"zarr version {zarr_version}. See also "
382+
"https://github.com/zarr-developers/zarr-specs/issues/136"
383+
)
384+
385+
if consolidated is None:
386+
consolidated = False
369387

370388
if chunk_store:
371389
open_kwargs["chunk_store"] = chunk_store
@@ -440,6 +458,11 @@ def open_store_variable(self, name, zarr_array):
440458
zarr_array, DIMENSION_KEY, try_nczarr
441459
)
442460
attributes = dict(attributes)
461+
462+
# TODO: this should not be needed once
463+
# https://github.com/zarr-developers/zarr-python/issues/1269 is resolved.
464+
attributes.pop("filters", None)
465+
443466
encoding = {
444467
"chunks": zarr_array.chunks,
445468
"preferred_chunks": dict(zip(dimensions, zarr_array.chunks)),
@@ -668,6 +691,7 @@ def open_zarr(
668691
storage_options=None,
669692
decode_timedelta=None,
670693
use_cftime=None,
694+
zarr_version=None,
671695
**kwargs,
672696
):
673697
"""Load and decode a dataset from a Zarr store.
@@ -725,6 +749,9 @@ def open_zarr(
725749
capability. Only works for stores that have already been consolidated.
726750
By default (`consolidate=None`), attempts to read consolidated metadata,
727751
falling back to read non-consolidated metadata if that fails.
752+
753+
When the experimental ``zarr_version=3``, ``consolidated`` must be
754+
either be ``None`` or ``False``.
728755
chunk_store : MutableMapping, optional
729756
A separate Zarr store only for chunk data.
730757
storage_options : dict, optional
@@ -745,6 +772,10 @@ def open_zarr(
745772
represented using ``np.datetime64[ns]`` objects. If False, always
746773
decode times to ``np.datetime64[ns]`` objects; if this is not possible
747774
raise an error.
775+
zarr_version : int or None, optional
776+
The desired zarr spec version to target (currently 2 or 3). The default
777+
of None will attempt to determine the zarr version from ``store`` when
778+
possible, otherwise defaulting to 2.
748779
749780
Returns
750781
-------
@@ -782,6 +813,7 @@ def open_zarr(
782813
"chunk_store": chunk_store,
783814
"storage_options": storage_options,
784815
"stacklevel": 4,
816+
"zarr_version": zarr_version,
785817
}
786818

787819
ds = open_dataset(
@@ -798,6 +830,7 @@ def open_zarr(
798830
backend_kwargs=backend_kwargs,
799831
decode_timedelta=decode_timedelta,
800832
use_cftime=use_cftime,
833+
zarr_version=zarr_version,
801834
)
802835
return ds
803836

@@ -842,6 +875,7 @@ def open_dataset(
842875
chunk_store=None,
843876
storage_options=None,
844877
stacklevel=3,
878+
zarr_version=None,
845879
):
846880

847881
filename_or_obj = _normalize_path(filename_or_obj)
@@ -855,6 +889,7 @@ def open_dataset(
855889
chunk_store=chunk_store,
856890
storage_options=storage_options,
857891
stacklevel=stacklevel + 1,
892+
zarr_version=zarr_version,
858893
)
859894

860895
store_entrypoint = StoreBackendEntrypoint()

xarray/core/dataset.py

+10
Original file line numberDiff line numberDiff line change
@@ -1930,6 +1930,7 @@ def to_zarr(
19301930
region: Mapping[str, slice] | None = None,
19311931
safe_chunks: bool = True,
19321932
storage_options: dict[str, str] | None = None,
1933+
zarr_version: int | None = None,
19331934
) -> ZarrStore:
19341935
...
19351936

@@ -1967,6 +1968,7 @@ def to_zarr(
19671968
region: Mapping[str, slice] | None = None,
19681969
safe_chunks: bool = True,
19691970
storage_options: dict[str, str] | None = None,
1971+
zarr_version: int | None = None,
19701972
) -> ZarrStore | Delayed:
19711973
"""Write dataset contents to a zarr group.
19721974
@@ -2017,6 +2019,9 @@ def to_zarr(
20172019
metadata; if False, do not. The default (`consolidated=None`) means
20182020
write consolidated metadata and attempt to read consolidated
20192021
metadata for existing stores (falling back to non-consolidated).
2022+
2023+
When the experimental ``zarr_version=3``, ``consolidated`` must be
2024+
either be ``None`` or ``False``.
20202025
append_dim : hashable, optional
20212026
If set, the dimension along which the data will be appended. All
20222027
other dimensions on overridden variables must remain the same size.
@@ -2048,6 +2053,10 @@ def to_zarr(
20482053
storage_options : dict, optional
20492054
Any additional parameters for the storage backend (ignored for local
20502055
paths).
2056+
zarr_version : int or None, optional
2057+
The desired zarr spec version to target (currently 2 or 3). The
2058+
default of None will attempt to determine the zarr version from
2059+
``store`` when possible, otherwise defaulting to 2.
20512060
20522061
Returns
20532062
-------
@@ -2092,6 +2101,7 @@ def to_zarr(
20922101
append_dim=append_dim,
20932102
region=region,
20942103
safe_chunks=safe_chunks,
2104+
zarr_version=zarr_version,
20952105
)
20962106

20972107
def __repr__(self) -> str:

0 commit comments

Comments
 (0)