Skip to content

Commit 20f7e36

Browse files
ianhidcherian
authored andcommitted
Support zarr write_empty_chunks for zarr-python 3 and up (pydata#10177)
* bug: fix write_empty_chunks for zarr v3 * future proof write_empty_chunks in append flow * test: fix write_empty_test for zarr 2 * typing: fix typing for write_empty_chunks * small edits --------- Co-authored-by: Deepak Cherian <[email protected]>
1 parent b37c989 commit 20f7e36

File tree

2 files changed

+83
-25
lines changed

2 files changed

+83
-25
lines changed

xarray/backends/zarr.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1084,13 +1084,19 @@ def _open_existing_array(self, *, name) -> ZarrArray:
10841084
# - Existing variables already have their attrs included in the consolidated metadata file.
10851085
# - The size of dimensions can not be expanded, that would require a call using `append_dim`
10861086
# which is mutually exclusive with `region`
1087+
empty: dict[str, bool] | dict[str, dict[str, bool]]
1088+
if _zarr_v3():
1089+
empty = dict(config={"write_empty_chunks": self._write_empty})
1090+
else:
1091+
empty = dict(write_empty_chunks=self._write_empty)
1092+
10871093
zarr_array = zarr.open(
10881094
store=(
10891095
self.zarr_group.store if _zarr_v3() else self.zarr_group.chunk_store
10901096
),
10911097
# TODO: see if zarr should normalize these strings.
10921098
path="/".join([self.zarr_group.name.rstrip("/"), name]).lstrip("/"),
1093-
write_empty_chunks=self._write_empty,
1099+
**empty,
10941100
)
10951101
else:
10961102
zarr_array = self.zarr_group[name]
@@ -1115,6 +1121,14 @@ def _create_new_array(
11151121
else:
11161122
encoding["write_empty_chunks"] = self._write_empty
11171123

1124+
if _zarr_v3():
1125+
# zarr v3 deprecated origin and write_empty_chunks
1126+
# instead preferring to pass them via the config argument
1127+
encoding["config"] = {}
1128+
for c in ("write_empty_chunks", "order"):
1129+
if c in encoding:
1130+
encoding["config"][c] = encoding.pop(c)
1131+
11181132
zarr_array = self.zarr_group.create(
11191133
name,
11201134
shape=shape,

xarray/tests/test_backends.py

Lines changed: 68 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from collections.abc import Generator, Iterator, Mapping
1717
from contextlib import ExitStack
1818
from io import BytesIO
19-
from os import listdir
2019
from pathlib import Path
2120
from typing import TYPE_CHECKING, Any, Final, Literal, cast
2221
from unittest.mock import patch
@@ -3638,34 +3637,54 @@ def roundtrip_dir(
36383637

36393638
@pytest.mark.parametrize("consolidated", [True, False, None])
36403639
@pytest.mark.parametrize("write_empty", [True, False, None])
3641-
@pytest.mark.skipif(
3642-
has_zarr_v3, reason="zarr-python 3.x removed write_empty_chunks"
3643-
)
36443640
def test_write_empty(
3645-
self, consolidated: bool | None, write_empty: bool | None
3641+
self,
3642+
consolidated: bool | None,
3643+
write_empty: bool | None,
36463644
) -> None:
3647-
if write_empty is False:
3648-
expected = ["0.1.0", "1.1.0"]
3645+
def assert_expected_files(expected: list[str], store: str) -> None:
3646+
"""Convenience for comparing with actual files written"""
3647+
ls = []
3648+
test_root = os.path.join(store, "test")
3649+
for root, _, files in os.walk(test_root):
3650+
ls.extend(
3651+
[
3652+
os.path.join(root, f).removeprefix(test_root).lstrip("/")
3653+
for f in files
3654+
]
3655+
)
3656+
3657+
assert set(expected) == set(
3658+
[
3659+
file.lstrip("c/")
3660+
for file in ls
3661+
if (file not in (".zattrs", ".zarray", "zarr.json"))
3662+
]
3663+
)
3664+
3665+
# The zarr format is set by the `default_zarr_format`
3666+
# pytest fixture that acts on a superclass
3667+
zarr_format_3 = has_zarr_v3 and zarr.config.config["default_zarr_format"] == 3
3668+
if (write_empty is False) or (write_empty is None and has_zarr_v3):
3669+
expected = ["0.1.0"]
36493670
else:
36503671
expected = [
36513672
"0.0.0",
36523673
"0.0.1",
36533674
"0.1.0",
36543675
"0.1.1",
3655-
"1.0.0",
3656-
"1.0.1",
3657-
"1.1.0",
3658-
"1.1.1",
36593676
]
36603677

3661-
ds = xr.Dataset(
3662-
data_vars={
3663-
"test": (
3664-
("Z", "Y", "X"),
3665-
np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2)),
3666-
)
3667-
}
3668-
)
3678+
if zarr_format_3:
3679+
data = np.array([0.0, 0, 1.0, 0]).reshape((1, 2, 2))
3680+
# transform to the path style of zarr 3
3681+
# e.g. 0/0/1
3682+
expected = [e.replace(".", "/") for e in expected]
3683+
else:
3684+
# use nan for default fill_value behaviour
3685+
data = np.array([np.nan, np.nan, 1.0, np.nan]).reshape((1, 2, 2))
3686+
3687+
ds = xr.Dataset(data_vars={"test": (("Z", "Y", "X"), data)})
36693688

36703689
if has_dask:
36713690
ds["test"] = ds["test"].chunk(1)
@@ -3681,17 +3700,42 @@ def test_write_empty(
36813700
write_empty_chunks=write_empty,
36823701
)
36833702

3703+
# check expected files after a write
3704+
assert_expected_files(expected, store)
3705+
36843706
with self.roundtrip_dir(
36853707
ds,
36863708
store,
3687-
{"mode": "a", "append_dim": "Z", "write_empty_chunks": write_empty},
3709+
save_kwargs={
3710+
"mode": "a",
3711+
"append_dim": "Z",
3712+
"write_empty_chunks": write_empty,
3713+
},
36883714
) as a_ds:
36893715
expected_ds = xr.concat([ds, ds], dim="Z")
36903716

3691-
assert_identical(a_ds, expected_ds)
3692-
3693-
ls = listdir(os.path.join(store, "test"))
3694-
assert set(expected) == set([file for file in ls if file[0] != "."])
3717+
assert_identical(a_ds, expected_ds.compute())
3718+
# add the new files we expect to be created by the append
3719+
# that was performed by the roundtrip_dir
3720+
if (write_empty is False) or (write_empty is None and has_zarr_v3):
3721+
expected.append("1.1.0")
3722+
else:
3723+
if not has_zarr_v3:
3724+
# TODO: remove zarr3 if once zarr issue is fixed
3725+
# https://github.com/zarr-developers/zarr-python/issues/2931
3726+
expected.extend(
3727+
[
3728+
"1.1.0",
3729+
"1.0.0",
3730+
"1.0.1",
3731+
"1.1.1",
3732+
]
3733+
)
3734+
else:
3735+
expected.append("1.1.0")
3736+
if zarr_format_3:
3737+
expected = [e.replace(".", "/") for e in expected]
3738+
assert_expected_files(expected, store)
36953739

36963740
def test_avoid_excess_metadata_calls(self) -> None:
36973741
"""Test that chunk requests do not trigger redundant metadata requests.

0 commit comments

Comments
 (0)