Skip to content

Commit befd1b9

Browse files
raybellwavesRay Bellblackarywxman22
authored
add storage_options arg to to_zarr (#5615)
* add storage_options arg to to_zarr * add try import * add what's new * merge main whats-new * undo whats new * move import fsspec lower * fsspec in to_zarr * add a test. Co-authored-by: Zachary Blackwood <[email protected]> Co-authored-by: Nathan Lis <[email protected]> * add requires_zarr_2_5_0 * add what's new * add storage options arg to end Co-authored-by: Ray Bell <[email protected]> Co-authored-by: Zachary Blackwood <[email protected]> Co-authored-by: Nathan Lis <[email protected]>
1 parent a78c1e0 commit befd1b9

File tree

6 files changed

+46
-5
lines changed

6 files changed

+46
-5
lines changed

doc/whats-new.rst

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ New Features
2828
By `Justus Magin <https://github.com/keewis>`_.
2929
- Added ``**kwargs`` argument to :py:meth:`open_rasterio` to access overviews (:issue:`3269`).
3030
By `Pushkar Kopparla <https://github.com/pkopparla>`_.
31+
- Added ``storage_options`` argument to :py:meth:`to_zarr` (:issue:`5601`).
32+
By `Ray Bell <https://github.com/raybellwaves>`_, `Zachary Blackwood <https://github.com/blackary>`_ and
33+
`Nathan Lis <https://github.com/wxman22>`_.
3134

3235

3336
Breaking changes

xarray/backends/api.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -1319,6 +1319,7 @@ def to_zarr(
13191319
append_dim: Hashable = None,
13201320
region: Mapping[str, slice] = None,
13211321
safe_chunks: bool = True,
1322+
storage_options: Dict[str, str] = None,
13221323
):
13231324
"""This function creates an appropriate datastore for writing a dataset to
13241325
a zarr ztore
@@ -1330,6 +1331,22 @@ def to_zarr(
13301331
store = _normalize_path(store)
13311332
chunk_store = _normalize_path(chunk_store)
13321333

1334+
if storage_options is None:
1335+
mapper = store
1336+
chunk_mapper = chunk_store
1337+
else:
1338+
from fsspec import get_mapper
1339+
1340+
if not isinstance(store, str):
1341+
raise ValueError(
1342+
f"store must be a string to use storage_options. Got {type(store)}"
1343+
)
1344+
mapper = get_mapper(store, **storage_options)
1345+
if chunk_store is not None:
1346+
chunk_mapper = get_mapper(chunk_store, **storage_options)
1347+
else:
1348+
chunk_mapper = chunk_store
1349+
13331350
if encoding is None:
13341351
encoding = {}
13351352

@@ -1372,13 +1389,13 @@ def to_zarr(
13721389
already_consolidated = False
13731390
consolidate_on_close = consolidated or consolidated is None
13741391
zstore = backends.ZarrStore.open_group(
1375-
store=store,
1392+
store=mapper,
13761393
mode=mode,
13771394
synchronizer=synchronizer,
13781395
group=group,
13791396
consolidated=already_consolidated,
13801397
consolidate_on_close=consolidate_on_close,
1381-
chunk_store=chunk_store,
1398+
chunk_store=chunk_mapper,
13821399
append_dim=append_dim,
13831400
write_region=region,
13841401
safe_chunks=safe_chunks,

xarray/backends/zarr.py

+3
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,9 @@ def open_zarr(
713713
falling back to read non-consolidated metadata if that fails.
714714
chunk_store : MutableMapping, optional
715715
A separate Zarr store only for chunk data.
716+
storage_options : dict, optional
717+
Any additional parameters for the storage backend (ignored for local
718+
paths).
716719
decode_timedelta : bool, optional
717720
If True, decode variables and coordinates with time units in
718721
{'days', 'hours', 'minutes', 'seconds', 'milliseconds', 'microseconds'}

xarray/core/dataset.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1922,6 +1922,7 @@ def to_zarr(
19221922
append_dim: Hashable = None,
19231923
region: Mapping[str, slice] = None,
19241924
safe_chunks: bool = True,
1925+
storage_options: Dict[str, str] = None,
19251926
) -> "ZarrStore":
19261927
"""Write dataset contents to a zarr group.
19271928
@@ -1941,10 +1942,10 @@ def to_zarr(
19411942
Parameters
19421943
----------
19431944
store : MutableMapping, str or Path, optional
1944-
Store or path to directory in file system.
1945+
Store or path to directory in local or remote file system.
19451946
chunk_store : MutableMapping, str or Path, optional
1946-
Store or path to directory in file system only for Zarr array chunks.
1947-
Requires zarr-python v2.4.0 or later.
1947+
Store or path to directory in local or remote file system only for Zarr
1948+
array chunks. Requires zarr-python v2.4.0 or later.
19481949
mode : {"w", "w-", "a", "r+", None}, optional
19491950
Persistence mode: "w" means create (overwrite if exists);
19501951
"w-" means create (fail if exists);
@@ -1999,6 +2000,9 @@ def to_zarr(
19992000
if Zarr arrays are written in parallel. This option may be useful in combination
20002001
with ``compute=False`` to initialize a Zarr from an existing
20012002
Dataset with aribtrary chunk structure.
2003+
storage_options : dict, optional
2004+
Any additional parameters for the storage backend (ignored for local
2005+
paths).
20022006
20032007
References
20042008
----------
@@ -2031,6 +2035,7 @@ def to_zarr(
20312035
self,
20322036
store=store,
20332037
chunk_store=chunk_store,
2038+
storage_options=storage_options,
20342039
mode=mode,
20352040
synchronizer=synchronizer,
20362041
group=group,

xarray/tests/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ def LooseVersion(vstring):
7777
has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis")
7878
has_rasterio, requires_rasterio = _importorskip("rasterio")
7979
has_zarr, requires_zarr = _importorskip("zarr")
80+
has_zarr_2_5_0, requires_zarr_2_5_0 = _importorskip("zarr", minversion="2.5.0")
8081
has_fsspec, requires_fsspec = _importorskip("fsspec")
8182
has_iris, requires_iris = _importorskip("iris")
8283
has_cfgrib, requires_cfgrib = _importorskip("cfgrib")

xarray/tests/test_backends.py

+12
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
requires_scipy,
7272
requires_scipy_or_netCDF4,
7373
requires_zarr,
74+
requires_zarr_2_5_0,
7475
)
7576
from .test_coding_times import (
7677
_ALL_CALENDARS,
@@ -2388,6 +2389,17 @@ def create_zarr_target(self):
23882389
yield tmp
23892390

23902391

2392+
@requires_fsspec
2393+
@requires_zarr_2_5_0
2394+
def test_zarr_storage_options():
2395+
pytest.importorskip("aiobotocore")
2396+
ds = create_test_data()
2397+
store_target = "memory://test.zarr"
2398+
ds.to_zarr(store_target, storage_options={"test": "zarr_write"})
2399+
ds_a = xr.open_zarr(store_target, storage_options={"test": "zarr_read"})
2400+
assert_identical(ds, ds_a)
2401+
2402+
23912403
@requires_scipy
23922404
class TestScipyInMemoryData(CFEncodedBase, NetCDF3Only):
23932405
engine = "scipy"

0 commit comments

Comments
 (0)