Skip to content

Commit c0399d3

Browse files
alexamicimax-sixtykeewisMonica Rossetti
authored
Refactor of the big if-chain to a dictionary in the form {backend_name: backend_open}. (#4431)
* Add docs re stable branch (#4444) * Add docs re stable branch * Update HOW_TO_RELEASE.md Co-authored-by: keewis <[email protected]> Co-authored-by: keewis <[email protected]> * Port engine selection refactor from #3166 and add zarr * Always add `mode="r"` to zarr and simplify logic Co-authored-by: Maximilian Roos <[email protected]> Co-authored-by: keewis <[email protected]> Co-authored-by: Monica Rossetti <[email protected]>
1 parent 4f414f2 commit c0399d3

File tree

3 files changed

+49
-77
lines changed

3 files changed

+49
-77
lines changed

xarray/backends/api.py

+47-75
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os.path
22
import warnings
3-
from collections.abc import MutableMapping
43
from glob import glob
54
from io import BytesIO
65
from numbers import Number
@@ -41,6 +40,17 @@
4140
DATAARRAY_NAME = "__xarray_dataarray_name__"
4241
DATAARRAY_VARIABLE = "__xarray_dataarray_variable__"
4342

43+
ENGINES = {
44+
"netcdf4": backends.NetCDF4DataStore.open,
45+
"scipy": backends.ScipyDataStore,
46+
"pydap": backends.PydapDataStore.open,
47+
"h5netcdf": backends.H5NetCDFStore.open,
48+
"pynio": backends.NioDataStore,
49+
"pseudonetcdf": backends.PseudoNetCDFDataStore.open,
50+
"cfgrib": backends.CfGribDataStore,
51+
"zarr": backends.ZarrStore.open_group,
52+
}
53+
4454

4555
def _get_default_engine_remote_uri():
4656
try:
@@ -153,6 +163,17 @@ def _get_default_engine(path, allow_remote=False):
153163
return engine
154164

155165

166+
def _get_backend_cls(engine):
167+
"""Select open_dataset method based on current engine"""
168+
try:
169+
return ENGINES[engine]
170+
except KeyError:
171+
raise ValueError(
172+
"unrecognized engine for open_dataset: {}\n"
173+
"must be one of: {}".format(engine, list(ENGINES))
174+
)
175+
176+
156177
def _normalize_path(path):
157178
if is_remote_uri(path):
158179
return path
@@ -407,23 +428,6 @@ def open_dataset(
407428
--------
408429
open_mfdataset
409430
"""
410-
engines = [
411-
None,
412-
"netcdf4",
413-
"scipy",
414-
"pydap",
415-
"h5netcdf",
416-
"pynio",
417-
"cfgrib",
418-
"pseudonetcdf",
419-
"zarr",
420-
]
421-
if engine not in engines:
422-
raise ValueError(
423-
"unrecognized engine for open_dataset: {}\n"
424-
"must be one of: {}".format(engine, engines)
425-
)
426-
427431
if autoclose is not None:
428432
warnings.warn(
429433
"The autoclose argument is no longer used by "
@@ -450,6 +454,7 @@ def open_dataset(
450454

451455
if backend_kwargs is None:
452456
backend_kwargs = {}
457+
extra_kwargs = {}
453458

454459
def maybe_decode_store(store, chunks, lock=False):
455460
ds = conventions.decode_cf(
@@ -532,68 +537,35 @@ def maybe_decode_store(store, chunks, lock=False):
532537

533538
if isinstance(filename_or_obj, AbstractDataStore):
534539
store = filename_or_obj
540+
else:
541+
if isinstance(filename_or_obj, str):
542+
filename_or_obj = _normalize_path(filename_or_obj)
535543

536-
elif isinstance(filename_or_obj, MutableMapping) and engine == "zarr":
537-
# Zarr supports a wide range of access modes, but for now xarray either
538-
# reads or writes from a store, never both.
539-
# For open_dataset(engine="zarr"), we only read (i.e. mode="r")
540-
mode = "r"
541-
_backend_kwargs = backend_kwargs.copy()
542-
overwrite_encoded_chunks = _backend_kwargs.pop("overwrite_encoded_chunks", None)
543-
store = backends.ZarrStore.open_group(
544-
filename_or_obj, mode=mode, group=group, **_backend_kwargs
545-
)
546-
547-
elif isinstance(filename_or_obj, str):
548-
filename_or_obj = _normalize_path(filename_or_obj)
544+
if engine is None:
545+
engine = _get_default_engine(filename_or_obj, allow_remote=True)
546+
elif engine != "zarr":
547+
if engine not in [None, "scipy", "h5netcdf"]:
548+
raise ValueError(
549+
"can only read bytes or file-like objects "
550+
"with engine='scipy' or 'h5netcdf'"
551+
)
552+
engine = _get_engine_from_magic_number(filename_or_obj)
549553

550-
if engine is None:
551-
engine = _get_default_engine(filename_or_obj, allow_remote=True)
552-
if engine == "netcdf4":
553-
store = backends.NetCDF4DataStore.open(
554-
filename_or_obj, group=group, lock=lock, **backend_kwargs
555-
)
556-
elif engine == "scipy":
557-
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
558-
elif engine == "pydap":
559-
store = backends.PydapDataStore.open(filename_or_obj, **backend_kwargs)
560-
elif engine == "h5netcdf":
561-
store = backends.H5NetCDFStore.open(
562-
filename_or_obj, group=group, lock=lock, **backend_kwargs
563-
)
564-
elif engine == "pynio":
565-
store = backends.NioDataStore(filename_or_obj, lock=lock, **backend_kwargs)
566-
elif engine == "pseudonetcdf":
567-
store = backends.PseudoNetCDFDataStore.open(
568-
filename_or_obj, lock=lock, **backend_kwargs
569-
)
570-
elif engine == "cfgrib":
571-
store = backends.CfGribDataStore(
572-
filename_or_obj, lock=lock, **backend_kwargs
573-
)
554+
if engine in ["netcdf4", "h5netcdf"]:
555+
extra_kwargs["group"] = group
556+
extra_kwargs["lock"] = lock
557+
elif engine in ["pynio", "pseudonetcdf", "cfgrib"]:
558+
extra_kwargs["lock"] = lock
574559
elif engine == "zarr":
575-
# on ZarrStore, mode='r', synchronizer=None, group=None,
576-
# consolidated=False.
577-
_backend_kwargs = backend_kwargs.copy()
578-
overwrite_encoded_chunks = _backend_kwargs.pop(
560+
backend_kwargs = backend_kwargs.copy()
561+
overwrite_encoded_chunks = backend_kwargs.pop(
579562
"overwrite_encoded_chunks", None
580563
)
581-
store = backends.ZarrStore.open_group(
582-
filename_or_obj, group=group, **_backend_kwargs
583-
)
584-
else:
585-
if engine not in [None, "scipy", "h5netcdf"]:
586-
raise ValueError(
587-
"can only read bytes or file-like objects "
588-
"with engine='scipy' or 'h5netcdf'"
589-
)
590-
engine = _get_engine_from_magic_number(filename_or_obj)
591-
if engine == "scipy":
592-
store = backends.ScipyDataStore(filename_or_obj, **backend_kwargs)
593-
elif engine == "h5netcdf":
594-
store = backends.H5NetCDFStore.open(
595-
filename_or_obj, group=group, lock=lock, **backend_kwargs
596-
)
564+
extra_kwargs["mode"] = "r"
565+
extra_kwargs["group"] = group
566+
567+
opener = _get_backend_cls(engine)
568+
store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs)
597569

598570
with close_on_error(store):
599571
ds = maybe_decode_store(store, chunks)

xarray/backends/zarr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from ..core.pycompat import integer_types
88
from ..core.utils import FrozenDict, HiddenKeyDict
99
from ..core.variable import Variable
10-
from .api import open_dataset
1110
from .common import AbstractWritableDataStore, BackendArray, _encode_variable_name
1211

1312
# need some special secret attributes to tell us the dimensions
@@ -647,6 +646,7 @@ def open_zarr(
647646
----------
648647
http://zarr.readthedocs.io/
649648
"""
649+
from .api import open_dataset
650650

651651
if kwargs:
652652
raise TypeError(

xarray/tests/test_backends.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2224,7 +2224,7 @@ def test_engine(self):
22242224
open_dataset(tmp_file, engine="foobar")
22252225

22262226
netcdf_bytes = data.to_netcdf()
2227-
with raises_regex(ValueError, "unrecognized engine"):
2227+
with raises_regex(ValueError, "can only read bytes or file-like"):
22282228
open_dataset(BytesIO(netcdf_bytes), engine="foobar")
22292229

22302230
def test_cross_engine_read_write_netcdf3(self):

0 commit comments

Comments
 (0)