From 3899b0668761d97249f3c742343f03c9cb922b21 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 29 Jan 2024 15:41:29 -0700 Subject: [PATCH 01/24] DAS-2060: Skips datatree_ CI Adds additional ignore to mypy Adds additional ignore to doctests Excludes xarray/datatree_ from all pre-commmit.ci --- .github/workflows/ci-additional.yaml | 3 ++- .pre-commit-config.yaml | 1 + pyproject.toml | 5 ++++- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 8e2560d910d..74b54ad55d6 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -79,7 +79,8 @@ jobs: # # If dependencies emit warnings we can't do anything about, add ignores to # `xarray/tests/__init__.py`. - python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror + # [MHS, 01/25/2024] Skip datatree_ documentation remove after #8572 + python -m pytest --doctest-modules xarray --ignore xarray/tests --ignore xarray/datatree_ -Werror mypy: name: Mypy diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4af262a0a04..64d6bcaebf9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,7 @@ # https://pre-commit.com/ ci: autoupdate_schedule: monthly +exclude: 'xarray/datatree_.*' repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.5.0 diff --git a/pyproject.toml b/pyproject.toml index 2a185933b47..81d41a38939 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,10 @@ exclude_lines = ["pragma: no cover", "if TYPE_CHECKING"] [tool.mypy] enable_error_code = "redundant-self" -exclude = 'xarray/util/generate_.*\.py' +exclude = [ + 'xarray/util/generate_.*\.py', + 'xarray/datatree_/.*\.py', +] files = "xarray" show_error_codes = true show_error_context = true From d5b80f9b750dcdc9432a6afa4e6c47bf45a7336d Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 29 Jan 2024 10:47:18 -0700 Subject: [PATCH 02/24] DAS-2070: Migrate open_datatree into xarray. First stab. Will need to add/move tests. --- xarray/__init__.py | 2 + xarray/backends/api.py | 28 +++++++ xarray/backends/common.py | 16 ++++ xarray/backends/h5netcdf_.py | 35 +++++++++ xarray/backends/netCDF4_.py | 34 ++++++++ xarray/backends/zarr.py | 36 +++++++++ xarray/datatree_/datatree/__init__.py | 5 +- xarray/datatree_/datatree/io.py | 91 +--------------------- xarray/datatree_/datatree/tests/test_io.py | 2 +- 9 files changed, 155 insertions(+), 94 deletions(-) diff --git a/xarray/__init__.py b/xarray/__init__.py index 91613e8cbbc..6d23556e06f 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -6,6 +6,7 @@ load_dataset, open_dataarray, open_dataset, + open_datatree, open_mfdataset, save_mfdataset, ) @@ -83,6 +84,7 @@ "ones_like", "open_dataarray", "open_dataset", + "open_datatree", "open_mfdataset", "open_zarr", "polyval", diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 670a0ec6d68..4017c102e36 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -69,6 +69,7 @@ T_NetcdfTypes = Literal[ "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" ] + from xarray.datatree_.datatree import DataTree DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" @@ -788,6 +789,33 @@ def open_dataarray( return data_array +def open_datatree( + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + engine: T_Engine = None, + **kwargs, +) -> DataTree: + """ + Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file. + Parameters + ---------- + filename_or_obj : str, Path, file-like, or DataStore + Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. + engine : str, optional + Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. + kwargs : + Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. + Returns + ------- + datatree.DataTree + """ + if engine is None: + engine = plugins.guess_engine(filename_or_obj) + + backend = plugins.get_backend(engine) + + return backend.open_datatree(filename_or_obj, **kwargs) + + def open_mfdataset( paths: str | NestedSequence[str | os.PathLike], chunks: T_Chunks | None = None, diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 5b8f9a6840f..6b63525e8a3 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -21,6 +21,7 @@ from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence + from xarray.datatree_.datatree import DataTree # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -458,6 +459,11 @@ class BackendEntrypoint: - ``guess_can_open`` method: it shall return ``True`` if the backend is able to open ``filename_or_obj``, ``False`` otherwise. The implementation of this method is not mandatory. + - ``open_datatree`` method: it shall implement reading from file, variables + decoding and it returns an instance of :py:class:`~datatree.DataTree`. + It shall take in input at least ``filename_or_obj`` argument. The + implementation of this method is not mandatory. For more details see + :ref:`RST open_datatree`. Attributes ---------- @@ -508,6 +514,16 @@ def guess_can_open( return False + def open_datatree( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs: Any, + ) -> DataTree: + """ + Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. + """ + + raise NotImplementedError # mapping of engine name to (module name, BackendEntrypoint Class) BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {} diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index d9385fc68a9..68b2780f83d 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -38,6 +38,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree class H5NetCDFArrayWrapper(BaseNetCDF4Array): @@ -423,5 +424,39 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds + # TODO [MHS, 01/23/2024] This is duplicative of the netcdf4 code in an ugly way. + def open_datatree(self, filename: str, **kwargs) -> DataTree: + from h5netcdf.legacyapi import Dataset as ncDataset + + from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree, NodePath + + ds = open_dataset(filename, **kwargs) + tree_root = DataTree.from_dict({"/": ds}) + with ncDataset(filename, mode="r") as ncds: + for path in _iter_nc_groups(ncds): + subgroup_ds = open_dataset(filename, group=path, **kwargs) + + # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again + node_name = NodePath(path).name + new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) + tree_root._set_item( + path, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) + return tree_root + + +# TODO [MHS, 01/23/2024] directly duplicated from netCDF4 backend +def _iter_nc_groups(root, parent="/"): + from xarray.datatree_.datatree import NodePath + + parent = NodePath(parent) + for path, group in root.groups.items(): + gpath = parent / path + yield str(gpath) + yield from _iter_nc_groups(group, parent=gpath) BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d3845568709..75b462eaa4f 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -44,6 +44,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -667,5 +668,38 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds + def open_datatree(self, filename: str, **kwargs) -> DataTree: + from netCDF4 import Dataset as ncDataset + + from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree, NodePath + + ds = open_dataset(filename, **kwargs) + tree_root = DataTree.from_dict({"/": ds}) + with ncDataset(filename, mode="r") as ncds: + for path in _iter_nc_groups(ncds): + subgroup_ds = open_dataset(filename, group=path, **kwargs) + + # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again + node_name = NodePath(path).name + new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) + tree_root._set_item( + path, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) + return tree_root + + +def _iter_nc_groups(root, parent="/"): + from xarray.datatree_.datatree import NodePath + + parent = NodePath(parent) + for path, group in root.groups.items(): + gpath = parent / path + yield str(gpath) + yield from _iter_nc_groups(group, parent=gpath) + BACKEND_ENTRYPOINTS["netcdf4"] = ("netCDF4", NetCDF4BackendEntrypoint) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 469bbf4c339..a4d14134cfb 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -34,6 +34,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree # need some special secret attributes to tell us the dimensions @@ -1035,5 +1036,40 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds + def open_datatree(self, store, **kwargs) -> DataTree: + import zarr # type: ignore + + from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree, NodePath + + zds = zarr.open_group(store, mode="r") + ds = open_dataset(store, engine="zarr", **kwargs) + tree_root = DataTree.from_dict({"/": ds}) + for path in _iter_zarr_groups(zds): + try: + subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) + except zarr.errors.PathNotFoundError: + subgroup_ds = Dataset() + + # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again + node_name = NodePath(path).name + new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) + tree_root._set_item( + path, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) + return tree_root + + +def _iter_zarr_groups(root, parent="/"): + from xarray.datatree_.datatree import NodePath + + parent = NodePath(parent) + for path, group in root.groups(): + gpath = parent / path + yield str(gpath) + yield from _iter_zarr_groups(group, parent=gpath) BACKEND_ENTRYPOINTS["zarr"] = ("zarr", ZarrBackendEntrypoint) diff --git a/xarray/datatree_/datatree/__init__.py b/xarray/datatree_/datatree/__init__.py index 3b97ea9d4db..622b5e6140a 100644 --- a/xarray/datatree_/datatree/__init__.py +++ b/xarray/datatree_/datatree/__init__.py @@ -1,9 +1,8 @@ # import public API from .datatree import DataTree from .extensions import register_datatree_accessor -from .io import open_datatree from .mapping import TreeIsomorphismError, map_over_subtree -from .treenode import InvalidTreeError, NotFoundInTreeError +from .treenode import InvalidTreeError, NodePath, NotFoundInTreeError try: # NOTE: the `_version.py` file must not be present in the git repository @@ -15,10 +14,10 @@ __all__ = ( "DataTree", - "open_datatree", "TreeIsomorphismError", "InvalidTreeError", "NotFoundInTreeError", + "NodePath", "map_over_subtree", "register_datatree_accessor", "__version__", diff --git a/xarray/datatree_/datatree/io.py b/xarray/datatree_/datatree/io.py index 8bb7682f085..b460224a697 100644 --- a/xarray/datatree_/datatree/io.py +++ b/xarray/datatree_/datatree/io.py @@ -1,22 +1,4 @@ -from xarray import Dataset, open_dataset - -from .datatree import DataTree, NodePath - - -def _iter_zarr_groups(root, parent="/"): - parent = NodePath(parent) - for path, group in root.groups(): - gpath = parent / path - yield str(gpath) - yield from _iter_zarr_groups(group, parent=gpath) - - -def _iter_nc_groups(root, parent="/"): - parent = NodePath(parent) - for path, group in root.groups.items(): - gpath = parent / path - yield str(gpath) - yield from _iter_nc_groups(group, parent=gpath) +from .datatree import DataTree def _get_nc_dataset_class(engine): @@ -34,77 +16,6 @@ def _get_nc_dataset_class(engine): return Dataset -def open_datatree(filename_or_obj, engine=None, **kwargs) -> DataTree: - """ - Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file. - - Parameters - ---------- - filename_or_obj : str, Path, file-like, or DataStore - Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. - engine : str, optional - Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. - kwargs : - Additional keyword arguments passed to ``xarray.open_dataset`` for each group. - - Returns - ------- - DataTree - """ - - if engine == "zarr": - return _open_datatree_zarr(filename_or_obj, **kwargs) - elif engine in [None, "netcdf4", "h5netcdf"]: - return _open_datatree_netcdf(filename_or_obj, engine=engine, **kwargs) - else: - raise ValueError("Unsupported engine") - - -def _open_datatree_netcdf(filename: str, **kwargs) -> DataTree: - ncDataset = _get_nc_dataset_class(kwargs.get("engine", None)) - - ds = open_dataset(filename, **kwargs) - tree_root = DataTree.from_dict({"/": ds}) - with ncDataset(filename, mode="r") as ncds: - for path in _iter_nc_groups(ncds): - subgroup_ds = open_dataset(filename, group=path, **kwargs) - - # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again - node_name = NodePath(path).name - new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) - tree_root._set_item( - path, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) - return tree_root - - -def _open_datatree_zarr(store, **kwargs) -> DataTree: - import zarr # type: ignore - - zds = zarr.open_group(store, mode="r") - ds = open_dataset(store, engine="zarr", **kwargs) - tree_root = DataTree.from_dict({"/": ds}) - for path in _iter_zarr_groups(zds): - try: - subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) - except zarr.errors.PathNotFoundError: - subgroup_ds = Dataset() - - # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again - node_name = NodePath(path).name - new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) - tree_root._set_item( - path, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) - return tree_root - - def _create_empty_netcdf_group(filename, group, mode, engine): ncDataset = _get_nc_dataset_class(engine) diff --git a/xarray/datatree_/datatree/tests/test_io.py b/xarray/datatree_/datatree/tests/test_io.py index 6fa20479f9a..bb9771ea3ec 100644 --- a/xarray/datatree_/datatree/tests/test_io.py +++ b/xarray/datatree_/datatree/tests/test_io.py @@ -1,7 +1,7 @@ import pytest import zarr.errors -from datatree.io import open_datatree +from xarray import open_datatree from datatree.testing import assert_equal from datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr From 0c62960ee0bcf6c2b601802d6a2454380cd7a80b Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 30 Jan 2024 08:58:15 -0700 Subject: [PATCH 03/24] DAS-2060: replace relative import of datatree to library --- xarray/backends/api.py | 2 +- xarray/backends/common.py | 2 +- xarray/backends/h5netcdf_.py | 6 +++--- xarray/backends/netCDF4_.py | 6 +++--- xarray/backends/zarr.py | 6 +++--- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 4017c102e36..976fd48afbb 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -69,7 +69,7 @@ T_NetcdfTypes = Literal[ "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" ] - from xarray.datatree_.datatree import DataTree + from datatree import DataTree DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 6b63525e8a3..1006fefbf06 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -21,7 +21,7 @@ from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence - from xarray.datatree_.datatree import DataTree + from datatree import DataTree # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 68b2780f83d..fb16afe181b 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -38,7 +38,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from xarray.datatree_.datatree import DataTree + from datatree import DataTree class H5NetCDFArrayWrapper(BaseNetCDF4Array): @@ -429,7 +429,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: from h5netcdf.legacyapi import Dataset as ncDataset from xarray.backends.api import open_dataset - from xarray.datatree_.datatree import DataTree, NodePath + from datatree import DataTree, NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -451,7 +451,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: # TODO [MHS, 01/23/2024] directly duplicated from netCDF4 backend def _iter_nc_groups(root, parent="/"): - from xarray.datatree_.datatree import NodePath + from datatree import NodePath parent = NodePath(parent) for path, group in root.groups.items(): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 75b462eaa4f..e6852dd062c 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -44,7 +44,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from xarray.datatree_.datatree import DataTree + from datatree import DataTree # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -672,7 +672,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: from netCDF4 import Dataset as ncDataset from xarray.backends.api import open_dataset - from xarray.datatree_.datatree import DataTree, NodePath + from datatree import DataTree, NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -693,7 +693,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: def _iter_nc_groups(root, parent="/"): - from xarray.datatree_.datatree import NodePath + from datatree import NodePath parent = NodePath(parent) for path, group in root.groups.items(): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index a4d14134cfb..7988c996388 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -34,7 +34,7 @@ from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from xarray.datatree_.datatree import DataTree + from datatree import DataTree # need some special secret attributes to tell us the dimensions @@ -1040,7 +1040,7 @@ def open_datatree(self, store, **kwargs) -> DataTree: import zarr # type: ignore from xarray.backends.api import open_dataset - from xarray.datatree_.datatree import DataTree, NodePath + from datatree import DataTree, NodePath zds = zarr.open_group(store, mode="r") ds = open_dataset(store, engine="zarr", **kwargs) @@ -1064,7 +1064,7 @@ def open_datatree(self, store, **kwargs) -> DataTree: def _iter_zarr_groups(root, parent="/"): - from xarray.datatree_.datatree import NodePath + from datatree import NodePath parent = NodePath(parent) for path, group in root.groups(): From a523d50264cec6cd3d2d89afd6454d47a3a1828f Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 30 Jan 2024 14:53:57 -0700 Subject: [PATCH 04/24] DAS-2060: revert the exporting of NodePath from datatree I mistakenly thought we wanted to use the hidden version of datatree_ and we do not. --- xarray/backends/h5netcdf_.py | 9 ++++++--- xarray/backends/netCDF4_.py | 8 +++++--- xarray/backends/zarr.py | 9 ++++++--- xarray/datatree_/datatree/__init__.py | 3 +-- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index fb16afe181b..c85d04a7235 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -36,9 +36,10 @@ if TYPE_CHECKING: from io import BufferedIOBase + from datatree import DataTree + from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from datatree import DataTree class H5NetCDFArrayWrapper(BaseNetCDF4Array): @@ -426,10 +427,11 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti # TODO [MHS, 01/23/2024] This is duplicative of the netcdf4 code in an ugly way. def open_datatree(self, filename: str, **kwargs) -> DataTree: + from datatree import DataTree + from datatree.treenode import NodePath from h5netcdf.legacyapi import Dataset as ncDataset from xarray.backends.api import open_dataset - from datatree import DataTree, NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -451,7 +453,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: # TODO [MHS, 01/23/2024] directly duplicated from netCDF4 backend def _iter_nc_groups(root, parent="/"): - from datatree import NodePath + from datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups.items(): @@ -459,4 +461,5 @@ def _iter_nc_groups(root, parent="/"): yield str(gpath) yield from _iter_nc_groups(group, parent=gpath) + BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index e6852dd062c..426734b6549 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -42,9 +42,10 @@ if TYPE_CHECKING: from io import BufferedIOBase + from datatree import DataTree + from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from datatree import DataTree # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -669,10 +670,11 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti return ds def open_datatree(self, filename: str, **kwargs) -> DataTree: + from datatree import DataTree + from datatree.treenode import NodePath from netCDF4 import Dataset as ncDataset from xarray.backends.api import open_dataset - from datatree import DataTree, NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -693,7 +695,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: def _iter_nc_groups(root, parent="/"): - from datatree import NodePath + from datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups.items(): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 7988c996388..76d7b8bdaf6 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -32,9 +32,10 @@ if TYPE_CHECKING: from io import BufferedIOBase + from datatree import DataTree + from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset - from datatree import DataTree # need some special secret attributes to tell us the dimensions @@ -1038,9 +1039,10 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti def open_datatree(self, store, **kwargs) -> DataTree: import zarr # type: ignore + from datatree import DataTree + from datatree.treenode import NodePath from xarray.backends.api import open_dataset - from datatree import DataTree, NodePath zds = zarr.open_group(store, mode="r") ds = open_dataset(store, engine="zarr", **kwargs) @@ -1064,7 +1066,7 @@ def open_datatree(self, store, **kwargs) -> DataTree: def _iter_zarr_groups(root, parent="/"): - from datatree import NodePath + from datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups(): @@ -1072,4 +1074,5 @@ def _iter_zarr_groups(root, parent="/"): yield str(gpath) yield from _iter_zarr_groups(group, parent=gpath) + BACKEND_ENTRYPOINTS["zarr"] = ("zarr", ZarrBackendEntrypoint) diff --git a/xarray/datatree_/datatree/__init__.py b/xarray/datatree_/datatree/__init__.py index 622b5e6140a..fb1d61a4b82 100644 --- a/xarray/datatree_/datatree/__init__.py +++ b/xarray/datatree_/datatree/__init__.py @@ -2,7 +2,7 @@ from .datatree import DataTree from .extensions import register_datatree_accessor from .mapping import TreeIsomorphismError, map_over_subtree -from .treenode import InvalidTreeError, NodePath, NotFoundInTreeError +from .treenode import InvalidTreeError, NotFoundInTreeError try: # NOTE: the `_version.py` file must not be present in the git repository @@ -17,7 +17,6 @@ "TreeIsomorphismError", "InvalidTreeError", "NotFoundInTreeError", - "NodePath", "map_over_subtree", "register_datatree_accessor", "__version__", From e687e4aace326c2314d21d269db7bd343995bc48 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 1 Feb 2024 10:26:24 -0700 Subject: [PATCH 05/24] Don't expose open_datatree at top level We do not want to expose open_datatree at top level until all of the code is migrated. --- doc/roadmap.rst | 2 +- xarray/__init__.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/doc/roadmap.rst b/doc/roadmap.rst index eeaaf10813b..820ff82151c 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -156,7 +156,7 @@ types would also be highly useful for xarray users. By pursuing these improvements in NumPy we hope to extend the benefits to the full scientific Python community, and avoid tight coupling between xarray and specific third-party libraries (e.g., for -implementing untis). This will allow xarray to maintain its domain +implementing units). This will allow xarray to maintain its domain agnostic strengths. We expect that we may eventually add some minimal interfaces in xarray diff --git a/xarray/__init__.py b/xarray/__init__.py index 6d23556e06f..91613e8cbbc 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -6,7 +6,6 @@ load_dataset, open_dataarray, open_dataset, - open_datatree, open_mfdataset, save_mfdataset, ) @@ -84,7 +83,6 @@ "ones_like", "open_dataarray", "open_dataset", - "open_datatree", "open_mfdataset", "open_zarr", "polyval", From 4e05d5c62caebd4d97247253db622f4eee8ff93f Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 1 Feb 2024 17:15:20 -0700 Subject: [PATCH 06/24] Point datatree imports to xarray.datatree_.datatree --- xarray/backends/api.py | 4 ++-- xarray/backends/common.py | 3 ++- xarray/backends/h5netcdf_.py | 9 ++++----- xarray/backends/netCDF4_.py | 9 ++++----- xarray/backends/zarr.py | 9 ++++----- xarray/datatree_/datatree/__init__.py | 8 -------- xarray/datatree_/datatree/tests/conftest.py | 2 +- xarray/datatree_/datatree/tests/test_dataset_api.py | 4 ++-- xarray/datatree_/datatree/tests/test_datatree.py | 4 ++-- xarray/datatree_/datatree/tests/test_extensions.py | 2 +- xarray/datatree_/datatree/tests/test_formatting.py | 4 ++-- xarray/datatree_/datatree/tests/test_formatting_html.py | 2 +- xarray/datatree_/datatree/tests/test_io.py | 6 +++--- xarray/datatree_/datatree/tests/test_mapping.py | 6 +++--- xarray/datatree_/datatree/tests/test_treenode.py | 4 ++-- xarray/datatree_/datatree/tests/test_version.py | 5 ----- 16 files changed, 33 insertions(+), 48 deletions(-) delete mode 100644 xarray/datatree_/datatree/tests/test_version.py diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 976fd48afbb..0b480095105 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -69,7 +69,7 @@ T_NetcdfTypes = Literal[ "NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_64BIT", "NETCDF3_CLASSIC" ] - from datatree import DataTree + from xarray.datatree_.datatree import DataTree DATAARRAY_NAME = "__xarray_dataarray_name__" DATAARRAY_VARIABLE = "__xarray_dataarray_variable__" @@ -806,7 +806,7 @@ def open_datatree( Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. Returns ------- - datatree.DataTree + xarray.core.datatree.DataTree """ if engine is None: engine = plugins.guess_engine(filename_or_obj) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 1006fefbf06..8b585ca86a8 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -21,7 +21,7 @@ from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence - from datatree import DataTree + from xarray.datatree_.datatree import DataTree # Create a logger object, but don't add any handlers. Leave that to user code. logger = logging.getLogger(__name__) @@ -525,5 +525,6 @@ def open_datatree( raise NotImplementedError + # mapping of engine name to (module name, BackendEntrypoint Class) BACKEND_ENTRYPOINTS: dict[str, tuple[str | None, type[BackendEntrypoint]]] = {} diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index c85d04a7235..d55fac4f6c9 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -36,10 +36,9 @@ if TYPE_CHECKING: from io import BufferedIOBase - from datatree import DataTree - from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree class H5NetCDFArrayWrapper(BaseNetCDF4Array): @@ -427,11 +426,11 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti # TODO [MHS, 01/23/2024] This is duplicative of the netcdf4 code in an ugly way. def open_datatree(self, filename: str, **kwargs) -> DataTree: - from datatree import DataTree - from datatree.treenode import NodePath from h5netcdf.legacyapi import Dataset as ncDataset from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree + from xarray.datatree_.datatree.treenode import NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -453,7 +452,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: # TODO [MHS, 01/23/2024] directly duplicated from netCDF4 backend def _iter_nc_groups(root, parent="/"): - from datatree.treenode import NodePath + from xarray.datatree_.datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups.items(): diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 426734b6549..d7879a57264 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -42,10 +42,9 @@ if TYPE_CHECKING: from io import BufferedIOBase - from datatree import DataTree - from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree # This lookup table maps from dtype.byteorder to a readable endian # string used by netCDF4. @@ -670,11 +669,11 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti return ds def open_datatree(self, filename: str, **kwargs) -> DataTree: - from datatree import DataTree - from datatree.treenode import NodePath from netCDF4 import Dataset as ncDataset from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree + from xarray.datatree_.datatree.treenode import NodePath ds = open_dataset(filename, **kwargs) tree_root = DataTree.from_dict({"/": ds}) @@ -695,7 +694,7 @@ def open_datatree(self, filename: str, **kwargs) -> DataTree: def _iter_nc_groups(root, parent="/"): - from datatree.treenode import NodePath + from xarray.datatree_.datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups.items(): diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 76d7b8bdaf6..bb46ab68de5 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -32,10 +32,9 @@ if TYPE_CHECKING: from io import BufferedIOBase - from datatree import DataTree - from xarray.backends.common import AbstractDataStore from xarray.core.dataset import Dataset + from xarray.datatree_.datatree import DataTree # need some special secret attributes to tell us the dimensions @@ -1039,10 +1038,10 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti def open_datatree(self, store, **kwargs) -> DataTree: import zarr # type: ignore - from datatree import DataTree - from datatree.treenode import NodePath from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree + from xarray.datatree_.datatree.treenode import NodePath zds = zarr.open_group(store, mode="r") ds = open_dataset(store, engine="zarr", **kwargs) @@ -1066,7 +1065,7 @@ def open_datatree(self, store, **kwargs) -> DataTree: def _iter_zarr_groups(root, parent="/"): - from datatree.treenode import NodePath + from xarray.datatree_.datatree.treenode import NodePath parent = NodePath(parent) for path, group in root.groups(): diff --git a/xarray/datatree_/datatree/__init__.py b/xarray/datatree_/datatree/__init__.py index fb1d61a4b82..f9fd419bddc 100644 --- a/xarray/datatree_/datatree/__init__.py +++ b/xarray/datatree_/datatree/__init__.py @@ -4,13 +4,6 @@ from .mapping import TreeIsomorphismError, map_over_subtree from .treenode import InvalidTreeError, NotFoundInTreeError -try: - # NOTE: the `_version.py` file must not be present in the git repository - # as it is generated by setuptools at install time - from ._version import __version__ -except ImportError: # pragma: no cover - # Local copy or not installed with setuptools - __version__ = "999" __all__ = ( "DataTree", @@ -19,5 +12,4 @@ "NotFoundInTreeError", "map_over_subtree", "register_datatree_accessor", - "__version__", ) diff --git a/xarray/datatree_/datatree/tests/conftest.py b/xarray/datatree_/datatree/tests/conftest.py index 3ed1325ccd5..bd2e7ba3247 100644 --- a/xarray/datatree_/datatree/tests/conftest.py +++ b/xarray/datatree_/datatree/tests/conftest.py @@ -1,7 +1,7 @@ import pytest import xarray as xr -from datatree import DataTree +from xarray.datatree_.datatree import DataTree @pytest.fixture(scope="module") diff --git a/xarray/datatree_/datatree/tests/test_dataset_api.py b/xarray/datatree_/datatree/tests/test_dataset_api.py index 6879b869299..c3eb74451a6 100644 --- a/xarray/datatree_/datatree/tests/test_dataset_api.py +++ b/xarray/datatree_/datatree/tests/test_dataset_api.py @@ -1,8 +1,8 @@ import numpy as np import xarray as xr -from datatree import DataTree -from datatree.testing import assert_equal +from xarray.datatree_.datatree import DataTree +from xarray.datatree_.datatree.testing import assert_equal class TestDSMethodInheritance: diff --git a/xarray/datatree_/datatree/tests/test_datatree.py b/xarray/datatree_/datatree/tests/test_datatree.py index fde83b2e226..cfb57470651 100644 --- a/xarray/datatree_/datatree/tests/test_datatree.py +++ b/xarray/datatree_/datatree/tests/test_datatree.py @@ -6,8 +6,8 @@ import xarray.testing as xrt from xarray.tests import create_test_data, source_ndarray -import datatree.testing as dtt -from datatree import DataTree, NotFoundInTreeError +import xarray.datatree_.datatree.testing as dtt +from xarray.datatree_.datatree import DataTree, NotFoundInTreeError class TestTreeCreation: diff --git a/xarray/datatree_/datatree/tests/test_extensions.py b/xarray/datatree_/datatree/tests/test_extensions.py index b288998e2ce..0241e496abf 100644 --- a/xarray/datatree_/datatree/tests/test_extensions.py +++ b/xarray/datatree_/datatree/tests/test_extensions.py @@ -1,6 +1,6 @@ import pytest -from datatree import DataTree, register_datatree_accessor +from xarray.datatree_.datatree import DataTree, register_datatree_accessor class TestAccessor: diff --git a/xarray/datatree_/datatree/tests/test_formatting.py b/xarray/datatree_/datatree/tests/test_formatting.py index 0f64644c05a..8726c95fe62 100644 --- a/xarray/datatree_/datatree/tests/test_formatting.py +++ b/xarray/datatree_/datatree/tests/test_formatting.py @@ -2,8 +2,8 @@ from xarray import Dataset -from datatree import DataTree -from datatree.formatting import diff_tree_repr +from xarray.datatree_.datatree import DataTree +from xarray.datatree_.datatree.formatting import diff_tree_repr class TestRepr: diff --git a/xarray/datatree_/datatree/tests/test_formatting_html.py b/xarray/datatree_/datatree/tests/test_formatting_html.py index 7c6a47ea86e..943bbab4154 100644 --- a/xarray/datatree_/datatree/tests/test_formatting_html.py +++ b/xarray/datatree_/datatree/tests/test_formatting_html.py @@ -1,7 +1,7 @@ import pytest import xarray as xr -from datatree import DataTree, formatting_html +from xarray.datatree_.datatree import DataTree, formatting_html @pytest.fixture(scope="module", params=["some html", "some other html"]) diff --git a/xarray/datatree_/datatree/tests/test_io.py b/xarray/datatree_/datatree/tests/test_io.py index bb9771ea3ec..e2744b14a2d 100644 --- a/xarray/datatree_/datatree/tests/test_io.py +++ b/xarray/datatree_/datatree/tests/test_io.py @@ -1,9 +1,9 @@ import pytest import zarr.errors -from xarray import open_datatree -from datatree.testing import assert_equal -from datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr +from xarray.backends.api import open_datatree +from xarray.datatree_.datatree.testing import assert_equal +from xarray.datatree_.datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr class TestIO: diff --git a/xarray/datatree_/datatree/tests/test_mapping.py b/xarray/datatree_/datatree/tests/test_mapping.py index 929ce7644dd..53d6e085440 100644 --- a/xarray/datatree_/datatree/tests/test_mapping.py +++ b/xarray/datatree_/datatree/tests/test_mapping.py @@ -2,9 +2,9 @@ import pytest import xarray as xr -from datatree.datatree import DataTree -from datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree -from datatree.testing import assert_equal +from xarray.datatree_.datatree.datatree import DataTree +from xarray.datatree_.datatree.mapping import TreeIsomorphismError, check_isomorphic, map_over_subtree +from xarray.datatree_.datatree.testing import assert_equal empty = xr.Dataset() diff --git a/xarray/datatree_/datatree/tests/test_treenode.py b/xarray/datatree_/datatree/tests/test_treenode.py index f2d314c50e3..3c75f3ac8a4 100644 --- a/xarray/datatree_/datatree/tests/test_treenode.py +++ b/xarray/datatree_/datatree/tests/test_treenode.py @@ -1,7 +1,7 @@ import pytest -from datatree.iterators import LevelOrderIter, PreOrderIter -from datatree.treenode import InvalidTreeError, NamedNode, NodePath, TreeNode +from xarray.datatree_.datatree.iterators import LevelOrderIter, PreOrderIter +from xarray.datatree_.datatree.treenode import InvalidTreeError, NamedNode, NodePath, TreeNode class TestFamilyTree: diff --git a/xarray/datatree_/datatree/tests/test_version.py b/xarray/datatree_/datatree/tests/test_version.py deleted file mode 100644 index 207d5d86d53..00000000000 --- a/xarray/datatree_/datatree/tests/test_version.py +++ /dev/null @@ -1,5 +0,0 @@ -import datatree - - -def test_version(): - assert datatree.__version__ != "999" From 77405d98922fa59c0789a6bcac3e17d82d050bfd Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 2 Feb 2024 13:53:33 -0700 Subject: [PATCH 07/24] Updates function signatures for mypy. --- xarray/backends/h5netcdf_.py | 12 ++++++++---- xarray/backends/netCDF4_.py | 12 ++++++++---- xarray/backends/zarr.py | 16 +++++++++++----- 3 files changed, 27 insertions(+), 13 deletions(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index d55fac4f6c9..4c36adbc121 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -425,18 +425,22 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti return ds # TODO [MHS, 01/23/2024] This is duplicative of the netcdf4 code in an ugly way. - def open_datatree(self, filename: str, **kwargs) -> DataTree: + def open_datatree( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs, + ) -> DataTree: from h5netcdf.legacyapi import Dataset as ncDataset from xarray.backends.api import open_dataset from xarray.datatree_.datatree import DataTree from xarray.datatree_.datatree.treenode import NodePath - ds = open_dataset(filename, **kwargs) + ds = open_dataset(filename_or_obj, **kwargs) tree_root = DataTree.from_dict({"/": ds}) - with ncDataset(filename, mode="r") as ncds: + with ncDataset(filename_or_obj, mode="r") as ncds: for path in _iter_nc_groups(ncds): - subgroup_ds = open_dataset(filename, group=path, **kwargs) + subgroup_ds = open_dataset(filename_or_obj, group=path, **kwargs) # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again node_name = NodePath(path).name diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index d7879a57264..8c3bf0ff77c 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -668,18 +668,22 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds - def open_datatree(self, filename: str, **kwargs) -> DataTree: + def open_datatree( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs, + ) -> DataTree: from netCDF4 import Dataset as ncDataset from xarray.backends.api import open_dataset from xarray.datatree_.datatree import DataTree from xarray.datatree_.datatree.treenode import NodePath - ds = open_dataset(filename, **kwargs) + ds = open_dataset(filename_or_obj, **kwargs) tree_root = DataTree.from_dict({"/": ds}) - with ncDataset(filename, mode="r") as ncds: + with ncDataset(filename_or_obj, mode="r") as ncds: for path in _iter_nc_groups(ncds): - subgroup_ds = open_dataset(filename, group=path, **kwargs) + subgroup_ds = open_dataset(filename_or_obj, group=path, **kwargs) # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again node_name = NodePath(path).name diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index bb46ab68de5..a276103b18b 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -1036,19 +1036,25 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds - def open_datatree(self, store, **kwargs) -> DataTree: - import zarr # type: ignore + def open_datatree( + self, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs, + ) -> DataTree: + import zarr from xarray.backends.api import open_dataset from xarray.datatree_.datatree import DataTree from xarray.datatree_.datatree.treenode import NodePath - zds = zarr.open_group(store, mode="r") - ds = open_dataset(store, engine="zarr", **kwargs) + zds = zarr.open_group(filename_or_obj, mode="r") + ds = open_dataset(filename_or_obj, engine="zarr", **kwargs) tree_root = DataTree.from_dict({"/": ds}) for path in _iter_zarr_groups(zds): try: - subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) + subgroup_ds = open_dataset( + filename_or_obj, engine="zarr", group=path, **kwargs + ) except zarr.errors.PathNotFoundError: subgroup_ds = Dataset() From 81b425f4e0fff6f8828ac6f3fc85be727dc5d7a3 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Fri, 2 Feb 2024 15:06:31 -0700 Subject: [PATCH 08/24] Move io tests, remove undefined reference to documentation. Also starts fixing simple mypy errors --- xarray/backends/common.py | 2 +- xarray/datatree_/datatree/io.py | 10 +-- xarray/datatree_/pyproject.toml | 61 ----------------- xarray/tests/datatree/conftest.py | 65 +++++++++++++++++++ .../tests => tests/datatree}/test_io.py | 6 +- 5 files changed, 76 insertions(+), 68 deletions(-) delete mode 100644 xarray/datatree_/pyproject.toml create mode 100644 xarray/tests/datatree/conftest.py rename xarray/{datatree_/datatree/tests => tests/datatree}/test_io.py (97%) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 8b585ca86a8..2e7d6a8f1aa 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -463,7 +463,7 @@ class BackendEntrypoint: decoding and it returns an instance of :py:class:`~datatree.DataTree`. It shall take in input at least ``filename_or_obj`` argument. The implementation of this method is not mandatory. For more details see - :ref:`RST open_datatree`. + . Attributes ---------- diff --git a/xarray/datatree_/datatree/io.py b/xarray/datatree_/datatree/io.py index b460224a697..1b2e328c440 100644 --- a/xarray/datatree_/datatree/io.py +++ b/xarray/datatree_/datatree/io.py @@ -3,14 +3,14 @@ def _get_nc_dataset_class(engine): if engine == "netcdf4": - from netCDF4 import Dataset # type: ignore + from netCDF4 import Dataset elif engine == "h5netcdf": - from h5netcdf.legacyapi import Dataset # type: ignore + from h5netcdf.legacyapi import Dataset elif engine is None: try: from netCDF4 import Dataset except ImportError: - from h5netcdf.legacyapi import Dataset # type: ignore + from h5netcdf.legacyapi import Dataset else: raise ValueError(f"unsupported engine: {engine}") return Dataset @@ -78,7 +78,7 @@ def _datatree_to_netcdf( def _create_empty_zarr_group(store, group, mode): - import zarr # type: ignore + import zarr root = zarr.open_group(store, mode=mode) root.create_group(group, overwrite=True) @@ -92,7 +92,7 @@ def _datatree_to_zarr( consolidated: bool = True, **kwargs, ): - from zarr.convenience import consolidate_metadata # type: ignore + from zarr.convenience import consolidate_metadata if kwargs.get("group", None) is not None: raise NotImplementedError( diff --git a/xarray/datatree_/pyproject.toml b/xarray/datatree_/pyproject.toml deleted file mode 100644 index 40f7d5a59b3..00000000000 --- a/xarray/datatree_/pyproject.toml +++ /dev/null @@ -1,61 +0,0 @@ -[project] -name = "xarray-datatree" -description = "Hierarchical tree-like data structures for xarray" -readme = "README.md" -authors = [ - {name = "Thomas Nicholas", email = "thomas.nicholas@columbia.edu"} -] -license = {text = "Apache-2"} -classifiers = [ - "Development Status :: 3 - Alpha", - "Intended Audience :: Science/Research", - "Topic :: Scientific/Engineering", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", -] -requires-python = ">=3.9" -dependencies = [ - "xarray >=2023.12.0", - "packaging", -] -dynamic = ["version"] - -[project.urls] -Home = "https://github.com/xarray-contrib/datatree" -Documentation = "https://xarray-datatree.readthedocs.io/en/stable/" - -[build-system] -requires = [ - "setuptools>=61.0.0", - "wheel", - "setuptools_scm[toml]>=7.0", - "check-manifest" -] - -[tool.setuptools_scm] -write_to = "datatree/_version.py" -write_to_template = ''' -# Do not change! Do not track in version control! -__version__ = "{version}" -''' - -[tool.setuptools.packages.find] -exclude = ["docs", "tests", "tests.*", "docs.*"] - -[tool.setuptools.package-data] -datatree = ["py.typed"] - -[tool.isort] -profile = "black" -skip_gitignore = true -float_to_top = true -default_section = "THIRDPARTY" -known_first_party = "datatree" - -[mypy] -files = "datatree/**/*.py" -show_error_codes = true diff --git a/xarray/tests/datatree/conftest.py b/xarray/tests/datatree/conftest.py new file mode 100644 index 00000000000..d3593ff36de --- /dev/null +++ b/xarray/tests/datatree/conftest.py @@ -0,0 +1,65 @@ +import pytest + +import xarray as xr +from xarray.datatree_.datatree import DataTree + + +@pytest.fixture(scope="module") +def create_test_datatree(): + """ + Create a test datatree with this structure: + + + |-- set1 + | |-- + | | Dimensions: () + | | Data variables: + | | a int64 0 + | | b int64 1 + | |-- set1 + | |-- set2 + |-- set2 + | |-- + | | Dimensions: (x: 2) + | | Data variables: + | | a (x) int64 2, 3 + | | b (x) int64 0.1, 0.2 + | |-- set1 + |-- set3 + |-- + | Dimensions: (x: 2, y: 3) + | Data variables: + | a (y) int64 6, 7, 8 + | set0 (x) int64 9, 10 + + The structure has deliberately repeated names of tags, variables, and + dimensions in order to better check for bugs caused by name conflicts. + """ + + def _create_test_datatree(modify=lambda ds: ds): + set1_data = modify(xr.Dataset({"a": 0, "b": 1})) + set2_data = modify(xr.Dataset({"a": ("x", [2, 3]), "b": ("x", [0.1, 0.2])})) + root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) + + # Avoid using __init__ so we can independently test it + root = DataTree(data=root_data) + set1 = DataTree(name="set1", parent=root, data=set1_data) + DataTree(name="set1", parent=set1) + DataTree(name="set2", parent=set1) + set2 = DataTree(name="set2", parent=root, data=set2_data) + DataTree(name="set1", parent=set2) + DataTree(name="set3", parent=root) + + return root + + return _create_test_datatree + + +@pytest.fixture(scope="module") +def simple_datatree(create_test_datatree): + """ + Invoke create_test_datatree fixture (callback). + + Returns a DataTree. + """ + return create_test_datatree() diff --git a/xarray/datatree_/datatree/tests/test_io.py b/xarray/tests/datatree/test_io.py similarity index 97% rename from xarray/datatree_/datatree/tests/test_io.py rename to xarray/tests/datatree/test_io.py index e2744b14a2d..fef94c9fbb2 100644 --- a/xarray/datatree_/datatree/tests/test_io.py +++ b/xarray/tests/datatree/test_io.py @@ -3,7 +3,11 @@ from xarray.backends.api import open_datatree from xarray.datatree_.datatree.testing import assert_equal -from xarray.datatree_.datatree.tests import requires_h5netcdf, requires_netCDF4, requires_zarr +from xarray.tests import ( + requires_h5netcdf, + requires_netCDF4, + requires_zarr, +) class TestIO: From 3c5bcda81ededfd37c89aac3a42b1b8a7d70c030 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 5 Feb 2024 09:23:05 -0700 Subject: [PATCH 09/24] Pass bare-minimum tests. --- xarray/tests/datatree/test_io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/tests/datatree/test_io.py b/xarray/tests/datatree/test_io.py index fef94c9fbb2..b7e7139ebfd 100644 --- a/xarray/tests/datatree/test_io.py +++ b/xarray/tests/datatree/test_io.py @@ -1,5 +1,4 @@ import pytest -import zarr.errors from xarray.backends.api import open_datatree from xarray.datatree_.datatree.testing import assert_equal @@ -117,6 +116,8 @@ def test_to_zarr_not_consolidated(self, tmpdir, simple_datatree): @requires_zarr def test_to_zarr_default_write_mode(self, tmpdir, simple_datatree): + import zarr + simple_datatree.to_zarr(tmpdir) # with default settings, to_zarr should not overwrite an existing dir From 9f892560f9a9e66c9aa40124eba4eedd57f70469 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 5 Feb 2024 11:02:11 -0700 Subject: [PATCH 10/24] Update pyproject.toml to exclude imported datatree_ modules. Add some typing for mygrated tests. Adds display_expand_groups to core options. --- pyproject.toml | 5 +++++ xarray/core/options.py | 3 +++ xarray/datatree_/datatree/datatree.py | 3 ++- xarray/datatree_/datatree/formatting_html.py | 3 --- xarray/tests/datatree/conftest.py | 6 +++--- xarray/tests/datatree/test_io.py | 4 ++-- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 0dce98ff61f..0491abb71b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,6 +96,11 @@ warn_redundant_casts = true warn_unused_configs = true warn_unused_ignores = true +# Ignore mypy errors for modules imported from datatree_. +[[tool.mypy.overrides]] +module = "xarray.datatree_.*" +ignore_errors = true + # Much of the numerical computing stack doesn't have type annotations yet. [[tool.mypy.overrides]] ignore_missing_imports = true diff --git a/xarray/core/options.py b/xarray/core/options.py index d116c350991..915f233bb7b 100644 --- a/xarray/core/options.py +++ b/xarray/core/options.py @@ -20,6 +20,7 @@ "display_expand_coords", "display_expand_data_vars", "display_expand_data", + "display_expand_groups", "display_expand_indexes", "display_default_indexes", "enable_cftimeindex", @@ -44,6 +45,7 @@ class T_Options(TypedDict): display_expand_coords: Literal["default", True, False] display_expand_data_vars: Literal["default", True, False] display_expand_data: Literal["default", True, False] + display_expand_groups: Literal["default", True, False] display_expand_indexes: Literal["default", True, False] display_default_indexes: Literal["default", True, False] enable_cftimeindex: bool @@ -68,6 +70,7 @@ class T_Options(TypedDict): "display_expand_coords": "default", "display_expand_data_vars": "default", "display_expand_data": "default", + "display_expand_groups": "default", "display_expand_indexes": "default", "display_default_indexes": False, "enable_cftimeindex": True, diff --git a/xarray/datatree_/datatree/datatree.py b/xarray/datatree_/datatree/datatree.py index c86c2e2e3e8..87a882a2edb 100644 --- a/xarray/datatree_/datatree/datatree.py +++ b/xarray/datatree_/datatree/datatree.py @@ -16,6 +16,7 @@ List, Mapping, MutableMapping, + NoReturn, Optional, Set, Tuple, @@ -160,7 +161,7 @@ def __setitem__(self, key, val) -> None: "use `.copy()` first to get a mutable version of the input dataset." ) - def update(self, other) -> None: + def update(self, other) -> NoReturn: raise AttributeError( "Mutation of the DatasetView is not allowed, please use `.update` on the wrapping DataTree node, " "or use `dt.to_dataset()` if you want a mutable dataset. If calling this from within `map_over_subtree`," diff --git a/xarray/datatree_/datatree/formatting_html.py b/xarray/datatree_/datatree/formatting_html.py index 4531f5aec18..547b567a396 100644 --- a/xarray/datatree_/datatree/formatting_html.py +++ b/xarray/datatree_/datatree/formatting_html.py @@ -10,9 +10,6 @@ datavar_section, dim_section, ) -from xarray.core.options import OPTIONS - -OPTIONS["display_expand_groups"] = "default" def summarize_children(children: Mapping[str, Any]) -> str: diff --git a/xarray/tests/datatree/conftest.py b/xarray/tests/datatree/conftest.py index d3593ff36de..b341f3007aa 100644 --- a/xarray/tests/datatree/conftest.py +++ b/xarray/tests/datatree/conftest.py @@ -42,11 +42,11 @@ def _create_test_datatree(modify=lambda ds: ds): root_data = modify(xr.Dataset({"a": ("y", [6, 7, 8]), "set0": ("x", [9, 10])})) # Avoid using __init__ so we can independently test it - root = DataTree(data=root_data) - set1 = DataTree(name="set1", parent=root, data=set1_data) + root: DataTree = DataTree(data=root_data) + set1: DataTree = DataTree(name="set1", parent=root, data=set1_data) DataTree(name="set1", parent=set1) DataTree(name="set2", parent=set1) - set2 = DataTree(name="set2", parent=root, data=set2_data) + set2: DataTree = DataTree(name="set2", parent=root, data=set2_data) DataTree(name="set1", parent=set2) DataTree(name="set3", parent=root) diff --git a/xarray/tests/datatree/test_io.py b/xarray/tests/datatree/test_io.py index b7e7139ebfd..4f32e19de4a 100644 --- a/xarray/tests/datatree/test_io.py +++ b/xarray/tests/datatree/test_io.py @@ -38,7 +38,7 @@ def test_netcdf_encoding(self, tmpdir, simple_datatree): assert roundtrip_dt["/set2/a"].encoding["zlib"] == comp["zlib"] assert roundtrip_dt["/set2/a"].encoding["complevel"] == comp["complevel"] - enc["/not/a/group"] = {"foo": "bar"} + enc["/not/a/group"] = {"foo": "bar"} # type: ignore with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_netcdf(filepath, encoding=enc, engine="netcdf4") @@ -81,7 +81,7 @@ def test_zarr_encoding(self, tmpdir, simple_datatree): print(roundtrip_dt["/set2/a"].encoding) assert roundtrip_dt["/set2/a"].encoding["compressor"] == comp["compressor"] - enc["/not/a/group"] = {"foo": "bar"} + enc["/not/a/group"] = {"foo": "bar"} # type: ignore with pytest.raises(ValueError, match="unexpected encoding group.*"): original_dt.to_zarr(filepath, encoding=enc, engine="zarr") From a4bad61c76c20d97f930d9c20abd4a97cf505aa6 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 5 Feb 2024 11:15:08 -0700 Subject: [PATCH 11/24] Adding back type ignores This is cargo-cult. I wonder if there's a different CI test that wanted these and since this is now excluded at the top level. I'm putting them back until migration into main codebase. --- xarray/datatree_/datatree/io.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/datatree_/datatree/io.py b/xarray/datatree_/datatree/io.py index 1b2e328c440..d3d533ee71e 100644 --- a/xarray/datatree_/datatree/io.py +++ b/xarray/datatree_/datatree/io.py @@ -1,16 +1,16 @@ -from .datatree import DataTree +from xarray.datatree_.datatree import DataTree def _get_nc_dataset_class(engine): if engine == "netcdf4": - from netCDF4 import Dataset + from netCDF4 import Dataset # type: ignore elif engine == "h5netcdf": - from h5netcdf.legacyapi import Dataset + from h5netcdf.legacyapi import Dataset # type: ignore elif engine is None: try: from netCDF4 import Dataset except ImportError: - from h5netcdf.legacyapi import Dataset + from h5netcdf.legacyapi import Dataset # type: ignore else: raise ValueError(f"unsupported engine: {engine}") return Dataset @@ -78,7 +78,7 @@ def _datatree_to_netcdf( def _create_empty_zarr_group(store, group, mode): - import zarr + import zarr # type: ignore root = zarr.open_group(store, mode=mode) root.create_group(group, overwrite=True) @@ -92,7 +92,7 @@ def _datatree_to_zarr( consolidated: bool = True, **kwargs, ): - from zarr.convenience import consolidate_metadata + from zarr.convenience import consolidate_metadata # type: ignore if kwargs.get("group", None) is not None: raise NotImplementedError( From e4f037428d687f254727c8010f6f73b73c2ee155 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 6 Feb 2024 08:46:57 -0700 Subject: [PATCH 12/24] Refactor open_datatree back together. puts common parts in common. --- xarray/backends/common.py | 40 ++++++++++++++++++++++++++++++++++++ xarray/backends/h5netcdf_.py | 33 ++--------------------------- xarray/backends/netCDF4_.py | 32 ++--------------------------- 3 files changed, 44 insertions(+), 61 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 2e7d6a8f1aa..187a6e5d0a5 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -19,6 +19,9 @@ if TYPE_CHECKING: from io import BufferedIOBase + from h5netcdf.legacyapi import Dataset as ncDatasetLegacyH5 + from netCDF4 import Dataset as ncDataset + from xarray.core.dataset import Dataset from xarray.core.types import NestedSequence from xarray.datatree_.datatree import DataTree @@ -128,6 +131,43 @@ def _decode_variable_name(name): return name +def _open_datatree_netcdf( + ncDataset: ncDataset | ncDatasetLegacyH5, + filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, + **kwargs, +) -> DataTree: + from xarray.backends.api import open_dataset + from xarray.datatree_.datatree import DataTree + from xarray.datatree_.datatree.treenode import NodePath + + ds = open_dataset(filename_or_obj, **kwargs) + tree_root = DataTree.from_dict({"/": ds}) + with ncDataset(filename_or_obj, mode="r") as ncds: + for path in _iter_nc_groups(ncds): + subgroup_ds = open_dataset(filename_or_obj, group=path, **kwargs) + + # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again + node_name = NodePath(path).name + new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) + tree_root._set_item( + path, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) + return tree_root + + +def _iter_nc_groups(root, parent="/"): + from xarray.datatree_.datatree.treenode import NodePath + + parent = NodePath(parent) + for path, group in root.groups.items(): + gpath = parent / path + yield str(gpath) + yield from _iter_nc_groups(group, parent=gpath) + + def find_root_and_group(ds): """Find the root and group name of a netCDF4/h5netcdf dataset.""" hierarchy = () diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 4c36adbc121..27da5477a74 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -11,6 +11,7 @@ BackendEntrypoint, WritableCFDataStore, _normalize_path, + _open_datatree_netcdf, find_root_and_group, ) from xarray.backends.file_manager import CachingFileManager, DummyFileManager @@ -432,37 +433,7 @@ def open_datatree( ) -> DataTree: from h5netcdf.legacyapi import Dataset as ncDataset - from xarray.backends.api import open_dataset - from xarray.datatree_.datatree import DataTree - from xarray.datatree_.datatree.treenode import NodePath - - ds = open_dataset(filename_or_obj, **kwargs) - tree_root = DataTree.from_dict({"/": ds}) - with ncDataset(filename_or_obj, mode="r") as ncds: - for path in _iter_nc_groups(ncds): - subgroup_ds = open_dataset(filename_or_obj, group=path, **kwargs) - - # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again - node_name = NodePath(path).name - new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) - tree_root._set_item( - path, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) - return tree_root - - -# TODO [MHS, 01/23/2024] directly duplicated from netCDF4 backend -def _iter_nc_groups(root, parent="/"): - from xarray.datatree_.datatree.treenode import NodePath - - parent = NodePath(parent) - for path, group in root.groups.items(): - gpath = parent / path - yield str(gpath) - yield from _iter_nc_groups(group, parent=gpath) + return _open_datatree_netcdf(ncDataset, filename_or_obj, **kwargs) BACKEND_ENTRYPOINTS["h5netcdf"] = ("h5netcdf", H5netcdfBackendEntrypoint) diff --git a/xarray/backends/netCDF4_.py b/xarray/backends/netCDF4_.py index 8c3bf0ff77c..6720a67ae2f 100644 --- a/xarray/backends/netCDF4_.py +++ b/xarray/backends/netCDF4_.py @@ -16,6 +16,7 @@ BackendEntrypoint, WritableCFDataStore, _normalize_path, + _open_datatree_netcdf, find_root_and_group, robust_getitem, ) @@ -675,36 +676,7 @@ def open_datatree( ) -> DataTree: from netCDF4 import Dataset as ncDataset - from xarray.backends.api import open_dataset - from xarray.datatree_.datatree import DataTree - from xarray.datatree_.datatree.treenode import NodePath - - ds = open_dataset(filename_or_obj, **kwargs) - tree_root = DataTree.from_dict({"/": ds}) - with ncDataset(filename_or_obj, mode="r") as ncds: - for path in _iter_nc_groups(ncds): - subgroup_ds = open_dataset(filename_or_obj, group=path, **kwargs) - - # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again - node_name = NodePath(path).name - new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) - tree_root._set_item( - path, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) - return tree_root - - -def _iter_nc_groups(root, parent="/"): - from xarray.datatree_.datatree.treenode import NodePath - - parent = NodePath(parent) - for path, group in root.groups.items(): - gpath = parent / path - yield str(gpath) - yield from _iter_nc_groups(group, parent=gpath) + return _open_datatree_netcdf(ncDataset, filename_or_obj, **kwargs) BACKEND_ENTRYPOINTS["netcdf4"] = ("netCDF4", NetCDF4BackendEntrypoint) From 3b1224c484f1ffd4cb565e70c998db1b3136ab5b Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 6 Feb 2024 09:32:21 -0700 Subject: [PATCH 13/24] Removes TODO comment --- xarray/backends/h5netcdf_.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/backends/h5netcdf_.py b/xarray/backends/h5netcdf_.py index 27da5477a74..b7c1b2a5f03 100644 --- a/xarray/backends/h5netcdf_.py +++ b/xarray/backends/h5netcdf_.py @@ -425,7 +425,6 @@ def open_dataset( # type: ignore[override] # allow LSP violation, not supporti ) return ds - # TODO [MHS, 01/23/2024] This is duplicative of the netcdf4 code in an ugly way. def open_datatree( self, filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore, From 20d8691d095b17438330751dca44a9557d522e27 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 8 Feb 2024 10:03:35 -0700 Subject: [PATCH 14/24] typo fix Co-authored-by: Tom Nicholas --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 1c981bc5aa7..5febc59e0e9 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -801,7 +801,7 @@ def open_datatree( filename_or_obj : str, Path, file-like, or DataStore Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. engine : str, optional - Xarray backend engine to us. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. + Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. kwargs : Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. Returns From 221bc8ce498485d475134f7cf74339ef4cfedb4a Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 8 Feb 2024 10:08:10 -0700 Subject: [PATCH 15/24] typo 2 Co-authored-by: Tom Nicholas --- xarray/backends/api.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 5febc59e0e9..6821d7e7396 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -795,7 +795,8 @@ def open_datatree( **kwargs, ) -> DataTree: """ - Open and decode a dataset from a file or file-like object, creating one Tree node for each group in the file. + Open and decode a DataTree from a file or file-like object, creating one tree node for each group in the file. + Parameters ---------- filename_or_obj : str, Path, file-like, or DataStore From b74764eced9af005220c93351cce5b87d757c3d5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:08:54 +0000 Subject: [PATCH 16/24] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 6821d7e7396..95158d0559f 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -796,7 +796,7 @@ def open_datatree( ) -> DataTree: """ Open and decode a DataTree from a file or file-like object, creating one tree node for each group in the file. - + Parameters ---------- filename_or_obj : str, Path, file-like, or DataStore From 4280d303c23da9476ef52aba01f56f4f2e99157f Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Thu, 8 Feb 2024 10:24:04 -0700 Subject: [PATCH 17/24] Call raised exception --- xarray/backends/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/common.py b/xarray/backends/common.py index 187a6e5d0a5..134a713bd31 100644 --- a/xarray/backends/common.py +++ b/xarray/backends/common.py @@ -542,7 +542,7 @@ def open_dataset( Backend open_dataset method used by Xarray in :py:func:`~xarray.open_dataset`. """ - raise NotImplementedError + raise NotImplementedError() def guess_can_open( self, @@ -563,7 +563,7 @@ def open_datatree( Backend open_datatree method used by Xarray in :py:func:`~xarray.open_datatree`. """ - raise NotImplementedError + raise NotImplementedError() # mapping of engine name to (module name, BackendEntrypoint Class) From 8c54465666f8e086b26c5cf185e00ae427c4f819 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 12 Feb 2024 10:15:43 -0700 Subject: [PATCH 18/24] Add unpacking notation to kwargs --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 95158d0559f..4e597f40f96 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -803,7 +803,7 @@ def open_datatree( Strings and Path objects are interpreted as a path to a netCDF file or Zarr store. engine : str, optional Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. - kwargs : + **kwargs : dict Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. Returns ------- From afba7ba18941af1bbc127d48a5ee1ec1bb41cbf8 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 12 Feb 2024 10:20:28 -0700 Subject: [PATCH 19/24] Use final location for DataTree doc strings Co-authored-by: Justus Magin --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 4e597f40f96..dcea6087c57 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -807,7 +807,7 @@ def open_datatree( Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. Returns ------- - xarray.core.datatree.DataTree + xarray.DataTree """ if engine is None: engine = plugins.guess_engine(filename_or_obj) From aab1744d52a466ac03829cda171c09beb1cc0704 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 12 Feb 2024 11:02:28 -0700 Subject: [PATCH 20/24] fix comment from open_dataset to open_datatree Co-authored-by: Justus Magin --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index dcea6087c57..092a7f21b06 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -804,7 +804,7 @@ def open_datatree( engine : str, optional Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. **kwargs : dict - Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. + Additional keyword arguments passed to :py:func:`~xarray.open_datatree` for each group. Returns ------- xarray.DataTree From 5b4897335ea62f974ba37d3b2f6e860fd13fcbd4 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Mon, 12 Feb 2024 11:18:16 -0700 Subject: [PATCH 21/24] Revert "fix comment from open_dataset to open_datatree" This reverts commit aab1744d52a466ac03829cda171c09beb1cc0704. --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 092a7f21b06..dcea6087c57 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -804,7 +804,7 @@ def open_datatree( engine : str, optional Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. **kwargs : dict - Additional keyword arguments passed to :py:func:`~xarray.open_datatree` for each group. + Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. Returns ------- xarray.DataTree From c6bb18aec0031734b5d6d553319cc4535a4d08f2 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Tue, 13 Feb 2024 08:02:06 -0700 Subject: [PATCH 22/24] Change sphynx link from meth to func --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index dcea6087c57..eace037b86c 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -804,7 +804,7 @@ def open_datatree( engine : str, optional Xarray backend engine to use. Valid options include `{"netcdf4", "h5netcdf", "zarr"}`. **kwargs : dict - Additional keyword arguments passed to :py:meth:`~xarray.open_dataset` for each group. + Additional keyword arguments passed to :py:func:`~xarray.open_dataset` for each group. Returns ------- xarray.DataTree From d386ed3d5089ea4e2575053e513228b27011d373 Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 14 Feb 2024 12:55:03 -0700 Subject: [PATCH 23/24] Update whats-new.rst --- doc/whats-new.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 50eece5f0af..25e40ecc348 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -93,6 +93,13 @@ Internal Changes - Move `parallelcompat` and `chunk managers` modules from `xarray/core` to `xarray/namedarray`. (:pull:`8319`) By `Tom Nicholas `_ and `Anderson Banihirwe `_. +- Imports ``datatree`` repository and history into internal + location. (:pull:`8688`) By `Matt Savoie `_ + and `Justus Magin `_. + +- Adds :py:func:`open_datatree` into `xarray/backends` (:pull:`8697`) By `Matt + Savoie `_. + .. _whats-new.2024.01.1: v2024.01.1 (23 Jan, 2024) From e291587f330c1e3772a55d4d6d68e18cd9c4639c Mon Sep 17 00:00:00 2001 From: Matt Savoie Date: Wed, 14 Feb 2024 14:14:43 -0700 Subject: [PATCH 24/24] Fix what-new.rst formatting. --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 25e40ecc348..16562ed0988 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -90,14 +90,14 @@ Internal Changes when the data isn't datetime-like. (:issue:`8718`, :pull:`8724`) By `Maximilian Roos `_. -- Move `parallelcompat` and `chunk managers` modules from `xarray/core` to `xarray/namedarray`. (:pull:`8319`) +- Move ``parallelcompat`` and ``chunk managers`` modules from ``xarray/core`` to ``xarray/namedarray``. (:pull:`8319`) By `Tom Nicholas `_ and `Anderson Banihirwe `_. - Imports ``datatree`` repository and history into internal location. (:pull:`8688`) By `Matt Savoie `_ and `Justus Magin `_. -- Adds :py:func:`open_datatree` into `xarray/backends` (:pull:`8697`) By `Matt +- Adds :py:func:`open_datatree` into ``xarray/backends`` (:pull:`8697`) By `Matt Savoie `_. .. _whats-new.2024.01.1: