From a3972f5451a1c82ea2191e435e108e23d9c17a2f Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Aug 2024 14:05:43 +0200 Subject: [PATCH 01/10] Make illegal path-like variable names when constructing a DataTree from a Dataset --- doc/whats-new.rst | 3 +++ xarray/core/datatree.py | 16 +++++++++++++++- xarray/tests/test_datatree.py | 17 +++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c8f3a40e87f..9fda62f99ba 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -37,6 +37,9 @@ Deprecations Bug fixes ~~~~~~~~~ +- Make illegal path-like variable names when constructing a DataTree from a Dataset + (:issue:`311`, :pull:`314`) + By `Etienne Schalk `_. - Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`). By `Deepak Cherian `_. - Fix bug causing `DataTree.from_dict` to be sensitive to insertion order (:issue:`9276`, :pull:`9292`). diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 1b8a5ffbf38..cd484898e65 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -158,6 +158,18 @@ def _check_alignment( _check_alignment(child_path, child_ds, base_ds, child.children) +def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: + offending_variable_names = [ + name for name in variables if isinstance(name, str) and "/" in name + ] + if len(offending_variable_names) > 0: + raise KeyError( + f"Given Dataset contains path-like variable names: {offending_variable_names}. " + "A Dataset represents a group, and a single group " + "cannot have path-like variable names. " + ) + + class DatasetView(Dataset): """ An immutable Dataset-like view onto the data in a single DataTree node. @@ -459,7 +471,9 @@ def __init__( children = {} super().__init__(name=name) - self._set_node_data(_coerce_to_dataset(data)) + ds = _coerce_to_dataset(data) + self._set_node_data(ds) + _check_for_slashes_in_names(ds.variables) self.parent = parent self.children = children diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 9a15376a1f8..7fa27e23fcd 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -72,6 +72,23 @@ def test_child_gets_named_on_attach(self): mary: DataTree = DataTree(children={"Sue": sue}) # noqa assert sue.name == "Sue" + def test_dataset_containing_slashes(self): + xda: xr.DataArray = xr.DataArray( + [[1, 2]], + coords={"label": ["a"], "R30m/y": [30, 60]}, + ) + xds: xr.Dataset = xr.Dataset({"group/subgroup/my_variable": xda}) + with pytest.raises( + KeyError, + match=re.escape( + "Given Dataset contains path-like variable names: " + "['R30m/y', 'group/subgroup/my_variable']. " + "A Dataset represents a group, and a single group cannot " + "have path-like variable names. " + ), + ): + DataTree(xds) + class TestPaths: def test_path_property(self): From c8ce30a5a257e15440bfee19bfd3f9df4beffa1f Mon Sep 17 00:00:00 2001 From: eschalk Date: Sun, 18 Aug 2024 14:14:41 +0200 Subject: [PATCH 02/10] Updated whats-new.rst --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9fda62f99ba..b165ea1b655 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -38,7 +38,7 @@ Bug fixes ~~~~~~~~~ - Make illegal path-like variable names when constructing a DataTree from a Dataset - (:issue:`311`, :pull:`314`) + (:issue:`9339`, :pull:`9378`) By `Etienne Schalk `_. - Fix bug with rechunking to a frequency when some periods contain no data (:issue:`9360`). By `Deepak Cherian `_. From 6bf09820ec72c300f9fb6fefa6e53f5351633a19 Mon Sep 17 00:00:00 2001 From: eschalk Date: Mon, 19 Aug 2024 22:03:36 +0200 Subject: [PATCH 03/10] PR comments --- xarray/core/datatree.py | 7 ++++--- xarray/tests/test_datatree.py | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index cd484898e65..cd61de76877 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -164,9 +164,10 @@ def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: ] if len(offending_variable_names) > 0: raise KeyError( - f"Given Dataset contains path-like variable names: {offending_variable_names}. " + "Given Dataset contains variable names with '/': " + f"{offending_variable_names}. " "A Dataset represents a group, and a single group " - "cannot have path-like variable names. " + "cannot have path-like variable names with '/' characters in them. " ) @@ -473,11 +474,11 @@ def __init__( super().__init__(name=name) ds = _coerce_to_dataset(data) self._set_node_data(ds) - _check_for_slashes_in_names(ds.variables) self.parent = parent self.children = children def _set_node_data(self, ds: Dataset): + _check_for_slashes_in_names(ds.variables) data_vars, coord_vars = _collect_data_and_coord_variables(ds) self._data_variables = data_vars self._node_coord_variables = coord_vars diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 7fa27e23fcd..97a41402aad 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -81,10 +81,10 @@ def test_dataset_containing_slashes(self): with pytest.raises( KeyError, match=re.escape( - "Given Dataset contains path-like variable names: " + "Given Dataset contains variable names with '/': " "['R30m/y', 'group/subgroup/my_variable']. " - "A Dataset represents a group, and a single group cannot " - "have path-like variable names. " + "A Dataset represents a group, and a single group " + "cannot have path-like variable names with '/' characters in them. " ), ): DataTree(xds) From fd3edff84051b30fcebd92e60528172f8d62af5e Mon Sep 17 00:00:00 2001 From: eschalk Date: Mon, 19 Aug 2024 22:07:15 +0200 Subject: [PATCH 04/10] Revert diff --- xarray/core/datatree.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index cd61de76877..4a2275bcc84 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -472,8 +472,7 @@ def __init__( children = {} super().__init__(name=name) - ds = _coerce_to_dataset(data) - self._set_node_data(ds) + self._set_node_data(_coerce_to_dataset(data)) self.parent = parent self.children = children From bc15c1ce8e122a1464af4c4a754d32ca3d60f5bb Mon Sep 17 00:00:00 2001 From: Etienne Schalk <45271239+etienneschalk@users.noreply.github.com> Date: Tue, 10 Sep 2024 21:59:10 +0200 Subject: [PATCH 05/10] Update xarray/core/datatree.py Co-authored-by: Tom Nicholas --- xarray/core/datatree.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 0d33454ecb0..81659005df2 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -160,10 +160,9 @@ def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: ] if len(offending_variable_names) > 0: raise KeyError( - "Given Dataset contains variable names with '/': " + "Given variables have names containing the '/' character: " f"{offending_variable_names}. " - "A Dataset represents a group, and a single group " - "cannot have path-like variable names with '/' characters in them. " + "Variables stored in DataTree objects cannot have names containing '/' characters, as this would make path-like access to variables ambiguous." ) From f7af02d798355c17ff6c01b5d9ec63e5d3cf0800 Mon Sep 17 00:00:00 2001 From: Etienne Schalk <45271239+etienneschalk@users.noreply.github.com> Date: Tue, 10 Sep 2024 22:00:13 +0200 Subject: [PATCH 06/10] Update xarray/core/datatree.py Co-authored-by: Tom Nicholas --- xarray/core/datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 81659005df2..f72c3a52f03 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -159,7 +159,7 @@ def _check_for_slashes_in_names(variables: Iterable[Hashable]) -> None: name for name in variables if isinstance(name, str) and "/" in name ] if len(offending_variable_names) > 0: - raise KeyError( + raise ValueError( "Given variables have names containing the '/' character: " f"{offending_variable_names}. " "Variables stored in DataTree objects cannot have names containing '/' characters, as this would make path-like access to variables ambiguous." From 7d2289fb53b1a790fe7e61e033cb038d40360e74 Mon Sep 17 00:00:00 2001 From: Etienne Schalk <45271239+etienneschalk@users.noreply.github.com> Date: Tue, 10 Sep 2024 22:00:20 +0200 Subject: [PATCH 07/10] Update xarray/tests/test_datatree.py Co-authored-by: Tom Nicholas --- xarray/tests/test_datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index c2d19fe36a3..66c350f3044 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -89,7 +89,7 @@ def test_dataset_containing_slashes(self): ) xds: xr.Dataset = xr.Dataset({"group/subgroup/my_variable": xda}) with pytest.raises( - KeyError, + ValueError, match=re.escape( "Given Dataset contains variable names with '/': " "['R30m/y', 'group/subgroup/my_variable']. " From 107603b12f1c3d0bf407d95a826af10250e56ca4 Mon Sep 17 00:00:00 2001 From: eschalk Date: Tue, 10 Sep 2024 22:06:17 +0200 Subject: [PATCH 08/10] Update expected Exception message in test --- xarray/tests/test_datatree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_datatree.py b/xarray/tests/test_datatree.py index 66c350f3044..4c18b28da03 100644 --- a/xarray/tests/test_datatree.py +++ b/xarray/tests/test_datatree.py @@ -91,10 +91,10 @@ def test_dataset_containing_slashes(self): with pytest.raises( ValueError, match=re.escape( - "Given Dataset contains variable names with '/': " + "Given variables have names containing the '/' character: " "['R30m/y', 'group/subgroup/my_variable']. " - "A Dataset represents a group, and a single group " - "cannot have path-like variable names with '/' characters in them. " + "Variables stored in DataTree objects cannot have names containing '/' characters, " + "as this would make path-like access to variables ambiguous." ), ): DataTree(xds) From 95384ac4a993f122662c6a936253c0e0589e2a73 Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 12 Sep 2024 09:32:44 -0600 Subject: [PATCH 09/10] Merge changes from #9476 --- xarray/core/datatree.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 5759c95b032..85b3c871a96 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -468,14 +468,9 @@ def __init__( # shallow copy to avoid modifying arguments in-place (see GH issue #9196) self.children = {name: child.copy() for name, child in children.items()} -<<<<<<< eschalk/make-illegal-path-like-variable-names-when-dt-from-ds - def _set_node_data(self, ds: Dataset): - _check_for_slashes_in_names(ds.variables) - data_vars, coord_vars = _collect_data_and_coord_variables(ds) -======= def _set_node_data(self, dataset: Dataset): + _check_for_slashes_in_names(ds.variables) data_vars, coord_vars = _collect_data_and_coord_variables(dataset) ->>>>>>> main self._data_variables = data_vars self._node_coord_variables = coord_vars self._node_dims = dataset._dims From 5886af55f61e3311005e103ecc30a4ffc2f886bc Mon Sep 17 00:00:00 2001 From: Tom Nicholas Date: Thu, 12 Sep 2024 09:33:13 -0600 Subject: [PATCH 10/10] Fix --- xarray/core/datatree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/datatree.py b/xarray/core/datatree.py index 85b3c871a96..2ab7a5771b3 100644 --- a/xarray/core/datatree.py +++ b/xarray/core/datatree.py @@ -469,7 +469,7 @@ def __init__( self.children = {name: child.copy() for name, child in children.items()} def _set_node_data(self, dataset: Dataset): - _check_for_slashes_in_names(ds.variables) + _check_for_slashes_in_names(dataset.variables) data_vars, coord_vars = _collect_data_and_coord_variables(dataset) self._data_variables = data_vars self._node_coord_variables = coord_vars