From 2a237a0da28ac8131a2384a9372baf77abe1ef72 Mon Sep 17 00:00:00 2001 From: Keewis Date: Mon, 25 Jan 2021 17:36:41 +0100 Subject: [PATCH 01/11] add a combine_attrs kwarg to Dataset.merge --- xarray/core/dataset.py | 14 ++++++++++++++ xarray/core/merge.py | 8 +++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 874e26ff465..d31f28649fd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3843,6 +3843,7 @@ def merge( compat: str = "no_conflicts", join: str = "outer", fill_value: Any = dtypes.NA, + combine_attrs: str = "override", ) -> "Dataset": """Merge the arrays of two datasets into a single dataset. @@ -3880,9 +3881,21 @@ def merge( - 'left': use indexes from ``self`` - 'right': use indexes from ``other`` - 'exact': error instead of aligning non-equal indexes + fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. + combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ + default: "drop" + String indicating how to combine attrs of the objects being merged: + + - "drop": empty attrs on returned Dataset. + - "identical": all attrs must be the same on every object. + - "no_conflicts": attrs from all objects are combined, any that have + the same name must also have the same value. + - "override": skip comparing and copy attrs from the first dataset to + the result. + Returns ------- @@ -3902,6 +3915,7 @@ def merge( compat=compat, join=join, fill_value=fill_value, + combine_attrs=combine_attrs, ) return self._replace(**merge_result._asdict()) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index d29a9e1ff02..f6386e7e748 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -872,6 +872,7 @@ def dataset_merge_method( compat: str, join: str, fill_value: Any, + combine_attrs: str, ) -> _MergeResult: """Guts of the Dataset.merge method.""" # we are locked into supporting overwrite_vars for the Dataset.merge @@ -901,7 +902,12 @@ def dataset_merge_method( priority_arg = 2 return merge_core( - objs, compat, join, priority_arg=priority_arg, fill_value=fill_value + objs, + compat, + join, + priority_arg=priority_arg, + fill_value=fill_value, + combine_attrs=combine_attrs, ) From 1d6a2e7beff6c35ff6e85972f5529eeb0ed0f8a8 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 11 Feb 2021 23:23:01 +0100 Subject: [PATCH 02/11] document the new drop_conflicts value --- xarray/core/dataset.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 93f7cb98a80..73583caac17 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3939,14 +3939,16 @@ def merge( fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. - combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \ - default: "drop" + combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ + "override"}, default: "drop" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. - "identical": all attrs must be the same on every object. - "no_conflicts": attrs from all objects are combined, any that have the same name must also have the same value. + - "drop_conflicts": attrs from all objects are combined, any that have + the same name but different values are dropped. - "override": skip comparing and copy attrs from the first dataset to the result. From 09c93430673e2d211c9c176cd08c9f25d5dd8484 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 11 Feb 2021 23:40:37 +0100 Subject: [PATCH 03/11] test that combine_attrs is passed through --- xarray/tests/test_merge.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py index 27e2b10dcbc..5b84eccca14 100644 --- a/xarray/tests/test_merge.py +++ b/xarray/tests/test_merge.py @@ -418,3 +418,34 @@ def test_merge_dataarray(self): da = xr.DataArray(data=1, name="b") assert_identical(ds.merge(da), xr.merge([ds, da])) + + @pytest.mark.parametrize( + ["combine_attrs", "attrs1", "attrs2", "expected_attrs", "expect_error"], + # don't need to test thoroughly + ( + ("drop", {"a": 0, "b": 1, "c": 2}, {"a": 1, "b": 2, "c": 3}, {}, False), + ( + "drop_conflicts", + {"a": 0, "b": 1, "c": 2}, + {"b": 2, "c": 2, "d": 3}, + {"a": 0, "c": 2, "d": 3}, + False, + ), + ("override", {"a": 0, "b": 1}, {"a": 1, "b": 2}, {"a": 0, "b": 1}, False), + ("no_conflicts", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), + ("identical", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True), + ), + ) + def test_merge_combine_attrs( + self, combine_attrs, attrs1, attrs2, expected_attrs, expect_error + ): + ds1 = xr.Dataset(attrs=attrs1) + ds2 = xr.Dataset(attrs=attrs2) + + if expect_error: + with pytest.raises(xr.MergeError): + ds1.merge(ds2, combine_attrs=combine_attrs) + else: + actual = ds1.merge(ds2, combine_attrs=combine_attrs) + expected = xr.Dataset(attrs=expected_attrs) + assert_identical(actual, expected) From 4263e1110fae27d24f93a8cf5025bc6809569770 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 11 Feb 2021 23:46:10 +0100 Subject: [PATCH 04/11] fix the documented default of combine_attrs --- xarray/core/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 73583caac17..589d1eda930 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3940,7 +3940,7 @@ def merge( Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \ - "override"}, default: "drop" + "override"}, default: "override" String indicating how to combine attrs of the objects being merged: - "drop": empty attrs on returned Dataset. From 61c63c050162b27e23e123d13f290f394017bb17 Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 11 Feb 2021 23:48:12 +0100 Subject: [PATCH 05/11] update whats-new.rst --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4b06003b630..46492124932 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -65,13 +65,13 @@ New Features contain missing values; 8x faster in our benchmark, and 2x faster than pandas. (:pull:`4746`); By `Maximilian Roos `_. - - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. By `Deepak Cherian `_ - add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg (:issue:`4749`, :pull:`4827`). By `Justus Magin `_. - By `Deepak Cherian `_. +- allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). + By `Justus Magin `_. - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims in the form of kwargs as well as a dict, like most similar methods. By `Maximilian Roos `_. From ae17625918e81cd1540dd93d04ee721a4d2a600c Mon Sep 17 00:00:00 2001 From: Keewis Date: Thu, 11 Feb 2021 23:49:59 +0100 Subject: [PATCH 06/11] minor fix [skip-ci] --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 46492124932..63e435303ce 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -66,7 +66,7 @@ New Features (:pull:`4746`); By `Maximilian Roos `_. - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. - By `Deepak Cherian `_ + By `Deepak Cherian `_. - add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg (:issue:`4749`, :pull:`4827`). By `Justus Magin `_. From 3d93e681d0c8ac88679d784946fd74ef73eb85a9 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 23 Feb 2021 20:42:14 +0100 Subject: [PATCH 07/11] minor fix [skip-ci] --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index d73a75f7610..90a142b7625 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -87,7 +87,7 @@ New Features - add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg (:issue:`4749`, :pull:`4827`). By `Justus Magin `_. -- allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). +- Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). By `Justus Magin `_. - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims in the form of kwargs as well as a dict, like most similar methods. From 689b69070357a0fcc3c124b75b73c14b106ef8e6 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 23 Feb 2021 20:42:49 +0100 Subject: [PATCH 08/11] remove a empty line [skip-ci] --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 90a142b7625..3f0ce3872e4 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -94,7 +94,6 @@ New Features By `Maximilian Roos `_. - Allow installing from git archives (:pull:`4897`). By `Justus Magin `_. - - :py:func:`open_dataset` and :py:func:`open_mfdataset` now accept ``fsspec`` URLs (including globs for the latter) for ``engine="zarr"``, and so allow reading from many remote and other file systems (:pull:`4461`) From 4c589cd8696f331a7dbd0ef98654a685f51d9a65 Mon Sep 17 00:00:00 2001 From: Keewis Date: Tue, 23 Feb 2021 20:46:47 +0100 Subject: [PATCH 09/11] fix bad merge [skip-ci] --- doc/whats-new.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 974c66e5838..75495912037 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -85,7 +85,6 @@ New Features contain missing values; 8x faster in our benchmark, and 2x faster than pandas. (:pull:`4746`); By `Maximilian Roos `_. -- Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables. - Add :py:meth:`Dataset.plot.quiver` for quiver plots with :py:class:`Dataset` variables. By `Deepak Cherian `_. - add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg From 7d6416a24ea3ff97e10c1559abea2390fcd5cdad Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 26 Feb 2021 00:46:08 +0100 Subject: [PATCH 10/11] fix bad merge [skip-ci] --- doc/whats-new.rst | 9 --------- 1 file changed, 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 48541d5ed59..fd998fd6336 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -109,15 +109,6 @@ New Features By `Justus Magin `_. - Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`). By `Justus Magin `_. -- :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims - in the form of kwargs as well as a dict, like most similar methods. - By `Maximilian Roos `_. -- Allow installing from git archives (:pull:`4897`). - By `Justus Magin `_. -- :py:func:`open_dataset` and :py:func:`open_mfdataset` now accept ``fsspec`` URLs - (including globs for the latter) for ``engine="zarr"``, and so allow reading from - many remote and other file systems (:pull:`4461`) - By `Martin Durant `_ - Allow installing from git archives (:pull:`4897`). By `Justus Magin `_. - :py:class:`DataArrayCoarsen` and :py:class:`DatasetCoarsen` now implement a From fd4c3289a13c6e473c935e3b55a20452cbb6f53b Mon Sep 17 00:00:00 2001 From: Keewis Date: Fri, 26 Feb 2021 00:58:16 +0100 Subject: [PATCH 11/11] remove the blank line after rst lists [skip-ci] --- xarray/core/dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index addf3d13b5d..a3f329fccd4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -3935,7 +3935,6 @@ def merge( - 'no_conflicts': only values which are not null in both datasets must be equal. The returned dataset then contains the combination of all non-null values. - join : {"outer", "inner", "left", "right", "exact"}, optional Method for joining ``self`` and ``other`` along shared dimensions: @@ -3944,7 +3943,6 @@ def merge( - 'left': use indexes from ``self`` - 'right': use indexes from ``other`` - 'exact': error instead of aligning non-equal indexes - fill_value : scalar or dict-like, optional Value to use for newly missing values. If a dict-like, maps variable names (including coordinates) to fill values. @@ -3961,7 +3959,6 @@ def merge( - "override": skip comparing and copy attrs from the first dataset to the result. - Returns ------- merged : Dataset