pydata · dcherian · Feb 10, 2021 · Jan 19, 2021 · Jan 19, 2021 · Jan 19, 2021
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -58,6 +58,10 @@ New Features
   By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 - Performance improvement when constructing DataArrays. Significantly speeds up repr for Datasets with large number of variables.
+  By `Deepak Cherian <https://github.com/dcherian>`_
+- add ``"drop_conflicts"`` to the strategies supported by the ``combine_attrs`` kwarg
+  (:issue:`4749`, :pull:`4827`).
+  By `Justus Magin <https://github.com/keewis>`_.
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - :py:meth:`DataArray.swap_dims` & :py:meth:`Dataset.swap_dims` now accept dims
   in the form of kwargs as well as a dict, like most similar methods.

diff --git a/xarray/core/combine.py b/xarray/core/combine.py
@@ -412,14 +412,16 @@ def combine_nested(
         - "override": if indexes are of same size, rewrite indexes to be
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
-    combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \
-                    default: "drop"
+    combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
+                     "override"}, default: "drop"
         String indicating how to combine attrs of the objects being merged:
 
         - "drop": empty attrs on returned Dataset.
         - "identical": all attrs must be the same on every object.
         - "no_conflicts": attrs from all objects are combined, any that have
           the same name must also have the same value.
+        - "drop_conflicts": attrs from all objects are combined, any that have
+          the same name but different values are dropped.
         - "override": skip comparing and copy attrs from the first dataset to
           the result.
 
@@ -625,14 +627,16 @@ def combine_by_coords(
         - "override": if indexes are of same size, rewrite indexes to be
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
-    combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \
-                    default: "drop"
+    combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
+                     "override"}, default: "drop"
         String indicating how to combine attrs of the objects being merged:
 
         - "drop": empty attrs on returned Dataset.
         - "identical": all attrs must be the same on every object.
         - "no_conflicts": attrs from all objects are combined, any that have
           the same name must also have the same value.
+        - "drop_conflicts": attrs from all objects are combined, any that have
+          the same name but different values are dropped.
         - "override": skip comparing and copy attrs from the first dataset to
           the result.
 

diff --git a/xarray/core/concat.py b/xarray/core/concat.py
@@ -142,14 +142,16 @@ def concat(
         - "override": if indexes are of same size, rewrite indexes to be
           those of the first object with that dimension. Indexes for the same
           dimension must have the same size in all objects.
-    combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \
-                    default: "override"
+    combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
+                     "override"}, default: "override"
         String indicating how to combine attrs of the objects being merged:
 
         - "drop": empty attrs on returned Dataset.
         - "identical": all attrs must be the same on every object.
         - "no_conflicts": attrs from all objects are combined, any that have
           the same name must also have the same value.
+        - "drop_conflicts": attrs from all objects are combined, any that have
+          the same name but different values are dropped.
         - "override": skip comparing and copy attrs from the first dataset to
           the result.
 

diff --git a/xarray/core/merge.py b/xarray/core/merge.py
@@ -20,7 +20,7 @@
 from . import dtypes, pdcompat
 from .alignment import deep_align
 from .duck_array_ops import lazy_array_equiv
-from .utils import Frozen, compat_dict_union, dict_equiv
+from .utils import Frozen, compat_dict_union, dict_equiv, equivalent
 from .variable import Variable, as_variable, assert_unique_multiindex_level_names
 
 if TYPE_CHECKING:
@@ -513,6 +513,24 @@ def merge_attrs(variable_attrs, combine_attrs):
                     "the same. Merging %s with %s" % (str(result), str(attrs))
                 )
         return result
+    elif combine_attrs == "drop_conflicts":
+        result = {}
+        dropped_keys = set()
+        for attrs in variable_attrs:
+            result.update(
+                {
+                    key: value
+                    for key, value in attrs.items()
+                    if key not in result and key not in dropped_keys
+                }
+            )
+            result = {
+                key: value
+                for key, value in result.items()
+                if key not in attrs or equivalent(attrs[key], value)
+            }
+            dropped_keys |= {key for key in attrs if key not in result}
+        return result
     elif combine_attrs == "identical":
         result = dict(variable_attrs[0])
         for attrs in variable_attrs[1:]:
@@ -556,7 +574,8 @@ def merge_core(
         Compatibility checks to use when merging variables.
     join : {"outer", "inner", "left", "right"}, optional
         How to combine objects with different indexes.
-    combine_attrs : {"drop", "identical", "no_conflicts", "override"}, optional
+    combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
+                     "override"}, optional
         How to combine attributes of objects
     priority_arg : int, optional
         Optional argument in `objects` that takes precedence over the others.
@@ -668,14 +687,16 @@ def merge(
         Value to use for newly missing values. If a dict-like, maps
         variable names to fill values. Use a data array's name to
         refer to its values.
-    combine_attrs : {"drop", "identical", "no_conflicts", "override"}, \
-                    default: "drop"
+    combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
+                     "override"}, default: "drop"
         String indicating how to combine attrs of the objects being merged:
 
         - "drop": empty attrs on returned Dataset.
         - "identical": all attrs must be the same on every object.
         - "no_conflicts": attrs from all objects are combined, any that have
           the same name must also have the same value.
+        - "drop_conflicts": attrs from all objects are combined, any that have
+          the same name but different values are dropped.
         - "override": skip comparing and copy attrs from the first dataset to
           the result.
 

diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py
@@ -732,6 +732,17 @@ def test_combine_coords_combine_attrs_identical(self):
                 objs, concat_dim="x", join="outer", combine_attrs="identical"
             )
 
+    def test_combine_nested_combine_attrs_drop_conflicts(self):
+        objs = [
+            Dataset({"x": [0], "y": [0]}, attrs={"a": 1, "b": 2, "c": 3}),
+            Dataset({"x": [1], "y": [1]}, attrs={"a": 1, "b": 0, "d": 3}),
+        ]
+        expected = Dataset({"x": [0, 1], "y": [0, 1]}, attrs={"a": 1, "c": 3, "d": 3})
+        actual = combine_nested(
+            objs, concat_dim="x", join="outer", combine_attrs="drop_conflicts"
+        )
+        assert_identical(expected, actual)
+
     def test_infer_order_from_coords(self):
         data = create_test_data()
         objs = [data.isel(dim2=slice(4, 9)), data.isel(dim2=slice(4))]

diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py
@@ -258,27 +258,118 @@ def test_concat_join_kwarg(self):
         )
         assert_identical(actual, expected)
 
-    def test_concat_combine_attrs_kwarg(self):
-        ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs={"b": 42})
-        ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs={"b": 42, "c": 43})
-
-        expected = {}
-        expected["drop"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]})
-        expected["no_conflicts"] = Dataset(
-            {"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42, "c": 43}
-        )
-        expected["override"] = Dataset({"a": ("x", [0, 0])}, {"x": [0, 1]}, {"b": 42})
-
-        with raises_regex(ValueError, "combine_attrs='identical'"):
-            actual = concat([ds1, ds2], dim="x", combine_attrs="identical")
-        with raises_regex(ValueError, "combine_attrs='no_conflicts'"):
-            ds3 = ds2.copy(deep=True)
-            ds3.attrs["b"] = 44
-            actual = concat([ds1, ds3], dim="x", combine_attrs="no_conflicts")
+    @pytest.mark.parametrize(
+        "combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception",
+        [
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 1, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                False,
+            ),
+            ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False),
+            ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False),
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 4, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                True,
+            ),
+            ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True),
+            (
+                "override",
+                {"a": 1, "b": 2},
+                {"a": 4, "b": 5, "c": 3},
+                {"a": 1, "b": 2},
+                False,
+            ),
+            (
+                "drop_conflicts",
+                {"a": 41, "b": 42, "c": 43},
+                {"b": 2, "c": 43, "d": 44},
+                {"a": 41, "c": 43, "d": 44},
+                False,
+            ),
+        ],
+    )
+    def test_concat_combine_attrs_kwarg(
+        self, combine_attrs, var1_attrs, var2_attrs, expected_attrs, expect_exception
+    ):
+        ds1 = Dataset({"a": ("x", [0])}, coords={"x": [0]}, attrs=var1_attrs)
+        ds2 = Dataset({"a": ("x", [0])}, coords={"x": [1]}, attrs=var2_attrs)
+
+        if expect_exception:
+            with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"):
+                concat([ds1, ds2], dim="x", combine_attrs=combine_attrs)
+        else:
+            actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs)
+            expected = Dataset(
+                {"a": ("x", [0, 0])}, {"x": [0, 1]}, attrs=expected_attrs
+            )
 
-        for combine_attrs in expected:
+            assert_identical(actual, expected)
+
+    @pytest.mark.skip(reason="not implemented, yet (see #4827)")
+    @pytest.mark.parametrize(
+        "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception",
+        [
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 1, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                False,
+            ),
+            ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False),
+            ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False),
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 4, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                True,
+            ),
+            ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True),
+            (
+                "override",
+                {"a": 1, "b": 2},
+                {"a": 4, "b": 5, "c": 3},
+                {"a": 1, "b": 2},
+                False,
+            ),
+            (
+                "drop_conflicts",
+                {"a": 41, "b": 42, "c": 43},
+                {"b": 2, "c": 43, "d": 44},
+                {"a": 41, "c": 43, "d": 44},
+                False,
+            ),
+        ],
+    )
+    def test_concat_combine_attrs_kwarg_variables(
-    def test_concat_combine_attrs_kwarg_variables(
+    def test_concat_combine_attrs_kwarg_dataarrays(
-    def test_concat_combine_attrs_kwarg_variables(
+    def test_concat_combine_attrs_kwarg_dataarrays(
+        self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception
+    ):
+        """check that combine_attrs is used on data variables and coords"""
+        ds1 = Dataset({"a": ("x", [0], attrs1)}, coords={"x": ("x", [0], attrs1)})
+        ds2 = Dataset({"a": ("x", [0], attrs2)}, coords={"x": ("x", [1], attrs2)})
+
+        if expect_exception:
+            with pytest.raises(ValueError, match=f"combine_attrs='{combine_attrs}'"):
+                concat([ds1, ds2], dim="x", combine_attrs=combine_attrs)
+        else:
             actual = concat([ds1, ds2], dim="x", combine_attrs=combine_attrs)
-            assert_identical(actual, expected[combine_attrs])
+            expected = Dataset(
+                {"a": ("x", [0, 0], expected_attrs)},
+                {"x": ("x", [0, 1], expected_attrs)},
+            )
+
+            assert_identical(actual, expected)
 
     def test_concat_promote_shape(self):
         # mixed dims within variables

diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
@@ -92,6 +92,20 @@ def test_merge_arrays_attrs_default(self):
                 {"a": 1, "b": 2},
                 False,
             ),
+            (
+                "drop_conflicts",
+                {"a": 1, "b": 2, "c": 3},
+                {"b": 1, "c": 3, "d": 4},
+                {"a": 1, "c": 3, "d": 4},
+                False,
+            ),
+            (
+                "drop_conflicts",
+                {"a": 1, "b": np.array([2]), "c": np.array([3])},
+                {"b": 1, "c": np.array([3]), "d": 4},
+                {"a": 1, "c": np.array([3]), "d": 4},
+                False,
+            ),
         ],
     )
     def test_merge_arrays_attrs(
@@ -109,13 +123,84 @@ def test_merge_arrays_attrs(
             expected.attrs = expected_attrs
             assert_identical(actual, expected)
 
+    @pytest.mark.skip(reason="not implemented, yet (see #4827)")
+    @pytest.mark.parametrize(
+        "combine_attrs, attrs1, attrs2, expected_attrs, expect_exception",
+        [
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 1, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                False,
+            ),
+            ("no_conflicts", {"a": 1, "b": 2}, {}, {"a": 1, "b": 2}, False),
+            ("no_conflicts", {}, {"a": 1, "c": 3}, {"a": 1, "c": 3}, False),
+            (
+                "no_conflicts",
+                {"a": 1, "b": 2},
+                {"a": 4, "c": 3},
+                {"a": 1, "b": 2, "c": 3},
+                True,
+            ),
+            ("drop", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "b": 2}, {"a": 1, "b": 2}, False),
+            ("identical", {"a": 1, "b": 2}, {"a": 1, "c": 3}, {"a": 1, "b": 2}, True),
+            (
+                "override",
+                {"a": 1, "b": 2},
+                {"a": 4, "b": 5, "c": 3},
+                {"a": 1, "b": 2},
+                False,
+            ),
+            (
+                "drop_conflicts",
+                {"a": 1, "b": 2, "c": 3},
+                {"b": 1, "c": 3, "d": 4},
+                {"a": 1, "c": 3, "d": 4},
+                False,
+            ),
+        ],
+    )
+    def test_merge_arrays_attrs_variables(
+        self, combine_attrs, attrs1, attrs2, expected_attrs, expect_exception
+    ):
+        """check that combine_attrs is used on data variables and coords"""
+        data = create_test_data()
+        data1 = data.copy()
+        data1.var1.attrs = attrs1
+        data1.dim1.attrs = attrs1
+        data2 = data.copy()
+        data2.var1.attrs = attrs2
+        data2.dim1.attrs = attrs2
+
+        if expect_exception:
+            with raises_regex(MergeError, "combine_attrs"):
+                actual = xr.merge([data1, data2], combine_attrs=combine_attrs)
+        else:
+            actual = xr.merge([data1, data2], combine_attrs=combine_attrs)
+            expected = data.copy()
+            expected.var1.attrs = expected_attrs
+            expected.dim1.attrs = expected_attrs
+
+            assert_identical(actual, expected)
+
     def test_merge_attrs_override_copy(self):
         ds1 = xr.Dataset(attrs={"x": 0})
         ds2 = xr.Dataset(attrs={"x": 1})
         ds3 = xr.merge([ds1, ds2], combine_attrs="override")
         ds3.attrs["x"] = 2
         assert ds1.x == 0
 
+    def test_merge_attrs_drop_conflicts(self):
+        ds1 = xr.Dataset(attrs={"a": 0, "b": 0, "c": 0})
+        ds2 = xr.Dataset(attrs={"b": 0, "c": 1, "d": 0})
+        ds3 = xr.Dataset(attrs={"a": 0, "b": 1, "c": 0, "e": 0})
+
+        actual = xr.merge([ds1, ds2, ds3], combine_attrs="drop_conflicts")
+        expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
+        assert_identical(actual, expected)
+
     def test_merge_dicts_simple(self):
         actual = xr.merge([{"foo": 0}, {"bar": "one"}, {"baz": 3.5}])
         expected = xr.Dataset({"foo": 0, "bar": "one", "baz": 3.5})