Skip to content

Commit 229829f

Browse files
authored
add a combine_attrs parameter to Dataset.merge (#4895)
* add a combine_attrs kwarg to Dataset.merge * document the new drop_conflicts value * test that combine_attrs is passed through * fix the documented default of combine_attrs * update whats-new.rst * minor fix [skip-ci] * minor fix [skip-ci] * remove a empty line [skip-ci] * fix bad merge [skip-ci] * fix bad merge [skip-ci] * remove the blank line after rst lists [skip-ci]
1 parent 37522e9 commit 229829f

File tree

4 files changed

+54
-2
lines changed

4 files changed

+54
-2
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ v0.17.1 (unreleased)
2222

2323
New Features
2424
~~~~~~~~~~~~
25+
- Allow passing ``combine_attrs`` to :py:meth:`Dataset.merge` (:pull:`4895`).
26+
By `Justus Magin <https://github.com/keewis>`_.
2527
- Support for `dask.graph_manipulation
2628
<https://docs.dask.org/en/latest/graph_manipulation.html>`_ (requires dask >=2021.3)
2729
By `Guido Imperiale <https://github.com/crusaderky>`_

xarray/core/dataset.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -3917,6 +3917,7 @@ def merge(
39173917
compat: str = "no_conflicts",
39183918
join: str = "outer",
39193919
fill_value: Any = dtypes.NA,
3920+
combine_attrs: str = "override",
39203921
) -> "Dataset":
39213922
"""Merge the arrays of two datasets into a single dataset.
39223923
@@ -3945,7 +3946,6 @@ def merge(
39453946
- 'no_conflicts': only values which are not null in both datasets
39463947
must be equal. The returned dataset then contains the combination
39473948
of all non-null values.
3948-
39493949
join : {"outer", "inner", "left", "right", "exact"}, optional
39503950
Method for joining ``self`` and ``other`` along shared dimensions:
39513951
@@ -3957,6 +3957,18 @@ def merge(
39573957
fill_value : scalar or dict-like, optional
39583958
Value to use for newly missing values. If a dict-like, maps
39593959
variable names (including coordinates) to fill values.
3960+
combine_attrs : {"drop", "identical", "no_conflicts", "drop_conflicts", \
3961+
"override"}, default: "override"
3962+
String indicating how to combine attrs of the objects being merged:
3963+
3964+
- "drop": empty attrs on returned Dataset.
3965+
- "identical": all attrs must be the same on every object.
3966+
- "no_conflicts": attrs from all objects are combined, any that have
3967+
the same name must also have the same value.
3968+
- "drop_conflicts": attrs from all objects are combined, any that have
3969+
the same name but different values are dropped.
3970+
- "override": skip comparing and copy attrs from the first dataset to
3971+
the result.
39603972
39613973
Returns
39623974
-------
@@ -3976,6 +3988,7 @@ def merge(
39763988
compat=compat,
39773989
join=join,
39783990
fill_value=fill_value,
3991+
combine_attrs=combine_attrs,
39793992
)
39803993
return self._replace(**merge_result._asdict())
39813994

xarray/core/merge.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -893,6 +893,7 @@ def dataset_merge_method(
893893
compat: str,
894894
join: str,
895895
fill_value: Any,
896+
combine_attrs: str,
896897
) -> _MergeResult:
897898
"""Guts of the Dataset.merge method."""
898899
# we are locked into supporting overwrite_vars for the Dataset.merge
@@ -922,7 +923,12 @@ def dataset_merge_method(
922923
priority_arg = 2
923924

924925
return merge_core(
925-
objs, compat, join, priority_arg=priority_arg, fill_value=fill_value
926+
objs,
927+
compat,
928+
join,
929+
priority_arg=priority_arg,
930+
fill_value=fill_value,
931+
combine_attrs=combine_attrs,
926932
)
927933

928934

xarray/tests/test_merge.py

+31
Original file line numberDiff line numberDiff line change
@@ -418,3 +418,34 @@ def test_merge_dataarray(self):
418418
da = xr.DataArray(data=1, name="b")
419419

420420
assert_identical(ds.merge(da), xr.merge([ds, da]))
421+
422+
@pytest.mark.parametrize(
423+
["combine_attrs", "attrs1", "attrs2", "expected_attrs", "expect_error"],
424+
# don't need to test thoroughly
425+
(
426+
("drop", {"a": 0, "b": 1, "c": 2}, {"a": 1, "b": 2, "c": 3}, {}, False),
427+
(
428+
"drop_conflicts",
429+
{"a": 0, "b": 1, "c": 2},
430+
{"b": 2, "c": 2, "d": 3},
431+
{"a": 0, "c": 2, "d": 3},
432+
False,
433+
),
434+
("override", {"a": 0, "b": 1}, {"a": 1, "b": 2}, {"a": 0, "b": 1}, False),
435+
("no_conflicts", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True),
436+
("identical", {"a": 0, "b": 1}, {"a": 0, "b": 2}, None, True),
437+
),
438+
)
439+
def test_merge_combine_attrs(
440+
self, combine_attrs, attrs1, attrs2, expected_attrs, expect_error
441+
):
442+
ds1 = xr.Dataset(attrs=attrs1)
443+
ds2 = xr.Dataset(attrs=attrs2)
444+
445+
if expect_error:
446+
with pytest.raises(xr.MergeError):
447+
ds1.merge(ds2, combine_attrs=combine_attrs)
448+
else:
449+
actual = ds1.merge(ds2, combine_attrs=combine_attrs)
450+
expected = xr.Dataset(attrs=expected_attrs)
451+
assert_identical(actual, expected)

0 commit comments

Comments
 (0)