Skip to content

Commit 3cbd21a

Browse files
authored
add a combine_attrs option to open_mfdataset (#4971)
* expose combine_attrs to open_mfdataset * add tests for passing combine_attrs to open_mfdataset * don't override the main dataset attrs * switch the default to "override" which seems closer to current behavior * update whats-new.rst [skip-ci]
1 parent c1272b5 commit 3cbd21a

File tree

3 files changed

+59
-4
lines changed

3 files changed

+59
-4
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ New Features
6464
:py:class:`~core.groupby.DataArrayGroupBy`, inspired by pandas'
6565
:py:meth:`~pandas.core.groupby.GroupBy.get_group`.
6666
By `Deepak Cherian <https://github.com/dcherian>`_.
67+
- Add a ``combine_attrs`` parameter to :py:func:`open_mfdataset` (:pull:`4971`).
68+
By `Justus Magin <https://github.com/keewis>`_.
6769
- Disable the `cfgrib` backend if the `eccodes` library is not installed (:pull:`5083`). By `Baudouin Raoult <https://github.com/b8raoult>`_.
6870
- Added :py:meth:`DataArray.curvefit` and :py:meth:`Dataset.curvefit` for general curve fitting applications. (:issue:`4300`, :pull:`4849`)
6971
By `Sam Levang <https://github.com/slevang>`_.

xarray/backends/api.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ def open_mfdataset(
717717
parallel=False,
718718
join="outer",
719719
attrs_file=None,
720+
combine_attrs="override",
720721
**kwargs,
721722
):
722723
"""Open multiple files as a single dataset.
@@ -931,7 +932,7 @@ def open_mfdataset(
931932
coords=coords,
932933
ids=ids,
933934
join=join,
934-
combine_attrs="drop",
935+
combine_attrs=combine_attrs,
935936
)
936937
elif combine == "by_coords":
937938
# Redo ordering from coordinates, ignoring how they were ordered
@@ -942,7 +943,7 @@ def open_mfdataset(
942943
data_vars=data_vars,
943944
coords=coords,
944945
join=join,
945-
combine_attrs="drop",
946+
combine_attrs=combine_attrs,
946947
)
947948
else:
948949
raise ValueError(
@@ -965,8 +966,6 @@ def multi_file_closer():
965966
if isinstance(attrs_file, Path):
966967
attrs_file = str(attrs_file)
967968
combined.attrs = datasets[paths.index(attrs_file)].attrs
968-
else:
969-
combined.attrs = datasets[0].attrs
970969

971970
return combined
972971

xarray/tests/test_backends.py

+54
Original file line numberDiff line numberDiff line change
@@ -3004,6 +3004,60 @@ def test_open_mfdataset_does_same_as_concat(self, combine, opt, join):
30043004
ds_expect = xr.concat([ds1, ds2], data_vars=opt, dim="t", join=join)
30053005
assert_identical(ds, ds_expect)
30063006

3007+
@pytest.mark.parametrize(
3008+
["combine_attrs", "attrs", "expected", "expect_error"],
3009+
(
3010+
pytest.param("drop", [{"a": 1}, {"a": 2}], {}, False, id="drop"),
3011+
pytest.param(
3012+
"override", [{"a": 1}, {"a": 2}], {"a": 1}, False, id="override"
3013+
),
3014+
pytest.param(
3015+
"no_conflicts", [{"a": 1}, {"a": 2}], None, True, id="no_conflicts"
3016+
),
3017+
pytest.param(
3018+
"identical",
3019+
[{"a": 1, "b": 2}, {"a": 1, "c": 3}],
3020+
None,
3021+
True,
3022+
id="identical",
3023+
),
3024+
pytest.param(
3025+
"drop_conflicts",
3026+
[{"a": 1, "b": 2}, {"b": -1, "c": 3}],
3027+
{"a": 1, "c": 3},
3028+
False,
3029+
id="drop_conflicts",
3030+
),
3031+
),
3032+
)
3033+
def test_open_mfdataset_dataset_combine_attrs(
3034+
self, combine_attrs, attrs, expected, expect_error
3035+
):
3036+
with self.setup_files_and_datasets() as (files, [ds1, ds2]):
3037+
# Give the files an inconsistent attribute
3038+
for i, f in enumerate(files):
3039+
ds = open_dataset(f).load()
3040+
ds.attrs = attrs[i]
3041+
ds.close()
3042+
ds.to_netcdf(f)
3043+
3044+
if expect_error:
3045+
with pytest.raises(xr.MergeError):
3046+
xr.open_mfdataset(
3047+
files,
3048+
combine="by_coords",
3049+
concat_dim="t",
3050+
combine_attrs=combine_attrs,
3051+
)
3052+
else:
3053+
with xr.open_mfdataset(
3054+
files,
3055+
combine="by_coords",
3056+
concat_dim="t",
3057+
combine_attrs=combine_attrs,
3058+
) as ds:
3059+
assert ds.attrs == expected
3060+
30073061
def test_open_mfdataset_dataset_attr_by_coords(self):
30083062
"""
30093063
Case when an attribute differs across the multiple files

0 commit comments

Comments
 (0)