From 575975e3464528d56d9ead6d6f5f29f2b8ceff08 Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 13 Feb 2020 08:08:57 -0700 Subject: [PATCH 1/3] concat can now deal with non-dim coordinates only present in one dataset. --- doc/whats-new.rst | 4 ++++ xarray/core/concat.py | 16 +++++++++++++++- xarray/tests/test_concat.py | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index bf8e63eb926..b3f77e737ff 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,6 +28,10 @@ New Features Bug fixes ~~~~~~~~~ +- :py:func:`concat` can now handle coordinate variables only present in one of + the objects to be concatenated when ``coords="different"``. + By `Deepak Cherian `_. + Documentation ~~~~~~~~~~~~~ diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 302f7afcec6..04dfc654ee3 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -194,7 +194,21 @@ def process_subset_opt(opt, subset): for k in getattr(datasets[0], subset): if k not in concat_over: equals[k] = None - variables = [ds.variables[k] for ds in datasets] + + variables = [] + for ds in datasets: + if k in ds.variables: + variables.append(ds.variables[k]) + + if len(variables) == 1: + break + elif len(variables) != len(datasets) and opt == "different": + raise ValueError( + f"{k} not present in all datasets and coords='different'. " + f"Either add {k} to datasets where it is missing or " + "specify coords='minimal'." + ) + # first check without comparing values i.e. no computes for var in variables[1:]: equals[k] = getattr(variables[0], compat)( diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index def5abc942f..bec0670a44d 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -475,3 +475,24 @@ def test_concat_attrs_first_variable(attr1, attr2): concat_attrs = concat(arrs, "y").attrs assert concat_attrs == attr1 + + +def test_concat_merge_single_non_dim_coord(): + da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) + da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) + + expected = DataArray(range(1, 7), dims="x", coords={"x": range(1, 7), "y": 1}) + + for coords in ["different", "minimal"]: + actual = concat([da1, da2], "x", coords=coords) + assert_identical(actual, expected) + + with raises_regex(ValueError, "'y' is not present in all datasets."): + concat([da1, da2], dim="x", coords="all") + + da1 = DataArray([1, 2, 3], dims="x", coords={"x": [1, 2, 3], "y": 1}) + da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) + da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1}) + for coords in ["different", "all"]: + with raises_regex(ValueError, "'y' not present in all datasets."): + concat([da1, da2, da3], dim="x") From 7ba1f388f1e573f66f6460ce76f7a6814594d50b Mon Sep 17 00:00:00 2001 From: dcherian Date: Thu, 13 Feb 2020 09:01:30 -0700 Subject: [PATCH 2/3] fix test --- xarray/core/concat.py | 4 ++-- xarray/tests/test_combine.py | 5 +++-- xarray/tests/test_concat.py | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 04dfc654ee3..267ce99aa98 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -204,8 +204,8 @@ def process_subset_opt(opt, subset): break elif len(variables) != len(datasets) and opt == "different": raise ValueError( - f"{k} not present in all datasets and coords='different'. " - f"Either add {k} to datasets where it is missing or " + f"{k!r} not present in all datasets and coords='different'. " + f"Either add {k!r} to datasets where it is missing or " "specify coords='minimal'." ) diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index d907e1c5e46..fe669919d05 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -366,8 +366,9 @@ def test_nested_concat(self): assert_identical(expected, actual) objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] - with pytest.raises(KeyError): - combine_nested(objs, concat_dim="x") + actual = combine_nested(objs, concat_dim="x") + expected = Dataset({"x": [0, 0], "y": [0]}) + assert_identical(expected, actual) @pytest.mark.parametrize( "join, expected", diff --git a/xarray/tests/test_concat.py b/xarray/tests/test_concat.py index bec0670a44d..bd99181a947 100644 --- a/xarray/tests/test_concat.py +++ b/xarray/tests/test_concat.py @@ -494,5 +494,5 @@ def test_concat_merge_single_non_dim_coord(): da2 = DataArray([4, 5, 6], dims="x", coords={"x": [4, 5, 6]}) da3 = DataArray([7, 8, 9], dims="x", coords={"x": [7, 8, 9], "y": 1}) for coords in ["different", "all"]: - with raises_regex(ValueError, "'y' not present in all datasets."): + with raises_regex(ValueError, "'y' not present in all datasets"): concat([da1, da2, da3], dim="x") From e81cd122f68840929ad398c0987e143326c051b6 Mon Sep 17 00:00:00 2001 From: dcherian Date: Fri, 14 Feb 2020 08:32:08 -0700 Subject: [PATCH 3/3] minor fixes. --- xarray/core/concat.py | 2 ++ xarray/tests/test_combine.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index 267ce99aa98..96b4be15d1b 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -201,6 +201,8 @@ def process_subset_opt(opt, subset): variables.append(ds.variables[k]) if len(variables) == 1: + # coords="different" doesn't make sense when only one object + # contains a particular variable. break elif len(variables) != len(datasets) and opt == "different": raise ValueError( diff --git a/xarray/tests/test_combine.py b/xarray/tests/test_combine.py index fe669919d05..eb2c6e1dbf7 100644 --- a/xarray/tests/test_combine.py +++ b/xarray/tests/test_combine.py @@ -365,9 +365,9 @@ def test_nested_concat(self): expected = Dataset({"x": ("a", [0, 1]), "y": ("a", [0, 1])}) assert_identical(expected, actual) - objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [0]})] + objs = [Dataset({"x": [0], "y": [0]}), Dataset({"x": [1]})] actual = combine_nested(objs, concat_dim="x") - expected = Dataset({"x": [0, 0], "y": [0]}) + expected = Dataset({"x": [0, 1], "y": [0]}) assert_identical(expected, actual) @pytest.mark.parametrize(