Add tests for groupby math (#6137)

dcherian · web-flow · commit bc28eda793c3 · 2022-01-12T17:02:03.000-07:00
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
@@ -663,30 +663,33 @@ def test_groupby_dataset_reduce() -> None:
     assert_allclose(expected, actual)
 
 
-def test_groupby_dataset_math() -> None:
+@pytest.mark.parametrize("squeeze", [True, False])
+def test_groupby_dataset_math(squeeze) -> None:
     def reorder_dims(x):
         return x.transpose("dim1", "dim2", "dim3", "time")
 
     ds = create_test_data()
     ds["dim1"] = ds["dim1"]
-    for squeeze in [True, False]:
-        grouped = ds.groupby("dim1", squeeze=squeeze)
+    grouped = ds.groupby("dim1", squeeze=squeeze)
 
-        expected = reorder_dims(ds + ds.coords["dim1"])
-        actual = grouped + ds.coords["dim1"]
-        assert_identical(expected, reorder_dims(actual))
+    expected = reorder_dims(ds + ds.coords["dim1"])
+    actual = grouped + ds.coords["dim1"]
+    assert_identical(expected, reorder_dims(actual))
 
-        actual = ds.coords["dim1"] + grouped
-        assert_identical(expected, reorder_dims(actual))
+    actual = ds.coords["dim1"] + grouped
+    assert_identical(expected, reorder_dims(actual))
 
-        ds2 = 2 * ds
-        expected = reorder_dims(ds + ds2)
-        actual = grouped + ds2
-        assert_identical(expected, reorder_dims(actual))
+    ds2 = 2 * ds
+    expected = reorder_dims(ds + ds2)
+    actual = grouped + ds2
+    assert_identical(expected, reorder_dims(actual))
 
-        actual = ds2 + grouped
-        assert_identical(expected, reorder_dims(actual))
+    actual = ds2 + grouped
+    assert_identical(expected, reorder_dims(actual))
 
+
+def test_groupby_math_more() -> None:
+    ds = create_test_data()
     grouped = ds.groupby("numbers")
     zeros = DataArray([0, 0, 0, 0], [("numbers", range(4))])
     expected = (ds + Variable("dim3", np.zeros(10))).transpose(
@@ -719,6 +722,58 @@ def reorder_dims(x):
         ds + ds.groupby("time.month")
 
 
+@pytest.mark.parametrize("indexed_coord", [True, False])
+def test_groupby_bins_math(indexed_coord) -> None:
+    N = 7
+    da = DataArray(np.random.random((N, N)), dims=("x", "y"))
+    if indexed_coord:
+        da["x"] = np.arange(N)
+        da["y"] = np.arange(N)
+    g = da.groupby_bins("x", np.arange(0, N + 1, 3))
+    mean = g.mean()
+    expected = da.isel(x=slice(1, None)) - mean.isel(x_bins=("x", [0, 0, 0, 1, 1, 1]))
+    actual = g - mean
+    assert_identical(expected, actual)
+
+
+def test_groupby_math_nD_group() -> None:
+    N = 40
+    da = DataArray(
+        np.random.random((N, N)),
+        dims=("x", "y"),
+        coords={
+            "labels": (
+                "x",
+                np.repeat(["a", "b", "c", "d", "e", "f", "g", "h"], repeats=N // 8),
+            ),
+        },
+    )
+    da["labels2d"] = xr.broadcast(da.labels, da)[0]
+
+    g = da.groupby("labels2d")
+    mean = g.mean()
+    expected = da - mean.sel(labels2d=da.labels2d)
+    expected["labels"] = expected.labels.broadcast_like(expected.labels2d)
+    actual = g - mean
+    assert_identical(expected, actual)
+
+    da["num"] = (
+        "x",
+        np.repeat([1, 2, 3, 4, 5, 6, 7, 8], repeats=N // 8),
+    )
+    da["num2d"] = xr.broadcast(da.num, da)[0]
+    g = da.groupby_bins("num2d", bins=[0, 4, 6])
+    mean = g.mean()
+    idxr = np.digitize(da.num2d, bins=(0, 4, 6), right=True)[:30, :] - 1
+    expanded_mean = mean.drop("num2d_bins").isel(num2d_bins=(("x", "y"), idxr))
+    expected = da.isel(x=slice(30)) - expanded_mean
+    expected["labels"] = expected.labels.broadcast_like(expected.labels2d)
+    expected["num"] = expected.num.broadcast_like(expected.num2d)
+    expected["num2d_bins"] = (("x", "y"), mean.num2d_bins.data[idxr])
+    actual = g - mean
+    assert_identical(expected, actual)
+
+
 def test_groupby_dataset_math_virtual() -> None:
     ds = Dataset({"x": ("t", [1, 2, 3])}, {"t": pd.date_range("20100101", periods=3)})
     grouped = ds.groupby("t.day")