Skip to content

Commit a0afe13

Browse files
committed
[skip-ci] Add compute to groupby benchmarks
1 parent a28e9b5 commit a0afe13

File tree

1 file changed

+14
-12
lines changed

1 file changed

+14
-12
lines changed

asv_bench/benchmarks/groupby.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# import flox to avoid the cost of first import
2+
import flox.xarray # noqa
13
import numpy as np
24
import pandas as pd
35

@@ -27,24 +29,24 @@ def time_init(self, ndim):
2729
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
2830
def time_agg_small_num_groups(self, method, ndim):
2931
ds = getattr(self, f"ds{ndim}d")
30-
getattr(ds.groupby("a"), method)()
32+
getattr(ds.groupby("a"), method)().compute()
3133

3234
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
3335
def time_agg_large_num_groups(self, method, ndim):
3436
ds = getattr(self, f"ds{ndim}d")
35-
getattr(ds.groupby("b"), method)()
37+
getattr(ds.groupby("b"), method)().compute()
3638

3739
def time_binary_op_1d(self):
38-
self.ds1d.groupby("b") - self.ds1d_mean
40+
(self.ds1d.groupby("b") - self.ds1d_mean).compute()
3941

4042
def time_binary_op_2d(self):
41-
self.ds2d.groupby("b") - self.ds2d_mean
43+
(self.ds2d.groupby("b") - self.ds2d_mean).compute()
4244

4345
def peakmem_binary_op_1d(self):
44-
self.ds1d.groupby("b") - self.ds1d_mean
46+
(self.ds1d.groupby("b") - self.ds1d_mean).compute()
4547

4648
def peakmem_binary_op_2d(self):
47-
self.ds2d.groupby("b") - self.ds2d_mean
49+
(self.ds2d.groupby("b") - self.ds2d_mean).compute()
4850

4951

5052
class GroupByDask(GroupBy):
@@ -56,8 +58,8 @@ def setup(self, *args, **kwargs):
5658
self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50})
5759
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2))
5860
self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5})
59-
self.ds1d_mean = self.ds1d.groupby("b").mean()
60-
self.ds2d_mean = self.ds2d.groupby("b").mean()
61+
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
62+
self.ds2d_mean = self.ds2d.groupby("b").mean().compute()
6163

6264

6365
class GroupByPandasDataFrame(GroupBy):
@@ -69,7 +71,7 @@ def setup(self, *args, **kwargs):
6971

7072
super().setup(**kwargs)
7173
self.ds1d = self.ds1d.to_dataframe()
72-
self.ds1d_mean = self.ds1d.groupby("b").mean()
74+
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
7375

7476
def time_binary_op_2d(self):
7577
raise NotImplementedError
@@ -88,7 +90,7 @@ def setup(self, *args, **kwargs):
8890
requires_dask()
8991
super().setup(**kwargs)
9092
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
91-
self.ds1d_mean = self.ds1d.groupby("b").mean()
93+
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
9294

9395
def time_binary_op_2d(self):
9496
raise NotImplementedError
@@ -116,12 +118,12 @@ def time_init(self, ndim):
116118
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
117119
def time_agg_small_num_groups(self, method, ndim):
118120
ds = getattr(self, f"ds{ndim}d")
119-
getattr(ds.resample(time="3M"), method)()
121+
getattr(ds.resample(time="3M"), method)().compute()
120122

121123
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
122124
def time_agg_large_num_groups(self, method, ndim):
123125
ds = getattr(self, f"ds{ndim}d")
124-
getattr(ds.resample(time="48H"), method)()
126+
getattr(ds.resample(time="48H"), method)().compute()
125127

126128

127129
class ResampleDask(Resample):

0 commit comments

Comments
 (0)