1
+ # import flox to avoid the cost of first import
2
+ import flox .xarray # noqa
1
3
import numpy as np
2
4
import pandas as pd
3
5
@@ -27,24 +29,24 @@ def time_init(self, ndim):
27
29
@parameterized (["method" , "ndim" ], [("sum" , "mean" ), (1 , 2 )])
28
30
def time_agg_small_num_groups (self , method , ndim ):
29
31
ds = getattr (self , f"ds{ ndim } d" )
30
- getattr (ds .groupby ("a" ), method )()
32
+ getattr (ds .groupby ("a" ), method )(). compute ()
31
33
32
34
@parameterized (["method" , "ndim" ], [("sum" , "mean" ), (1 , 2 )])
33
35
def time_agg_large_num_groups (self , method , ndim ):
34
36
ds = getattr (self , f"ds{ ndim } d" )
35
- getattr (ds .groupby ("b" ), method )()
37
+ getattr (ds .groupby ("b" ), method )(). compute ()
36
38
37
39
def time_binary_op_1d (self ):
38
- self .ds1d .groupby ("b" ) - self .ds1d_mean
40
+ ( self .ds1d .groupby ("b" ) - self .ds1d_mean ). compute ()
39
41
40
42
def time_binary_op_2d (self ):
41
- self .ds2d .groupby ("b" ) - self .ds2d_mean
43
+ ( self .ds2d .groupby ("b" ) - self .ds2d_mean ). compute ()
42
44
43
45
def peakmem_binary_op_1d (self ):
44
- self .ds1d .groupby ("b" ) - self .ds1d_mean
46
+ ( self .ds1d .groupby ("b" ) - self .ds1d_mean ). compute ()
45
47
46
48
def peakmem_binary_op_2d (self ):
47
- self .ds2d .groupby ("b" ) - self .ds2d_mean
49
+ ( self .ds2d .groupby ("b" ) - self .ds2d_mean ). compute ()
48
50
49
51
50
52
class GroupByDask (GroupBy ):
@@ -56,8 +58,8 @@ def setup(self, *args, **kwargs):
56
58
self .ds1d ["c" ] = self .ds1d ["c" ].chunk ({"dim_0" : 50 })
57
59
self .ds2d = self .ds2d .sel (dim_0 = slice (None , None , 2 ))
58
60
self .ds2d ["c" ] = self .ds2d ["c" ].chunk ({"dim_0" : 50 , "z" : 5 })
59
- self .ds1d_mean = self .ds1d .groupby ("b" ).mean ()
60
- self .ds2d_mean = self .ds2d .groupby ("b" ).mean ()
61
+ self .ds1d_mean = self .ds1d .groupby ("b" ).mean (). compute ()
62
+ self .ds2d_mean = self .ds2d .groupby ("b" ).mean (). compute ()
61
63
62
64
63
65
class GroupByPandasDataFrame (GroupBy ):
@@ -69,7 +71,7 @@ def setup(self, *args, **kwargs):
69
71
70
72
super ().setup (** kwargs )
71
73
self .ds1d = self .ds1d .to_dataframe ()
72
- self .ds1d_mean = self .ds1d .groupby ("b" ).mean ()
74
+ self .ds1d_mean = self .ds1d .groupby ("b" ).mean (). compute ()
73
75
74
76
def time_binary_op_2d (self ):
75
77
raise NotImplementedError
@@ -88,7 +90,7 @@ def setup(self, *args, **kwargs):
88
90
requires_dask ()
89
91
super ().setup (** kwargs )
90
92
self .ds1d = self .ds1d .chunk ({"dim_0" : 50 }).to_dataframe ()
91
- self .ds1d_mean = self .ds1d .groupby ("b" ).mean ()
93
+ self .ds1d_mean = self .ds1d .groupby ("b" ).mean (). compute ()
92
94
93
95
def time_binary_op_2d (self ):
94
96
raise NotImplementedError
@@ -116,12 +118,12 @@ def time_init(self, ndim):
116
118
@parameterized (["method" , "ndim" ], [("sum" , "mean" ), (1 , 2 )])
117
119
def time_agg_small_num_groups (self , method , ndim ):
118
120
ds = getattr (self , f"ds{ ndim } d" )
119
- getattr (ds .resample (time = "3M" ), method )()
121
+ getattr (ds .resample (time = "3M" ), method )(). compute ()
120
122
121
123
@parameterized (["method" , "ndim" ], [("sum" , "mean" ), (1 , 2 )])
122
124
def time_agg_large_num_groups (self , method , ndim ):
123
125
ds = getattr (self , f"ds{ ndim } d" )
124
- getattr (ds .resample (time = "48H" ), method )()
126
+ getattr (ds .resample (time = "48H" ), method )(). compute ()
125
127
126
128
127
129
class ResampleDask (Resample ):
0 commit comments