Skip to content

Commit 511d36c

Browse files
authored
[skip-ci] Add benchmarks for groupby math (#6390)
1 parent 83f238a commit 511d36c

File tree

1 file changed

+44
-0
lines changed

1 file changed

+44
-0
lines changed

asv_bench/benchmarks/groupby.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def setup(self, *args, **kwargs):
1616
}
1717
)
1818
self.ds2d = self.ds1d.expand_dims(z=10)
19+
self.ds1d_mean = self.ds1d.groupby("b").mean()
20+
self.ds2d_mean = self.ds2d.groupby("b").mean()
1921

2022
@parameterized(["ndim"], [(1, 2)])
2123
def time_init(self, ndim):
@@ -31,6 +33,18 @@ def time_agg_large_num_groups(self, method, ndim):
3133
ds = getattr(self, f"ds{ndim}d")
3234
getattr(ds.groupby("b"), method)()
3335

36+
def time_groupby_binary_op_1d(self):
37+
self.ds1d - self.ds1d_mean
38+
39+
def time_groupby_binary_op_2d(self):
40+
self.ds2d - self.ds2d_mean
41+
42+
def peakmem_groupby_binary_op_1d(self):
43+
self.ds1d - self.ds1d_mean
44+
45+
def peakmem_groupby_binary_op_2d(self):
46+
self.ds2d - self.ds2d_mean
47+
3448

3549
class GroupByDask(GroupBy):
3650
def setup(self, *args, **kwargs):
@@ -40,6 +54,8 @@ def setup(self, *args, **kwargs):
4054
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
4155
{"dim_0": 50, "z": 5}
4256
)
57+
self.ds1d_mean = self.ds1d.groupby("b").mean()
58+
self.ds2d_mean = self.ds2d.groupby("b").mean()
4359

4460

4561
class GroupByPandasDataFrame(GroupBy):
@@ -51,6 +67,13 @@ def setup(self, *args, **kwargs):
5167

5268
super().setup(**kwargs)
5369
self.ds1d = self.ds1d.to_dataframe()
70+
self.ds1d_mean = self.ds1d.groupby("b").mean()
71+
72+
def time_groupby_binary_op_2d(self):
73+
raise NotImplementedError
74+
75+
def peakmem_groupby_binary_op_2d(self):
76+
raise NotImplementedError
5477

5578

5679
class GroupByDaskDataFrame(GroupBy):
@@ -63,6 +86,13 @@ def setup(self, *args, **kwargs):
6386
requires_dask()
6487
super().setup(**kwargs)
6588
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
89+
self.ds1d_mean = self.ds1d.groupby("b").mean()
90+
91+
def time_groupby_binary_op_2d(self):
92+
raise NotImplementedError
93+
94+
def peakmem_groupby_binary_op_2d(self):
95+
raise NotImplementedError
6696

6797

6898
class Resample:
@@ -74,6 +104,8 @@ def setup(self, *args, **kwargs):
74104
coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
75105
)
76106
self.ds2d = self.ds1d.expand_dims(z=10)
107+
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
108+
self.ds2d_mean = self.ds2d.resample(time="48H").mean()
77109

78110
@parameterized(["ndim"], [(1, 2)])
79111
def time_init(self, ndim):
@@ -89,6 +121,18 @@ def time_agg_large_num_groups(self, method, ndim):
89121
ds = getattr(self, f"ds{ndim}d")
90122
getattr(ds.resample(time="48H"), method)()
91123

124+
def time_groupby_binary_op_1d(self):
125+
self.ds1d - self.ds1d_mean
126+
127+
def time_groupby_binary_op_2d(self):
128+
self.ds2d - self.ds2d_mean
129+
130+
def peakmem_groupby_binary_op_1d(self):
131+
self.ds1d - self.ds1d_mean
132+
133+
def peakmem_groupby_binary_op_2d(self):
134+
self.ds2d - self.ds2d_mean
135+
92136

93137
class ResampleDask(Resample):
94138
def setup(self, *args, **kwargs):

0 commit comments

Comments
 (0)