Skip to content

Commit bc35e39

Browse files
hmaarrfkIllviljanpre-commit-ci[bot]
authored
Expand benchmarks for dataset insertion and creation (#7236)
* Expand benchmarks for dataset insertion and creation Taken from discussions in #7224 (comment) Thank you @Illviljan * Apply suggestions from code review Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Update asv_bench/benchmarks/merge.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> * Move data set creation definition * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add attrs * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Update asv_bench/benchmarks/merge.py * Update asv_bench/benchmarks/merge.py * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 93d1eab commit bc35e39

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

asv_bench/benchmarks/merge.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import numpy as np
2+
13
import xarray as xr
24

35

@@ -13,6 +15,63 @@ def setup(self, existing_elements):
1315
d[f"var{i}"] = i
1416
self.dataset = xr.merge([d])
1517

18+
d = {f"set_2_{i}": i for i in range(existing_elements)}
19+
self.dataset2 = xr.merge([d])
20+
1621
def time_variable_insertion(self, existing_elements):
1722
dataset = self.dataset
1823
dataset["new_var"] = 0
24+
25+
def time_merge_two_datasets(self, existing_elements):
26+
xr.merge([self.dataset, self.dataset2])
27+
28+
29+
class DatasetCreation:
30+
# The idea here is to time how long it takes to go from numpy
31+
# and python data types, to a full dataset
32+
# See discussion
33+
# https://github.com/pydata/xarray/issues/7224#issuecomment-1292216344
34+
param_names = ["strategy", "count"]
35+
params = [
36+
["dict_of_DataArrays", "dict_of_Variables", "dict_of_Tuples"],
37+
[0, 1, 10, 100, 1000],
38+
]
39+
40+
def setup(self, strategy, count):
41+
data = np.array(["0", "b"], dtype=str)
42+
self.dataset_coords = dict(time=np.array([0, 1]))
43+
self.dataset_attrs = dict(description="Test data")
44+
attrs = dict(units="Celcius")
45+
if strategy == "dict_of_DataArrays":
46+
47+
def create_data_vars():
48+
return {
49+
f"long_variable_name_{i}": xr.DataArray(
50+
data=data, dims=("time"), attrs=attrs
51+
)
52+
for i in range(count)
53+
}
54+
55+
elif strategy == "dict_of_Variables":
56+
57+
def create_data_vars():
58+
return {
59+
f"long_variable_name_{i}": xr.Variable("time", data, attrs=attrs)
60+
for i in range(count)
61+
}
62+
63+
elif strategy == "dict_of_Tuples":
64+
65+
def create_data_vars():
66+
return {
67+
f"long_variable_name_{i}": ("time", data, attrs)
68+
for i in range(count)
69+
}
70+
71+
self.create_data_vars = create_data_vars
72+
73+
def time_dataset_creation(self, strategy, count):
74+
data_vars = self.create_data_vars()
75+
xr.Dataset(
76+
data_vars=data_vars, coords=self.dataset_coords, attrs=self.dataset_attrs
77+
)

0 commit comments

Comments
 (0)