@@ -119,8 +119,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
119
119
# Dictionary mapping EMF dimensions to A/B-testable metrics/properties
120
120
processed_emf = {}
121
121
122
- distinct_values_per_dimenson = defaultdict (set )
123
-
124
122
report = json .loads (report_path .read_text ("UTF-8" ))
125
123
for test in report ["tests" ]:
126
124
for line in test ["teardown" ]["stdout" ].splitlines ():
@@ -140,9 +138,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
140
138
if not dimensions :
141
139
continue
142
140
143
- for dimension , value in dimensions .items ():
144
- distinct_values_per_dimenson [dimension ].add (value )
145
-
146
141
dimension_set = frozenset (dimensions .items ())
147
142
148
143
if dimension_set not in processed_emf :
@@ -159,24 +154,27 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
159
154
160
155
values .extend (result [metric ][0 ])
161
156
162
- irrelevant_dimensions = set ()
157
+ return processed_emf
163
158
164
- for dimension , distinct_values in distinct_values_per_dimenson .items ():
165
- if len (distinct_values ) == 1 :
166
- irrelevant_dimensions .add (dimension )
167
159
168
- post_processed_emf = {}
160
+ def uninteresting_dimensions (processed_emf ):
161
+ """
162
+ Computes the set of cloudwatch dimensions that only ever take on a
163
+ single value across the entire dataset.
164
+ """
165
+ values_per_dimension = defaultdict (set )
166
+
167
+ for dimension_set in processed_emf :
168
+ for dimension , value in dimension_set :
169
+ values_per_dimension [dimension ].add (value )
169
170
170
- for dimension_set , metrics in processed_emf .items ():
171
- processed_key = frozenset (
172
- (dim , value )
173
- for (dim , value ) in dimension_set
174
- if dim not in irrelevant_dimensions
175
- )
171
+ uninteresting = set ()
176
172
177
- post_processed_emf [processed_key ] = metrics
173
+ for dimension , distinct_values in values_per_dimension .items ():
174
+ if len (distinct_values ) == 1 :
175
+ uninteresting .add (dimension )
178
176
179
- return post_processed_emf
177
+ return uninteresting
180
178
181
179
182
180
def collect_data (binary_dir : Path , pytest_opts : str ):
@@ -304,6 +302,7 @@ def analyze_data(
304
302
)
305
303
306
304
messages = []
305
+ do_not_print_list = uninteresting_dimensions (processed_emf_a )
307
306
for dimension_set , metric , result , unit in failures :
308
307
# Sanity check as described above
309
308
if abs (statistics .mean (relative_changes_by_metric [metric ])) <= noise_threshold :
@@ -325,7 +324,7 @@ def analyze_data(
325
324
f"for metric \033 [1m{ metric } \033 [0m with \033 [0;31m\033 [1mp={ result .pvalue } \033 [0m. "
326
325
f"This means that observing a change of this magnitude or worse, assuming that performance "
327
326
f"characteristics did not change across the tested commits, has a probability of { result .pvalue :.2%} . "
328
- f"Tested Dimensions:\n { json .dumps (dict (dimension_set ), indent = 2 , sort_keys = True )} "
327
+ f"Tested Dimensions:\n { json .dumps (dict ({ k : v for k , v in dimension_set . items () if k not in do_not_print_list } ), indent = 2 , sort_keys = True )} "
329
328
)
330
329
messages .append (msg )
331
330
0 commit comments