Skip to content

Commit 666b329

Browse files
committed
fix(ab): only reduce dimension set for printing
By reducing the dimension set to eliminate all dimensions that only every take on a single value straight during parsing, we broke the ignore list, which does require all dimensions to be present. Fix this by moving the "dimension reduction" to only happen during printing of failure messages. Fixes: fcb39a6 ("test(ab): do not print dimension that are the same across all metrics") Signed-off-by: Patrick Roy <roypat@amazon.co.uk>
1 parent dafe4e1 commit 666b329

File tree

1 file changed

+18
-19
lines changed

1 file changed

+18
-19
lines changed

tools/ab_test.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
119119
# Dictionary mapping EMF dimensions to A/B-testable metrics/properties
120120
processed_emf = {}
121121

122-
distinct_values_per_dimenson = defaultdict(set)
123-
124122
report = json.loads(report_path.read_text("UTF-8"))
125123
for test in report["tests"]:
126124
for line in test["teardown"]["stdout"].splitlines():
@@ -140,9 +138,6 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
140138
if not dimensions:
141139
continue
142140

143-
for dimension, value in dimensions.items():
144-
distinct_values_per_dimenson[dimension].add(value)
145-
146141
dimension_set = frozenset(dimensions.items())
147142

148143
if dimension_set not in processed_emf:
@@ -159,24 +154,27 @@ def load_data_series(report_path: Path, tag=None, *, reemit: bool = False):
159154

160155
values.extend(result[metric][0])
161156

162-
irrelevant_dimensions = set()
157+
return processed_emf
163158

164-
for dimension, distinct_values in distinct_values_per_dimenson.items():
165-
if len(distinct_values) == 1:
166-
irrelevant_dimensions.add(dimension)
167159

168-
post_processed_emf = {}
160+
def uninteresting_dimensions(processed_emf):
161+
"""
162+
Computes the set of cloudwatch dimensions that only ever take on a
163+
single value across the entire dataset.
164+
"""
165+
values_per_dimension = defaultdict(set)
166+
167+
for dimension_set in processed_emf:
168+
for dimension, value in dimension_set:
169+
values_per_dimension[dimension].add(value)
169170

170-
for dimension_set, metrics in processed_emf.items():
171-
processed_key = frozenset(
172-
(dim, value)
173-
for (dim, value) in dimension_set
174-
if dim not in irrelevant_dimensions
175-
)
171+
uninteresting = set()
176172

177-
post_processed_emf[processed_key] = metrics
173+
for dimension, distinct_values in values_per_dimension.items():
174+
if len(distinct_values) == 1:
175+
uninteresting.add(dimension)
178176

179-
return post_processed_emf
177+
return uninteresting
180178

181179

182180
def collect_data(binary_dir: Path, pytest_opts: str):
@@ -304,6 +302,7 @@ def analyze_data(
304302
)
305303

306304
messages = []
305+
do_not_print_list = uninteresting_dimensions(processed_emf_a)
307306
for dimension_set, metric, result, unit in failures:
308307
# Sanity check as described above
309308
if abs(statistics.mean(relative_changes_by_metric[metric])) <= noise_threshold:
@@ -325,7 +324,7 @@ def analyze_data(
325324
f"for metric \033[1m{metric}\033[0m with \033[0;31m\033[1mp={result.pvalue}\033[0m. "
326325
f"This means that observing a change of this magnitude or worse, assuming that performance "
327326
f"characteristics did not change across the tested commits, has a probability of {result.pvalue:.2%}. "
328-
f"Tested Dimensions:\n{json.dumps(dict(dimension_set), indent=2, sort_keys=True)}"
327+
f"Tested Dimensions:\n{json.dumps(dict({k: v for k,v in dimension_set.items() if k not in do_not_print_list}), indent=2, sort_keys=True)}"
329328
)
330329
messages.append(msg)
331330

0 commit comments

Comments
 (0)