Skip to content

Commit 8a23597

Browse files
committed
improve html output
This removes the bar charts, since they were complicated to create and not really useful. The timeline charts are also now drawn individually and are filterable.
1 parent bd39fbe commit 8a23597

File tree

1 file changed

+44
-221
lines changed

1 file changed

+44
-221
lines changed

scripts/benchmarks/output_html.py

Lines changed: 44 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from collections import defaultdict
1010
from dataclasses import dataclass
1111
import matplotlib.dates as mdates
12-
import numpy as np
1312
from benches.result import BenchmarkRun, Result
1413

1514
@dataclass
@@ -24,220 +23,21 @@ class BenchmarkSeries:
2423
runs: list[BenchmarkRun]
2524

2625
@dataclass
27-
class LatestResults:
28-
benchmark_label: str
29-
run_values: dict[str, float]
30-
31-
@classmethod
32-
def from_dict(cls, label: str, values: dict[str, float]) -> 'LatestResults':
33-
return cls(benchmark_label=label, run_values=values)
34-
35-
def get_latest_results(benchmarks: list[BenchmarkSeries]) -> dict[str, LatestResults]:
36-
latest_results: dict[str, LatestResults] = {}
37-
for benchmark in benchmarks:
38-
run_values = {
39-
run.name: max(run.results, key=lambda x: x.date).value
40-
for run in benchmark.runs
41-
}
42-
latest_results[benchmark.label] = LatestResults.from_dict(benchmark.label, run_values)
43-
return latest_results
44-
45-
def prepare_normalized_data(latest_results: dict[str, LatestResults],
46-
benchmarks: list[BenchmarkSeries],
47-
group_benchmarks: list[str],
48-
non_baseline_runs: list[str],
49-
baseline_name: str) -> list[list[float]]:
50-
normalized_data = []
51-
benchmark_map = {b.label: b for b in benchmarks}
52-
53-
for run_name in non_baseline_runs:
54-
run_data: list[float] = []
55-
for benchmark_label in group_benchmarks:
56-
benchmark_data = latest_results[benchmark_label].run_values
57-
if run_name not in benchmark_data or baseline_name not in benchmark_data:
58-
run_data.append(None)
59-
continue
60-
61-
baseline_value = benchmark_data[baseline_name]
62-
current_value = benchmark_data[run_name]
63-
64-
normalized_value = ((baseline_value / current_value) if benchmark_map[benchmark_label].metadata.lower_is_better
65-
else (current_value / baseline_value)) * 100
66-
run_data.append(normalized_value)
67-
normalized_data.append(run_data)
68-
return normalized_data
69-
70-
def format_benchmark_label(label: str) -> list[str]:
71-
words = re.split(' |_', label)
72-
lines = []
73-
current_line = []
74-
75-
# max line length 30
76-
for word in words:
77-
if len(' '.join(current_line + [word])) > 30:
78-
lines.append(' '.join(current_line))
79-
current_line = [word]
80-
else:
81-
current_line.append(word)
82-
83-
if current_line:
84-
lines.append(' '.join(current_line))
85-
86-
return lines
87-
88-
def create_bar_plot(ax: plt.Axes,
89-
normalized_data: list[list[float]],
90-
group_benchmarks: list[str],
91-
non_baseline_runs: list[str],
92-
latest_results: dict[str, LatestResults],
93-
benchmarks: list[BenchmarkSeries],
94-
baseline_name: str) -> float:
95-
x = np.arange(len(group_benchmarks))
96-
width = 0.8 / len(non_baseline_runs)
97-
max_height = 0
98-
benchmark_map = {b.label: b for b in benchmarks}
99-
100-
for i, (run_name, run_data) in enumerate(zip(non_baseline_runs, normalized_data)):
101-
offset = width * i - width * (len(non_baseline_runs) - 1) / 2
102-
positions = x + offset
103-
valid_data = [v if v is not None else 0 for v in run_data]
104-
rects = ax.bar(positions, valid_data, width, label=run_name)
105-
106-
for rect, value, benchmark_label in zip(rects, run_data, group_benchmarks):
107-
if value is not None:
108-
height = rect.get_height()
109-
if height > max_height:
110-
max_height = height
111-
112-
ax.text(rect.get_x() + rect.get_width()/2., height + 2,
113-
f'{value:.1f}%',
114-
ha='center', va='bottom')
115-
116-
benchmark_data = latest_results[benchmark_label].run_values
117-
baseline_value = benchmark_data[baseline_name]
118-
current_value = benchmark_data[run_name]
119-
unit = benchmark_map[benchmark_label].metadata.unit
120-
121-
tooltip_labels = [
122-
f"Run: {run_name}\n"
123-
f"Value: {current_value:.2f} {unit}\n"
124-
f"Normalized to ({baseline_name}): {baseline_value:.2f} {unit}\n"
125-
f"Normalized: {value:.1f}%"
126-
]
127-
tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}')
128-
mpld3.plugins.connect(ax.figure, tooltip)
129-
130-
return max_height
131-
132-
def add_chart_elements(ax: plt.Axes,
133-
group_benchmarks: list[str],
134-
group_name: str,
135-
max_height: float) -> None:
136-
top_padding = max_height * 0.2
137-
ax.set_ylim(0, max_height + top_padding)
138-
ax.set_ylabel('Performance relative to baseline (%)')
139-
ax.set_title(f'Performance Comparison (Normalized to Baseline) - {group_name} Group')
140-
ax.set_xticks([])
141-
142-
for idx, label in enumerate(group_benchmarks):
143-
split_labels = format_benchmark_label(label)
144-
for i, sublabel in enumerate(split_labels):
145-
y_pos = max_height + (top_padding * 0.5) + 2 - (i * top_padding * 0.15)
146-
ax.text(idx, y_pos, sublabel,
147-
ha='center',
148-
style='italic',
149-
color='#666666')
150-
151-
ax.grid(True, axis='y', alpha=0.2)
152-
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
153-
154-
def split_large_groups(benchmark_groups):
155-
miscellaneous = []
156-
new_groups = defaultdict(list)
157-
158-
split_happened = False
159-
for group, labels in benchmark_groups.items():
160-
if len(labels) == 1:
161-
miscellaneous.extend(labels)
162-
elif len(labels) > 5:
163-
split_happened = True
164-
mid = len(labels) // 2
165-
new_groups[group] = labels[:mid]
166-
new_groups[group + '_'] = labels[mid:]
167-
else:
168-
new_groups[group] = labels
169-
170-
if miscellaneous:
171-
new_groups['Miscellaneous'] = miscellaneous
172-
173-
if split_happened:
174-
return split_large_groups(new_groups)
175-
else:
176-
return new_groups
177-
178-
def group_benchmark_labels(benchmark_labels):
179-
benchmark_groups = defaultdict(list)
180-
for label in benchmark_labels:
181-
group = re.match(r'^[^_\s]+', label)[0]
182-
benchmark_groups[group].append(label)
183-
return split_large_groups(benchmark_groups)
184-
185-
def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]:
186-
latest_results = get_latest_results(benchmarks)
187-
188-
run_names = sorted(list(set(
189-
name for result in latest_results.values()
190-
for name in result.run_values.keys()
191-
)))
192-
193-
if baseline_name not in run_names:
194-
return []
195-
196-
benchmark_labels = [b.label for b in benchmarks]
197-
198-
benchmark_groups = group_benchmark_labels(benchmark_labels)
199-
200-
html_charts = []
201-
202-
for group_name, group_benchmarks in benchmark_groups.items():
203-
plt.close('all')
204-
non_baseline_runs = [n for n in run_names if n != baseline_name]
205-
206-
if len(non_baseline_runs) == 0:
207-
continue
208-
209-
normalized_data = prepare_normalized_data(
210-
latest_results, benchmarks, group_benchmarks,
211-
non_baseline_runs, baseline_name
212-
)
213-
214-
fig, ax = plt.subplots(figsize=(10, 6))
215-
max_height = create_bar_plot(
216-
ax, normalized_data, group_benchmarks, non_baseline_runs,
217-
latest_results, benchmarks, baseline_name
218-
)
219-
add_chart_elements(ax, group_benchmarks, group_name, max_height)
220-
221-
plt.tight_layout()
222-
html_charts.append(mpld3.fig_to_html(fig))
223-
plt.close(fig)
224-
225-
return html_charts
26+
class BenchmarkTimeSeries:
27+
label: str
28+
html: str
22629

227-
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> str:
30+
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkTimeSeries]:
22831
plt.close('all')
22932

23033
num_benchmarks = len(benchmarks)
23134
if num_benchmarks == 0:
23235
return
23336

234-
fig, axes = plt.subplots(num_benchmarks, 1, figsize=(10, max(4 * num_benchmarks, 30)))
235-
236-
if num_benchmarks == 1:
237-
axes = [axes]
37+
html_charts = []
23838

239-
for idx, benchmark in enumerate(benchmarks):
240-
ax = axes[idx]
39+
for _, benchmark in enumerate(benchmarks):
40+
fig, ax = plt.subplots(figsize=(10, 4))
24141

24242
for run in benchmark.runs:
24343
sorted_points = sorted(run.results, key=lambda x: x.date)
@@ -277,13 +77,12 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
27777
ax.grid(True, alpha=0.2)
27878
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
27979
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))
280-
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
28180

282-
plt.tight_layout()
283-
html = mpld3.fig_to_html(fig)
81+
plt.tight_layout()
82+
html_charts.append(BenchmarkTimeSeries(html= mpld3.fig_to_html(fig), label= benchmark.label))
83+
plt.close(fig)
28484

285-
plt.close(fig)
286-
return html
85+
return html_charts
28786

28887
def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[BenchmarkSeries]:
28988
benchmark_metadata: dict[str, BenchmarkMetadata] = {}
@@ -319,12 +118,10 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li
319118
return benchmark_series
320119

321120
def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]) -> str:
322-
baseline_name = compare_names[0]
323121
benchmarks = process_benchmark_data(benchmark_runs, compare_names)
324122

325-
comparison_html_charts = create_normalized_bar_chart(benchmarks, baseline_name)
326-
timeseries_html = create_time_series_chart(benchmarks, github_repo)
327-
comparison_charts_html = '\n'.join(f'<div class="chart"><div>{chart}</div></div>' for chart in comparison_html_charts)
123+
timeseries = create_time_series_chart(benchmarks, github_repo)
124+
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}"><div>{ts.html}</div></div>' for ts in timeseries)
328125

329126
html_template = f"""
330127
<!DOCTYPE html>
@@ -375,18 +172,44 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
375172
margin-bottom: 16px;
376173
}}
377174
}}
175+
.filter-container {{
176+
text-align: center;
177+
margin-bottom: 24px;
178+
}}
179+
.filter-container input {{
180+
padding: 8px;
181+
font-size: 16px;
182+
border: 1px solid #ccc;
183+
border-radius: 4px;
184+
width: 400px;
185+
max-width: 100%;
186+
}}
378187
</style>
188+
<script>
189+
function filterCharts() {{
190+
const regexInput = document.getElementById('bench-filter').value;
191+
const regex = new RegExp(regexInput, 'i');
192+
const charts = document.querySelectorAll('.chart');
193+
charts.forEach(chart => {{
194+
const label = chart.getAttribute('data-label');
195+
if (regex.test(label)) {{
196+
chart.style.display = '';
197+
}} else {{
198+
chart.style.display = 'none';
199+
}}
200+
}});
201+
}}
202+
</script>
379203
</head>
380204
<body>
381205
<div class="container">
382206
<h1>Benchmark Results</h1>
383-
<h2>Latest Results Comparison</h2>
384-
<div class="chart">
385-
{comparison_charts_html}
207+
<div class="filter-container">
208+
<input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
386209
</div>
387210
<h2>Historical Results</h2>
388-
<div class="chart">
389-
{timeseries_html}
211+
<div class="charts">
212+
{timeseries_charts_html}
390213
</div>
391214
</div>
392215
</body>

0 commit comments

Comments
 (0)