Skip to content

Commit 62b2712

Browse files
authored
Merge pull request #2423 from pbalcer/improve-html
[benchmarks] improve html output
2 parents 55863ae + 8a23597 commit 62b2712

File tree

1 file changed

+44
-221
lines changed

1 file changed

+44
-221
lines changed

scripts/benchmarks/output_html.py

Lines changed: 44 additions & 221 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from collections import defaultdict
1010
from dataclasses import dataclass
1111
import matplotlib.dates as mdates
12-
import numpy as np
1312
from benches.result import BenchmarkRun, Result
1413

1514
@dataclass
@@ -24,220 +23,21 @@ class BenchmarkSeries:
2423
runs: list[BenchmarkRun]
2524

2625
@dataclass
27-
class LatestResults:
28-
benchmark_label: str
29-
run_values: dict[str, float]
30-
31-
@classmethod
32-
def from_dict(cls, label: str, values: dict[str, float]) -> 'LatestResults':
33-
return cls(benchmark_label=label, run_values=values)
34-
35-
def get_latest_results(benchmarks: list[BenchmarkSeries]) -> dict[str, LatestResults]:
36-
latest_results: dict[str, LatestResults] = {}
37-
for benchmark in benchmarks:
38-
run_values = {
39-
run.name: max(run.results, key=lambda x: x.date).value
40-
for run in benchmark.runs
41-
}
42-
latest_results[benchmark.label] = LatestResults.from_dict(benchmark.label, run_values)
43-
return latest_results
44-
45-
def prepare_normalized_data(latest_results: dict[str, LatestResults],
46-
benchmarks: list[BenchmarkSeries],
47-
group_benchmarks: list[str],
48-
non_baseline_runs: list[str],
49-
baseline_name: str) -> list[list[float]]:
50-
normalized_data = []
51-
benchmark_map = {b.label: b for b in benchmarks}
52-
53-
for run_name in non_baseline_runs:
54-
run_data: list[float] = []
55-
for benchmark_label in group_benchmarks:
56-
benchmark_data = latest_results[benchmark_label].run_values
57-
if run_name not in benchmark_data or baseline_name not in benchmark_data:
58-
run_data.append(None)
59-
continue
60-
61-
baseline_value = benchmark_data[baseline_name]
62-
current_value = benchmark_data[run_name]
63-
64-
normalized_value = ((baseline_value / current_value) if benchmark_map[benchmark_label].metadata.lower_is_better
65-
else (current_value / baseline_value)) * 100
66-
run_data.append(normalized_value)
67-
normalized_data.append(run_data)
68-
return normalized_data
69-
70-
def format_benchmark_label(label: str) -> list[str]:
71-
words = re.split(' |_', label)
72-
lines = []
73-
current_line = []
74-
75-
# max line length 30
76-
for word in words:
77-
if len(' '.join(current_line + [word])) > 30:
78-
lines.append(' '.join(current_line))
79-
current_line = [word]
80-
else:
81-
current_line.append(word)
82-
83-
if current_line:
84-
lines.append(' '.join(current_line))
85-
86-
return lines
87-
88-
def create_bar_plot(ax: plt.Axes,
89-
normalized_data: list[list[float]],
90-
group_benchmarks: list[str],
91-
non_baseline_runs: list[str],
92-
latest_results: dict[str, LatestResults],
93-
benchmarks: list[BenchmarkSeries],
94-
baseline_name: str) -> float:
95-
x = np.arange(len(group_benchmarks))
96-
width = 0.8 / len(non_baseline_runs)
97-
max_height = 0
98-
benchmark_map = {b.label: b for b in benchmarks}
99-
100-
for i, (run_name, run_data) in enumerate(zip(non_baseline_runs, normalized_data)):
101-
offset = width * i - width * (len(non_baseline_runs) - 1) / 2
102-
positions = x + offset
103-
valid_data = [v if v is not None else 0 for v in run_data]
104-
rects = ax.bar(positions, valid_data, width, label=run_name)
105-
106-
for rect, value, benchmark_label in zip(rects, run_data, group_benchmarks):
107-
if value is not None:
108-
height = rect.get_height()
109-
if height > max_height:
110-
max_height = height
111-
112-
ax.text(rect.get_x() + rect.get_width()/2., height + 2,
113-
f'{value:.1f}%',
114-
ha='center', va='bottom')
115-
116-
benchmark_data = latest_results[benchmark_label].run_values
117-
baseline_value = benchmark_data[baseline_name]
118-
current_value = benchmark_data[run_name]
119-
unit = benchmark_map[benchmark_label].metadata.unit
120-
121-
tooltip_labels = [
122-
f"Run: {run_name}\n"
123-
f"Value: {current_value:.2f} {unit}\n"
124-
f"Normalized to ({baseline_name}): {baseline_value:.2f} {unit}\n"
125-
f"Normalized: {value:.1f}%"
126-
]
127-
tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}')
128-
mpld3.plugins.connect(ax.figure, tooltip)
129-
130-
return max_height
131-
132-
def add_chart_elements(ax: plt.Axes,
133-
group_benchmarks: list[str],
134-
group_name: str,
135-
max_height: float) -> None:
136-
top_padding = max_height * 0.2
137-
ax.set_ylim(0, max_height + top_padding)
138-
ax.set_ylabel('Performance relative to baseline (%)')
139-
ax.set_title(f'Performance Comparison (Normalized to Baseline) - {group_name} Group')
140-
ax.set_xticks([])
141-
142-
for idx, label in enumerate(group_benchmarks):
143-
split_labels = format_benchmark_label(label)
144-
for i, sublabel in enumerate(split_labels):
145-
y_pos = max_height + (top_padding * 0.5) + 2 - (i * top_padding * 0.15)
146-
ax.text(idx, y_pos, sublabel,
147-
ha='center',
148-
style='italic',
149-
color='#666666')
150-
151-
ax.grid(True, axis='y', alpha=0.2)
152-
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
153-
154-
def split_large_groups(benchmark_groups):
155-
miscellaneous = []
156-
new_groups = defaultdict(list)
157-
158-
split_happened = False
159-
for group, labels in benchmark_groups.items():
160-
if len(labels) == 1:
161-
miscellaneous.extend(labels)
162-
elif len(labels) > 5:
163-
split_happened = True
164-
mid = len(labels) // 2
165-
new_groups[group] = labels[:mid]
166-
new_groups[group + '_'] = labels[mid:]
167-
else:
168-
new_groups[group] = labels
169-
170-
if miscellaneous:
171-
new_groups['Miscellaneous'] = miscellaneous
172-
173-
if split_happened:
174-
return split_large_groups(new_groups)
175-
else:
176-
return new_groups
177-
178-
def group_benchmark_labels(benchmark_labels):
179-
benchmark_groups = defaultdict(list)
180-
for label in benchmark_labels:
181-
group = re.match(r'^[^_\s]+', label)[0]
182-
benchmark_groups[group].append(label)
183-
return split_large_groups(benchmark_groups)
184-
185-
def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]:
186-
latest_results = get_latest_results(benchmarks)
187-
188-
run_names = sorted(list(set(
189-
name for result in latest_results.values()
190-
for name in result.run_values.keys()
191-
)))
192-
193-
if baseline_name not in run_names:
194-
return []
195-
196-
benchmark_labels = [b.label for b in benchmarks]
197-
198-
benchmark_groups = group_benchmark_labels(benchmark_labels)
199-
200-
html_charts = []
201-
202-
for group_name, group_benchmarks in benchmark_groups.items():
203-
plt.close('all')
204-
non_baseline_runs = [n for n in run_names if n != baseline_name]
205-
206-
if len(non_baseline_runs) == 0:
207-
continue
208-
209-
normalized_data = prepare_normalized_data(
210-
latest_results, benchmarks, group_benchmarks,
211-
non_baseline_runs, baseline_name
212-
)
213-
214-
fig, ax = plt.subplots(figsize=(10, 6))
215-
max_height = create_bar_plot(
216-
ax, normalized_data, group_benchmarks, non_baseline_runs,
217-
latest_results, benchmarks, baseline_name
218-
)
219-
add_chart_elements(ax, group_benchmarks, group_name, max_height)
220-
221-
plt.tight_layout()
222-
html_charts.append(mpld3.fig_to_html(fig))
223-
plt.close(fig)
224-
225-
return html_charts
26+
class BenchmarkTimeSeries:
27+
label: str
28+
html: str
22629

227-
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> str:
30+
def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> list[BenchmarkTimeSeries]:
22831
plt.close('all')
22932

23033
num_benchmarks = len(benchmarks)
23134
if num_benchmarks == 0:
23235
return
23336

234-
fig, axes = plt.subplots(num_benchmarks, 1, figsize=(10, max(4 * num_benchmarks, 30)))
235-
236-
if num_benchmarks == 1:
237-
axes = [axes]
37+
html_charts = []
23838

239-
for idx, benchmark in enumerate(benchmarks):
240-
ax = axes[idx]
39+
for _, benchmark in enumerate(benchmarks):
40+
fig, ax = plt.subplots(figsize=(10, 4))
24141

24242
for run in benchmark.runs:
24343
sorted_points = sorted(run.results, key=lambda x: x.date)
@@ -277,13 +77,12 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
27777
ax.grid(True, alpha=0.2)
27878
ax.legend(bbox_to_anchor=(1, 1), loc='upper left')
27979
ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S'))
280-
ax.xaxis.set_major_locator(mdates.AutoDateLocator())
28180

282-
plt.tight_layout()
283-
html = mpld3.fig_to_html(fig)
81+
plt.tight_layout()
82+
html_charts.append(BenchmarkTimeSeries(html= mpld3.fig_to_html(fig), label= benchmark.label))
83+
plt.close(fig)
28484

285-
plt.close(fig)
286-
return html
85+
return html_charts
28786

28887
def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[BenchmarkSeries]:
28988
benchmark_metadata: dict[str, BenchmarkMetadata] = {}
@@ -319,12 +118,10 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li
319118
return benchmark_series
320119

321120
def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]) -> str:
322-
baseline_name = compare_names[0]
323121
benchmarks = process_benchmark_data(benchmark_runs, compare_names)
324122

325-
comparison_html_charts = create_normalized_bar_chart(benchmarks, baseline_name)
326-
timeseries_html = create_time_series_chart(benchmarks, github_repo)
327-
comparison_charts_html = '\n'.join(f'<div class="chart"><div>{chart}</div></div>' for chart in comparison_html_charts)
123+
timeseries = create_time_series_chart(benchmarks, github_repo)
124+
timeseries_charts_html = '\n'.join(f'<div class="chart" data-label="{ts.label}"><div>{ts.html}</div></div>' for ts in timeseries)
328125

329126
html_template = f"""
330127
<!DOCTYPE html>
@@ -375,18 +172,44 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
375172
margin-bottom: 16px;
376173
}}
377174
}}
175+
.filter-container {{
176+
text-align: center;
177+
margin-bottom: 24px;
178+
}}
179+
.filter-container input {{
180+
padding: 8px;
181+
font-size: 16px;
182+
border: 1px solid #ccc;
183+
border-radius: 4px;
184+
width: 400px;
185+
max-width: 100%;
186+
}}
378187
</style>
188+
<script>
189+
function filterCharts() {{
190+
const regexInput = document.getElementById('bench-filter').value;
191+
const regex = new RegExp(regexInput, 'i');
192+
const charts = document.querySelectorAll('.chart');
193+
charts.forEach(chart => {{
194+
const label = chart.getAttribute('data-label');
195+
if (regex.test(label)) {{
196+
chart.style.display = '';
197+
}} else {{
198+
chart.style.display = 'none';
199+
}}
200+
}});
201+
}}
202+
</script>
379203
</head>
380204
<body>
381205
<div class="container">
382206
<h1>Benchmark Results</h1>
383-
<h2>Latest Results Comparison</h2>
384-
<div class="chart">
385-
{comparison_charts_html}
207+
<div class="filter-container">
208+
<input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
386209
</div>
387210
<h2>Historical Results</h2>
388-
<div class="chart">
389-
{timeseries_html}
211+
<div class="charts">
212+
{timeseries_charts_html}
390213
</div>
391214
</div>
392215
</body>

0 commit comments

Comments
 (0)