9
9
from collections import defaultdict
10
10
from dataclasses import dataclass
11
11
import matplotlib .dates as mdates
12
- import numpy as np
13
12
from benches .result import BenchmarkRun , Result
14
13
15
14
@dataclass
@@ -24,220 +23,21 @@ class BenchmarkSeries:
24
23
runs : list [BenchmarkRun ]
25
24
26
25
@dataclass
27
- class LatestResults :
28
- benchmark_label : str
29
- run_values : dict [str , float ]
30
-
31
- @classmethod
32
- def from_dict (cls , label : str , values : dict [str , float ]) -> 'LatestResults' :
33
- return cls (benchmark_label = label , run_values = values )
34
-
35
- def get_latest_results (benchmarks : list [BenchmarkSeries ]) -> dict [str , LatestResults ]:
36
- latest_results : dict [str , LatestResults ] = {}
37
- for benchmark in benchmarks :
38
- run_values = {
39
- run .name : max (run .results , key = lambda x : x .date ).value
40
- for run in benchmark .runs
41
- }
42
- latest_results [benchmark .label ] = LatestResults .from_dict (benchmark .label , run_values )
43
- return latest_results
44
-
45
- def prepare_normalized_data (latest_results : dict [str , LatestResults ],
46
- benchmarks : list [BenchmarkSeries ],
47
- group_benchmarks : list [str ],
48
- non_baseline_runs : list [str ],
49
- baseline_name : str ) -> list [list [float ]]:
50
- normalized_data = []
51
- benchmark_map = {b .label : b for b in benchmarks }
52
-
53
- for run_name in non_baseline_runs :
54
- run_data : list [float ] = []
55
- for benchmark_label in group_benchmarks :
56
- benchmark_data = latest_results [benchmark_label ].run_values
57
- if run_name not in benchmark_data or baseline_name not in benchmark_data :
58
- run_data .append (None )
59
- continue
60
-
61
- baseline_value = benchmark_data [baseline_name ]
62
- current_value = benchmark_data [run_name ]
63
-
64
- normalized_value = ((baseline_value / current_value ) if benchmark_map [benchmark_label ].metadata .lower_is_better
65
- else (current_value / baseline_value )) * 100
66
- run_data .append (normalized_value )
67
- normalized_data .append (run_data )
68
- return normalized_data
69
-
70
- def format_benchmark_label (label : str ) -> list [str ]:
71
- words = re .split (' |_' , label )
72
- lines = []
73
- current_line = []
74
-
75
- # max line length 30
76
- for word in words :
77
- if len (' ' .join (current_line + [word ])) > 30 :
78
- lines .append (' ' .join (current_line ))
79
- current_line = [word ]
80
- else :
81
- current_line .append (word )
82
-
83
- if current_line :
84
- lines .append (' ' .join (current_line ))
85
-
86
- return lines
87
-
88
- def create_bar_plot (ax : plt .Axes ,
89
- normalized_data : list [list [float ]],
90
- group_benchmarks : list [str ],
91
- non_baseline_runs : list [str ],
92
- latest_results : dict [str , LatestResults ],
93
- benchmarks : list [BenchmarkSeries ],
94
- baseline_name : str ) -> float :
95
- x = np .arange (len (group_benchmarks ))
96
- width = 0.8 / len (non_baseline_runs )
97
- max_height = 0
98
- benchmark_map = {b .label : b for b in benchmarks }
99
-
100
- for i , (run_name , run_data ) in enumerate (zip (non_baseline_runs , normalized_data )):
101
- offset = width * i - width * (len (non_baseline_runs ) - 1 ) / 2
102
- positions = x + offset
103
- valid_data = [v if v is not None else 0 for v in run_data ]
104
- rects = ax .bar (positions , valid_data , width , label = run_name )
105
-
106
- for rect , value , benchmark_label in zip (rects , run_data , group_benchmarks ):
107
- if value is not None :
108
- height = rect .get_height ()
109
- if height > max_height :
110
- max_height = height
111
-
112
- ax .text (rect .get_x () + rect .get_width ()/ 2. , height + 2 ,
113
- f'{ value :.1f} %' ,
114
- ha = 'center' , va = 'bottom' )
115
-
116
- benchmark_data = latest_results [benchmark_label ].run_values
117
- baseline_value = benchmark_data [baseline_name ]
118
- current_value = benchmark_data [run_name ]
119
- unit = benchmark_map [benchmark_label ].metadata .unit
120
-
121
- tooltip_labels = [
122
- f"Run: { run_name } \n "
123
- f"Value: { current_value :.2f} { unit } \n "
124
- f"Normalized to ({ baseline_name } ): { baseline_value :.2f} { unit } \n "
125
- f"Normalized: { value :.1f} %"
126
- ]
127
- tooltip = mpld3 .plugins .LineHTMLTooltip (rect , tooltip_labels , css = '.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}' )
128
- mpld3 .plugins .connect (ax .figure , tooltip )
129
-
130
- return max_height
131
-
132
- def add_chart_elements (ax : plt .Axes ,
133
- group_benchmarks : list [str ],
134
- group_name : str ,
135
- max_height : float ) -> None :
136
- top_padding = max_height * 0.2
137
- ax .set_ylim (0 , max_height + top_padding )
138
- ax .set_ylabel ('Performance relative to baseline (%)' )
139
- ax .set_title (f'Performance Comparison (Normalized to Baseline) - { group_name } Group' )
140
- ax .set_xticks ([])
141
-
142
- for idx , label in enumerate (group_benchmarks ):
143
- split_labels = format_benchmark_label (label )
144
- for i , sublabel in enumerate (split_labels ):
145
- y_pos = max_height + (top_padding * 0.5 ) + 2 - (i * top_padding * 0.15 )
146
- ax .text (idx , y_pos , sublabel ,
147
- ha = 'center' ,
148
- style = 'italic' ,
149
- color = '#666666' )
150
-
151
- ax .grid (True , axis = 'y' , alpha = 0.2 )
152
- ax .legend (bbox_to_anchor = (1 , 1 ), loc = 'upper left' )
153
-
154
- def split_large_groups (benchmark_groups ):
155
- miscellaneous = []
156
- new_groups = defaultdict (list )
157
-
158
- split_happened = False
159
- for group , labels in benchmark_groups .items ():
160
- if len (labels ) == 1 :
161
- miscellaneous .extend (labels )
162
- elif len (labels ) > 5 :
163
- split_happened = True
164
- mid = len (labels ) // 2
165
- new_groups [group ] = labels [:mid ]
166
- new_groups [group + '_' ] = labels [mid :]
167
- else :
168
- new_groups [group ] = labels
169
-
170
- if miscellaneous :
171
- new_groups ['Miscellaneous' ] = miscellaneous
172
-
173
- if split_happened :
174
- return split_large_groups (new_groups )
175
- else :
176
- return new_groups
177
-
178
- def group_benchmark_labels (benchmark_labels ):
179
- benchmark_groups = defaultdict (list )
180
- for label in benchmark_labels :
181
- group = re .match (r'^[^_\s]+' , label )[0 ]
182
- benchmark_groups [group ].append (label )
183
- return split_large_groups (benchmark_groups )
184
-
185
- def create_normalized_bar_chart (benchmarks : list [BenchmarkSeries ], baseline_name : str ) -> list [str ]:
186
- latest_results = get_latest_results (benchmarks )
187
-
188
- run_names = sorted (list (set (
189
- name for result in latest_results .values ()
190
- for name in result .run_values .keys ()
191
- )))
192
-
193
- if baseline_name not in run_names :
194
- return []
195
-
196
- benchmark_labels = [b .label for b in benchmarks ]
197
-
198
- benchmark_groups = group_benchmark_labels (benchmark_labels )
199
-
200
- html_charts = []
201
-
202
- for group_name , group_benchmarks in benchmark_groups .items ():
203
- plt .close ('all' )
204
- non_baseline_runs = [n for n in run_names if n != baseline_name ]
205
-
206
- if len (non_baseline_runs ) == 0 :
207
- continue
208
-
209
- normalized_data = prepare_normalized_data (
210
- latest_results , benchmarks , group_benchmarks ,
211
- non_baseline_runs , baseline_name
212
- )
213
-
214
- fig , ax = plt .subplots (figsize = (10 , 6 ))
215
- max_height = create_bar_plot (
216
- ax , normalized_data , group_benchmarks , non_baseline_runs ,
217
- latest_results , benchmarks , baseline_name
218
- )
219
- add_chart_elements (ax , group_benchmarks , group_name , max_height )
220
-
221
- plt .tight_layout ()
222
- html_charts .append (mpld3 .fig_to_html (fig ))
223
- plt .close (fig )
224
-
225
- return html_charts
26
+ class BenchmarkTimeSeries :
27
+ label : str
28
+ html : str
226
29
227
- def create_time_series_chart (benchmarks : list [BenchmarkSeries ], github_repo : str ) -> str :
30
+ def create_time_series_chart (benchmarks : list [BenchmarkSeries ], github_repo : str ) -> list [ BenchmarkTimeSeries ] :
228
31
plt .close ('all' )
229
32
230
33
num_benchmarks = len (benchmarks )
231
34
if num_benchmarks == 0 :
232
35
return
233
36
234
- fig , axes = plt .subplots (num_benchmarks , 1 , figsize = (10 , max (4 * num_benchmarks , 30 )))
235
-
236
- if num_benchmarks == 1 :
237
- axes = [axes ]
37
+ html_charts = []
238
38
239
- for idx , benchmark in enumerate (benchmarks ):
240
- ax = axes [ idx ]
39
+ for _ , benchmark in enumerate (benchmarks ):
40
+ fig , ax = plt . subplots ( figsize = ( 10 , 4 ))
241
41
242
42
for run in benchmark .runs :
243
43
sorted_points = sorted (run .results , key = lambda x : x .date )
@@ -277,13 +77,12 @@ def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str
277
77
ax .grid (True , alpha = 0.2 )
278
78
ax .legend (bbox_to_anchor = (1 , 1 ), loc = 'upper left' )
279
79
ax .xaxis .set_major_formatter (mdates .ConciseDateFormatter ('%Y-%m-%d %H:%M:%S' ))
280
- ax .xaxis .set_major_locator (mdates .AutoDateLocator ())
281
80
282
- plt .tight_layout ()
283
- html = mpld3 .fig_to_html (fig )
81
+ plt .tight_layout ()
82
+ html_charts .append (BenchmarkTimeSeries (html = mpld3 .fig_to_html (fig ), label = benchmark .label ))
83
+ plt .close (fig )
284
84
285
- plt .close (fig )
286
- return html
85
+ return html_charts
287
86
288
87
def process_benchmark_data (benchmark_runs : list [BenchmarkRun ], compare_names : list [str ]) -> list [BenchmarkSeries ]:
289
88
benchmark_metadata : dict [str , BenchmarkMetadata ] = {}
@@ -319,12 +118,10 @@ def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: li
319
118
return benchmark_series
320
119
321
120
def generate_html (benchmark_runs : list [BenchmarkRun ], github_repo : str , compare_names : list [str ]) -> str :
322
- baseline_name = compare_names [0 ]
323
121
benchmarks = process_benchmark_data (benchmark_runs , compare_names )
324
122
325
- comparison_html_charts = create_normalized_bar_chart (benchmarks , baseline_name )
326
- timeseries_html = create_time_series_chart (benchmarks , github_repo )
327
- comparison_charts_html = '\n ' .join (f'<div class="chart"><div>{ chart } </div></div>' for chart in comparison_html_charts )
123
+ timeseries = create_time_series_chart (benchmarks , github_repo )
124
+ timeseries_charts_html = '\n ' .join (f'<div class="chart" data-label="{ ts .label } "><div>{ ts .html } </div></div>' for ts in timeseries )
328
125
329
126
html_template = f"""
330
127
<!DOCTYPE html>
@@ -375,18 +172,44 @@ def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_
375
172
margin-bottom: 16px;
376
173
}}
377
174
}}
175
+ .filter-container {{
176
+ text-align: center;
177
+ margin-bottom: 24px;
178
+ }}
179
+ .filter-container input {{
180
+ padding: 8px;
181
+ font-size: 16px;
182
+ border: 1px solid #ccc;
183
+ border-radius: 4px;
184
+ width: 400px;
185
+ max-width: 100%;
186
+ }}
378
187
</style>
188
+ <script>
189
+ function filterCharts() {{
190
+ const regexInput = document.getElementById('bench-filter').value;
191
+ const regex = new RegExp(regexInput, 'i');
192
+ const charts = document.querySelectorAll('.chart');
193
+ charts.forEach(chart => {{
194
+ const label = chart.getAttribute('data-label');
195
+ if (regex.test(label)) {{
196
+ chart.style.display = '';
197
+ }} else {{
198
+ chart.style.display = 'none';
199
+ }}
200
+ }});
201
+ }}
202
+ </script>
379
203
</head>
380
204
<body>
381
205
<div class="container">
382
206
<h1>Benchmark Results</h1>
383
- <h2>Latest Results Comparison</h2>
384
- <div class="chart">
385
- { comparison_charts_html }
207
+ <div class="filter-container">
208
+ <input type="text" id="bench-filter" placeholder="Regex..." oninput="filterCharts()">
386
209
</div>
387
210
<h2>Historical Results</h2>
388
- <div class="chart ">
389
- { timeseries_html }
211
+ <div class="charts ">
212
+ { timeseries_charts_html }
390
213
</div>
391
214
</div>
392
215
</body>
0 commit comments