1
1
#!/usr/bin/env python3
2
+ """
3
+ This script compares benchmark results from two release directories one by one.
4
+
5
+ Usage:
6
+ python3 release/release_logs/compare_perf_metrics <old-dir> <new-dir>
7
+ """
8
+
2
9
import json
3
10
import pathlib
4
11
import argparse
@@ -27,37 +34,43 @@ def parse_args():
27
34
28
35
29
36
def main (old_dir_name , new_dir_name ):
37
+ old_files = list (walk (old_dir_name ))
38
+ new_files = list (walk (new_dir_name ))
30
39
31
- old_paths = paths_without_root_dir (walk (old_dir_name ))
32
- new_paths = paths_without_root_dir (walk (new_dir_name ))
33
- to_compare , missing_in_new , missing_in_old = get_compare_list (old_paths , new_paths )
40
+ old_by_name = group_by_filename (old_files , old_dir_name )
41
+ new_by_name = group_by_filename (new_files , new_dir_name )
34
42
35
- for path in missing_in_new :
36
- print (new_dir_name , "does not have" , path )
37
-
38
- for path in missing_in_old :
39
- print (old_dir_name , "does not have" , path )
43
+ all_filenames = set (old_by_name .keys ()) | set (new_by_name .keys ())
40
44
41
45
throughput_regressions = []
42
46
latency_regressions = []
43
47
missing_in_new = []
44
48
missing_in_old = []
45
- for path in to_compare :
46
- old = pathlib .Path (old_dir_name , * path .parts )
47
- new = pathlib .Path (new_dir_name , * path .parts )
48
49
49
- throughput , latency , new , old = get_regressions (old , new )
50
+ for filename in sorted (all_filenames ):
51
+ old_path = old_by_name .get (filename )
52
+ new_path = new_by_name .get (filename )
53
+
54
+ if not old_path :
55
+ print (f"{ old_dir_name } is missing { filename } " )
56
+ continue
57
+ if not new_path :
58
+ print (f"{ new_dir_name } is missing { filename } " )
59
+ continue
60
+
61
+ # Compare the two files
62
+ throughput , latency , missing_new_metrics , missing_old_metrics = get_regressions (old_path , new_path )
50
63
51
64
throughput_regressions .extend (throughput )
52
65
latency_regressions .extend (latency )
53
- missing_in_new .extend (new )
54
- missing_in_old .extend (old )
66
+ missing_in_new .extend (missing_new_metrics )
67
+ missing_in_old .extend (missing_old_metrics )
55
68
56
- for perf_metric in missing_in_new :
57
- print (f"{ new } does not have { perf_metric } " )
69
+ for metric in missing_in_new :
70
+ print (f"{ new_path } does not have { metric } " )
58
71
59
- for perf_metric in missing_in_old :
60
- print (f"{ old } does not have { perf_metric } " )
72
+ for metric in missing_in_old :
73
+ print (f"{ old_path } does not have { metric } " )
61
74
62
75
throughput_regressions .sort ()
63
76
for _ , regression in throughput_regressions :
@@ -78,9 +91,20 @@ def walk(dir_name):
78
91
stack .extend (root .iterdir ())
79
92
80
93
81
- def paths_without_root_dir (paths ):
82
- for p in paths :
83
- yield pathlib .Path (* p .parts [1 :])
94
+ def group_by_filename (paths , base_dir ):
95
+ """
96
+ Return a dict mapping filenames to full paths.
97
+ If there are duplicates, logging warning and ignore later ones.
98
+ """
99
+ file_map = {}
100
+ for path in paths :
101
+ name = path .name
102
+ rel_path = path .relative_to (base_dir )
103
+ if name not in file_map :
104
+ file_map [name ] = path
105
+ else :
106
+ print (f"Warning: duplicate filename { name } found at { rel_path } " )
107
+ return file_map
84
108
85
109
86
110
def get_compare_list (old , new ):
@@ -93,7 +117,6 @@ def get_compare_list(old, new):
93
117
new_set .difference (old_set ),
94
118
)
95
119
96
-
97
120
def get_regressions (old_path , new_path ):
98
121
99
122
with open (old_path , "r" ) as f :
0 commit comments