56
56
"INTEGER" , "INTEGER"
57
57
]
58
58
59
- # Legacy aliases for backward compatibility
60
- DB_FIELDS = LLAMA_BENCH_DB_FIELDS
61
- DB_TYPES = LLAMA_BENCH_DB_TYPES
62
-
63
59
assert len (LLAMA_BENCH_DB_FIELDS ) == len (LLAMA_BENCH_DB_TYPES )
64
60
assert len (TEST_BACKEND_OPS_DB_FIELDS ) == len (TEST_BACKEND_OPS_DB_TYPES )
65
61
75
71
"backend_name" , "op_name" , "op_params" , "test_mode"
76
72
]
77
73
78
- # Legacy alias for backward compatibility
79
- KEY_PROPERTIES = LLAMA_BENCH_KEY_PROPERTIES
80
-
81
- # Properties that are boolean and are converted to Yes/No for the table (llama-bench):
74
+ # Properties that are boolean and are converted to Yes/No for the table:
82
75
LLAMA_BENCH_BOOL_PROPERTIES = ["embeddings" , "cpu_strict" , "use_mmap" , "no_kv_offload" , "flash_attn" ]
83
-
84
- # Properties that are boolean and are converted to Yes/No for the table (test-backend-ops):
85
76
TEST_BACKEND_OPS_BOOL_PROPERTIES = ["supported" , "passed" ]
86
77
87
- # Legacy alias for backward compatibility
88
- BOOL_PROPERTIES = LLAMA_BENCH_BOOL_PROPERTIES
89
-
90
78
# Header names for the table (llama-bench):
91
79
LLAMA_BENCH_PRETTY_NAMES = {
92
80
"cpu_info" : "CPU" , "gpu_info" : "GPU" , "backends" : "Backends" , "n_gpu_layers" : "GPU layers" ,
104
92
"flops" : "FLOPS" , "bandwidth_gb_s" : "Bandwidth (GB/s)" , "memory_kb" : "Memory (KB)" , "n_runs" : "Runs"
105
93
}
106
94
107
- # Legacy alias for backward compatibility
108
- PRETTY_NAMES = LLAMA_BENCH_PRETTY_NAMES
109
-
110
95
DEFAULT_SHOW_LLAMA_BENCH = ["model_type" ] # Always show these properties by default.
111
96
DEFAULT_HIDE_LLAMA_BENCH = ["model_filename" ] # Always hide these properties by default.
112
97
113
98
DEFAULT_SHOW_TEST_BACKEND_OPS = ["backend_name" , "op_name" ] # Always show these properties by default.
114
99
DEFAULT_HIDE_TEST_BACKEND_OPS = ["error_message" ] # Always hide these properties by default.
115
100
116
- # Legacy aliases for backward compatibility
117
- DEFAULT_SHOW = DEFAULT_SHOW_LLAMA_BENCH
118
- DEFAULT_HIDE = DEFAULT_HIDE_LLAMA_BENCH
119
101
GPU_NAME_STRIP = ["NVIDIA GeForce " , "Tesla " , "AMD Radeon " ] # Strip prefixes for smaller tables.
120
102
MODEL_SUFFIX_REPLACE = {" - Small" : "_S" , " - Medium" : "_M" , " - Large" : "_L" }
121
103
122
104
DESCRIPTION = """Creates tables from llama-bench or test-backend-ops data written to multiple JSON/CSV files, a single JSONL file or SQLite database. Example usage (Linux):
123
105
124
106
For llama-bench:
125
107
$ git checkout master
126
- $ make clean && make llama-bench
108
+ $ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc)
127
109
$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
128
110
$ git checkout some_branch
129
- $ make clean && make llama-bench
111
+ $ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t llama-bench -j $(nproc)
130
112
$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
131
113
$ ./scripts/compare-llama-bench.py
132
114
133
115
For test-backend-ops:
134
116
$ git checkout master
135
- $ make clean && make test-backend-ops
117
+ $ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc)
136
118
$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite
137
119
$ git checkout some_branch
138
- $ make clean && make test-backend-ops
120
+ $ cmake -B ${BUILD_DIR} ${CMAKE_OPTS} && cmake --build ${BUILD_DIR} -t test-backend-ops -j $(nproc)
139
121
$ ./test-backend-ops perf --output sql | sqlite3 test-backend-ops.sqlite
140
122
$ ./scripts/compare-llama-bench.py --tool test-backend-ops -i test-backend-ops.sqlite
141
123
180
162
help_s = (
181
163
"Columns to add to the table. "
182
164
"Accepts a comma-separated list of values. "
183
- f"Legal values: { ', ' .join (KEY_PROPERTIES [:- 3 ])} . "
165
+ f"Legal values: { ', ' .join (LLAMA_BENCH_KEY_PROPERTIES [:- 3 ])} . "
184
166
"Defaults to model name (model_type) and CPU and/or GPU name (cpu_info, gpu_info) "
185
167
"plus any column where not all data points are the same. "
186
168
"If the columns are manually specified, then the results for each unique combination of the "
@@ -245,8 +227,10 @@ def __init__(self, tool: str = "llama-bench"):
245
227
# Set schema-specific properties based on tool
246
228
if self .tool == "llama-bench" :
247
229
self .check_keys = set (LLAMA_BENCH_KEY_PROPERTIES + ["build_commit" , "test_time" , "avg_ts" ])
248
- else : # test-backend-ops
230
+ elif self . tool == " test-backend-ops" :
249
231
self .check_keys = set (TEST_BACKEND_OPS_KEY_PROPERTIES + ["build_commit" , "test_time" ])
232
+ else :
233
+ assert False
250
234
251
235
def _builds_init (self ):
252
236
self .build_len = self .build_len_min
@@ -343,10 +327,12 @@ def __init__(self, tool: str = "llama-bench"):
343
327
self .table_name = "test"
344
328
db_fields = LLAMA_BENCH_DB_FIELDS
345
329
db_types = LLAMA_BENCH_DB_TYPES
346
- else : # test-backend-ops
330
+ elif self . tool == " test-backend-ops" :
347
331
self .table_name = "test_backend_ops"
348
332
db_fields = TEST_BACKEND_OPS_DB_FIELDS
349
333
db_types = TEST_BACKEND_OPS_DB_TYPES
334
+ else :
335
+ assert False
350
336
351
337
self .cursor .execute (f"CREATE TABLE { self .table_name } ({ ', ' .join (' ' .join (x ) for x in zip (db_fields , db_types ))} );" )
352
338
@@ -372,8 +358,10 @@ def builds_timestamp(self, reverse: bool = False) -> Union[Iterator[tuple], Sequ
372
358
def get_rows (self , properties : list [str ], hexsha8_baseline : str , hexsha8_compare : str ) -> Sequence [tuple ]:
373
359
if self .tool == "llama-bench" :
374
360
return self ._get_rows_llama_bench (properties , hexsha8_baseline , hexsha8_compare )
375
- else : # test-backend-ops
361
+ elif self . tool == " test-backend-ops" :
376
362
return self ._get_rows_test_backend_ops (properties , hexsha8_baseline , hexsha8_compare )
363
+ else :
364
+ assert False
377
365
378
366
def _get_rows_llama_bench (self , properties : list [str ], hexsha8_baseline : str , hexsha8_compare : str ) -> Sequence [tuple ]:
379
367
select_string = ", " .join (
@@ -721,12 +709,14 @@ def get_flops_unit_name(flops_values: list) -> str:
721
709
pretty_names = LLAMA_BENCH_PRETTY_NAMES
722
710
default_show = DEFAULT_SHOW_LLAMA_BENCH
723
711
default_hide = DEFAULT_HIDE_LLAMA_BENCH
724
- else : # test-backend-ops
712
+ elif tool == " test-backend-ops" :
725
713
key_properties = TEST_BACKEND_OPS_KEY_PROPERTIES
726
714
bool_properties = TEST_BACKEND_OPS_BOOL_PROPERTIES
727
715
pretty_names = TEST_BACKEND_OPS_PRETTY_NAMES
728
716
default_show = DEFAULT_SHOW_TEST_BACKEND_OPS
729
717
default_hide = DEFAULT_HIDE_TEST_BACKEND_OPS
718
+ else :
719
+ assert False
730
720
731
721
# If the user provided columns to group the results by, use them:
732
722
if known_args .show is not None :
@@ -756,7 +746,7 @@ def get_flops_unit_name(flops_values: list) -> str:
756
746
if row_full [i ] != rows_full [0 ][i ]:
757
747
properties_different .append (kp_i )
758
748
break
759
- else : # test-backend-ops
749
+ elif tool == " test-backend-ops" :
760
750
# For test-backend-ops, check all key properties
761
751
for i , kp_i in enumerate (key_properties ):
762
752
if kp_i in default_show :
@@ -765,6 +755,8 @@ def get_flops_unit_name(flops_values: list) -> str:
765
755
if row_full [i ] != rows_full [0 ][i ]:
766
756
properties_different .append (kp_i )
767
757
break
758
+ else :
759
+ assert False
768
760
769
761
show = []
770
762
@@ -783,8 +775,10 @@ def get_flops_unit_name(flops_values: list) -> str:
783
775
if prop in show :
784
776
index_default += 1
785
777
show = show [:index_default ] + default_show + show [index_default :]
786
- else : # test-backend-ops
778
+ elif tool == " test-backend-ops" :
787
779
show = default_show + properties_different
780
+ else :
781
+ assert False
788
782
789
783
for prop in default_hide :
790
784
try :
@@ -825,7 +819,7 @@ def get_flops_unit_name(flops_values: list) -> str:
825
819
# Regular columns test name avg t/s values Speedup
826
820
# VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV
827
821
table .append (list (row [:- 5 ]) + [test_name ] + list (row [- 2 :]) + [float (row [- 1 ]) / float (row [- 2 ])])
828
- else : # test-backend-ops
822
+ elif tool == " test-backend-ops" :
829
823
# Determine the primary metric by checking rows until we find one with valid data
830
824
if rows_show :
831
825
primary_metric = "FLOPS" # Default to FLOPS
@@ -869,9 +863,11 @@ def get_flops_unit_name(flops_values: list) -> str:
869
863
# Fallback if no valid data is available
870
864
baseline_str = "N/A"
871
865
compare_str = "N/A"
872
- speedup = 1.0
866
+ speedup = float ( 'nan' )
873
867
874
868
table .append (list (row [:- 4 ]) + [baseline_str , compare_str , speedup ])
869
+ else :
870
+ assert False
875
871
876
872
# Some a-posteriori fixes to make the table contents prettier:
877
873
for bool_property in bool_properties :
@@ -907,8 +903,10 @@ def get_flops_unit_name(flops_values: list) -> str:
907
903
headers = [pretty_names .get (p , p ) for p in show ]
908
904
if tool == "llama-bench" :
909
905
headers += ["Test" , f"t/s { name_baseline } " , f"t/s { name_compare } " , "Speedup" ]
910
- else : # test-backend-ops
906
+ elif tool == " test-backend-ops" :
911
907
headers += [f"{ primary_metric } { name_baseline } " , f"{ primary_metric } { name_compare } " , "Speedup" ]
908
+ else :
909
+ assert False
912
910
913
911
if known_args .plot :
914
912
def create_performance_plot (table_data : list [list [str ]], headers : list [str ], baseline_name : str , compare_name : str , output_file : str , plot_x_param : str , log_scale : bool = False , tool_type : str = "llama-bench" , metric_name : str = "t/s" ):
@@ -925,7 +923,7 @@ def create_performance_plot(table_data: list[list[str]], headers: list[str], bas
925
923
plot_x_label = plot_x_param
926
924
927
925
if plot_x_param not in ["n_prompt" , "n_gen" , "n_depth" ]:
928
- pretty_name = PRETTY_NAMES .get (plot_x_param , plot_x_param )
926
+ pretty_name = LLAMA_BENCH_PRETTY_NAMES .get (plot_x_param , plot_x_param )
929
927
if pretty_name in data_headers :
930
928
plot_x_index = data_headers .index (pretty_name )
931
929
plot_x_label = pretty_name
@@ -1047,8 +1045,10 @@ def make_axes(num_groups, max_cols=2, base_size=(8, 4)):
1047
1045
# Determine y-axis label based on tool type
1048
1046
if tool_type == "llama-bench" :
1049
1047
y_label = "Tokens per second (t/s)"
1050
- else : # test-backend-ops
1048
+ elif tool_type == " test-backend-ops" :
1051
1049
y_label = metric_name
1050
+ else :
1051
+ assert False
1052
1052
1053
1053
ax .set_xlabel (plot_x_label , fontsize = 12 , fontweight = 'bold' )
1054
1054
ax .set_ylabel (y_label , fontsize = 12 , fontweight = 'bold' )
0 commit comments