Skip to content

Commit ff17436

Browse files
committed
Add script to test op perf and compare
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
1 parent 0142961 commit ff17436

File tree

3 files changed

+199
-0
lines changed

3 files changed

+199
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ poetry.toml
137137
/tests/test-tokenizer-1-bpe
138138
/tests/test-tokenizer-1-spm
139139

140+
# Test reports
141+
comparison_backend_ops_perf.txt
142+
140143
# Scripts
141144
!/scripts/install-oneapi.bat
142145

scripts/compare-commits-op-perf.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
if [ $# -lt 2 ]; then
4+
echo "usage: ./scripts/compare-commits-op-perf.sh <commit1> <commit2> [additional test-backend-ops arguments]"
5+
exit 1
6+
fi
7+
8+
set -e
9+
set -x
10+
11+
test_backend_ops_args="${@:3}"
12+
13+
# Extract short form of commits (first 7 characters)
14+
commit1_short=$(echo $1 | cut -c1-7)
15+
commit2_short=$(echo $2 | cut -c1-7)
16+
17+
rm -f test-backend-ops-perf-*.log
18+
19+
# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...)
20+
if [ -n "$GGML_CUDA" ]; then
21+
CMAKE_OPTS="${CMAKE_OPTS} -DGGML_CUDA=ON"
22+
fi
23+
24+
dir="build-test-backend-ops"
25+
26+
function run {
27+
commit_short=$1
28+
rm -fr ${dir} > /dev/null
29+
cmake -B ${dir} -S . ${CMAKE_OPTS} > /dev/null
30+
cmake --build ${dir} -t test-backend-ops > /dev/null
31+
${dir}/bin/test-backend-ops $test_backend_ops_args perf 2>&1 | tee test-backend-ops-perf-${commit_short}.log
32+
}
33+
34+
git checkout $1 > /dev/null
35+
run $commit1_short
36+
37+
git checkout $2 > /dev/null
38+
run $commit2_short
39+
40+
./scripts/compare-test-backend-ops-perf.py -b test-backend-ops-perf-$commit1_short.log -c test-backend-ops-perf-$commit2_short.log
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import re
5+
from pathlib import Path
6+
7+
8+
def parse_benchmark_line(line: str):
9+
"""
10+
Parses a single line of benchmark output.
11+
12+
Example line:
13+
MUL_MAT(...): 744 runs - 1660.11 us/run - 134.48 MFLOP/run - 81.01 GFLOPS
14+
15+
Returns a tuple of (key, gflops) or (None, None) if parsing fails.
16+
"""
17+
line = line.strip()
18+
if ':' not in line:
19+
return None, None
20+
21+
key, data_part = line.split(':', 1)
22+
key = key.strip()
23+
24+
# Remove ANSI color codes from the data part
25+
data_part = re.sub(r'\x1b\[[0-9;]*m', '', data_part)
26+
27+
# Find the last number and unit in the data part
28+
match = re.search(r'([\d\.]+)\s+(GFLOPS|TFLOPS|MFLOPS)\s*$', data_part.strip())
29+
if not match:
30+
return None, None
31+
32+
value_str, unit = match.groups()
33+
value = float(value_str)
34+
35+
# Normalize everything to GFLOPS
36+
if unit == 'TFLOPS':
37+
gflops = value * 1000
38+
elif unit == 'MFLOPS':
39+
gflops = value / 1000
40+
else: # GFLOPS
41+
gflops = value
42+
43+
return key, gflops
44+
45+
46+
def extract_commit_id(filepath: Path) -> str:
47+
"""Extract commit ID from filename like test-backend-ops-perf-abc1234.log"""
48+
filename = filepath.name
49+
# Pattern: test-backend-ops-perf-<commit_id>.log
50+
match = re.match(r'test-backend-ops-perf-([^.]+)\.log', filename)
51+
if match:
52+
return match.group(1)
53+
return ""
54+
55+
56+
def load_results(filepath: Path) -> dict:
57+
"""Loads all benchmark results from a file into a dictionary."""
58+
results = {}
59+
try:
60+
with open(filepath, 'r', encoding='utf-8') as f:
61+
for line in f:
62+
key, gflops = parse_benchmark_line(line)
63+
if key:
64+
results[key] = gflops
65+
except FileNotFoundError:
66+
print(f"Error: File not found at {filepath}")
67+
exit(1)
68+
return results
69+
70+
71+
def format_change(change: float) -> str:
72+
"""Formats the percentage change."""
73+
if change > 0.1:
74+
return f"+{change:.2f}%"
75+
elif change < -0.1:
76+
return f"{change:.2f}%"
77+
else:
78+
return " ~0.00%"
79+
80+
81+
def main():
82+
"""Main function to compare benchmark files."""
83+
parser = argparse.ArgumentParser(
84+
description="Compare two benchmark result files and generate a report.",
85+
formatter_class=argparse.RawTextHelpFormatter
86+
)
87+
help_b = (
88+
"Path to the baseline benchmark results file."
89+
)
90+
parser.add_argument("-b", "--baseline", dest="baseline", type=Path, required=True, help=help_b)
91+
help_c = (
92+
"Path to the benchmark results file to compare against the baseline."
93+
)
94+
parser.add_argument("-c", "--compare", dest="compare", type=Path, required=True, help=help_c)
95+
parser.add_argument(
96+
"-o", "--output", type=Path, default="comparison_backend_ops_perf.txt",
97+
help="Path to the output report file (default: comparison_backend_ops_perf.txt)."
98+
)
99+
args = parser.parse_args()
100+
101+
print(f"Loading baseline results from: {args.baseline}")
102+
baseline_results = load_results(args.baseline)
103+
print(f"Loading compare results from: {args.compare}")
104+
compare_results = load_results(args.compare)
105+
106+
if not baseline_results or not compare_results:
107+
print("Could not load results from one or both files. Exiting.")
108+
return
109+
110+
# Extract commit IDs from filenames
111+
baseline_commit = extract_commit_id(args.baseline)
112+
compare_commit = extract_commit_id(args.compare)
113+
114+
all_keys = sorted(list(set(baseline_results.keys()) | set(compare_results.keys())))
115+
116+
comparisons = []
117+
118+
for key in all_keys:
119+
baseline_val = baseline_results.get(key)
120+
compare_val = compare_results.get(key)
121+
122+
entry = {"key": key, "baseline": baseline_val, "compare": compare_val, "change": 0}
123+
124+
if baseline_val is not None and compare_val is not None:
125+
entry["change"] = ((compare_val - baseline_val) / baseline_val) * 100
126+
127+
comparisons.append(entry)
128+
129+
# --- Generate Report ---
130+
with open(args.output, 'w', encoding='utf-8') as f:
131+
132+
# Create header with commit IDs extracted from filenames
133+
baseline_header = "Baseline GFLOPS"
134+
compare_header = "Compare GFLOPS"
135+
136+
if baseline_commit:
137+
baseline_header = f"Baseline ({baseline_commit}) GFLOPS"
138+
if compare_commit:
139+
compare_header = f"Compare ({compare_commit}) GFLOPS"
140+
141+
key_width = max(len(k) for k in all_keys) + 2
142+
header = f"{'Test Configuration':<{key_width}} {baseline_header:>25} {compare_header:>25} {'Change (%)':>15}"
143+
f.write(header + "\n")
144+
f.write("-" * len(header) + "\n")
145+
146+
for item in comparisons:
147+
baseline_str = f"{item['baseline']:.2f}" if item['baseline'] is not None else "N/A"
148+
compare_str = f"{item['compare']:.2f}" if item['compare'] is not None else "N/A"
149+
change_str = format_change(item['change'])
150+
f.write(f"{item['key']:<{key_width}} {baseline_str:>25} {compare_str:>25} {change_str:>15}\n")
151+
152+
print(f"Comparison report successfully generated at: {args.output}")
153+
154+
155+
if __name__ == "__main__":
156+
main()

0 commit comments

Comments
 (0)