Skip to content

Commit 64f137e

Browse files
Add script to test op perf and compare
Co-authored-by: Daniele <daniele.dilotorres@gmail.com> Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
1 parent 0142961 commit 64f137e

File tree

3 files changed

+219
-0
lines changed

3 files changed

+219
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,9 @@ poetry.toml
137137
/tests/test-tokenizer-1-bpe
138138
/tests/test-tokenizer-1-spm
139139

140+
# Test reports
141+
comparison_backend_ops_perf.txt
142+
140143
# Scripts
141144
!/scripts/install-oneapi.bat
142145

scripts/compare-commits-op-perf.sh

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#!/bin/bash
2+
3+
if [ $# -lt 2 ]; then
4+
echo "usage: ./scripts/compare-commits-op-perf.sh <commit1> <commit2> [additional test-backend-ops arguments]"
5+
exit 1
6+
fi
7+
8+
set -e
9+
set -x
10+
11+
test_backend_ops_args="${@:3}"
12+
13+
# Extract short form of commits (first 7 characters)
14+
commit1_short=$(echo $1 | cut -c1-7)
15+
commit2_short=$(echo $2 | cut -c1-7)
16+
17+
rm -f test-backend-ops-perf-*.log
18+
19+
# to test a backend, call the script with the corresponding environment variable (e.g. GGML_CUDA=1 ./scripts/compare-commits.sh ...)
20+
if [ -n "$GGML_CUDA" ]; then
21+
CMAKE_OPTS="${CMAKE_OPTS} -DGGML_CUDA=ON"
22+
fi
23+
24+
dir="build-test-backend-ops"
25+
26+
function run {
27+
commit_short=$1
28+
rm -fr ${dir} > /dev/null
29+
cmake -B ${dir} -S . ${CMAKE_OPTS} > /dev/null
30+
cmake --build ${dir} -t test-backend-ops > /dev/null
31+
${dir}/bin/test-backend-ops $test_backend_ops_args perf 2>&1 | tee test-backend-ops-perf-${commit_short}.log
32+
}
33+
34+
git checkout $1 > /dev/null
35+
run $commit1_short
36+
37+
git checkout $2 > /dev/null
38+
run $commit2_short
39+
40+
./scripts/compare-test-backend-ops-perf.py -b test-backend-ops-perf-$commit1_short.log -c test-backend-ops-perf-$commit2_short.log
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import logging
5+
import re
6+
import sys
7+
from pathlib import Path
8+
9+
# Set up logging
10+
logger = logging.getLogger(__name__)
11+
logging.basicConfig(level=logging.INFO, format="%(message)s")
12+
13+
14+
def parse_benchmark_line(line: str):
15+
"""
16+
Parses a single line of benchmark output.
17+
18+
Example line:
19+
MUL_MAT(...): 744 runs - 1660.11 us/run - 134.48 MFLOP/run - 81.01 GFLOPS
20+
21+
Returns a tuple of (key, gflops) or (None, None) if parsing fails.
22+
"""
23+
line = line.strip()
24+
if ":" not in line:
25+
return None, None
26+
27+
key, data_part = line.split(":", 1)
28+
key = key.strip()
29+
30+
# Remove ANSI color codes from the data part
31+
data_part = re.sub(r"\x1b\[[0-9;]*m", "", data_part)
32+
33+
# Find the last number and unit in the data part
34+
match = re.search(r"([\d\.]+)\s+(GFLOPS|TFLOPS|MFLOPS)\s*$", data_part.strip())
35+
if not match:
36+
return None, None
37+
38+
value_str, unit = match.groups()
39+
value = float(value_str)
40+
41+
# Normalize everything to GFLOPS
42+
if unit == "TFLOPS":
43+
gflops = value * 1000
44+
elif unit == "MFLOPS":
45+
gflops = value / 1000
46+
else: # GFLOPS
47+
gflops = value
48+
49+
return key, gflops
50+
51+
52+
def extract_commit_id(filepath: Path) -> str:
53+
"""Extract commit ID from filename like test-backend-ops-perf-abc1234.log"""
54+
filename = filepath.name
55+
# Pattern: test-backend-ops-perf-<commit_id>.log
56+
match = re.match(r"test-backend-ops-perf-([^.]+)\.log", filename)
57+
if match:
58+
return match.group(1)
59+
return ""
60+
61+
62+
def load_results(filepath: Path) -> dict:
63+
"""Loads all benchmark results from a file into a dictionary."""
64+
results = {}
65+
try:
66+
with open(filepath, "r", encoding="utf-8") as f:
67+
for line in f:
68+
key, gflops = parse_benchmark_line(line)
69+
if key:
70+
results[key] = gflops
71+
except FileNotFoundError:
72+
logger.error(f"Error: File not found at {filepath}")
73+
sys.exit(1)
74+
return results
75+
76+
77+
def format_change(change: float) -> str:
78+
"""Formats the percentage change."""
79+
if change > 0.1:
80+
return f"+{change:.2f}%"
81+
elif change < -0.1:
82+
return f"{change:.2f}%"
83+
else:
84+
return " ~0.00%"
85+
86+
87+
def main():
88+
"""Main function to compare benchmark files."""
89+
parser = argparse.ArgumentParser(
90+
description="Compare two benchmark result files and generate a report.",
91+
formatter_class=argparse.RawTextHelpFormatter,
92+
)
93+
help_b = "Path to the baseline benchmark results file."
94+
parser.add_argument(
95+
"-b", "--baseline", dest="baseline", type=Path, required=True, help=help_b
96+
)
97+
help_c = "Path to the benchmark results file to compare against the baseline."
98+
parser.add_argument(
99+
"-c", "--compare", dest="compare", type=Path, required=True, help=help_c
100+
)
101+
parser.add_argument(
102+
"-o",
103+
"--output",
104+
type=Path,
105+
default="comparison_backend_ops_perf.txt",
106+
help="Path to the output report file (default: comparison_backend_ops_perf.txt).",
107+
)
108+
args = parser.parse_args()
109+
110+
logger.info(f"Loading baseline results from: {args.baseline}")
111+
baseline_results = load_results(args.baseline)
112+
logger.info(f"Loading compare results from: {args.compare}")
113+
compare_results = load_results(args.compare)
114+
115+
if not baseline_results or not compare_results:
116+
logger.error("Could not load results from one or both files. Exiting.")
117+
return
118+
119+
# Extract commit IDs from filenames
120+
baseline_commit = extract_commit_id(args.baseline)
121+
compare_commit = extract_commit_id(args.compare)
122+
123+
all_keys = sorted(list(set(baseline_results.keys()) | set(compare_results.keys())))
124+
125+
comparisons = []
126+
127+
for key in all_keys:
128+
baseline_val = baseline_results.get(key)
129+
compare_val = compare_results.get(key)
130+
131+
entry = {
132+
"key": key,
133+
"baseline": baseline_val,
134+
"compare": compare_val,
135+
"change": 0,
136+
}
137+
138+
if baseline_val is not None and compare_val is not None:
139+
entry["change"] = ((compare_val - baseline_val) / baseline_val) * 100
140+
141+
comparisons.append(entry)
142+
143+
# --- Generate Report ---
144+
with open(args.output, "w", encoding="utf-8") as f:
145+
146+
# Create header with commit IDs extracted from filenames
147+
baseline_header = "Baseline GFLOPS"
148+
compare_header = "Compare GFLOPS"
149+
150+
if baseline_commit:
151+
baseline_header = f"Baseline ({baseline_commit}) GFLOPS"
152+
if compare_commit:
153+
compare_header = f"Compare ({compare_commit}) GFLOPS"
154+
155+
key_width = max(len(k) for k in all_keys) + 2
156+
header = f"{'Test Configuration':<{key_width}} {baseline_header:>25} {compare_header:>25} {'Change (%)':>15}"
157+
f.write(header + "\n")
158+
f.write("-" * len(header) + "\n")
159+
160+
for item in comparisons:
161+
baseline_str = (
162+
f"{item['baseline']:.2f}" if item["baseline"] is not None else "N/A"
163+
)
164+
compare_str = (
165+
f"{item['compare']:.2f}" if item["compare"] is not None else "N/A"
166+
)
167+
change_str = format_change(item["change"])
168+
f.write(
169+
f"{item['key']:<{key_width}} {baseline_str:>25} {compare_str:>25} {change_str:>15}\n"
170+
)
171+
172+
logger.info(f"Comparison report successfully generated at: {args.output}")
173+
174+
175+
if __name__ == "__main__":
176+
main()

0 commit comments

Comments
 (0)