Skip to content

[UR][Benchmarks] Support for Unitrace for all benchmarks #19320

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 32 additions & 9 deletions devops/scripts/benchmarks/benches/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from options import options
from utils.utils import download, run
from abc import ABC, abstractmethod
import utils.unitrace as unitrace

benchmark_tags = [
BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
Expand Down Expand Up @@ -70,7 +71,7 @@ def teardown(self):
pass

@abstractmethod
def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
pass

@staticmethod
Expand All @@ -86,7 +87,14 @@ def get_adapter_full_path():
), f"could not find adapter file {adapter_path} (and in similar lib paths)"

def run_bench(
Copy link
Preview

Copilot AI Jul 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] Avoid mutable default arguments like extra_unitrace_opt=[]; use None and set to an empty list inside the function to prevent unintended sharing.

Copilot uses AI. Check for mistakes.

self, command, env_vars, ld_library=[], add_sycl=True, use_stdout=True
self,
command,
env_vars,
ld_library=[],
add_sycl=True,
use_stdout=True,
unitrace_timestamp: str = None,
extra_unitrace_opt=[],
):
env_vars = env_vars.copy()
if options.ur is not None:
Expand All @@ -99,13 +107,28 @@ def run_bench(
ld_libraries = options.extra_ld_libraries.copy()
ld_libraries.extend(ld_library)

result = run(
command=command,
env_vars=env_vars,
add_sycl=add_sycl,
cwd=options.benchmark_cwd,
ld_library=ld_libraries,
)
if unitrace_timestamp is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's unintuitive that passing a unitrace timestamp triggers unitracing. Please, consider making a unitrace class and passing a Unitrace object or None. This would also allow encapsulating unitrace download and setup inside Unitrace's constructor.

bench_dir, unitrace_output, command = unitrace.unitrace_prepare(
self.name(), unitrace_timestamp, command, extra_unitrace_opt
)

try:
result = run(
command=command,
env_vars=env_vars,
add_sycl=add_sycl,
cwd=options.benchmark_cwd,
ld_library=ld_libraries,
)
except subprocess.CalledProcessError as e:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

e is unused, can be omitted

if unitrace_timestamp is not None:
unitrace.unitrace_cleanup(options.benchmark_cwd, unitrace_output)
raise

if unitrace_timestamp is not None:
unitrace.handle_unitrace_output(
bench_dir, unitrace_output, unitrace_timestamp
)

if use_stdout:
return result.stdout.decode()
Expand Down
13 changes: 11 additions & 2 deletions devops/scripts/benchmarks/benches/benchdnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from utils.utils import git_clone, run, create_build_path
from utils.result import Result
from utils.oneapi import get_oneapi
from .benchdnn_list import get_bench_dnn_list
from .benchdnn_list import get_bench_dnn_list, unitrace_exclusion_list


class OneDnnBench(Suite):
Expand Down Expand Up @@ -129,7 +129,7 @@ def setup(self):
if not self.bench_bin.exists():
raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")

def run(self, env_vars):
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: add a dict type as a type hint to env_vars

command = [
str(self.bench_bin),
*self.bench_args.split(),
Expand All @@ -142,12 +142,21 @@ def run(self, env_vars):
env_vars = dict(env_vars) if env_vars else {}
env_vars["ONEAPI_DEVICE_SELECTOR"] = "level_zero:*"

if self.name() in unitrace_exclusion_list:
if options.verbose:
print(
f"[{self.name()}] Skipping benchmark due to unitrace exclusion list."
)
unitrace_timestamp = None

output = self.run_bench(
command,
env_vars,
add_sycl=True,
ld_library=ld_library,
use_stdout=True,
unitrace_timestamp=unitrace_timestamp,
extra_unitrace_opt=["--chrome-dnn-logging"],
)
result_value = self._extract_time(output)

Expand Down
6 changes: 6 additions & 0 deletions devops/scripts/benchmarks/benches/benchdnn_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

unitrace_exclusion_list = [
"onednn-graph-sdpa-plain-f32-eager",
"onednn-graph-sdpa-plain-f32-graph",
]

# entry format:
# [bench_driver, bench_name, bench_args, rungraph]
# bench_driver is the name of the benchdnn driver, e.g. "sum", "graph", etc.
Expand Down Expand Up @@ -62,6 +67,7 @@
"graph",
"sdpa-plain-f16",
"--reset --dt=f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
False, # Do not run SYCL graph for this benchmark
],
[
"graph",
Expand Down
74 changes: 72 additions & 2 deletions devops/scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,71 @@ def parse_unit_type(compute_unit):


class ComputeBenchmark(Benchmark):

# list of benchmarks to exclude from unitrace due to SIGSEGV, SIGABRT or timeouts
unitrace_exclusion_list = [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should just let things fail. If unitrace is failing, it's an urgent bug that needs to be fixed.

"api_overhead_benchmark_l0 SubmitKernel in order not using events KernelExecTime=20",
"api_overhead_benchmark_l0 SubmitKernel in order not using events",
"api_overhead_benchmark_l0 SubmitKernel in order with measure completion not using events KernelExecTime=20",
"api_overhead_benchmark_l0 SubmitKernel in order with measure completion not using events",
"api_overhead_benchmark_sycl SubmitKernel in order not using events KernelExecTime=20",
"api_overhead_benchmark_sycl SubmitKernel in order not using events",
"api_overhead_benchmark_sycl SubmitKernel in order with measure completion not using events KernelExecTime=20",
"api_overhead_benchmark_sycl SubmitKernel in order with measure completion not using events",
"api_overhead_benchmark_syclpreview SubmitKernel in order not using events KernelExecTime=20",
"api_overhead_benchmark_syclpreview SubmitKernel in order not using events",
"api_overhead_benchmark_syclpreview SubmitKernel in order with measure completion not using events KernelExecTime=20",
"api_overhead_benchmark_syclpreview SubmitKernel in order with measure completion not using events",
"api_overhead_benchmark_ur SubmitKernel in order not using events KernelExecTime=20",
"api_overhead_benchmark_ur SubmitKernel in order not using events",
"api_overhead_benchmark_ur SubmitKernel in order with measure completion not using events KernelExecTime=20",
"api_overhead_benchmark_ur SubmitKernel in order with measure completion not using events",
"api_overhead_benchmark_ur SubmitKernel out of order not using events KernelExecTime=20",
"api_overhead_benchmark_ur SubmitKernel out of order not using events",
"api_overhead_benchmark_ur SubmitKernel out of order with measure completion not using events KernelExecTime=20",
"api_overhead_benchmark_ur SubmitKernel out of order with measure completion not using events",
"graph_api_benchmark_l0 SinKernelGraph graphs:0, numKernels:5",
"graph_api_benchmark_l0 SinKernelGraph graphs:0, numKernels:100",
"graph_api_benchmark_l0 SinKernelGraph graphs:1, numKernels:5",
"graph_api_benchmark_l0 SinKernelGraph graphs:1, numKernels:100",
"graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 0 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 0 measureCompletion 1",
"graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 1 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 1 measureCompletion 1",
"graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 0 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 0 measureCompletion 1",
"graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 1 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 1 measureCompletion 1",
"graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 0 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 0 measureCompletion 1",
"graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 1 measureCompletion 0",
"graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 1 measureCompletion 1",
"graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:0 graphStructure:Gromacs",
"graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:0 graphStructure:Llama",
"graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:1 graphStructure:Gromacs",
"graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:1 graphStructure:Llama",
"graph_api_benchmark_ur SinKernelGraph graphs:0, numKernels:100",
"graph_api_benchmark_ur SinKernelGraph graphs:0, numKernels:5",
"graph_api_benchmark_ur SinKernelGraph graphs:1, numKernels:100",
"graph_api_benchmark_ur SinKernelGraph graphs:1, numKernels:5",
"graph_api_benchmark_ur SubmitGraph numKernels:4 ioq 1 measureCompletion 0",
"graph_api_benchmark_ur SubmitGraph numKernels:4 ioq 1 measureCompletion 1",
"graph_api_benchmark_ur SubmitGraph numKernels:10 ioq 0 measureCompletion 0",
"graph_api_benchmark_ur SubmitGraph numKernels:10 ioq 0 measureCompletion 1",
"graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 0 measureCompletion 0",
"graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 0 measureCompletion 1",
"graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 1 measureCompletion 0",
"graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 1 measureCompletion 1",
"multithread_benchmark_ur MemcpyExecute opsPerThread:400, numThreads:1, allocSize:102400 srcUSM:1 dstUSM:1",
"multithread_benchmark_ur MemcpyExecute opsPerThread:400, numThreads:1, allocSize:102400 srcUSM:0 dstUSM:1",
"multithread_benchmark_ur MemcpyExecute opsPerThread:100, numThreads:4, allocSize:102400 srcUSM:1 dstUSM:1 without events",
"multithread_benchmark_ur MemcpyExecute opsPerThread:100, numThreads:4, allocSize:102400 srcUSM:1 dstUSM:1 without events without copy offload",
"multithread_benchmark_ur MemcpyExecute opsPerThread:4096, numThreads:4, allocSize:1024 srcUSM:0 dstUSM:1 without events",
"multithread_benchmark_ur MemcpyExecute opsPerThread:4096, numThreads:4, allocSize:1024 srcUSM:0 dstUSM:1 without events with barrier",
"memory_benchmark_sycl StreamMemory, placement Device, type Triad, size 10240",
"miscellaneous_benchmark_sycl VectorSum",
]

def __init__(self, bench, name, test, runtime: RUNTIMES = None):
super().__init__(bench.directory, bench)
self.bench = bench
Expand Down Expand Up @@ -280,7 +345,7 @@ def explicit_group(self):
def description(self) -> str:
return ""

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
command = [
f"{self.benchmark_bin}",
f"--test={self.test}",
Expand All @@ -291,7 +356,12 @@ def run(self, env_vars) -> list[Result]:
command += self.bin_args()
env_vars.update(self.extra_env_vars())

result = self.run_bench(command, env_vars)
if self.name() in self.unitrace_exclusion_list:
unitrace_timestamp = None

result = self.run_bench(
command, env_vars, unitrace_timestamp=unitrace_timestamp
)
parsed_results = self.parse_output(result)
ret = []
for label, median, stddev, unit in parsed_results:
Expand Down
3 changes: 2 additions & 1 deletion devops/scripts/benchmarks/benches/gromacs.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def setup(self):
ld_library=self.suite.oneapi.ld_libraries(),
)

def run(self, env_vars):
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
model_dir = self.grappa_dir / self.model

env_vars.update({"SYCL_CACHE_PERSISTENT": "1"})
Expand Down Expand Up @@ -201,6 +201,7 @@ def run(self, env_vars):
add_sycl=True,
use_stdout=False,
ld_library=self.suite.oneapi.ld_libraries(),
unitrace_timestamp=unitrace_timestamp,
)

if not self._validate_correctness(options.benchmark_cwd + "/md.log"):
Expand Down
7 changes: 5 additions & 2 deletions devops/scripts/benchmarks/benches/llamacpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def get_tags(self):
def lower_is_better(self):
return False

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
command = [
f"{self.benchmark_bin}",
"--output",
Expand All @@ -141,7 +141,10 @@ def run(self, env_vars) -> list[Result]:
]

result = self.run_bench(
command, env_vars, ld_library=self.bench.oneapi.ld_libraries()
command,
env_vars,
ld_library=self.bench.oneapi.ld_libraries(),
unitrace_timestamp=unitrace_timestamp,
)
parsed = self.parse_output(result)
results = []
Expand Down
4 changes: 2 additions & 2 deletions devops/scripts/benchmarks/benches/syclbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def setup(self):
self.directory, "sycl-bench-build", self.bench_name
)

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")

command = [
Expand All @@ -151,7 +151,7 @@ def run(self, env_vars) -> list[Result]:
env_vars.update(self.extra_env_vars())

# no output to stdout, all in outputfile
self.run_bench(command, env_vars)
self.run_bench(command, env_vars, unitrace_timestamp=unitrace_timestamp)

with open(self.outputfile, "r") as f:
reader = csv.reader(f)
Expand Down
2 changes: 1 addition & 1 deletion devops/scripts/benchmarks/benches/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def notes(self) -> str:
def unstable(self) -> str:
return self.unstable_text

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
return [
Result(
Expand Down
8 changes: 6 additions & 2 deletions devops/scripts/benchmarks/benches/umf.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def get_names_of_benchmarks_to_be_run(self, command, env_vars):

return all_names

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
command = [f"{self.benchmark_bin}"]

all_names = self.get_names_of_benchmarks_to_be_run(command, env_vars)
Expand All @@ -152,7 +152,11 @@ def run(self, env_vars) -> list[Result]:
specific_benchmark = command + ["--benchmark_filter=^" + name + "$"]

result = self.run_bench(
specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib]
specific_benchmark,
env_vars,
add_sycl=False,
ld_library=[self.umf_lib],
unitrace_timestamp=unitrace_timestamp,
)

parsed = self.parse_output(result)
Expand Down
13 changes: 9 additions & 4 deletions devops/scripts/benchmarks/benches/velocity.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,20 @@ def description(self) -> str:
def get_tags(self):
return ["SYCL", "application"]

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
env_vars.update(self.extra_env_vars())

command = [
f"{self.benchmark_bin}",
]
command += self.bin_args()

result = self.run_bench(command, env_vars, ld_library=self.ld_libraries())
result = self.run_bench(
command,
env_vars,
ld_library=self.ld_libraries(),
unitrace_timestamp=unitrace_timestamp,
)

return [
Result(
Expand Down Expand Up @@ -282,15 +287,15 @@ class QuickSilver(VelocityBase):
def __init__(self, vb: VelocityBench):
super().__init__("QuickSilver", "qs", vb, "MMS/CTT")

def run(self, env_vars) -> list[Result]:
def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
# TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
if (
"UR_L0_USE_IMMEDIATE_COMMANDLISTS" in env_vars
and env_vars["UR_L0_USE_IMMEDIATE_COMMANDLISTS"] == "0"
):
return None

return super().run(env_vars)
return super().run(env_vars, unitrace_timestamp=unitrace_timestamp)

def name(self):
return "Velocity-Bench QuickSilver"
Expand Down
13 changes: 7 additions & 6 deletions devops/scripts/benchmarks/history.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def git_info_from_path(path: Path) -> (str, str):
compute_runtime=compute_runtime,
)

def save(self, save_name, results: list[Result], to_file=True):
def save(self, save_name, timestamp, results: list[Result], to_file=True):
benchmark_data = self.create_run(save_name, results)
self.runs.append(benchmark_data)

Expand All @@ -161,11 +161,12 @@ def save(self, save_name, results: list[Result], to_file=True):
os.makedirs(results_dir, exist_ok=True)

# Use formatted timestamp for the filename
timestamp = (
datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
if options.timestamp_override is None
else options.timestamp_override
)
if timestamp is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems like the timestamp is unconditionally created in the main() method, so this one is not needed?

timestamp = (
datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
if options.timestamp_override is None
else options.timestamp_override
)
file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
with file_path.open("w") as file:
json.dump(serialized, file, indent=4)
Expand Down
1 change: 0 additions & 1 deletion devops/scripts/benchmarks/html/data.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,3 @@
benchmarkRuns = [];

defaultCompareNames = [];

Loading