intel · mateuszpn · Jun 25, 2025 · Jul 1, 2025 · Jun 25, 2025 · Jul 2, 2025
@@ -12,6 +12,7 @@
 from options import options
 from utils.utils import download, run
 from abc import ABC, abstractmethod
+import utils.unitrace as unitrace
 
 benchmark_tags = [
     BenchmarkTag("SYCL", "Benchmark uses SYCL runtime"),
@@ -70,7 +71,7 @@ def teardown(self):
         pass
 
     @abstractmethod
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         pass
 
     @staticmethod
@@ -86,7 +87,14 @@ def get_adapter_full_path():
         ), f"could not find adapter file {adapter_path} (and in similar lib paths)"
 
     def run_bench(
-        self, command, env_vars, ld_library=[], add_sycl=True, use_stdout=True
+        self,
+        command,
+        env_vars,
+        ld_library=[],
+        add_sycl=True,
+        use_stdout=True,
+        unitrace_timestamp: str = None,
+        extra_unitrace_opt=[],
     ):
         env_vars = env_vars.copy()
         if options.ur is not None:
@@ -99,13 +107,28 @@ def run_bench(
         ld_libraries = options.extra_ld_libraries.copy()
         ld_libraries.extend(ld_library)
 
-        result = run(
-            command=command,
-            env_vars=env_vars,
-            add_sycl=add_sycl,
-            cwd=options.benchmark_cwd,
-            ld_library=ld_libraries,
-        )
+        if unitrace_timestamp is not None:
+            bench_dir, unitrace_output, command = unitrace.unitrace_prepare(
+                self.name(), unitrace_timestamp, command, extra_unitrace_opt
+            )
+
+        try:
+            result = run(
+                command=command,
+                env_vars=env_vars,
+                add_sycl=add_sycl,
+                cwd=options.benchmark_cwd,
+                ld_library=ld_libraries,
+            )
+        except subprocess.CalledProcessError as e:
+            if unitrace_timestamp is not None:
+                unitrace.unitrace_cleanup(options.benchmark_cwd, unitrace_output)
+            raise
+
+        if unitrace_timestamp is not None:
+            unitrace.handle_unitrace_output(
+                bench_dir, unitrace_output, unitrace_timestamp
+            )
 
         if use_stdout:
             return result.stdout.decode()

@@ -9,7 +9,7 @@
 from utils.utils import git_clone, run, create_build_path
 from utils.result import Result
 from utils.oneapi import get_oneapi
-from .benchdnn_list import get_bench_dnn_list
+from .benchdnn_list import get_bench_dnn_list, unitrace_exclusion_list
 
 
 class OneDnnBench(Suite):
@@ -129,7 +129,7 @@ def setup(self):
         if not self.bench_bin.exists():
             raise FileNotFoundError(f"Benchmark binary not found: {self.bench_bin}")
 
-    def run(self, env_vars):
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         command = [
             str(self.bench_bin),
             *self.bench_args.split(),
@@ -142,12 +142,21 @@ def run(self, env_vars):
         env_vars = dict(env_vars) if env_vars else {}
         env_vars["ONEAPI_DEVICE_SELECTOR"] = "level_zero:*"
 
+        if self.name() in unitrace_exclusion_list:
+            if options.verbose:
+                print(
+                    f"[{self.name()}] Skipping benchmark due to unitrace exclusion list."
+                )
+            unitrace_timestamp = None
+
         output = self.run_bench(
             command,
             env_vars,
             add_sycl=True,
             ld_library=ld_library,
             use_stdout=True,
+            unitrace_timestamp=unitrace_timestamp,
+            extra_unitrace_opt=["--chrome-dnn-logging"],
         )
         result_value = self._extract_time(output)
 

@@ -3,6 +3,11 @@
 # See LICENSE.TXT
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+unitrace_exclusion_list = [
+    "onednn-graph-sdpa-plain-f32-eager",
+    "onednn-graph-sdpa-plain-f32-graph",
+]
+
 # entry format:
 #  [bench_driver, bench_name, bench_args, rungraph]
 #  bench_driver is the name of the benchdnn driver, e.g. "sum", "graph", etc.
@@ -62,6 +67,7 @@
         "graph",
         "sdpa-plain-f16",
         "--reset --dt=f16 --case=complex_fusion/mha/sdpa-plain-implicit-causal-mask-fp32-bs1.json",
+        False,  # Do not run SYCL graph for this benchmark
     ],
     [
         "graph",

@@ -224,6 +224,71 @@ def parse_unit_type(compute_unit):
 
 
 class ComputeBenchmark(Benchmark):
+
+    # list of benchmarks to exclude from unitrace due to SIGSEGV, SIGABRT or timeouts
+    unitrace_exclusion_list = [
+        "api_overhead_benchmark_l0 SubmitKernel in order not using events KernelExecTime=20",
+        "api_overhead_benchmark_l0 SubmitKernel in order not using events",
+        "api_overhead_benchmark_l0 SubmitKernel in order with measure completion not using events KernelExecTime=20",
+        "api_overhead_benchmark_l0 SubmitKernel in order with measure completion not using events",
+        "api_overhead_benchmark_sycl SubmitKernel in order not using events KernelExecTime=20",
+        "api_overhead_benchmark_sycl SubmitKernel in order not using events",
+        "api_overhead_benchmark_sycl SubmitKernel in order with measure completion not using events KernelExecTime=20",
+        "api_overhead_benchmark_sycl SubmitKernel in order with measure completion not using events",
+        "api_overhead_benchmark_syclpreview SubmitKernel in order not using events KernelExecTime=20",
+        "api_overhead_benchmark_syclpreview SubmitKernel in order not using events",
+        "api_overhead_benchmark_syclpreview SubmitKernel in order with measure completion not using events KernelExecTime=20",
+        "api_overhead_benchmark_syclpreview SubmitKernel in order with measure completion not using events",
+        "api_overhead_benchmark_ur SubmitKernel in order not using events KernelExecTime=20",
+        "api_overhead_benchmark_ur SubmitKernel in order not using events",
+        "api_overhead_benchmark_ur SubmitKernel in order with measure completion not using events KernelExecTime=20",
+        "api_overhead_benchmark_ur SubmitKernel in order with measure completion not using events",
+        "api_overhead_benchmark_ur SubmitKernel out of order not using events KernelExecTime=20",
+        "api_overhead_benchmark_ur SubmitKernel out of order not using events",
+        "api_overhead_benchmark_ur SubmitKernel out of order with measure completion not using events KernelExecTime=20",
+        "api_overhead_benchmark_ur SubmitKernel out of order with measure completion not using events",
+        "graph_api_benchmark_l0 SinKernelGraph graphs:0, numKernels:5",
+        "graph_api_benchmark_l0 SinKernelGraph graphs:0, numKernels:100",
+        "graph_api_benchmark_l0 SinKernelGraph graphs:1, numKernels:5",
+        "graph_api_benchmark_l0 SinKernelGraph graphs:1, numKernels:100",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 0 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 0 measureCompletion 1",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 1 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:10 ioq 1 measureCompletion 1",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 0 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 0 measureCompletion 1",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 1 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:32 ioq 1 measureCompletion 1",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 0 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 0 measureCompletion 1",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 1 measureCompletion 0",
+        "graph_api_benchmark_l0 SubmitGraph numKernels:4 ioq 1 measureCompletion 1",
+        "graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:0 graphStructure:Gromacs",
+        "graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:0 graphStructure:Llama",
+        "graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:1 graphStructure:Gromacs",
+        "graph_api_benchmark_sycl FinalizeGraph rebuildGraphEveryIter:1 graphStructure:Llama",
+        "graph_api_benchmark_ur SinKernelGraph graphs:0, numKernels:100",
+        "graph_api_benchmark_ur SinKernelGraph graphs:0, numKernels:5",
+        "graph_api_benchmark_ur SinKernelGraph graphs:1, numKernels:100",
+        "graph_api_benchmark_ur SinKernelGraph graphs:1, numKernels:5",
+        "graph_api_benchmark_ur SubmitGraph numKernels:4 ioq 1 measureCompletion 0",
+        "graph_api_benchmark_ur SubmitGraph numKernels:4 ioq 1 measureCompletion 1",
+        "graph_api_benchmark_ur SubmitGraph numKernels:10 ioq 0 measureCompletion 0",
+        "graph_api_benchmark_ur SubmitGraph numKernels:10 ioq 0 measureCompletion 1",
+        "graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 0 measureCompletion 0",
+        "graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 0 measureCompletion 1",
+        "graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 1 measureCompletion 0",
+        "graph_api_benchmark_ur SubmitGraph numKernels:32 ioq 1 measureCompletion 1",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:400, numThreads:1, allocSize:102400 srcUSM:1 dstUSM:1",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:400, numThreads:1, allocSize:102400 srcUSM:0 dstUSM:1",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:100, numThreads:4, allocSize:102400 srcUSM:1 dstUSM:1 without events",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:100, numThreads:4, allocSize:102400 srcUSM:1 dstUSM:1 without events without copy offload",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:4096, numThreads:4, allocSize:1024 srcUSM:0 dstUSM:1 without events",
+        "multithread_benchmark_ur MemcpyExecute opsPerThread:4096, numThreads:4, allocSize:1024 srcUSM:0 dstUSM:1 without events with barrier",
+        "memory_benchmark_sycl StreamMemory, placement Device, type Triad, size 10240",
+        "miscellaneous_benchmark_sycl VectorSum",
+    ]
+
     def __init__(self, bench, name, test, runtime: RUNTIMES = None):
         super().__init__(bench.directory, bench)
         self.bench = bench
@@ -280,7 +345,7 @@ def explicit_group(self):
     def description(self) -> str:
         return ""
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
             f"--test={self.test}",
@@ -291,7 +356,12 @@ def run(self, env_vars) -> list[Result]:
         command += self.bin_args()
         env_vars.update(self.extra_env_vars())
 
-        result = self.run_bench(command, env_vars)
+        if self.name() in self.unitrace_exclusion_list:
+            unitrace_timestamp = None
+
+        result = self.run_bench(
+            command, env_vars, unitrace_timestamp=unitrace_timestamp
+        )
         parsed_results = self.parse_output(result)
         ret = []
         for label, median, stddev, unit in parsed_results:

@@ -162,7 +162,7 @@ def setup(self):
             ld_library=self.suite.oneapi.ld_libraries(),
         )
 
-    def run(self, env_vars):
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         model_dir = self.grappa_dir / self.model
 
         env_vars.update({"SYCL_CACHE_PERSISTENT": "1"})
@@ -201,6 +201,7 @@ def run(self, env_vars):
             add_sycl=True,
             use_stdout=False,
             ld_library=self.suite.oneapi.ld_libraries(),
+            unitrace_timestamp=unitrace_timestamp,
         )
 
         if not self._validate_correctness(options.benchmark_cwd + "/md.log"):

@@ -115,7 +115,7 @@ def get_tags(self):
     def lower_is_better(self):
         return False
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         command = [
             f"{self.benchmark_bin}",
             "--output",
@@ -141,7 +141,10 @@ def run(self, env_vars) -> list[Result]:
         ]
 
         result = self.run_bench(
-            command, env_vars, ld_library=self.bench.oneapi.ld_libraries()
+            command,
+            env_vars,
+            ld_library=self.bench.oneapi.ld_libraries(),
+            unitrace_timestamp=unitrace_timestamp,
         )
         parsed = self.parse_output(result)
         results = []

@@ -137,7 +137,7 @@ def setup(self):
             self.directory, "sycl-bench-build", self.bench_name
         )
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         self.outputfile = os.path.join(self.bench.directory, self.test + ".csv")
 
         command = [
@@ -151,7 +151,7 @@ def run(self, env_vars) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
         # no output to stdout, all in outputfile
-        self.run_bench(command, env_vars)
+        self.run_bench(command, env_vars, unitrace_timestamp=unitrace_timestamp)
 
         with open(self.outputfile, "r") as f:
             reader = csv.reader(f)

@@ -88,7 +88,7 @@ def notes(self) -> str:
     def unstable(self) -> str:
         return self.unstable_text
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         random_value = self.value + random.uniform(-1 * (self.diff), self.diff)
         return [
             Result(

@@ -138,7 +138,7 @@ def get_names_of_benchmarks_to_be_run(self, command, env_vars):
 
         return all_names
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         command = [f"{self.benchmark_bin}"]
 
         all_names = self.get_names_of_benchmarks_to_be_run(command, env_vars)
@@ -152,7 +152,11 @@ def run(self, env_vars) -> list[Result]:
             specific_benchmark = command + ["--benchmark_filter=^" + name + "$"]
 
             result = self.run_bench(
-                specific_benchmark, env_vars, add_sycl=False, ld_library=[self.umf_lib]
+                specific_benchmark,
+                env_vars,
+                add_sycl=False,
+                ld_library=[self.umf_lib],
+                unitrace_timestamp=unitrace_timestamp,
             )
 
             parsed = self.parse_output(result)

@@ -130,15 +130,20 @@ def description(self) -> str:
     def get_tags(self):
         return ["SYCL", "application"]
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         env_vars.update(self.extra_env_vars())
 
         command = [
             f"{self.benchmark_bin}",
         ]
         command += self.bin_args()
 
-        result = self.run_bench(command, env_vars, ld_library=self.ld_libraries())
+        result = self.run_bench(
+            command,
+            env_vars,
+            ld_library=self.ld_libraries(),
+            unitrace_timestamp=unitrace_timestamp,
+        )
 
         return [
             Result(
@@ -282,15 +287,15 @@ class QuickSilver(VelocityBase):
     def __init__(self, vb: VelocityBench):
         super().__init__("QuickSilver", "qs", vb, "MMS/CTT")
 
-    def run(self, env_vars) -> list[Result]:
+    def run(self, env_vars, unitrace_timestamp: str = None) -> list[Result]:
         # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0
         if (
             "UR_L0_USE_IMMEDIATE_COMMANDLISTS" in env_vars
             and env_vars["UR_L0_USE_IMMEDIATE_COMMANDLISTS"] == "0"
         ):
             return None
 
-        return super().run(env_vars)
+        return super().run(env_vars, unitrace_timestamp=unitrace_timestamp)
 
     def name(self):
         return "Velocity-Bench QuickSilver"

@@ -149,7 +149,7 @@ def git_info_from_path(path: Path) -> (str, str):
             compute_runtime=compute_runtime,
         )
 
-    def save(self, save_name, results: list[Result], to_file=True):
+    def save(self, save_name, timestamp, results: list[Result], to_file=True):
         benchmark_data = self.create_run(save_name, results)
         self.runs.append(benchmark_data)
 
@@ -161,11 +161,12 @@ def save(self, save_name, results: list[Result], to_file=True):
         os.makedirs(results_dir, exist_ok=True)
 
         # Use formatted timestamp for the filename
-        timestamp = (
-            datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
-            if options.timestamp_override is None
-            else options.timestamp_override
-        )
+        if timestamp is None:
+            timestamp = (
+                datetime.now(tz=timezone.utc).strftime("%Y%m%d_%H%M%S")
+                if options.timestamp_override is None
+                else options.timestamp_override
+            )
         file_path = Path(os.path.join(results_dir, f"{save_name}_{timestamp}.json"))
         with file_path.open("w") as file:
             json.dump(serialized, file, indent=4)

@@ -8,4 +8,3 @@
 benchmarkRuns = [];
 
 defaultCompareNames = [];
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -8,4 +8,3 @@
		benchmarkRuns = [];

		defaultCompareNames = [];