[Benchmark] Add UR SubmitKernel bench

igchor · igchor · commit db40173f200c · 2024-08-09T17:07:08.000Z
diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks_compute.yml
@@ -57,7 +57,8 @@ jobs:
         adapter: [
           {str_name: "${{inputs.str_name}}",
           sycl_config: "${{inputs.sycl_config_params}}",
-          unit: "${{inputs.unit}}"}
+          unit: "${{inputs.unit}}"
+          }
         ]
         build_type: [Release]
         compiler: [{c: clang, cxx: clang++}]
@@ -155,7 +156,7 @@ jobs:
 
     - name: Run benchmarks
       id: benchmarks
-      run: numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py ~/bench_workdir ${{github.workspace}}/sycl_build ${{ inputs.bench_script_params }}
+      run: numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py ~/bench_workdir ${{github.workspace}}/sycl_build ${{github.workspace}}/ur-repo ${{ matrix.adapter.str_name }} ${{ inputs.bench_script_params }}
 
     - name: Add comment to PR
       uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
@@ -9,9 +9,9 @@ Scripts for running performance tests on SYCL and Unified Runtime.
 
 ## Running
 
-`$ ./main.py ~/benchmarks_workdir/ ~/llvm/build/`
+`$ ./main.py ~/benchmarks_workdir/ ~/llvm/build/ ~/ur adapter_name`
 
-This will download and build everything in `~/benchmarks_workdir/` using the compiler in `~/llvm/build/`, and then run the benchmarks. The results will be stored in `benchmark_results.md`.
+This will download and build everything in `~/benchmarks_workdir/` using the compiler in `~/llvm/build/`, UR source from `~/ur` and then run the benchmarks for `adapter_name` adapter. The results will be stored in `benchmark_results.md`.
 
 The scripts will try to reuse the files stored in `~/benchmarks_workdir/`, but the benchmarks will be rebuilt every time. To avoid that, use `-no-rebuild` option.
 
diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py
@@ -15,13 +15,14 @@ class ComputeBench:
     def __init__(self, directory):
         self.directory = directory
         self.built = False
+        self.adapter_short_name = {'level_zero' : 'L0'}
         return
 
     def setup(self):
         if self.built:
             return
 
-        repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "0f758021dce9ba32341a503739b69db057433c59")
+        repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "08c41bb8bc1762ad53c6194df6d36bfcceff4aa2")
         build_path = create_build_path(self.directory, 'compute-benchmarks-build')
 
         configure_command = [
@@ -31,14 +32,22 @@ def setup(self):
             f"-DCMAKE_BUILD_TYPE=Release",
             f"-DBUILD_SYCL=ON",
             f"-DSYCL_COMPILER_ROOT={options.sycl}",
-            f"-DALLOW_WARNINGS=ON"
+            f"-DALLOW_WARNINGS=ON",
+            f"-DBUILD_UR=ON",
+            f"-DUR_BUILD_TESTS=OFF",
+            f"-DUR_BUILD_ADAPTER_L0=ON",
+            f"-DUR_BUILD_TESTS=OFF",
+            f"-DUMF_DISABLE_HWLOC=ON",
+            f"-DBENCHMARK_UR_SOURCE_DIR={options.ur_dir}",
+            f"-DUR_BUILD_ADAPTER_{self.adapter_short_name[options.ur_adapter_name]}=ON"
         ]
         run(configure_command, add_sycl=True)
 
         run(f"cmake --build {build_path} -j", add_sycl=True)
 
         self.built = True
         self.bins = os.path.join(build_path, 'bin')
+        self.libs = os.path.join(build_path, 'lib')
 
 class ComputeBenchmark(Benchmark):
     def __init__(self, bench, name, test):
@@ -112,6 +121,29 @@ def bin_args(self) -> list[str]:
             "--KernelExecTime=1"
         ]
 
+class SubmitKernelUR(ComputeBenchmark):
+    def __init__(self, bench, ioq):
+        self.ioq = ioq
+        super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")
+
+    def name(self):
+        order = "in order" if self.ioq else "out of order"
+        return f"api_overhead_benchmark_ur SubmitKernel {order}"
+
+    def extra_env_vars(self) -> dict:
+        return {"UR_ADAPTERS_FORCE_LOAD" : os.path.join(self.bench.libs, f"libur_adapter_{options.ur_adapter_name}.so")}
+
+    def bin_args(self) -> list[str]:
+        return [
+            f"--Ioq={self.ioq}",
+            "--DiscardEvents=0",
+            "--MeasureCompletion=0",
+            "--iterations=100000",
+            "--Profiling=0",
+            "--NumKernels=10",
+            "--KernelExecTime=1"
+        ]
+
 class ExecImmediateCopyQueue(ComputeBenchmark):
     def __init__(self, bench, ioq, isCopyOnly, source, destination, size):
         self.ioq = ioq
@@ -209,4 +241,3 @@ def bin_args(self) -> list[str]:
             "--numberOfElementsY=256",
             "--numberOfElementsZ=256",
         ]
-
diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py
@@ -31,6 +31,8 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
     benchmarks = [
         SubmitKernelSYCL(cb, 0),
         SubmitKernelSYCL(cb, 1),
+        SubmitKernelUR(cb, 0),
+        SubmitKernelUR(cb, 1),
         QueueInOrderMemcpy(cb, 0, 'Device', 'Device', 1024),
         QueueInOrderMemcpy(cb, 0, 'Host', 'Device', 1024),
         QueueMemcpy(cb, 'Device', 'Device', 1024),
@@ -114,6 +116,8 @@ def validate_and_parse_env_args(env_args):
     parser = argparse.ArgumentParser(description='Unified Runtime Benchmark Runner')
     parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.')
     parser.add_argument('sycl', type=str, help='Root directory of the SYCL compiler.')
+    parser.add_argument('ur_dir', type=str, help='Root directory of the UR.')
+    parser.add_argument('ur_adapter_name', type=str, help='Options to build the Unified Runtime as part of the benchmark')
     parser.add_argument("--no-rebuild", help='Rebuild the benchmarks from scratch.', action="store_true")
     parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[])
     parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.')
@@ -131,6 +135,8 @@ def validate_and_parse_env_args(env_args):
     options.sycl = args.sycl
     options.iterations = args.iterations
     options.timeout = args.timeout
+    options.ur_dir = args.ur_dir
+    options.ur_adapter_name = args.ur_adapter_name
 
     benchmark_filter = re.compile(args.filter) if args.filter else None