@@ -127,9 +127,20 @@ def benchmarks(self) -> list[Benchmark]:
127
127
for runtime in self .enabled_runtimes ():
128
128
for in_order_queue in [0 , 1 ]:
129
129
for measure_completion in [0 , 1 ]:
130
- benches .append (
131
- SubmitKernel (self , runtime , in_order_queue , measure_completion )
132
- )
130
+ for enqueue_functions in [0 , 1 ]:
131
+ # only SYCL backend supports enqueue functions
132
+ if enqueue_functions == 1 and runtime != RUNTIMES .SYCL :
133
+ continue
134
+
135
+ benches .append (
136
+ SubmitKernel (
137
+ self ,
138
+ runtime ,
139
+ in_order_queue ,
140
+ measure_completion ,
141
+ enqueue_functions ,
142
+ )
143
+ )
133
144
134
145
# Add SinKernelGraph benchmarks
135
146
for runtime in self .enabled_runtimes ():
@@ -278,10 +289,13 @@ def teardown(self):
278
289
279
290
280
291
class SubmitKernel (ComputeBenchmark ):
281
- def __init__ (self , bench , runtime : RUNTIMES , ioq , measure_completion = 0 ):
292
+ def __init__ (
293
+ self , bench , runtime : RUNTIMES , ioq , MeasureCompletion = 0 , EnqueueFunctions = 0
294
+ ):
282
295
self .ioq = ioq
283
296
self .runtime = runtime
284
- self .measure_completion = measure_completion
297
+ self .MeasureCompletion = MeasureCompletion
298
+ self .EnqueueFunctions = EnqueueFunctions
285
299
super ().__init__ (
286
300
bench , f"api_overhead_benchmark_{ runtime .value } " , "SubmitKernel"
287
301
)
@@ -291,12 +305,17 @@ def get_tags(self):
291
305
292
306
def name (self ):
293
307
order = "in order" if self .ioq else "out of order"
294
- completion_str = " with measure completion" if self .measure_completion else ""
295
- return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } "
308
+ completion_str = " with measure completion" if self .MeasureCompletion else ""
309
+ enqueue_str = " using eventless SYCL enqueue" if self .EnqueueFunctions else ""
310
+ return f"api_overhead_benchmark_{ self .runtime .value } SubmitKernel { order } { completion_str } { enqueue_str } "
296
311
297
312
def explicit_group (self ):
313
+ # make eventless enqueue its own group, since only SYCL supports this mode
314
+ if self .EnqueueFunctions :
315
+ return "Submit Kernel using eventless SYCL enqueue"
316
+
298
317
order = "In Order" if self .ioq else "Out Of Order"
299
- completion_str = " With Completion" if self .measure_completion else ""
318
+ completion_str = " With Completion" if self .MeasureCompletion else ""
300
319
return f"SubmitKernel { order } { completion_str } "
301
320
302
321
def description (self ) -> str :
@@ -305,15 +324,11 @@ def description(self) -> str:
305
324
306
325
completion_desc = ""
307
326
if self .runtime == RUNTIMES .UR :
308
- completion_desc = f", { 'including' if self .measure_completion else 'excluding' } kernel completion time"
309
-
310
- l0_specific = ""
311
- if self .runtime == RUNTIMES .LEVEL_ZERO :
312
- l0_specific = " Uses immediate command lists"
327
+ completion_desc = f", { 'including' if self .MeasureCompletion else 'excluding' } kernel completion time"
313
328
314
329
return (
315
330
f"Measures CPU time overhead of submitting { order } kernels through { runtime_name } API{ completion_desc } . "
316
- f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time. { l0_specific } "
331
+ f"Runs 10 simple kernels with minimal execution time to isolate API overhead from kernel execution time."
317
332
)
318
333
319
334
def range (self ) -> tuple [float , float ]:
@@ -323,11 +338,12 @@ def bin_args(self) -> list[str]:
323
338
return [
324
339
f"--Ioq={ self .ioq } " ,
325
340
"--DiscardEvents=0" ,
326
- f"--MeasureCompletion={ self .measure_completion } " ,
341
+ f"--MeasureCompletion={ self .MeasureCompletion } " ,
327
342
"--iterations=100000" ,
328
343
"--Profiling=0" ,
329
344
"--NumKernels=10" ,
330
345
"--KernelExecTime=1" ,
346
+ f"--EnqueueFunctions={ self .EnqueueFunctions } " ,
331
347
]
332
348
333
349
0 commit comments