@@ -1535,15 +1535,17 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer,
1535
1535
* @param CommandList The command-list to append the QueryKernelTimestamps
1536
1536
* command to.
1537
1537
* @param SignalEvent The event that must be signaled after the profiling is
1538
- * finished. This event will contain the profiling information.
1538
+ * finished.
1539
1539
* @param WaitEvent The event that must be waited on before starting the
1540
1540
* profiling.
1541
+ * @param ProfilingEvent The event that will contain the profiling data.
1541
1542
* @return UR_RESULT_SUCCESS or an error code on failure.
1542
1543
*/
1543
1544
ur_result_t appendProfilingQueries (ur_exp_command_buffer_handle_t CommandBuffer,
1544
1545
ze_command_list_handle_t CommandList,
1545
1546
ur_event_handle_t SignalEvent,
1546
- ur_event_handle_t WaitEvent) {
1547
+ ur_event_handle_t WaitEvent,
1548
+ ur_event_handle_t ProfilingEvent) {
1547
1549
// Multiple submissions of a command buffer implies that we need to save
1548
1550
// the event timestamps before resubmiting the command buffer. We
1549
1551
// therefore copy these timestamps in a dedicated USM memory section
@@ -1556,12 +1558,17 @@ ur_result_t appendProfilingQueries(ur_exp_command_buffer_handle_t CommandBuffer,
1556
1558
Profiling->Timestamps =
1557
1559
new ze_kernel_timestamp_result_t [Profiling->NumEvents ];
1558
1560
1561
+ uint32_t NumWaitEvents = WaitEvent ? 1 : 0 ;
1562
+ ze_event_handle_t *ZeWaitEventList =
1563
+ WaitEvent ? &(WaitEvent->ZeEvent ) : nullptr ;
1564
+ ze_event_handle_t ZeSignalEvent =
1565
+ SignalEvent ? SignalEvent->ZeEvent : nullptr ;
1559
1566
ZE2UR_CALL (zeCommandListAppendQueryKernelTimestamps,
1560
1567
(CommandList, CommandBuffer->ZeEventsList .size (),
1561
1568
CommandBuffer->ZeEventsList .data (), (void *)Profiling->Timestamps ,
1562
- 0 , SignalEvent-> ZeEvent , 1 , &(WaitEvent-> ZeEvent ) ));
1569
+ 0 , ZeSignalEvent, NumWaitEvents, ZeWaitEventList ));
1563
1570
1564
- SignalEvent ->CommandData = static_cast <void *>(Profiling);
1571
+ ProfilingEvent ->CommandData = static_cast <void *>(Profiling);
1565
1572
1566
1573
return UR_RESULT_SUCCESS;
1567
1574
}
@@ -1615,7 +1622,7 @@ ur_result_t enqueueImmediateAppendPath(
1615
1622
1616
1623
if (DoProfiling) {
1617
1624
UR_CALL (appendProfilingQueries (CommandBuffer, CommandListHelper->first ,
1618
- *Event,
1625
+ *Event, CommandBuffer-> ComputeFinishedEvent ,
1619
1626
CommandBuffer->ComputeFinishedEvent ));
1620
1627
}
1621
1628
@@ -1705,14 +1712,12 @@ ur_result_t enqueueWaitEventPath(ur_exp_command_buffer_handle_t CommandBuffer,
1705
1712
1706
1713
if (DoProfiling) {
1707
1714
UR_CALL (appendProfilingQueries (CommandBuffer, SignalCommandList->first ,
1708
- *Event,
1709
- CommandBuffer->ExecutionFinishedEvent ));
1710
- } else {
1711
- ZE2UR_CALL (zeCommandListAppendBarrier,
1712
- (SignalCommandList->first , (*Event)->ZeEvent , 1 ,
1713
- &(CommandBuffer->ExecutionFinishedEvent ->ZeEvent )));
1715
+ nullptr , nullptr , *Event));
1714
1716
}
1715
1717
1718
+ ZE2UR_CALL (zeCommandListAppendBarrier,
1719
+ (SignalCommandList->first , (*Event)->ZeEvent , 0 , nullptr ));
1720
+
1716
1721
UR_CALL (Queue->executeCommandList (SignalCommandList, false /* IsBlocking*/ ,
1717
1722
false /* OKToBatchCommand*/ ));
1718
1723
0 commit comments