@@ -171,48 +171,63 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
171
171
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
172
172
173
173
// Helper function for appending a barrier to a command list.
174
- auto insertBarrierIntoCmdList =
175
- [&Queue](ur_command_list_ptr_t CmdList,
176
- const _ur_ze_event_list_t &EventWaitList,
177
- ur_event_handle_t &Event, bool IsInternal) {
178
- UR_CALL (createEventAndAssociateQueue (
179
- Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList,
180
- IsInternal, false ));
181
-
182
- Event->WaitList = EventWaitList;
183
-
184
- // For in-order queue we don't need a real barrier, just wait for
185
- // requested events in potentially different queues and add a "barrier"
186
- // event signal because it is already guaranteed that previous commands
187
- // in this queue are completed when the signal is started.
188
- //
189
- // Only consideration here is that when profiling is used, signalEvent
190
- // cannot be used if EventWaitList.Lenght == 0. In those cases, we need
191
- // to fallback directly to barrier to have correct timestamps. See here:
192
- // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
193
- //
194
- // TODO: this and other special handling of in-order queues to be
195
- // updated when/if Level Zero adds native support for in-order queues.
196
- //
197
- if (Queue->isInOrderQueue () && InOrderBarrierBySignal &&
198
- !Queue->isProfilingEnabled ()) {
199
- // If we are using driver in order lists, then append wait on events
200
- // is unnecessary and we can signal the event created.
201
- if (EventWaitList.Length && !CmdList->second .IsInOrderList ) {
202
- ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
203
- (CmdList->first , EventWaitList.Length ,
204
- EventWaitList.ZeEventList ));
174
+ auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList,
175
+ _ur_ze_event_list_t &EventWaitList,
176
+ ur_event_handle_t &Event,
177
+ bool IsInternal) {
178
+ UR_CALL (createEventAndAssociateQueue (Queue, &Event,
179
+ UR_COMMAND_EVENTS_WAIT_WITH_BARRIER,
180
+ CmdList, IsInternal, false ));
181
+
182
+ Event->WaitList = EventWaitList;
183
+
184
+ // For in-order queue we don't need a real barrier, just wait for
185
+ // requested events in potentially different queues and add a "barrier"
186
+ // event signal because it is already guaranteed that previous commands
187
+ // in this queue are completed when the signal is started.
188
+ //
189
+ // Only consideration here is that when profiling is used, signalEvent
190
+ // cannot be used if EventWaitList.Lenght == 0. In those cases, we need
191
+ // to fallback directly to barrier to have correct timestamps. See here:
192
+ // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t
193
+ //
194
+ // TODO: this and other special handling of in-order queues to be
195
+ // updated when/if Level Zero adds native support for in-order queues.
196
+ //
197
+ if (Queue->isInOrderQueue () && InOrderBarrierBySignal &&
198
+ !Queue->isProfilingEnabled ()) {
199
+ // If we are using driver in order lists, then append wait on events
200
+ // is unnecessary IF the cmdlists match.
201
+ if (EventWaitList.Length ) {
202
+ if (CmdList->second .IsInOrderList ) {
203
+ for (unsigned i = EventWaitList.Length ; i-- < 0 ;) {
204
+ // if the events is from the same cmdlist, we can remove it
205
+ // from the waitlist.
206
+ if (EventWaitList.UrEventList [i]->CommandList == CmdList) {
207
+ EventWaitList.Length --;
208
+ if (EventWaitList.Length != i) {
209
+ std::swap (EventWaitList.UrEventList [i],
210
+ EventWaitList.UrEventList [EventWaitList.Length ]);
211
+ std::swap (EventWaitList.ZeEventList [i],
212
+ EventWaitList.ZeEventList [EventWaitList.Length ]);
213
+ }
214
+ }
205
215
}
206
- ZE2UR_CALL (zeCommandListAppendSignalEvent,
207
- (CmdList->first , Event->ZeEvent ));
208
- } else {
209
- ZE2UR_CALL (zeCommandListAppendBarrier,
210
- (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
211
- EventWaitList.ZeEventList ));
212
216
}
217
+ ZE2UR_CALL (
218
+ zeCommandListAppendWaitOnEvents,
219
+ (CmdList->first , EventWaitList.Length , EventWaitList.ZeEventList ));
220
+ }
221
+ ZE2UR_CALL (zeCommandListAppendSignalEvent,
222
+ (CmdList->first , Event->ZeEvent ));
223
+ } else {
224
+ ZE2UR_CALL (zeCommandListAppendBarrier,
225
+ (CmdList->first , Event->ZeEvent , EventWaitList.Length ,
226
+ EventWaitList.ZeEventList ));
227
+ }
213
228
214
- return UR_RESULT_SUCCESS;
215
- };
229
+ return UR_RESULT_SUCCESS;
230
+ };
216
231
217
232
// If the queue is in-order then each command in it effectively acts as a
218
233
// barrier, so we don't need to do anything except if we were requested
@@ -349,9 +364,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
349
364
// command-lists.
350
365
std::vector<ur_event_handle_t > EventWaitVector (CmdLists.size ());
351
366
for (size_t I = 0 ; I < CmdLists.size (); ++I) {
352
- UR_CALL ( insertBarrierIntoCmdList (CmdLists[I], _ur_ze_event_list_t {},
353
- EventWaitVector[I],
354
- true /* IsInternal*/ ));
367
+ _ur_ze_event_list_t waitlist;
368
+ UR_CALL ( insertBarrierIntoCmdList (
369
+ CmdLists[I], waitlist, EventWaitVector[I], true /* IsInternal*/ ));
355
370
}
356
371
// If there were multiple queues we need to create a "convergence" event to
357
372
// be our active barrier. This convergence event is signalled by a barrier
@@ -376,8 +391,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
376
391
// If there is only a single queue then insert a barrier and the single
377
392
// result event can be used as our active barrier and used as the return
378
393
// event. Take into account whether output event is discarded or not.
379
- UR_CALL (insertBarrierIntoCmdList (CmdLists[0 ], _ur_ze_event_list_t {},
380
- ResultEvent, IsInternal));
394
+ _ur_ze_event_list_t waitlist;
395
+ UR_CALL (insertBarrierIntoCmdList (CmdLists[0 ], waitlist, ResultEvent,
396
+ IsInternal));
381
397
}
382
398
383
399
// Execute each command list so the barriers can be encountered.
0 commit comments