@@ -18,49 +18,56 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp(
18
18
ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
19
19
uint32_t , const ur_mem_handle_t *,
20
20
const ur_exp_enqueue_native_command_properties_t *,
21
- uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList ,
21
+ uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventList ,
22
22
ur_event_handle_t *phEvent) {
23
23
auto Queue = this ;
24
24
25
- // TODO: Do I need this lock?
26
- std::scoped_lock<ur_shared_mutex> Lock (Queue->Mutex );
27
-
28
- // TODO: What do I need to do with phMemList? Will a ur_mem_handle_t always
29
- // be usable as a native arg from within pfnNativeEnqueue, or should some
30
- // mem migration happen?
25
+ // Lock automatically releases when this goes out of scope.
26
+ std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
31
27
32
28
bool UseCopyEngine = false ;
29
+
30
+ // Please note that the following code should be run before the
31
+ // subsequent getAvailableCommandList() call so that there is no
32
+ // dead-lock from waiting unsubmitted events in an open batch.
33
+ // The createAndRetainUrZeEventList() has the proper side-effect
34
+ // of submitting batches with dependent events.
35
+ //
33
36
_ur_ze_event_list_t TmpWaitList;
34
37
UR_CALL (TmpWaitList.createAndRetainUrZeEventList (
35
- NumEventsInWaitList, phEventWaitList , Queue, UseCopyEngine));
38
+ NumEventsInWaitList, phEventList , Queue, UseCopyEngine));
36
39
37
40
// Get a new command list to be used on this call
38
41
ur_command_list_ptr_t CommandList{};
42
+ // TODO: Change UseCopyEngine argument to 'true' once L0 backend
43
+ // support is added
39
44
UR_CALL (Queue->Context ->getAvailableCommandList (
40
- Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventWaitList,
41
- true /* AllowBatching */ ));
45
+ Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventList));
42
46
47
+ // TODO: do we need to create a unique command type for this?
43
48
ze_event_handle_t ZeEvent = nullptr ;
44
- ur_event_handle_t InternalEvent{} ;
49
+ ur_event_handle_t InternalEvent;
45
50
bool IsInternal = phEvent == nullptr ;
46
51
ur_event_handle_t *Event = phEvent ? phEvent : &InternalEvent;
47
-
48
- UR_CALL (createEventAndAssociateQueue (Queue, Event,
49
- UR_COMMAND_ENQUEUE_NATIVE_EXP,
52
+ UR_CALL (createEventAndAssociateQueue (Queue, Event, UR_COMMAND_USM_PREFETCH,
50
53
CommandList, IsInternal, false ));
51
- UR_CALL (setSignalEvent (Queue, UseCopyEngine, &ZeEvent, Event,
52
- NumEventsInWaitList, phEventWaitList,
53
- CommandList->second .ZeQueue ));
54
+ ZeEvent = (*Event)->ZeEvent ;
54
55
(*Event)->WaitList = TmpWaitList;
55
56
56
- // FIXME: blocking synchronization. Make this faster
57
- Queue->queueFinish ();
58
-
57
+ const auto &WaitList = (*Event)->WaitList ;
58
+ const auto &ZeCommandList = CommandList->first ;
59
+ if (WaitList.Length ) {
60
+ ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
61
+ (ZeCommandList, WaitList.Length , WaitList.ZeEventList ));
62
+ }
59
63
// Execute interop func
60
64
pfnNativeEnqueue (Queue, data);
61
65
62
- // FIXME: blocking synchronization. Make this faster
63
- Queue->queueFinish ();
66
+ // TODO: Level Zero does not have a completion "event" with the prefetch API,
67
+ // so manually add command to signal our event.
68
+ ZE2UR_CALL (zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));
69
+
70
+ UR_CALL (Queue->executeCommandList (CommandList, false ));
64
71
65
72
return UR_RESULT_SUCCESS;
66
73
}
0 commit comments