Skip to content

Commit 7d14d84

Browse files
author
Hugh Delaney
committed
Update entry point
Thanks pbalcer for suggestion.
1 parent f2afed2 commit 7d14d84

File tree

1 file changed

+29
-22
lines changed

1 file changed

+29
-22
lines changed

source/adapters/level_zero/enqueue_native.cpp

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,49 +18,56 @@ ur_result_t ur_queue_handle_legacy_t_::enqueueNativeCommandExp(
1818
ur_exp_enqueue_native_command_function_t pfnNativeEnqueue, void *data,
1919
uint32_t, const ur_mem_handle_t *,
2020
const ur_exp_enqueue_native_command_properties_t *,
21-
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList,
21+
uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventList,
2222
ur_event_handle_t *phEvent) {
2323
auto Queue = this;
2424

25-
// TODO: Do I need this lock?
26-
std::scoped_lock<ur_shared_mutex> Lock(Queue->Mutex);
27-
28-
// TODO: What do I need to do with phMemList? Will a ur_mem_handle_t always
29-
// be usable as a native arg from within pfnNativeEnqueue, or should some
30-
// mem migration happen?
25+
// Lock automatically releases when this goes out of scope.
26+
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
3127

3228
bool UseCopyEngine = false;
29+
30+
// Please note that the following code should be run before the
31+
// subsequent getAvailableCommandList() call so that there is no
32+
// dead-lock from waiting unsubmitted events in an open batch.
33+
// The createAndRetainUrZeEventList() has the proper side-effect
34+
// of submitting batches with dependent events.
35+
//
3336
_ur_ze_event_list_t TmpWaitList;
3437
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
35-
NumEventsInWaitList, phEventWaitList, Queue, UseCopyEngine));
38+
NumEventsInWaitList, phEventList, Queue, UseCopyEngine));
3639

3740
// Get a new command list to be used on this call
3841
ur_command_list_ptr_t CommandList{};
42+
// TODO: Change UseCopyEngine argument to 'true' once L0 backend
43+
// support is added
3944
UR_CALL(Queue->Context->getAvailableCommandList(
40-
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventWaitList,
41-
true /* AllowBatching */));
45+
Queue, CommandList, UseCopyEngine, NumEventsInWaitList, phEventList));
4246

47+
// TODO: do we need to create a unique command type for this?
4348
ze_event_handle_t ZeEvent = nullptr;
44-
ur_event_handle_t InternalEvent{};
49+
ur_event_handle_t InternalEvent;
4550
bool IsInternal = phEvent == nullptr;
4651
ur_event_handle_t *Event = phEvent ? phEvent : &InternalEvent;
47-
48-
UR_CALL(createEventAndAssociateQueue(Queue, Event,
49-
UR_COMMAND_ENQUEUE_NATIVE_EXP,
52+
UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_USM_PREFETCH,
5053
CommandList, IsInternal, false));
51-
UR_CALL(setSignalEvent(Queue, UseCopyEngine, &ZeEvent, Event,
52-
NumEventsInWaitList, phEventWaitList,
53-
CommandList->second.ZeQueue));
54+
ZeEvent = (*Event)->ZeEvent;
5455
(*Event)->WaitList = TmpWaitList;
5556

56-
// FIXME: blocking synchronization. Make this faster
57-
Queue->queueFinish();
58-
57+
const auto &WaitList = (*Event)->WaitList;
58+
const auto &ZeCommandList = CommandList->first;
59+
if (WaitList.Length) {
60+
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
61+
(ZeCommandList, WaitList.Length, WaitList.ZeEventList));
62+
}
5963
// Execute interop func
6064
pfnNativeEnqueue(Queue, data);
6165

62-
// FIXME: blocking synchronization. Make this faster
63-
Queue->queueFinish();
66+
// TODO: Level Zero does not have a completion "event" with the prefetch API,
67+
// so manually add command to signal our event.
68+
ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent));
69+
70+
UR_CALL(Queue->executeCommandList(CommandList, false));
6471

6572
return UR_RESULT_SUCCESS;
6673
}

0 commit comments

Comments
 (0)