Skip to content

Commit 1cd402e

Browse files
authored
Merge pull request #1218 from Bensuo/maxime/imm-cmd-list-support
[EXP][CMDBUF] L0 Immediate command-list support
2 parents 5b89ee8 + c8e150c commit 1cd402e

File tree

2 files changed

+32
-49
lines changed

2 files changed

+32
-49
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 28 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
9292
ZeStruct<ze_command_list_desc_t> ZeDesc,
9393
const ur_exp_command_buffer_desc_t *Desc)
9494
: Context(Context), Device(Device), ZeCommandList(CommandList),
95-
ZeCommandListDesc(ZeDesc), QueueProperties(), SyncPoints(),
96-
NextSyncPoint(0), CommandListMap() {
95+
ZeCommandListDesc(ZeDesc), ZeFencesList(), QueueProperties(),
96+
SyncPoints(), NextSyncPoint(0) {
9797
(void)Desc;
9898
urContextRetain(Context);
9999
urDeviceRetain(Device);
@@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
132132
}
133133

134134
// Release Fences allocated to command_buffer
135-
for (auto it = CommandListMap.begin(); it != CommandListMap.end(); ++it) {
136-
if (it->second.ZeFence != nullptr) {
137-
ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
138-
}
135+
for (auto &ZeFence : ZeFencesList) {
136+
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
139137
}
140138
}
141139

@@ -464,7 +462,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
464462
ZE2UR_CALL(
465463
zeCommandListAppendBarrier,
466464
(ZeCommandList, nullptr, 1, &RetCommandBuffer->WaitEvent->ZeEvent));
467-
468465
return UR_RESULT_SUCCESS;
469466
}
470467

@@ -856,12 +853,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
856853
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
857854
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
858855
ur_event_handle_t *Event) {
859-
// There are issues with immediate command lists so return an error if the
860-
// queue is in that mode.
861-
if (Queue->UsingImmCmdLists) {
862-
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
863-
}
864-
865856
std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
866857
// Use compute engine rather than copy engine
867858
const auto UseCopyEngine = false;
@@ -871,22 +862,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
871862

872863
ze_fence_handle_t ZeFence;
873864
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
874-
ur_command_list_ptr_t CommandListPtr;
875865

876866
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
877-
// TODO: Refactor so requiring a map iterator is not required here, currently
878-
// required for executeCommandList though.
879-
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
880-
ZeQueueDesc.ordinal = QueueGroupOrdinal;
881-
CommandListPtr = CommandBuffer->CommandListMap.insert(
882-
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
883-
CommandBuffer->ZeCommandList,
884-
{ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));
885-
886-
// Previous execution will have closed the command list, we need to reopen
887-
// it otherwise calling `executeCommandList` will return early.
888-
CommandListPtr->second.IsClosed = false;
889-
CommandListPtr->second.ZeFenceInUse = true;
867+
CommandBuffer->ZeFencesList.push_back(ZeFence);
890868

891869
// Create command-list to execute before `CommandListPtr` and will signal
892870
// when `EventWaitList` dependencies are complete.
@@ -908,6 +886,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
908886
(WaitCommandList->first, ZeEvent));
909887
}
910888

889+
bool MustSignalWaitEvent = true;
911890
if (NumEventsInWaitList) {
912891
_ur_ze_event_list_t TmpWaitList;
913892
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
@@ -920,17 +899,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
920899
else
921900
CommandBuffer->WaitEvent->WaitList.insert(TmpWaitList);
922901

923-
ZE2UR_CALL(zeCommandListAppendBarrier,
924-
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
925-
CommandBuffer->WaitEvent->WaitList.Length,
926-
CommandBuffer->WaitEvent->WaitList.ZeEventList));
927-
} else {
928-
ZE2UR_CALL(zeCommandListAppendSignalEvent,
929-
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
902+
if (!CommandBuffer->WaitEvent->WaitList.isEmpty()) {
903+
ZE2UR_CALL(zeCommandListAppendBarrier,
904+
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
905+
CommandBuffer->WaitEvent->WaitList.Length,
906+
CommandBuffer->WaitEvent->WaitList.ZeEventList));
907+
Queue->executeCommandList(WaitCommandList, false, false);
908+
MustSignalWaitEvent = false;
909+
}
910+
}
911+
912+
if (MustSignalWaitEvent) {
913+
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
930914
}
931915

916+
// Submit main command-list. This command-list is of a batch command-list
917+
// type, regardless of the UR Queue type. We therefore need to submit the list
918+
// directly using the Level-Zero API to avoid type mismatches if using UR
919+
// functions.
920+
ZE2UR_CALL(zeCommandQueueExecuteCommandLists,
921+
(ZeCommandQueue, 1, &CommandBuffer->ZeCommandList, ZeFence));
922+
932923
// Execution event for this enqueue of the UR command-buffer
933924
ur_event_handle_t RetEvent{};
925+
934926
// Create a command-list to signal RetEvent on completion
935927
ur_command_list_ptr_t SignalCommandList{};
936928
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
@@ -943,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
943935
if (Event) {
944936
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
945937
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
946-
SignalCommandList, false));
938+
SignalCommandList, false, true));
947939

948940
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
949941
// Multiple submissions of a command buffer implies that we need to save
@@ -972,13 +964,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
972964
}
973965
}
974966

975-
// Execution our command-lists asynchronously
976-
// TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
977-
// passing all three command-lists, rather than individual calls which
978-
// introduces latency.
979-
UR_CALL(Queue->executeCommandList(WaitCommandList, false, false));
980-
UR_CALL(Queue->executeCommandList(CommandListPtr, false, false));
981-
UR_CALL(Queue->executeCommandList(SignalCommandList, false, false));
967+
Queue->executeCommandList(SignalCommandList, false, false);
982968

983969
if (Event) {
984970
*Event = RetEvent;

source/adapters/level_zero/command_buffer.hpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
5151
ze_command_list_handle_t ZeCommandList;
5252
// Level Zero command list descriptor
5353
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
54+
// List of Level Zero fences created when submitting a graph.
55+
// This list is needed to release all fences retained by the
56+
// command_buffer.
57+
std::vector<ze_fence_handle_t> ZeFencesList;
5458
// Queue properties from command-buffer descriptor
5559
// TODO: Do we need these?
5660
ur_queue_properties_t QueueProperties;
@@ -60,13 +64,6 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
6064
// Next sync_point value (may need to consider ways to reuse values if 32-bits
6165
// is not enough)
6266
ur_exp_command_buffer_sync_point_t NextSyncPoint;
63-
// Command list map so we can use queue::executeCommandList.
64-
// Command list map is also used to release all the Fences retained by the
65-
// command_buffer std::unordered_multimap<ze_command_list_handle_t,
66-
// ur_command_list_info_t> CommandListMap; CommandListMap is redefined as a
67-
// multimap to enable mutiple commands enqueing into the same command_buffer
68-
std::unordered_multimap<ze_command_list_handle_t, ur_command_list_info_t>
69-
CommandListMap;
7067
// Event which will signals the most recent execution of the command-buffer
7168
// has finished
7269
ur_event_handle_t SignalEvent = nullptr;

0 commit comments

Comments
 (0)