@@ -92,8 +92,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
92
92
ZeStruct<ze_command_list_desc_t > ZeDesc,
93
93
const ur_exp_command_buffer_desc_t *Desc)
94
94
: Context(Context), Device(Device), ZeCommandList(CommandList),
95
- ZeCommandListDesc(ZeDesc), QueueProperties (), SyncPoints (),
96
- NextSyncPoint( 0 ), CommandListMap( ) {
95
+ ZeCommandListDesc(ZeDesc), ZeFencesList (), QueueProperties (),
96
+ SyncPoints( ), NextSyncPoint( 0 ) {
97
97
(void )Desc;
98
98
urContextRetain (Context);
99
99
urDeviceRetain (Device);
@@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
132
132
}
133
133
134
134
// Release Fences allocated to command_buffer
135
- for (auto it = CommandListMap.begin (); it != CommandListMap.end (); ++it) {
136
- if (it->second .ZeFence != nullptr ) {
137
- ZE_CALL_NOCHECK (zeFenceDestroy, (it->second .ZeFence ));
138
- }
135
+ for (auto &ZeFence : ZeFencesList) {
136
+ ZE_CALL_NOCHECK (zeFenceDestroy, (ZeFence));
139
137
}
140
138
}
141
139
@@ -464,7 +462,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
464
462
ZE2UR_CALL (
465
463
zeCommandListAppendBarrier,
466
464
(ZeCommandList, nullptr , 1 , &RetCommandBuffer->WaitEvent ->ZeEvent ));
467
-
468
465
return UR_RESULT_SUCCESS;
469
466
}
470
467
@@ -856,12 +853,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
856
853
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
857
854
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
858
855
ur_event_handle_t *Event) {
859
- // There are issues with immediate command lists so return an error if the
860
- // queue is in that mode.
861
- if (Queue->UsingImmCmdLists ) {
862
- return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
863
- }
864
-
865
856
std::scoped_lock<ur_shared_mutex> lock (Queue->Mutex );
866
857
// Use compute engine rather than copy engine
867
858
const auto UseCopyEngine = false ;
@@ -871,22 +862,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
871
862
872
863
ze_fence_handle_t ZeFence;
873
864
ZeStruct<ze_fence_desc_t > ZeFenceDesc;
874
- ur_command_list_ptr_t CommandListPtr;
875
865
876
866
ZE2UR_CALL (zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
877
- // TODO: Refactor so requiring a map iterator is not required here, currently
878
- // required for executeCommandList though.
879
- ZeStruct<ze_command_queue_desc_t > ZeQueueDesc;
880
- ZeQueueDesc.ordinal = QueueGroupOrdinal;
881
- CommandListPtr = CommandBuffer->CommandListMap .insert (
882
- std::pair<ze_command_list_handle_t , ur_command_list_info_t >(
883
- CommandBuffer->ZeCommandList ,
884
- {ZeFence, false , false , ZeCommandQueue, ZeQueueDesc}));
885
-
886
- // Previous execution will have closed the command list, we need to reopen
887
- // it otherwise calling `executeCommandList` will return early.
888
- CommandListPtr->second .IsClosed = false ;
889
- CommandListPtr->second .ZeFenceInUse = true ;
867
+ CommandBuffer->ZeFencesList .push_back (ZeFence);
890
868
891
869
// Create command-list to execute before `CommandListPtr` and will signal
892
870
// when `EventWaitList` dependencies are complete.
@@ -908,6 +886,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
908
886
(WaitCommandList->first , ZeEvent));
909
887
}
910
888
889
+ bool MustSignalWaitEvent = true ;
911
890
if (NumEventsInWaitList) {
912
891
_ur_ze_event_list_t TmpWaitList;
913
892
UR_CALL (TmpWaitList.createAndRetainUrZeEventList (
@@ -920,17 +899,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
920
899
else
921
900
CommandBuffer->WaitEvent ->WaitList .insert (TmpWaitList);
922
901
923
- ZE2UR_CALL (zeCommandListAppendBarrier,
924
- (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
925
- CommandBuffer->WaitEvent ->WaitList .Length ,
926
- CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
927
- } else {
928
- ZE2UR_CALL (zeCommandListAppendSignalEvent,
929
- (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ));
902
+ if (!CommandBuffer->WaitEvent ->WaitList .isEmpty ()) {
903
+ ZE2UR_CALL (zeCommandListAppendBarrier,
904
+ (WaitCommandList->first , CommandBuffer->WaitEvent ->ZeEvent ,
905
+ CommandBuffer->WaitEvent ->WaitList .Length ,
906
+ CommandBuffer->WaitEvent ->WaitList .ZeEventList ));
907
+ Queue->executeCommandList (WaitCommandList, false , false );
908
+ MustSignalWaitEvent = false ;
909
+ }
910
+ }
911
+
912
+ if (MustSignalWaitEvent) {
913
+ ZE2UR_CALL (zeEventHostSignal, (CommandBuffer->WaitEvent ->ZeEvent ));
930
914
}
931
915
916
+ // Submit main command-list. This command-list is of a batch command-list
917
+ // type, regardless of the UR Queue type. We therefore need to submit the list
918
+ // directly using the Level-Zero API to avoid type mismatches if using UR
919
+ // functions.
920
+ ZE2UR_CALL (zeCommandQueueExecuteCommandLists,
921
+ (ZeCommandQueue, 1 , &CommandBuffer->ZeCommandList , ZeFence));
922
+
932
923
// Execution event for this enqueue of the UR command-buffer
933
924
ur_event_handle_t RetEvent{};
925
+
934
926
// Create a command-list to signal RetEvent on completion
935
927
ur_command_list_ptr_t SignalCommandList{};
936
928
UR_CALL (Queue->Context ->getAvailableCommandList (Queue, SignalCommandList,
@@ -943,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
943
935
if (Event) {
944
936
UR_CALL (createEventAndAssociateQueue (Queue, &RetEvent,
945
937
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
946
- SignalCommandList, false ));
938
+ SignalCommandList, false , true ));
947
939
948
940
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
949
941
// Multiple submissions of a command buffer implies that we need to save
@@ -972,13 +964,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
972
964
}
973
965
}
974
966
975
- // Execution our command-lists asynchronously
976
- // TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
977
- // passing all three command-lists, rather than individual calls which
978
- // introduces latency.
979
- UR_CALL (Queue->executeCommandList (WaitCommandList, false , false ));
980
- UR_CALL (Queue->executeCommandList (CommandListPtr, false , false ));
981
- UR_CALL (Queue->executeCommandList (SignalCommandList, false , false ));
967
+ Queue->executeCommandList (SignalCommandList, false , false );
982
968
983
969
if (Event) {
984
970
*Event = RetEvent;
0 commit comments