@@ -1004,39 +1004,19 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
1004
1004
auto CommandBuffer = CurrentPartition->MCommandBuffers [Queue->get_device ()];
1005
1005
1006
1006
if (CommandBuffer) {
1007
- // if previous submissions are incompleted, we automatically
1008
- // add completion events of previous submissions as dependencies.
1009
- // With Level-Zero backend we cannot resubmit a command-buffer until the
1010
- // previous one has already completed.
1011
- // Indeed, since a command-list does not accept a list a dependencies at
1012
- // submission, we circumvent this lack by adding a barrier that waits on a
1013
- // specific event and then define the conditions to signal this event in
1014
- // another command-list. Consequently, if a second submission is
1015
- // performed, the signal conditions of this single event are redefined by
1016
- // this second submission. Thus, this can lead to an undefined behaviour
1017
- // and potential hangs. We have therefore to expliclty wait in the host
1018
- // for previous submission to complete before resubmitting the
1019
- // command-buffer for level-zero backend.
1020
- // TODO https://github.com/intel/llvm/issues/17734
1021
- // Remove this backend specific behavior and allow multiple concurrent
1022
- // submissions of the UR command-buffer.
1023
1007
for (std::vector<sycl::detail::EventImplPtr>::iterator It =
1024
1008
MExecutionEvents.begin ();
1025
1009
It != MExecutionEvents.end ();) {
1026
1010
auto Event = *It;
1027
1011
if (!Event->isCompleted ()) {
1028
- if (Queue->get_device ().get_backend () ==
1029
- sycl::backend::ext_oneapi_level_zero) {
1030
- Event->wait (Event);
1031
- } else {
1032
- auto &AttachedEventsList = Event->getPostCompleteEvents ();
1033
- CGData.MEvents .reserve (AttachedEventsList.size () + 1 );
1034
- CGData.MEvents .push_back (Event);
1035
- // Add events of the previous execution of all graph partitions.
1036
- for (auto &AttachedEvent : AttachedEventsList) {
1037
- CGData.MEvents .push_back (AttachedEvent);
1038
- }
1039
- }
1012
+ auto &AttachedEventsList = Event->getPostCompleteEvents ();
1013
+ CGData.MEvents .reserve (CGData.MEvents .size () +
1014
+ AttachedEventsList.size () + 1 );
1015
+ CGData.MEvents .push_back (Event);
1016
+ // Add events of the previous execution of all graph partitions.
1017
+ CGData.MEvents .insert (CGData.MEvents .end (),
1018
+ AttachedEventsList.begin (),
1019
+ AttachedEventsList.end ());
1040
1020
++It;
1041
1021
} else {
1042
1022
// Remove completed events
@@ -1102,46 +1082,6 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
1102
1082
1103
1083
NewEvent = sycl::detail::Scheduler::getInstance ().addCG (
1104
1084
NodeImpl->getCGCopy (), Queue, /* EventNeeded=*/ true );
1105
- } else {
1106
- std::vector<std::shared_ptr<sycl::detail::event_impl>> ScheduledEvents;
1107
- for (auto &NodeImpl : CurrentPartition->MSchedule ) {
1108
- std::vector<ur_event_handle_t > RawEvents;
1109
-
1110
- // If the node has no requirements for accessors etc. then we skip the
1111
- // scheduler and enqueue directly.
1112
- if (NodeImpl->MCGType == sycl::detail::CGType::Kernel &&
1113
- NodeImpl->MCommandGroup ->getRequirements ().size () +
1114
- static_cast <sycl::detail::CGExecKernel *>(
1115
- NodeImpl->MCommandGroup .get ())
1116
- ->MStreams .size () ==
1117
- 0 ) {
1118
- sycl::detail::CGExecKernel *CG =
1119
- static_cast <sycl::detail::CGExecKernel *>(
1120
- NodeImpl->MCommandGroup .get ());
1121
- auto OutEvent = CreateNewEvent ();
1122
- sycl::detail::enqueueImpKernel (
1123
- Queue, CG->MNDRDesc , CG->MArgs , CG->MKernelBundle ,
1124
- CG->MSyclKernel , CG->MKernelName , RawEvents, OutEvent,
1125
- // TODO: Pass accessor mem allocations
1126
- nullptr ,
1127
- // TODO: Extract from handler
1128
- UR_KERNEL_CACHE_CONFIG_DEFAULT, CG->MKernelIsCooperative ,
1129
- CG->MKernelUsesClusterLaunch , CG->MKernelWorkGroupMemorySize );
1130
- ScheduledEvents.push_back (NewEvent);
1131
- } else if (!NodeImpl->isEmpty ()) {
1132
- // Empty nodes are node processed as other nodes, but only their
1133
- // dependencies are propagated in findRealDeps
1134
- sycl::detail::EventImplPtr EventImpl =
1135
- sycl::detail::Scheduler::getInstance ().addCG (
1136
- NodeImpl->getCGCopy (), Queue, /* EventNeeded=*/ true );
1137
-
1138
- ScheduledEvents.push_back (EventImpl);
1139
- }
1140
- }
1141
- // Create an event which has all kernel events as dependencies
1142
- NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
1143
- NewEvent->setStateIncomplete ();
1144
- NewEvent->getPreparedDepsEvents () = ScheduledEvents;
1145
1085
}
1146
1086
PartitionsExecutionEvents[CurrentPartition] = NewEvent;
1147
1087
}
0 commit comments