Skip to content

Commit 346a6c5

Browse files
[SYCL] Fix depends_on handling with pi commands (#5901)
Fixes two related issues: 1) if pi task is blocked by host task or host accessor it can not be enqueued and piEvent is not present in its event_impl. When we schedule new pi task with explicit (depends_on) dependency on the first one - its is absent in MDeps since we have no usual memory dependencies and present in MPreparedDepsEvents. MPreparedDepsEvents is used in enqueueImp for obtaining piEvents. Any events from MPreparedDepsEvents w/o pi events will be just skipped. AddDep always call processDepEvent which distributes events to MPreparedDepsEvents (pi event expected) and MPreparedHostDepsEvents (no pi event) so replacement of MDeps in enqueueCommand should be valid. 2) if we have kernel w/o usual memory dependencies (MDeps & MUsers are empty) blocked kernel will be just "cleanup" and its execution will be skipped. Signed-off-by: Tikhomirova, Kseniya <kseniya.tikhomirova@intel.com>
1 parent a0a4d72 commit 346a6c5

13 files changed

+565
-93
lines changed

sycl/source/detail/event_impl.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -342,10 +342,16 @@ event_impl::get_info<info::event::command_execution_status>() {
342342
if (MState == HES_Discarded)
343343
return info::event_command_status::ext_oneapi_unknown;
344344

345-
if (!MHostEvent && MEvent) {
346-
return get_event_info<info::event::command_execution_status>(
347-
this->getHandleRef(), this->getPlugin());
345+
if (!MHostEvent) {
346+
// Command is enqueued and PiEvent is ready
347+
if (MEvent)
348+
return get_event_info<info::event::command_execution_status>(
349+
this->getHandleRef(), this->getPlugin());
350+
// Command is blocked and not enqueued, PiEvent is not assigned yet
351+
else if (MCommand)
352+
return sycl::info::event_command_status::submitted;
348353
}
354+
349355
return MHostEvent && MState.load() != HES_Complete
350356
? sycl::info::event_command_status::submitted
351357
: info::event_command_status::complete;

sycl/source/detail/event_impl.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ class event_impl {
212212
}
213213
bool needsCleanupAfterWait() { return MNeedsCleanupAfterWait; }
214214

215+
/// Returns worker queue for command.
216+
///
217+
/// @return a reference to MWorkerQueue.
218+
QueueImplPtr &getWorkerQueue() { return MWorkerQueue; };
219+
215220
/// Checks if an event is in a fully intialized state. Default-constructed
216221
/// events will return true only after having initialized its native event,
217222
/// while other events will assume that they are fully initialized at
@@ -243,6 +248,8 @@ class event_impl {
243248
std::weak_ptr<queue_impl> MQueue;
244249
const bool MIsProfilingEnabled = false;
245250

251+
QueueImplPtr MWorkerQueue;
252+
246253
/// Dependency events prepared for waiting by backend.
247254
std::vector<EventImplPtr> MPreparedDepsEvents;
248255
std::vector<EventImplPtr> MPreparedHostDepsEvents;

sycl/source/detail/queue_impl.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,7 @@ class queue_impl {
120120
}
121121
if (!MHostQueue) {
122122
const QueueOrder QOrder =
123-
MPropList.has_property<property::queue::in_order>()
124-
? QueueOrder::Ordered
125-
: QueueOrder::OOO;
123+
MIsInorder ? QueueOrder::Ordered : QueueOrder::OOO;
126124
MQueues.push_back(createQueue(QOrder));
127125
}
128126
}
@@ -202,6 +200,8 @@ class queue_impl {
202200
/// \return true if this queue has discard_events support.
203201
bool has_discard_events_support() const { return MHasDiscardEventsSupport; }
204202

203+
bool isInOrder() const { return MIsInorder; }
204+
205205
/// Queries SYCL queue for information.
206206
///
207207
/// The return type depends on information being queried.

sycl/source/detail/scheduler/commands.cpp

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,34 @@ static std::string commandToName(Command::CommandType Type) {
203203
}
204204
#endif
205205

206-
static std::vector<RT::PiEvent>
207-
getPiEvents(const std::vector<EventImplPtr> &EventImpls) {
206+
std::vector<RT::PiEvent>
207+
Command::getPiEvents(const std::vector<EventImplPtr> &EventImpls) const {
208208
std::vector<RT::PiEvent> RetPiEvents;
209209
for (auto &EventImpl : EventImpls) {
210-
if (EventImpl->getHandleRef() != nullptr)
211-
RetPiEvents.push_back(EventImpl->getHandleRef());
210+
if (EventImpl->getHandleRef() == nullptr)
211+
continue;
212+
213+
// Do not add redundant event dependencies for in-order queues.
214+
// At this stage dependency is definitely pi task and need to check if
215+
// current one is a host task. In this case we should not skip pi event due
216+
// to different sync mechanisms for different task types on in-order queue.
217+
const QueueImplPtr &WorkerQueue = getWorkerQueue();
218+
if (EventImpl->getWorkerQueue() == WorkerQueue &&
219+
WorkerQueue->isInOrder() && !isHostTask())
220+
continue;
221+
222+
RetPiEvents.push_back(EventImpl->getHandleRef());
212223
}
213224

214225
return RetPiEvents;
215226
}
216227

228+
bool Command::isHostTask() const {
229+
return (MType == CommandType::RUN_CG) /* host task has this type also */ &&
230+
((static_cast<const ExecCGCommand *>(this))->getCG().getType() ==
231+
CG::CGTYPE::CodeplayHostTask);
232+
}
233+
217234
static void flushCrossQueueDeps(const std::vector<EventImplPtr> &EventImpls,
218235
const QueueImplPtr &Queue) {
219236
for (auto &EventImpl : EventImpls) {
@@ -240,7 +257,8 @@ class DispatchHostTask {
240257
// sophisticated waiting mechanism to allow to utilize this thread for any
241258
// other available job and resume once all required events are ready.
242259
for (auto &PluginWithEvents : RequiredEventsPerPlugin) {
243-
std::vector<RT::PiEvent> RawEvents = getPiEvents(PluginWithEvents.second);
260+
std::vector<RT::PiEvent> RawEvents =
261+
MThisCmd->getPiEvents(PluginWithEvents.second);
244262
try {
245263
PluginWithEvents.first->call<PiApiKind::piEventsWait>(RawEvents.size(),
246264
RawEvents.data());
@@ -393,10 +411,12 @@ void Command::waitForEvents(QueueImplPtr Queue,
393411
Command::Command(CommandType Type, QueueImplPtr Queue)
394412
: MQueue(std::move(Queue)),
395413
MEvent(std::make_shared<detail::event_impl>(MQueue)),
414+
MWorkerQueue(MEvent->getWorkerQueue()),
396415
MPreparedDepsEvents(MEvent->getPreparedDepsEvents()),
397416
MPreparedHostDepsEvents(MEvent->getPreparedHostDepsEvents()),
398417
MType(Type) {
399418
MSubmittedQueue = MQueue;
419+
MWorkerQueue = MQueue;
400420
MEvent->setCommand(this);
401421
MEvent->setContextImpl(MQueue->getContextImplPtr());
402422
MEvent->setStateIncomplete();
@@ -600,12 +620,6 @@ Command *Command::processDepEvent(EventImplPtr DepEvent, const DepDesc &Dep,
600620

601621
Command *ConnectionCmd = nullptr;
602622

603-
// Do not add redundant event dependencies for in-order queues.
604-
if (Dep.MDepCommand && Dep.MDepCommand->getWorkerQueue() == WorkerQueue &&
605-
WorkerQueue->has_property<property::queue::in_order>() &&
606-
getType() != CommandType::HOST_TASK)
607-
return nullptr;
608-
609623
ContextImplPtr DepEventContext = DepEvent->getContextImpl();
610624
// If contexts don't match we'll connect them using host task
611625
if (DepEventContext != WorkerContext && !WorkerContext->is_host()) {
@@ -621,14 +635,14 @@ const ContextImplPtr &Command::getWorkerContext() const {
621635
return MQueue->getContextImplPtr();
622636
}
623637

624-
const QueueImplPtr &Command::getWorkerQueue() const { return MQueue; }
638+
const QueueImplPtr &Command::getWorkerQueue() const {
639+
assert(MWorkerQueue && "MWorkerQueue must not be nullptr");
640+
return MWorkerQueue;
641+
}
625642

626643
bool Command::producesPiEvent() const { return true; }
627644

628-
bool Command::supportsPostEnqueueCleanup() const {
629-
// Isolated commands are cleaned up separately
630-
return !MUsers.empty() || !MDeps.empty();
631-
}
645+
bool Command::supportsPostEnqueueCleanup() const { return true; }
632646

633647
Command *Command::addDep(DepDesc NewDep, std::vector<Command *> &ToCleanUp) {
634648
Command *ConnectionCmd = nullptr;
@@ -1298,6 +1312,9 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq,
12981312
if (!MSrcQueue->is_host()) {
12991313
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
13001314
}
1315+
1316+
MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
1317+
13011318
emitInstrumentationDataProxy();
13021319
}
13031320

@@ -1335,10 +1352,6 @@ const ContextImplPtr &MemCpyCommand::getWorkerContext() const {
13351352
return getWorkerQueue()->getContextImplPtr();
13361353
}
13371354

1338-
const QueueImplPtr &MemCpyCommand::getWorkerQueue() const {
1339-
return MQueue->is_host() ? MSrcQueue : MQueue;
1340-
}
1341-
13421355
bool MemCpyCommand::producesPiEvent() const {
13431356
// TODO remove this workaround once the batching issue is addressed in Level
13441357
// Zero plugin.
@@ -1481,6 +1494,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq,
14811494
MEvent->setContextImpl(MSrcQueue->getContextImplPtr());
14821495
}
14831496

1497+
MWorkerQueue = MQueue->is_host() ? MSrcQueue : MQueue;
1498+
14841499
emitInstrumentationDataProxy();
14851500
}
14861501

@@ -1518,10 +1533,6 @@ const ContextImplPtr &MemCpyCommandHost::getWorkerContext() const {
15181533
return getWorkerQueue()->getContextImplPtr();
15191534
}
15201535

1521-
const QueueImplPtr &MemCpyCommandHost::getWorkerQueue() const {
1522-
return MQueue->is_host() ? MSrcQueue : MQueue;
1523-
}
1524-
15251536
pi_int32 MemCpyCommandHost::enqueueImp() {
15261537
const QueueImplPtr &Queue = getWorkerQueue();
15271538
waitForPreparedHostEvents();

sycl/source/detail/scheduler/commands.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,19 +199,28 @@ class Command {
199199

200200
/// Get the queue this command will be submitted to. Could differ from MQueue
201201
/// for memory copy commands.
202-
virtual const QueueImplPtr &getWorkerQueue() const;
202+
const QueueImplPtr &getWorkerQueue() const;
203203

204204
/// Returns true iff the command produces a PI event on non-host devices.
205205
virtual bool producesPiEvent() const;
206206

207207
/// Returns true iff this command can be freed by post enqueue cleanup.
208208
virtual bool supportsPostEnqueueCleanup() const;
209209

210+
/// Collect PI events from EventImpls and filter out some of them in case of
211+
/// in order queue
212+
std::vector<RT::PiEvent>
213+
getPiEvents(const std::vector<EventImplPtr> &EventImpls) const;
214+
215+
bool isHostTask() const;
216+
210217
protected:
211218
QueueImplPtr MQueue;
212219
QueueImplPtr MSubmittedQueue;
213220
EventImplPtr MEvent;
214221

222+
QueueImplPtr &MWorkerQueue;
223+
215224
/// Dependency events prepared for waiting by backend.
216225
/// See processDepEvent for details.
217226
std::vector<EventImplPtr> &MPreparedDepsEvents;
@@ -252,6 +261,10 @@ class Command {
252261
return MPreparedHostDepsEvents;
253262
}
254263

264+
const std::vector<EventImplPtr> &getPreparedDepsEvents() const {
265+
return MPreparedDepsEvents;
266+
}
267+
255268
/// Contains list of dependencies(edges)
256269
std::vector<DepDesc> MDeps;
257270
/// Contains list of commands that depend on the command.
@@ -492,7 +505,6 @@ class MemCpyCommand : public Command {
492505
const Requirement *getRequirement() const final { return &MDstReq; }
493506
void emitInstrumentationData() final;
494507
const ContextImplPtr &getWorkerContext() const final;
495-
const QueueImplPtr &getWorkerQueue() const final;
496508
bool producesPiEvent() const final;
497509

498510
private:
@@ -517,7 +529,6 @@ class MemCpyCommandHost : public Command {
517529
const Requirement *getRequirement() const final { return &MDstReq; }
518530
void emitInstrumentationData() final;
519531
const ContextImplPtr &getWorkerContext() const final;
520-
const QueueImplPtr &getWorkerQueue() const final;
521532

522533
private:
523534
pi_int32 enqueueImp() final;

sycl/source/detail/scheduler/graph_processor.cpp

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,23 +58,20 @@ bool Scheduler::GraphProcessor::enqueueCommand(
5858
return false;
5959
}
6060

61-
// Recursively enqueue all the dependencies first and
62-
// exit immediately if any of the commands cannot be enqueued.
63-
for (DepDesc &Dep : Cmd->MDeps) {
64-
if (!enqueueCommand(Dep.MDepCommand, EnqueueResult, ToCleanUp, Blocking))
65-
return false;
61+
// Recursively enqueue all the implicit + explicit backend level dependencies
62+
// first and exit immediately if any of the commands cannot be enqueued.
63+
for (const EventImplPtr &Event : Cmd->getPreparedDepsEvents()) {
64+
if (Command *DepCmd = static_cast<Command *>(Event->getCommand()))
65+
if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking))
66+
return false;
6667
}
6768

68-
// Asynchronous host operations (amongst dependencies of an arbitrary command)
69-
// are not supported (see Command::processDepEvent method). This impacts
70-
// operation of host-task feature a lot with hangs and long-runs. Hence we
71-
// have this workaround here.
72-
// This workaround is safe as long as the only asynchronous host operation we
73-
// have is a host task.
74-
// This may iterate over some of dependencies in Cmd->MDeps. Though, the
75-
// enqueue operation is idempotent and the second call will result in no-op.
76-
// TODO remove the workaround when proper fix for host-task dispatching is
77-
// implemented.
69+
// Recursively enqueue all the implicit + explicit host dependencies and
70+
// exit immediately if any of the commands cannot be enqueued.
71+
// Host task execution is asynchronous. In current implementation enqueue for
72+
// this command will wait till host task completion by waitInternal call on
73+
// MHostDepsEvents. TO FIX: implement enqueue of blocked commands on host task
74+
// completion stage and eliminate this event waiting in enqueue.
7875
for (const EventImplPtr &Event : Cmd->getPreparedHostDepsEvents()) {
7976
if (Command *DepCmd = static_cast<Command *>(Event->getCommand()))
8077
if (!enqueueCommand(DepCmd, EnqueueResult, ToCleanUp, Blocking))

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -157,12 +157,6 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
157157
CleanUp();
158158
std::rethrow_exception(std::current_exception());
159159
}
160-
161-
// If there are no memory dependencies decouple and free the command.
162-
// Though, dismiss ownership of native kernel command group as it's
163-
// resources may be in use by backend and synchronization point here is
164-
// at native kernel execution finish.
165-
CleanUp();
166160
}
167161
}
168162
cleanupCommands(ToCleanUp);

sycl/unittests/scheduler/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,5 @@ add_sycl_unittest(SchedulerTests OBJECT
2222
LeafLimitDiffContexts.cpp
2323
InOrderQueueSyncCheck.cpp
2424
RunOnHostIntelCG.cpp
25+
EnqueueWithDependsOnDeps.cpp
2526
)

0 commit comments

Comments
 (0)