Skip to content

Commit f26c0b8

Browse files
[SYCL] Fix discarded enqueue function event markings (#16223)
This commit fixes an issue where memory operations enqueued through the enqueue free functions would not correctly mark the resulting events as discarded, breaking in-order barrier assumptions. Fixes #15606. --------- Co-authored-by: Andrey Alekseenko <al42and@gmail.com> Signed-off-by: Larsen, Steffen <steffen.larsen@intel.com>
1 parent 4679dc0 commit f26c0b8

File tree

6 files changed

+144
-38
lines changed

6 files changed

+144
-38
lines changed

sycl/source/detail/event_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,9 @@ class event_impl {
152152
/// Clear the event state
153153
void setStateIncomplete();
154154

155+
/// Set state as discarded.
156+
void setStateDiscarded() { MState = HES_Discarded; }
157+
155158
/// Returns command that is associated with the event.
156159
///
157160
/// Scheduler mutex must be locked in read mode when this is called.

sycl/source/detail/queue_impl.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -308,8 +308,9 @@ void queue_impl::addEvent(const event &Event) {
308308
addSharedEvent(Event);
309309
}
310310
// As long as the queue supports urQueueFinish we only need to store events
311-
// for unenqueued commands and host tasks.
312-
else if (MEmulateOOO || EImpl->getHandle() == nullptr) {
311+
// for undiscarded, unenqueued commands and host tasks.
312+
else if (MEmulateOOO ||
313+
(EImpl->getHandle() == nullptr && !EImpl->isDiscarded())) {
313314
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
314315
std::lock_guard<std::mutex> Lock{MMutex};
315316
MEventsWeak.push_back(std::move(EventWeakPtr));
@@ -412,13 +413,24 @@ event queue_impl::submit_impl(const std::function<void(handler &)> &CGF,
412413
template <typename HandlerFuncT>
413414
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
414415
const std::vector<event> &DepEvents,
416+
bool CallerNeedsEvent,
415417
HandlerFuncT HandlerFunc) {
416-
return submit(
418+
SubmissionInfo SI{};
419+
if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
420+
submit_without_event(
421+
[&](handler &CGH) {
422+
CGH.depends_on(DepEvents);
423+
HandlerFunc(CGH);
424+
},
425+
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
426+
return createDiscardedEvent();
427+
}
428+
return submit_with_event(
417429
[&](handler &CGH) {
418430
CGH.depends_on(DepEvents);
419431
HandlerFunc(CGH);
420432
},
421-
Self, /*CodeLoc*/ {}, /*SubmissionInfo*/ {}, /*IsTopCodeLoc*/ true);
433+
Self, SI, /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
422434
}
423435

424436
template <typename HandlerFuncT, typename MemOpFuncT, typename... MemOpArgTs>
@@ -446,7 +458,16 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
446458
NestedCallsTracker tracker;
447459
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
448460
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
449-
return createDiscardedEvent();
461+
462+
event DiscardedEvent = createDiscardedEvent();
463+
if (isInOrder()) {
464+
// Store the discarded event for proper in-order dependency tracking.
465+
auto &EventToStoreIn = MGraph.expired()
466+
? MDefaultGraphDeps.LastEventPtr
467+
: MExtGraphDeps.LastEventPtr;
468+
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
469+
}
470+
return DiscardedEvent;
450471
}
451472

452473
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
@@ -471,7 +492,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
471492
return discard_or_return(ResEvent);
472493
}
473494
}
474-
return submitWithHandler(Self, DepEvents, HandlerFunc);
495+
return submitWithHandler(Self, DepEvents, CallerNeedsEvent, HandlerFunc);
475496
}
476497

477498
void *queue_impl::instrumentationProlog(const detail::code_location &CodeLoc,

sycl/source/detail/queue_impl.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ class queue_impl {
868868
template <typename HandlerFuncT>
869869
event submitWithHandler(const std::shared_ptr<queue_impl> &Self,
870870
const std::vector<event> &DepEvents,
871-
HandlerFuncT HandlerFunc);
871+
bool CallerNeedsEvent, HandlerFuncT HandlerFunc);
872872

873873
/// Performs submission of a memory operation directly if scheduler can be
874874
/// bypassed, or with a handler otherwise.

sycl/source/detail/scheduler/commands.cpp

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -956,7 +956,7 @@ bool Command::enqueue(EnqueueResultT &EnqueueResult, BlockingT Blocking,
956956
EnqueueResultT(EnqueueResultT::SyclEnqueueFailed, this, Res);
957957
else {
958958
MEvent->setEnqueued();
959-
if (MShouldCompleteEventIfPossible &&
959+
if (MShouldCompleteEventIfPossible && !MEvent->isDiscarded() &&
960960
(MEvent->isHost() || MEvent->getHandle() == nullptr))
961961
MEvent->setComplete();
962962

@@ -3075,6 +3075,13 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
30753075
ur_event_handle_t *Event = DiscardUrEvent ? nullptr : &UREvent;
30763076
detail::EventImplPtr EventImpl = DiscardUrEvent ? nullptr : MEvent;
30773077

3078+
auto SetEventHandleOrDiscard = [&]() {
3079+
if (Event)
3080+
MEvent->setHandle(*Event);
3081+
else
3082+
MEvent->setStateDiscarded();
3083+
};
3084+
30783085
switch (MCommandGroup->getType()) {
30793086

30803087
case CGType::UpdateHost: {
@@ -3209,8 +3216,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32093216
Result != UR_RESULT_SUCCESS)
32103217
return Result;
32113218

3212-
if (Event)
3213-
MEvent->setHandle(*Event);
3219+
SetEventHandleOrDiscard();
32143220
return UR_RESULT_SUCCESS;
32153221
}
32163222
case CGType::FillUSM: {
@@ -3221,8 +3227,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32213227
Result != UR_RESULT_SUCCESS)
32223228
return Result;
32233229

3224-
if (Event)
3225-
MEvent->setHandle(*Event);
3230+
SetEventHandleOrDiscard();
32263231
return UR_RESULT_SUCCESS;
32273232
}
32283233
case CGType::PrefetchUSM: {
@@ -3233,8 +3238,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32333238
Result != UR_RESULT_SUCCESS)
32343239
return Result;
32353240

3236-
if (Event)
3237-
MEvent->setHandle(*Event);
3241+
SetEventHandleOrDiscard();
32383242
return UR_RESULT_SUCCESS;
32393243
}
32403244
case CGType::AdviseUSM: {
@@ -3246,8 +3250,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32463250
Result != UR_RESULT_SUCCESS)
32473251
return Result;
32483252

3249-
if (Event)
3250-
MEvent->setHandle(*Event);
3253+
SetEventHandleOrDiscard();
32513254
return UR_RESULT_SUCCESS;
32523255
}
32533256
case CGType::Copy2DUSM: {
@@ -3259,8 +3262,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32593262
Result != UR_RESULT_SUCCESS)
32603263
return Result;
32613264

3262-
if (Event)
3263-
MEvent->setHandle(*Event);
3265+
SetEventHandleOrDiscard();
32643266
return UR_RESULT_SUCCESS;
32653267
}
32663268
case CGType::Fill2DUSM: {
@@ -3272,8 +3274,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32723274
Result != UR_RESULT_SUCCESS)
32733275
return Result;
32743276

3275-
if (Event)
3276-
MEvent->setHandle(*Event);
3277+
SetEventHandleOrDiscard();
32773278
return UR_RESULT_SUCCESS;
32783279
}
32793280
case CGType::Memset2DUSM: {
@@ -3285,8 +3286,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
32853286
Result != UR_RESULT_SUCCESS)
32863287
return Result;
32873288

3288-
if (Event)
3289-
MEvent->setHandle(*Event);
3289+
SetEventHandleOrDiscard();
32903290
return UR_RESULT_SUCCESS;
32913291
}
32923292
case CGType::CodeplayHostTask: {
@@ -3426,8 +3426,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
34263426
MQueue->getAdapter()->call<UrApiKind::urEnqueueNativeCommandExp>(
34273427
MQueue->getHandleRef(), InteropFreeFunc, &CustomOpData, ReqMems.size(),
34283428
ReqMems.data(), nullptr, RawEvents.size(), RawEvents.data(), Event);
3429-
if (Event)
3430-
MEvent->setHandle(*Event);
3429+
SetEventHandleOrDiscard();
34313430
return UR_RESULT_SUCCESS;
34323431
}
34333432
case CGType::Barrier: {
@@ -3437,8 +3436,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
34373436
MEvent->setHostEnqueueTime();
34383437
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
34393438
MQueue->getHandleRef(), 0, nullptr, Event);
3440-
if (Event)
3441-
MEvent->setHandle(*Event);
3439+
SetEventHandleOrDiscard();
34423440
return UR_RESULT_SUCCESS;
34433441
}
34443442
case CGType::BarrierWaitlist: {
@@ -3455,8 +3453,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
34553453
MEvent->setHostEnqueueTime();
34563454
Adapter->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
34573455
MQueue->getHandleRef(), UrEvents.size(), &UrEvents[0], Event);
3458-
if (Event)
3459-
MEvent->setHandle(*Event);
3456+
SetEventHandleOrDiscard();
34603457
return UR_RESULT_SUCCESS;
34613458
}
34623459
case CGType::ProfilingTag: {
@@ -3503,8 +3500,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35033500
Adapter->call<UrApiKind::urEventRelease>(PostTimestampBarrierEvent);
35043501
}
35053502

3506-
if (Event)
3507-
MEvent->setHandle(*Event);
3503+
SetEventHandleOrDiscard();
35083504
return UR_RESULT_SUCCESS;
35093505
}
35103506
case CGType::CopyToDeviceGlobal: {
@@ -3517,8 +3513,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35173513
Result != UR_RESULT_SUCCESS)
35183514
return Result;
35193515

3520-
if (Event)
3521-
MEvent->setHandle(*Event);
3516+
SetEventHandleOrDiscard();
35223517
return UR_RESULT_SUCCESS;
35233518
}
35243519
case CGType::CopyFromDeviceGlobal: {
@@ -3532,8 +3527,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35323527
Result != UR_RESULT_SUCCESS)
35333528
return Result;
35343529

3535-
if (Event)
3536-
MEvent->setHandle(*Event);
3530+
SetEventHandleOrDiscard();
35373531
return UR_RESULT_SUCCESS;
35383532
}
35393533
case CGType::ReadWriteHostPipe: {
@@ -3564,8 +3558,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35643558
CmdBufferCG->MCommandBuffer, MQueue->getHandleRef(),
35653559
RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0],
35663560
Event);
3567-
if (Event)
3568-
MEvent->setHandle(*Event);
3561+
SetEventHandleOrDiscard();
35693562

35703563
return Err;
35713564
}
@@ -3581,8 +3574,7 @@ ur_result_t ExecCGCommand::enqueueImpQueue() {
35813574
Result != UR_RESULT_SUCCESS)
35823575
return Result;
35833576

3584-
if (Event)
3585-
MEvent->setHandle(*Event);
3577+
SetEventHandleOrDiscard();
35863578

35873579
return UR_RESULT_SUCCESS;
35883580
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// REQUIRES: aspect-usm_device_allocations
2+
// RUN: %{build} %threads_lib -o %t.out
3+
// RUN: %{run} %t.out
4+
5+
// Regression test for a case where parallel work with enqueue functions
6+
// discarding their results would cause implicit waits on discarded events.
7+
8+
#include <sycl/detail/core.hpp>
9+
#include <sycl/ext/oneapi/experimental/enqueue_functions.hpp>
10+
#include <sycl/properties/all_properties.hpp>
11+
#include <sycl/usm.hpp>
12+
#include <thread>
13+
14+
void threadFunction(int) {
15+
sycl::queue Q{{sycl::property::queue::in_order()}};
16+
17+
constexpr int Size = 128 * 128 * 128;
18+
int *DevMem = sycl::malloc_device<int>(Size, Q);
19+
20+
sycl::ext::oneapi::experimental::submit(
21+
Q, [&](sycl::handler &cgh) { cgh.fill<int>(DevMem, 1, Size); });
22+
Q.wait_and_throw();
23+
24+
sycl::free(DevMem, Q);
25+
}
26+
27+
int main() {
28+
constexpr size_t NThreads = 2;
29+
std::array<std::thread, NThreads> Threads;
30+
31+
for (size_t I = 0; I < NThreads; I++)
32+
Threads[I] = std::thread{threadFunction, I};
33+
for (size_t I = 0; I < NThreads; I++)
34+
Threads[I].join();
35+
36+
return 0;
37+
}

0 commit comments

Comments
 (0)