Skip to content

Commit 68bf5a0

Browse files
authored
[SYCL] Avoid event allocation on eventless submission (#18582)
Avoid allocating handler MLastEvent and passing the event, if the eventless enqueue functions are used. --------- Signed-off-by: Ptak, Slawomir <slawomir.ptak@intel.com>
1 parent 3b4ebca commit 68bf5a0

File tree

6 files changed

+143
-80
lines changed

6 files changed

+143
-80
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,11 @@ class __SYCL_EXPORT handler {
560560
/// object destruction.
561561
///
562562
/// \return a SYCL event object representing the command group
563+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
564+
detail::EventImplPtr finalize();
565+
#else
563566
event finalize();
567+
#endif
564568

565569
/// Constructs CG object of specific type, passes it to Scheduler and
566570
/// returns sycl::event object representing the command group.
@@ -3381,7 +3385,11 @@ class __SYCL_EXPORT handler {
33813385

33823386
detail::code_location MCodeLoc = {};
33833387
bool MIsFinalized = false;
3388+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
3389+
detail::EventImplPtr MLastEvent;
3390+
#else
33843391
event MLastEvent;
3392+
#endif
33853393

33863394
// Make queue_impl class friend to be able to call finalize method.
33873395
friend class detail::queue_impl;

sycl/include/sycl/reduction.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1179,7 +1179,11 @@ auto make_reduction(RedOutVar RedVar, RestTy &&...Rest) {
11791179
namespace reduction {
11801180
inline void finalizeHandler(handler &CGH) { CGH.finalize(); }
11811181
template <class FunctorTy> void withAuxHandler(handler &CGH, FunctorTy Func) {
1182+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
1183+
detail::EventImplPtr E = CGH.finalize();
1184+
#else
11821185
event E = CGH.finalize();
1186+
#endif
11831187
handler AuxHandler(CGH.MQueue, CGH.eventNeeded());
11841188
if (!createSyclObjFromImpl<queue>(CGH.MQueue).is_in_order())
11851189
AuxHandler.depends_on(E);

sycl/source/detail/queue_impl.cpp

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -301,24 +301,23 @@ queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
301301
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
302302
}
303303

304-
void queue_impl::addEvent(const event &Event) {
305-
const EventImplPtr &EImpl = getSyclObjImpl(Event);
306-
assert(EImpl && "Event implementation is missing");
307-
auto *Cmd = static_cast<Command *>(EImpl->getCommand());
308-
if (Cmd != nullptr && EImpl->getHandle() == nullptr &&
309-
!EImpl->isDiscarded()) {
310-
std::weak_ptr<event_impl> EventWeakPtr{EImpl};
304+
void queue_impl::addEvent(const detail::EventImplPtr &EventImpl) {
305+
if (!EventImpl)
306+
return;
307+
auto *Cmd = static_cast<Command *>(EventImpl->getCommand());
308+
if (Cmd != nullptr && EventImpl->getHandle() == nullptr) {
309+
std::weak_ptr<event_impl> EventWeakPtr{EventImpl};
311310
std::lock_guard<std::mutex> Lock{MMutex};
312311
MEventsWeak.push_back(std::move(EventWeakPtr));
313312
}
314313
}
315314

316-
event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
317-
const std::shared_ptr<queue_impl> &Self,
318-
queue_impl *SecondaryQueue, bool CallerNeedsEvent,
319-
const detail::code_location &Loc,
320-
bool IsTopCodeLoc,
321-
const v1::SubmissionInfo &SubmitInfo) {
315+
detail::EventImplPtr
316+
queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
317+
const std::shared_ptr<queue_impl> &Self,
318+
queue_impl *SecondaryQueue, bool CallerNeedsEvent,
319+
const detail::code_location &Loc, bool IsTopCodeLoc,
320+
const v1::SubmissionInfo &SubmitInfo) {
322321
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
323322
detail::handler_impl HandlerImplVal(SecondaryQueue, CallerNeedsEvent);
324323
detail::handler_impl *HandlerImpl = &HandlerImplVal;
@@ -371,29 +370,33 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
371370
}
372371
}
373372

374-
event Event;
373+
detail::EventImplPtr EventImpl;
375374
if (!isInOrder()) {
376-
Event = finalizeHandlerOutOfOrder(Handler);
377-
addEvent(Event);
375+
EventImpl = finalizeHandlerOutOfOrder(Handler);
376+
addEvent(EventImpl);
378377
} else {
379378
if (isHostTask) {
380379
std::unique_lock<std::mutex> Lock(MMutex);
381-
Event = finalizeHandlerInOrderHostTaskUnlocked(Handler);
380+
EventImpl = finalizeHandlerInOrderHostTaskUnlocked(Handler);
382381
} else {
383382
std::unique_lock<std::mutex> Lock(MMutex);
384383

385384
if (!isGraphSubmission && trySwitchingToNoEventsMode()) {
386-
Event = finalizeHandlerInOrderNoEventsUnlocked(Handler);
385+
EventImpl = finalizeHandlerInOrderNoEventsUnlocked(Handler);
387386
} else {
388-
Event = finalizeHandlerInOrderWithDepsUnlocked(Handler);
387+
EventImpl = finalizeHandlerInOrderWithDepsUnlocked(Handler);
389388
}
390389
}
391390
}
392391

393-
if (SubmitInfo.PostProcessorFunc())
392+
if (SubmitInfo.PostProcessorFunc()) {
393+
// All the submission functions using post processing are event based
394+
// functions
395+
assert(EventImpl);
396+
event Event = createSyclObjFromImpl<event>(EventImpl);
394397
handlerPostProcess(Handler, SubmitInfo.PostProcessorFunc(), Event);
398+
}
395399

396-
const auto &EventImpl = detail::getSyclObjImpl(Event);
397400
for (auto &Stream : Streams) {
398401
// We don't want stream flushing to be blocking operation that is why submit
399402
// a host task to print stream buffer. It will fire up as soon as the kernel
@@ -402,25 +405,25 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
402405
Stream->generateFlushCommand(ServiceCGH);
403406
};
404407
detail::type_erased_cgfo_ty CGF{L};
405-
event FlushEvent =
408+
detail::EventImplPtr FlushEvent =
406409
submit_impl(CGF, Self, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc,
407410
IsTopCodeLoc, {});
408-
EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent));
409-
registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent));
411+
if (EventImpl)
412+
EventImpl->attachEventToCompleteWeak(FlushEvent);
413+
registerStreamServiceEvent(FlushEvent);
410414
}
411415

412-
return Event;
416+
return EventImpl;
413417
}
414418

415419
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
416-
event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
417-
const std::shared_ptr<queue_impl> &Self,
418-
const std::shared_ptr<queue_impl> &,
419-
const std::shared_ptr<queue_impl> &SecondaryQueue,
420-
bool CallerNeedsEvent,
421-
const detail::code_location &Loc,
422-
bool IsTopCodeLoc,
423-
const SubmissionInfo &SubmitInfo) {
420+
detail::EventImplPtr
421+
queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
422+
const std::shared_ptr<queue_impl> &Self,
423+
const std::shared_ptr<queue_impl> &,
424+
const std::shared_ptr<queue_impl> &SecondaryQueue,
425+
bool CallerNeedsEvent, const detail::code_location &Loc,
426+
bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo) {
424427
return submit_impl(CGF, Self, SecondaryQueue.get(), CallerNeedsEvent, Loc,
425428
IsTopCodeLoc, SubmitInfo);
426429
}

sycl/source/detail/queue_impl.hpp

Lines changed: 53 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -351,10 +351,10 @@ class queue_impl {
351351
const v1::SubmissionInfo &SubmitInfo,
352352
const detail::code_location &Loc, bool IsTopCodeLoc) {
353353

354-
event ResEvent =
354+
detail::EventImplPtr ResEvent =
355355
submit_impl(CGF, Self, SubmitInfo.SecondaryQueue().get(),
356356
/*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, SubmitInfo);
357-
return ResEvent;
357+
return createSyclObjFromImpl<event>(ResEvent);
358358
}
359359

360360
void submit_without_event(const detail::type_erased_cgfo_ty &CGF,
@@ -701,6 +701,15 @@ class queue_impl {
701701
Handler.depends_on(*ExternalEvent);
702702
}
703703

704+
#ifdef __INTEL_PREVIEW_BREAKING_CHANGES
705+
#define parseEvent(arg) (arg)
706+
#else
707+
inline detail::EventImplPtr parseEvent(const event &Event) {
708+
const detail::EventImplPtr &EventImpl = getSyclObjImpl(Event);
709+
return EventImpl->isDiscarded() ? nullptr : EventImpl;
710+
}
711+
#endif
712+
704713
bool trySwitchingToNoEventsMode() {
705714
if (MNoEventMode.load(std::memory_order_relaxed))
706715
return true;
@@ -719,7 +728,8 @@ class queue_impl {
719728
}
720729

721730
template <typename HandlerType = handler>
722-
event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) {
731+
detail::EventImplPtr
732+
finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) {
723733
assert(isInOrder());
724734
assert(MGraph.expired());
725735
assert(MDefaultGraphDeps.LastEventPtr == nullptr ||
@@ -732,18 +742,19 @@ class queue_impl {
732742

733743
if (MContext->getBackend() == backend::opencl && MGraph.expired()) {
734744
// This is needed to support queue_empty() call
735-
auto Event = Handler.finalize();
736-
if (!getSyclObjImpl(Event)->isDiscarded()) {
737-
MDefaultGraphDeps.LastEventPtr = getSyclObjImpl(Event);
745+
auto Event = parseEvent(Handler.finalize());
746+
if (Event) {
747+
MDefaultGraphDeps.LastEventPtr = Event;
738748
}
739749
return Event;
740750
} else {
741-
return Handler.finalize();
751+
return parseEvent(Handler.finalize());
742752
}
743753
}
744754

745755
template <typename HandlerType = handler>
746-
event finalizeHandlerInOrderHostTaskUnlocked(HandlerType &Handler) {
756+
detail::EventImplPtr
757+
finalizeHandlerInOrderHostTaskUnlocked(HandlerType &Handler) {
747758
assert(isInOrder());
748759
assert(Handler.getType() == CGType::CodeplayHostTask);
749760

@@ -769,14 +780,14 @@ class queue_impl {
769780

770781
synchronizeWithExternalEvent(Handler);
771782

772-
auto Event = Handler.finalize();
773-
EventToBuildDeps = getSyclObjImpl(Event);
774-
assert(!EventToBuildDeps->isDiscarded());
775-
return Event;
783+
EventToBuildDeps = parseEvent(Handler.finalize());
784+
assert(EventToBuildDeps);
785+
return EventToBuildDeps;
776786
}
777787

778788
template <typename HandlerType = handler>
779-
event finalizeHandlerInOrderWithDepsUnlocked(HandlerType &Handler) {
789+
detail::EventImplPtr
790+
finalizeHandlerInOrderWithDepsUnlocked(HandlerType &Handler) {
780791
// this is handled by finalizeHandlerInOrderHostTask
781792
assert(Handler.getType() != CGType::CodeplayHostTask);
782793

@@ -804,25 +815,20 @@ class queue_impl {
804815

805816
synchronizeWithExternalEvent(Handler);
806817

807-
auto EventRet = Handler.finalize();
808-
809-
if (getSyclObjImpl(EventRet)->isDiscarded()) {
810-
EventToBuildDeps = nullptr;
811-
} else {
818+
EventToBuildDeps = parseEvent(Handler.finalize());
819+
if (EventToBuildDeps)
812820
MNoEventMode = false;
813-
EventToBuildDeps = getSyclObjImpl(EventRet);
814821

815-
// TODO: if the event is NOP we should be able to discard it as well.
816-
// However, NOP events are used to describe ordering for graph operations
817-
// Once https://github.com/intel/llvm/issues/18330 is fixed, we can
818-
// start relying on command buffer in-order property instead.
819-
}
822+
// TODO: if the event is NOP we should be able to discard it.
823+
// However, NOP events are used to describe ordering for graph operations
824+
// Once https://github.com/intel/llvm/issues/18330 is fixed, we can
825+
// start relying on command buffer in-order property instead.
820826

821-
return EventRet;
827+
return EventToBuildDeps;
822828
}
823829

824830
template <typename HandlerType = handler>
825-
event finalizeHandlerOutOfOrder(HandlerType &Handler) {
831+
detail::EventImplPtr finalizeHandlerOutOfOrder(HandlerType &Handler) {
826832
const CGType Type = getSyclObjImpl(Handler)->MCGType;
827833
std::lock_guard<std::mutex> Lock{MMutex};
828834

@@ -847,18 +853,17 @@ class queue_impl {
847853
(Type == CGType::CodeplayHostTask || (!Deps.LastBarrier->isEnqueued())))
848854
Handler.depends_on(Deps.LastBarrier);
849855

850-
auto EventRet = Handler.finalize();
851-
const EventImplPtr &EventRetImpl = getSyclObjImpl(EventRet);
856+
EventImplPtr EventRetImpl = parseEvent(Handler.finalize());
852857
if (Type == CGType::CodeplayHostTask)
853-
Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl));
858+
Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
854859
else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
855-
Deps.LastBarrier = std::move(EventRetImpl);
860+
Deps.LastBarrier = EventRetImpl;
856861
Deps.UnenqueuedCmdEvents.clear();
857862
} else if (!EventRetImpl->isEnqueued()) {
858-
Deps.UnenqueuedCmdEvents.push_back(std::move(EventRetImpl));
863+
Deps.UnenqueuedCmdEvents.push_back(EventRetImpl);
859864
}
860865

861-
return EventRet;
866+
return EventRetImpl;
862867
}
863868

864869
template <typename HandlerType = handler>
@@ -893,12 +898,13 @@ class queue_impl {
893898
/// \param Loc is the code location of the submit call (default argument)
894899
/// \param SubmitInfo is additional optional information for the submission.
895900
/// \return a SYCL event representing submitted command group.
896-
event submit_impl(const detail::type_erased_cgfo_ty &CGF,
897-
const std::shared_ptr<queue_impl> &Self,
898-
const std::shared_ptr<queue_impl> &PrimaryQueue,
899-
const std::shared_ptr<queue_impl> &SecondaryQueue,
900-
bool CallerNeedsEvent, const detail::code_location &Loc,
901-
bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo);
901+
detail::EventImplPtr
902+
submit_impl(const detail::type_erased_cgfo_ty &CGF,
903+
const std::shared_ptr<queue_impl> &Self,
904+
const std::shared_ptr<queue_impl> &PrimaryQueue,
905+
const std::shared_ptr<queue_impl> &SecondaryQueue,
906+
bool CallerNeedsEvent, const detail::code_location &Loc,
907+
bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo);
902908
#endif
903909

904910
/// Performs command group submission to the queue.
@@ -911,11 +917,13 @@ class queue_impl {
911917
/// \param Loc is the code location of the submit call (default argument)
912918
/// \param SubmitInfo is additional optional information for the submission.
913919
/// \return a SYCL event representing submitted command group.
914-
event submit_impl(const detail::type_erased_cgfo_ty &CGF,
915-
const std::shared_ptr<queue_impl> &Self,
916-
queue_impl *SecondaryQueue, bool CallerNeedsEvent,
917-
const detail::code_location &Loc, bool IsTopCodeLoc,
918-
const v1::SubmissionInfo &SubmitInfo);
920+
detail::EventImplPtr submit_impl(const detail::type_erased_cgfo_ty &CGF,
921+
const std::shared_ptr<queue_impl> &Self,
922+
queue_impl *SecondaryQueue,
923+
bool CallerNeedsEvent,
924+
const detail::code_location &Loc,
925+
bool IsTopCodeLoc,
926+
const v1::SubmissionInfo &SubmitInfo);
919927

920928
/// Helper function for submitting a memory operation with a handler.
921929
/// \param Self is a shared_ptr to this queue.
@@ -974,8 +982,8 @@ class queue_impl {
974982

975983
/// Stores an event that should be associated with the queue
976984
///
977-
/// \param Event is the event to be stored
978-
void addEvent(const event &Event);
985+
/// \param EventImpl is the event to be stored
986+
void addEvent(const detail::EventImplPtr &EventImpl);
979987

980988
/// Protects all the fields that can be changed by class' methods.
981989
mutable std::mutex MMutex;

0 commit comments

Comments
 (0)