@@ -781,79 +781,121 @@ class queue_impl {
781
781
return ResEvent;
782
782
}
783
783
784
- // template is needed for proper unit testing
785
784
template <typename HandlerType = handler>
786
- void finalizeHandler (HandlerType &Handler, event &EventRet) {
787
- if (MIsInorder) {
788
- // Accessing and changing of an event isn't atomic operation.
789
- // Hence, here is the lock for thread-safety.
790
- std::lock_guard<std::mutex> Lock{MMutex};
791
-
792
- auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
793
- : MExtGraphDeps.LastEventPtr ;
794
-
795
- // This dependency is needed for the following purposes:
796
- // - host tasks are handled by the runtime and cannot be implicitly
797
- // synchronized by the backend.
798
- // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
799
- // by a host task. This dependency allows to build the enqueue order in
800
- // the RT but will not be passed to the backend. See getPIEvents in
801
- // Command.
802
- if (EventToBuildDeps) {
803
- // In the case where the last event was discarded and we are to run a
804
- // host_task, we insert a barrier into the queue and use the resulting
805
- // event as the dependency for the host_task.
806
- // Note that host_task events can never be discarded, so this will not
807
- // insert barriers between host_task enqueues.
808
- if (EventToBuildDeps->isDiscarded () &&
809
- getSyclObjImpl (Handler)->MCGType == CGType::CodeplayHostTask)
810
- EventToBuildDeps = insertHelperBarrier (Handler);
811
-
812
- if (!EventToBuildDeps->isDiscarded ())
813
- Handler.depends_on (EventToBuildDeps);
814
- }
785
+ event finalizeHandlerInOrder (HandlerType &Handler) {
786
+ // Accessing and changing of an event isn't atomic operation.
787
+ // Hence, here is the lock for thread-safety.
788
+ std::lock_guard<std::mutex> Lock{MMutex};
789
+
790
+ auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
791
+ : MExtGraphDeps.LastEventPtr ;
792
+
793
+ // This dependency is needed for the following purposes:
794
+ // - host tasks are handled by the runtime and cannot be implicitly
795
+ // synchronized by the backend.
796
+ // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
797
+ // by a host task. This dependency allows to build the enqueue order in
798
+ // the RT but will not be passed to the backend. See getPIEvents in
799
+ // Command.
800
+ if (EventToBuildDeps) {
801
+ // In the case where the last event was discarded and we are to run a
802
+ // host_task, we insert a barrier into the queue and use the resulting
803
+ // event as the dependency for the host_task.
804
+ // Note that host_task events can never be discarded, so this will not
805
+ // insert barriers between host_task enqueues.
806
+ if (EventToBuildDeps->isDiscarded () &&
807
+ getSyclObjImpl (Handler)->MCGType == CGType::CodeplayHostTask)
808
+ EventToBuildDeps = insertHelperBarrier (Handler);
809
+
810
+ if (!EventToBuildDeps->isDiscarded ())
811
+ Handler.depends_on (EventToBuildDeps);
812
+ }
813
+
814
+ // If there is an external event set, add it as a dependency and clear it.
815
+ // We do not need to hold the lock as MLastEventMtx will ensure the last
816
+ // event reflects the corresponding external event dependence as well.
817
+ std::optional<event> ExternalEvent = popExternalEvent ();
818
+ if (ExternalEvent)
819
+ Handler.depends_on (*ExternalEvent);
820
+
821
+ auto EventRet = Handler.finalize ();
822
+ EventToBuildDeps = getSyclObjImpl (EventRet);
823
+
824
+ return EventRet;
825
+ }
826
+
827
+ template <typename HandlerType = handler>
828
+ event finalizeHandlerOutOfOrder (HandlerType &Handler) {
829
+ const CGType Type = getSyclObjImpl (Handler)->MCGType ;
830
+ std::lock_guard<std::mutex> Lock{MMutex};
831
+ // The following code supports barrier synchronization if host task is
832
+ // involved in the scenario. Native barriers cannot handle host task
833
+ // dependency so in the case where some commands were not enqueued
834
+ // (blocked), we track them to prevent barrier from being enqueued
835
+ // earlier.
836
+ {
837
+ std::lock_guard<std::mutex> RequestLock (MMissedCleanupRequestsMtx);
838
+ for (auto &UpdatedGraph : MMissedCleanupRequests)
839
+ doUnenqueuedCommandCleanup (UpdatedGraph);
840
+ MMissedCleanupRequests.clear ();
841
+ }
842
+ auto &Deps = MGraph.expired () ? MDefaultGraphDeps : MExtGraphDeps;
843
+ if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents .empty ()) {
844
+ Handler.depends_on (Deps.UnenqueuedCmdEvents );
845
+ }
846
+ if (Deps.LastBarrier &&
847
+ (Type == CGType::CodeplayHostTask || (!Deps.LastBarrier ->isEnqueued ())))
848
+ Handler.depends_on (Deps.LastBarrier );
849
+
850
+ auto EventRet = Handler.finalize ();
851
+ EventImplPtr EventRetImpl = getSyclObjImpl (EventRet);
852
+ if (Type == CGType::CodeplayHostTask)
853
+ Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
854
+ else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
855
+ Deps.LastBarrier = EventRetImpl;
856
+ Deps.UnenqueuedCmdEvents .clear ();
857
+ } else if (!EventRetImpl->isEnqueued ()) {
858
+ Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
859
+ }
860
+
861
+ return EventRet;
862
+ }
863
+
864
+ template <typename HandlerType = handler>
865
+ event finalizeHandlerPostProcess (
866
+ HandlerType &Handler,
867
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
868
+ auto HandlerImpl = detail::getSyclObjImpl (Handler);
869
+ const CGType Type = HandlerImpl->MCGType ;
870
+
871
+ bool IsKernel = Type == CGType::Kernel;
872
+ bool KernelUsesAssert = false ;
873
+
874
+ if (IsKernel)
875
+ // Kernel only uses assert if it's non interop one
876
+ KernelUsesAssert = !(Handler.MKernel && Handler.MKernel ->isInterop ()) &&
877
+ ProgramManager::getInstance ().kernelUsesAssert (
878
+ Handler.MKernelName .c_str ());
879
+
880
+ auto Event = MIsInorder ? finalizeHandlerInOrder (Handler)
881
+ : finalizeHandlerOutOfOrder (Handler);
882
+
883
+ auto &PostProcess = *PostProcessorFunc;
815
884
816
- // If there is an external event set, add it as a dependency and clear it.
817
- // We do not need to hold the lock as MLastEventMtx will ensure the last
818
- // event reflects the corresponding external event dependence as well.
819
- std::optional<event> ExternalEvent = popExternalEvent ();
820
- if (ExternalEvent)
821
- Handler.depends_on (*ExternalEvent);
885
+ PostProcess (IsKernel, KernelUsesAssert, Event);
822
886
823
- EventRet = Handler.finalize ();
824
- EventToBuildDeps = getSyclObjImpl (EventRet);
887
+ return Event;
888
+ }
889
+
890
+ // template is needed for proper unit testing
891
+ template <typename HandlerType = handler>
892
+ event finalizeHandler (HandlerType &Handler,
893
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
894
+ if (PostProcessorFunc) {
895
+ return finalizeHandlerPostProcess (Handler, PostProcessorFunc);
825
896
} else {
826
- const CGType Type = getSyclObjImpl (Handler)->MCGType ;
827
- std::lock_guard<std::mutex> Lock{MMutex};
828
- // The following code supports barrier synchronization if host task is
829
- // involved in the scenario. Native barriers cannot handle host task
830
- // dependency so in the case where some commands were not enqueued
831
- // (blocked), we track them to prevent barrier from being enqueued
832
- // earlier.
833
- {
834
- std::lock_guard<std::mutex> RequestLock (MMissedCleanupRequestsMtx);
835
- for (auto &UpdatedGraph : MMissedCleanupRequests)
836
- doUnenqueuedCommandCleanup (UpdatedGraph);
837
- MMissedCleanupRequests.clear ();
838
- }
839
- auto &Deps = MGraph.expired () ? MDefaultGraphDeps : MExtGraphDeps;
840
- if (Type == CGType::Barrier && !Deps.UnenqueuedCmdEvents .empty ()) {
841
- Handler.depends_on (Deps.UnenqueuedCmdEvents );
842
- }
843
- if (Deps.LastBarrier && (Type == CGType::CodeplayHostTask ||
844
- (!Deps.LastBarrier ->isEnqueued ())))
845
- Handler.depends_on (Deps.LastBarrier );
846
-
847
- EventRet = Handler.finalize ();
848
- EventImplPtr EventRetImpl = getSyclObjImpl (EventRet);
849
- if (Type == CGType::CodeplayHostTask)
850
- Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
851
- else if (Type == CGType::Barrier || Type == CGType::BarrierWaitlist) {
852
- Deps.LastBarrier = EventRetImpl;
853
- Deps.UnenqueuedCmdEvents .clear ();
854
- } else if (!EventRetImpl->isEnqueued ()) {
855
- Deps.UnenqueuedCmdEvents .push_back (EventRetImpl);
856
- }
897
+ return MIsInorder ? finalizeHandlerInOrder (Handler)
898
+ : finalizeHandlerOutOfOrder (Handler);
857
899
}
858
900
}
859
901
0 commit comments