@@ -631,6 +631,7 @@ class queue_impl {
631
631
std::lock_guard<std::mutex> Lock (MMutex);
632
632
MGraph = Graph;
633
633
MExtGraphDeps.reset ();
634
+ MNoEventMode = false ;
634
635
}
635
636
636
637
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
@@ -721,56 +722,90 @@ class queue_impl {
721
722
}
722
723
723
724
template <typename HandlerType = handler>
724
- event finalizeHandlerInOrder (HandlerType &Handler,
725
- std::unique_lock<std::mutex> &Lock) {
726
- Lock.lock ();
725
+ void synchronizeWithExternalEvent (HandlerType &Handler) {
726
+ // If there is an external event set, add it as a dependency and clear it.
727
+ // We do not need to hold the lock as MLastEventMtx will ensure the last
728
+ // event reflects the corresponding external event dependence as well.
729
+ std::optional<event> ExternalEvent = popExternalEvent ();
730
+ if (ExternalEvent)
731
+ Handler.depends_on (*ExternalEvent);
732
+ }
733
+
734
+ template <typename HandlerType = handler>
735
+ event finalizeHandlerInOrderNoEventsUnlocked (HandlerType &Handler) {
736
+ assert (isInOrder ());
737
+ assert (MGraph.expired ());
738
+ assert (MNoEventMode);
739
+
740
+ MEmpty = false ;
741
+
742
+ synchronizeWithExternalEvent (Handler);
743
+
744
+ return Handler.finalize ();
745
+ }
746
+
747
+ template <typename HandlerType = handler>
748
+ event finalizeHandlerInOrder (HandlerType &Handler) {
749
+ // Accessing and changing of an event isn't atomic operation.
750
+ // Hence, here is the lock for thread-safety.
751
+ std::lock_guard<std::mutex> Lock{MMutex};
727
752
753
+ MEmpty = false ;
728
754
auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
729
755
: MExtGraphDeps.LastEventPtr ;
730
756
731
- if (Handler.getType () == CGType::CodeplayHostTask) {
732
- if (!MHostTaskMode && MGraph.expired () && !MEmpty) {
733
- assert (EventToBuildDeps == nullptr );
734
- // since we don't store any events, insert a barrier to ensure proper
735
- // ordering with device execution
736
- auto barrierEvent = insertHelperBarrier (Handler);
737
- Handler.depends_on (barrierEvent);
738
- }
739
-
740
- MHostTaskMode = true ;
757
+ if (MNoEventMode && Handler.getType () == CGType::CodeplayHostTask) {
758
+ assert (MGraph.expired ());
759
+ assert (MDefaultGraphDeps.LastEventPtr == nullptr );
760
+ // There might be some operations submitted to the queue
761
+ // but the LastEventPtr is not set. If we are to run a host_task,
762
+ // we need to insert a barrier to ensure proper synchronization.
763
+ Handler.depends_on (insertHelperBarrier (Handler));
741
764
}
742
765
743
- if (EventToBuildDeps && Handler.getType () != CGType::AsyncAlloc) {
766
+ // This dependency is needed for the following purposes:
767
+ // - host tasks are handled by the runtime and cannot be implicitly
768
+ // synchronized by the backend.
769
+ // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
770
+ // by a host task. This dependency allows to build the enqueue order in
771
+ // the RT but will not be passed to the backend. See getPIEvents in
772
+ // Command.
773
+ if (EventToBuildDeps) {
774
+ // If we have last event, this means we are no longer in no-event mode.
775
+ assert (!MNoEventMode);
776
+
777
+ // In the case where the last event was discarded and we are to run a
778
+ // host_task, we insert a barrier into the queue and use the resulting
779
+ // event as the dependency for the host_task.
780
+ // Note that host_task events can never be discarded, so this will not
781
+ // insert barriers between host_task enqueues.
782
+ if (EventToBuildDeps->isDiscarded () &&
783
+ Handler.getType () == CGType::CodeplayHostTask)
784
+ EventToBuildDeps = insertHelperBarrier (Handler);
785
+
744
786
// depends_on after an async alloc is explicitly disallowed. Async alloc
745
787
// handles in order queue dependencies preemptively, so we skip them.
746
788
// Note: This could be improved by moving the handling of dependencies
747
789
// to before calling the CGF.
748
- Handler.depends_on (EventToBuildDeps);
790
+ if (!EventToBuildDeps->isDiscarded () &&
791
+ !(Handler.getType () == CGType::AsyncAlloc))
792
+ Handler.depends_on (EventToBuildDeps);
749
793
}
750
794
751
- MEmpty = false ;
795
+ MNoEventMode = false ;
752
796
753
- // If there is an external event set, add it as a dependency and clear it.
754
- // We do not need to hold the lock as MLastEventMtx will ensure the last
755
- // event reflects the corresponding external event dependence as well.
756
- std::optional<event> ExternalEvent = popExternalEvent ();
757
- if (ExternalEvent)
758
- Handler.depends_on (*ExternalEvent);
797
+ synchronizeWithExternalEvent (Handler);
759
798
760
799
auto EventRet = Handler.finalize ();
761
-
762
- if (shouldRecordLastEvent ()) {
763
- EventToBuildDeps = getSyclObjImpl (EventRet);
764
- }
800
+ EventToBuildDeps = getSyclObjImpl (EventRet);
765
801
766
802
return EventRet;
767
803
}
768
804
769
805
template <typename HandlerType = handler>
770
- event finalizeHandlerOutOfOrder (HandlerType &Handler,
771
- std::unique_lock<std::mutex> &Lock) {
806
+ event finalizeHandlerOutOfOrder (HandlerType &Handler) {
772
807
const CGType Type = getSyclObjImpl (Handler)->MCGType ;
773
- Lock. lock () ;
808
+ std::lock_guard<std::mutex> Lock{MMutex} ;
774
809
775
810
MEmpty = false ;
776
811
@@ -810,8 +845,7 @@ class queue_impl {
810
845
template <typename HandlerType = handler>
811
846
event finalizeHandlerPostProcess (
812
847
HandlerType &Handler,
813
- const optional<SubmitPostProcessF> &PostProcessorFunc,
814
- std::unique_lock<std::mutex> &Lock) {
848
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
815
849
bool IsKernel = Handler.getType () == CGType::Kernel;
816
850
bool KernelUsesAssert = false ;
817
851
@@ -822,8 +856,8 @@ class queue_impl {
822
856
ProgramManager::getInstance ().kernelUsesAssert (
823
857
Handler.MKernelName .data ());
824
858
825
- auto Event = MIsInorder ? finalizeHandlerInOrder (Handler, Lock )
826
- : finalizeHandlerOutOfOrder (Handler, Lock );
859
+ auto Event = MIsInorder ? finalizeHandlerInOrder (Handler)
860
+ : finalizeHandlerOutOfOrder (Handler);
827
861
828
862
auto &PostProcess = *PostProcessorFunc;
829
863
@@ -835,13 +869,12 @@ class queue_impl {
835
869
// template is needed for proper unit testing
836
870
template <typename HandlerType = handler>
837
871
event finalizeHandler (HandlerType &Handler,
838
- const optional<SubmitPostProcessF> &PostProcessorFunc,
839
- std::unique_lock<std::mutex> &Lock) {
872
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
840
873
if (PostProcessorFunc) {
841
- return finalizeHandlerPostProcess (Handler, PostProcessorFunc, Lock );
874
+ return finalizeHandlerPostProcess (Handler, PostProcessorFunc);
842
875
} else {
843
- return MIsInorder ? finalizeHandlerInOrder (Handler, Lock )
844
- : finalizeHandlerOutOfOrder (Handler, Lock );
876
+ return MIsInorder ? finalizeHandlerInOrder (Handler)
877
+ : finalizeHandlerOutOfOrder (Handler);
845
878
}
846
879
}
847
880
@@ -1011,18 +1044,11 @@ class queue_impl {
1011
1044
1012
1045
const bool MIsInorder;
1013
1046
1014
- // Specifies whether this queue uses host tasks. If yes, then event
1015
- // from all operations need to be recorded for proper synchronization.
1016
- bool MHostTaskMode = false ;
1017
-
1018
- bool shouldRecordLastEvent () const {
1019
- // For in-order queues we rely on UR queue ordering.
1020
- // We only need to keep the event if host task are used
1021
- // (to ensure proper ordering).
1022
-
1023
- // TODO: do not record last event for graphs as well
1024
- return MIsInorder && (MHostTaskMode || !MGraph.expired ());
1025
- }
1047
+ // Specifies whether this queue records last event. This can only
1048
+ // be true if the queue is in-order, the command graph is not
1049
+ // associated with the queue and there has never been any host
1050
+ // tasks submitted to the queue.
1051
+ bool MNoEventMode = true ;
1026
1052
1027
1053
bool MEmpty = true ;
1028
1054
0 commit comments