@@ -631,6 +631,7 @@ class queue_impl {
631
631
std::lock_guard<std::mutex> Lock (MMutex);
632
632
MGraph = Graph;
633
633
MExtGraphDeps.reset ();
634
+ MNoEventMode = false ;
634
635
}
635
636
636
637
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
@@ -721,56 +722,93 @@ class queue_impl {
721
722
}
722
723
723
724
template <typename HandlerType = handler>
724
- event finalizeHandlerInOrder (HandlerType &Handler,
725
- std::unique_lock<std::mutex> &Lock) {
726
- Lock.lock ();
725
+ void synchronizeWithExternalEvent (HandlerType &Handler) {
726
+ // If there is an external event set, add it as a dependency and clear it.
727
+ // We do not need to hold the lock as MLastEventMtx will ensure the last
728
+ // event reflects the corresponding external event dependence as well.
729
+ std::optional<event> ExternalEvent = popExternalEvent ();
730
+ if (ExternalEvent)
731
+ Handler.depends_on (*ExternalEvent);
732
+ }
733
+
734
+ template <typename HandlerType = handler>
735
+ event finalizeHandlerInOrderNoEventsUnlocked (HandlerType &Handler) {
736
+ assert (isInOrder ());
737
+ assert (MGraph.expired ());
738
+ assert (MNoEventMode);
727
739
740
+ MEmpty = false ;
741
+
742
+ synchronizeWithExternalEvent (Handler);
743
+
744
+ return Handler.finalize ();
745
+ }
746
+
747
+ template <typename HandlerType = handler>
748
+ event finalizeHandlerInOrder (HandlerType &Handler) {
749
+ // Accessing and changing of an event isn't atomic operation.
750
+ // Hence, here is the lock for thread-safety.
751
+ std::lock_guard<std::mutex> Lock{MMutex};
752
+
753
+ MEmpty = false ;
728
754
auto &EventToBuildDeps = MGraph.expired () ? MDefaultGraphDeps.LastEventPtr
729
755
: MExtGraphDeps.LastEventPtr ;
730
756
731
- if (Handler.getType () == CGType::CodeplayHostTask) {
732
- if (!MHostTaskMode && MGraph.expired () && !MEmpty) {
733
- assert (EventToBuildDeps == nullptr );
734
- // since we don't store any events, insert a barrier to ensure proper
735
- // ordering with device execution
736
- auto barrierEvent = insertHelperBarrier (Handler);
737
- Handler.depends_on (barrierEvent);
738
- }
739
-
740
- MHostTaskMode = true ;
757
+ if (MNoEventMode && Handler.getType () == CGType::CodeplayHostTask) {
758
+ assert (MGraph.expired ());
759
+ assert (MDefaultGraphDeps.LastEventPtr == nullptr );
760
+ // There might be some operations submitted to the queue
761
+ // but the LastEventPtr is not set. If we are to run a host_task,
762
+ // we need to insert a barrier to ensure proper synchronization.
763
+ Handler.depends_on (insertHelperBarrier (Handler));
741
764
}
742
765
743
- if (EventToBuildDeps && Handler.getType () != CGType::AsyncAlloc) {
766
+ // This dependency is needed for the following purposes:
767
+ // - host tasks are handled by the runtime and cannot be implicitly
768
+ // synchronized by the backend.
769
+ // - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
770
+ // by a host task. This dependency allows to build the enqueue order in
771
+ // the RT but will not be passed to the backend. See getPIEvents in
772
+ // Command.
773
+ if (EventToBuildDeps) {
774
+ // If we have last event, this means we are no longer in no-event mode.
775
+ assert (!MNoEventMode);
776
+
777
+ // In the case where the last event was discarded and we are to run a
778
+ // host_task, we insert a barrier into the queue and use the resulting
779
+ // event as the dependency for the host_task.
780
+ // Note that host_task events can never be discarded, so this will not
781
+ // insert barriers between host_task enqueues.
782
+ if (EventToBuildDeps->isDiscarded () &&
783
+ Handler.getType () == CGType::CodeplayHostTask)
784
+ EventToBuildDeps = insertHelperBarrier (Handler);
785
+
744
786
// depends_on after an async alloc is explicitly disallowed. Async alloc
745
787
// handles in order queue dependencies preemptively, so we skip them.
746
788
// Note: This could be improved by moving the handling of dependencies
747
789
// to before calling the CGF.
748
- Handler.depends_on (EventToBuildDeps);
790
+ if (!EventToBuildDeps->isDiscarded () &&
791
+ !(Handler.getType () == CGType::AsyncAlloc))
792
+ Handler.depends_on (EventToBuildDeps);
749
793
}
750
794
751
- MEmpty = false ;
795
+ MNoEventMode = false ;
752
796
753
- // If there is an external event set, add it as a dependency and clear it.
754
- // We do not need to hold the lock as MLastEventMtx will ensure the last
755
- // event reflects the corresponding external event dependence as well.
756
- std::optional<event> ExternalEvent = popExternalEvent ();
757
- if (ExternalEvent)
758
- Handler.depends_on (*ExternalEvent);
797
+ synchronizeWithExternalEvent (Handler);
759
798
760
799
auto EventRet = Handler.finalize ();
761
-
762
- if (shouldRecordLastEvent ()) {
763
- EventToBuildDeps = getSyclObjImpl (EventRet);
764
- }
800
+ EventToBuildDeps = getSyclObjImpl (EventRet);
765
801
766
802
return EventRet;
767
803
}
768
804
769
805
template <typename HandlerType = handler>
770
- event finalizeHandlerOutOfOrder (HandlerType &Handler,
771
- std::unique_lock<std::mutex> &Lock) {
806
+ event finalizeHandlerOutOfOrder (HandlerType &Handler) {
807
+ // Accessing and changing of an event isn't atomic operation.
808
+ // Hence, here is the lock for thread-safety.
809
+ std::lock_guard<std::mutex> Lock{MMutex};
810
+
772
811
const CGType Type = getSyclObjImpl (Handler)->MCGType ;
773
- Lock.lock ();
774
812
775
813
MEmpty = false ;
776
814
@@ -810,8 +848,7 @@ class queue_impl {
810
848
template <typename HandlerType = handler>
811
849
event finalizeHandlerPostProcess (
812
850
HandlerType &Handler,
813
- const optional<SubmitPostProcessF> &PostProcessorFunc,
814
- std::unique_lock<std::mutex> &Lock) {
851
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
815
852
bool IsKernel = Handler.getType () == CGType::Kernel;
816
853
bool KernelUsesAssert = false ;
817
854
@@ -822,8 +859,8 @@ class queue_impl {
822
859
ProgramManager::getInstance ().kernelUsesAssert (
823
860
Handler.MKernelName .data ());
824
861
825
- auto Event = MIsInorder ? finalizeHandlerInOrder (Handler, Lock )
826
- : finalizeHandlerOutOfOrder (Handler, Lock );
862
+ auto Event = MIsInorder ? finalizeHandlerInOrder (Handler)
863
+ : finalizeHandlerOutOfOrder (Handler);
827
864
828
865
auto &PostProcess = *PostProcessorFunc;
829
866
@@ -835,13 +872,12 @@ class queue_impl {
835
872
// template is needed for proper unit testing
836
873
template <typename HandlerType = handler>
837
874
event finalizeHandler (HandlerType &Handler,
838
- const optional<SubmitPostProcessF> &PostProcessorFunc,
839
- std::unique_lock<std::mutex> &Lock) {
875
+ const optional<SubmitPostProcessF> &PostProcessorFunc) {
840
876
if (PostProcessorFunc) {
841
- return finalizeHandlerPostProcess (Handler, PostProcessorFunc, Lock );
877
+ return finalizeHandlerPostProcess (Handler, PostProcessorFunc);
842
878
} else {
843
- return MIsInorder ? finalizeHandlerInOrder (Handler, Lock )
844
- : finalizeHandlerOutOfOrder (Handler, Lock );
879
+ return MIsInorder ? finalizeHandlerInOrder (Handler)
880
+ : finalizeHandlerOutOfOrder (Handler);
845
881
}
846
882
}
847
883
@@ -1011,18 +1047,11 @@ class queue_impl {
1011
1047
1012
1048
const bool MIsInorder;
1013
1049
1014
- // Specifies whether this queue uses host tasks. If yes, then event
1015
- // from all operations need to be recorded for proper synchronization.
1016
- bool MHostTaskMode = false ;
1017
-
1018
- bool shouldRecordLastEvent () const {
1019
- // For in-order queues we rely on UR queue ordering.
1020
- // We only need to keep the event if host task are used
1021
- // (to ensure proper ordering).
1022
-
1023
- // TODO: do not record last event for graphs as well
1024
- return MIsInorder && (MHostTaskMode || !MGraph.expired ());
1025
- }
1050
+ // Specifies whether this queue records last event. This can only
1051
+ // be true if the queue is in-order, the command graph is not
1052
+ // associated with the queue and there has never been any host
1053
+ // tasks submitted to the queue.
1054
+ bool MNoEventMode = true ;
1026
1055
1027
1056
bool MEmpty = true ;
1028
1057
0 commit comments