Skip to content

Commit a7b84a1

Browse files
committed
refactor
1 parent 375b895 commit a7b84a1

File tree

3 files changed

+102
-75
lines changed

3 files changed

+102
-75
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -292,13 +292,16 @@ queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
292292
std::lock_guard<std::mutex> Lock{MMutex};
293293
if (MEmpty)
294294
return std::nullopt;
295-
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr) {
296-
assert(!MHostTaskMode);
295+
if (MNoEventMode) {
296+
assert(MGraph.expired());
297+
assert(!MDefaultGraphDeps.LastEventPtr);
298+
297299
// We insert a marker to represent an event at end.
298300
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
299301
}
300302
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
301303
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
304+
assert(MDefaultGraphDeps.LastEventPtr);
302305
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
303306
}
304307

@@ -342,16 +345,21 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
342345

343346
HandlerImpl->MEventMode = SubmitInfo.EventMode();
344347

345-
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
346-
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc(), Lock);
348+
auto isInNoEventsMode = isInOrder() && MNoEventMode;
349+
auto needToStoreEvent = Type == CGType::CodeplayHostTask ||
350+
Streams.size() > 0 || SubmitInfo.PostProcessorFunc();
347351

348-
if (isInOrder() && !shouldRecordLastEvent() && Streams.empty()) {
349-
// NOP
350-
} else {
351-
addEvent(Event);
352+
if (isInNoEventsMode && !needToStoreEvent) {
353+
std::unique_lock<std::mutex> Lock(MMutex);
354+
return finalizeHandlerInOrderNoEventsUnlocked(Handler);
352355
}
353356

354-
Lock.unlock();
357+
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc());
358+
addEvent(Event);
359+
360+
// If we taken this path, it's because we need to store the last event.
361+
if (isInOrder())
362+
assert(!MNoEventMode);
355363

356364
const auto &EventImpl = detail::getSyclObjImpl(Event);
357365
for (auto &Stream : Streams) {
@@ -405,13 +413,10 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
405413

406414
HandlerImpl->MEventMode = SubmitInfo.EventMode();
407415

408-
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
416+
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc());
409417

410-
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc(), Lock);
411418
addEvent(Event);
412419

413-
Lock.unlock();
414-
415420
const auto &EventImpl = detail::getSyclObjImpl(Event);
416421
for (auto &Stream : Streams) {
417422
// We don't want stream flushing to be blocking operation that is why submit
@@ -473,8 +478,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
473478
// handler rather than by-passing the scheduler.
474479
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
475480
ExpandedDepEvents, MContext)) {
476-
if (!CallerNeedsEvent && supportsDiscardingPiEvents() &&
477-
!shouldRecordLastEvent()) {
481+
if (!CallerNeedsEvent && MNoEventMode) {
478482
NestedCallsTracker tracker;
479483
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
480484
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
@@ -505,7 +509,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
505509
}
506510
}
507511

508-
if (shouldRecordLastEvent()) {
512+
if (!MNoEventMode) {
509513
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
510514
: MExtGraphDeps.LastEventPtr;
511515
EventToStoreIn = EventImpl;
@@ -750,7 +754,7 @@ bool queue_impl::ext_oneapi_empty() const {
750754
// the status of the last event.
751755
if (isInOrder() && !MDiscardEvents) {
752756
std::lock_guard<std::mutex> Lock(MMutex);
753-
assert((MDefaultGraphDeps.LastEventPtr != nullptr) == MHostTaskMode);
757+
assert((MDefaultGraphDeps.LastEventPtr != nullptr) == MNoEventMode);
754758
// Note that we fall back to the backend query if the event was discarded,
755759
// which may happend despite the queue not being a discard event queue.
756760
if (MDefaultGraphDeps.LastEventPtr &&

sycl/source/detail/queue_impl.hpp

Lines changed: 79 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ class queue_impl {
631631
std::lock_guard<std::mutex> Lock(MMutex);
632632
MGraph = Graph;
633633
MExtGraphDeps.reset();
634+
MNoEventMode = false;
634635
}
635636

636637
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
@@ -721,56 +722,93 @@ class queue_impl {
721722
}
722723

723724
template <typename HandlerType = handler>
724-
event finalizeHandlerInOrder(HandlerType &Handler,
725-
std::unique_lock<std::mutex> &Lock) {
726-
Lock.lock();
725+
void synchronizeWithExternalEvent(HandlerType &Handler) {
726+
// If there is an external event set, add it as a dependency and clear it.
727+
// We do not need to hold the lock as MLastEventMtx will ensure the last
728+
// event reflects the corresponding external event dependence as well.
729+
std::optional<event> ExternalEvent = popExternalEvent();
730+
if (ExternalEvent)
731+
Handler.depends_on(*ExternalEvent);
732+
}
733+
734+
template <typename HandlerType = handler>
735+
event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) {
736+
assert(isInOrder());
737+
assert(MGraph.expired());
738+
assert(MNoEventMode);
727739

740+
MEmpty = false;
741+
742+
synchronizeWithExternalEvent(Handler);
743+
744+
return Handler.finalize();
745+
}
746+
747+
template <typename HandlerType = handler>
748+
event finalizeHandlerInOrder(HandlerType &Handler) {
749+
// Accessing and changing of an event isn't atomic operation.
750+
// Hence, here is the lock for thread-safety.
751+
std::lock_guard<std::mutex> Lock{MMutex};
752+
753+
MEmpty = false;
728754
auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
729755
: MExtGraphDeps.LastEventPtr;
730756

731-
if (Handler.getType() == CGType::CodeplayHostTask) {
732-
if (!MHostTaskMode && MGraph.expired() && !MEmpty) {
733-
assert(EventToBuildDeps == nullptr);
734-
// since we don't store any events, insert a barrier to ensure proper
735-
// ordering with device execution
736-
auto barrierEvent = insertHelperBarrier(Handler);
737-
Handler.depends_on(barrierEvent);
738-
}
739-
740-
MHostTaskMode = true;
757+
if (MNoEventMode && Handler.getType() == CGType::CodeplayHostTask) {
758+
assert(MGraph.expired());
759+
assert(MDefaultGraphDeps.LastEventPtr == nullptr);
760+
// There might be some operations submitted to the queue
761+
// but the LastEventPtr is not set. If we are to run a host_task,
762+
// we need to insert a barrier to ensure proper synchronization.
763+
Handler.depends_on(insertHelperBarrier(Handler));
741764
}
742765

743-
if (EventToBuildDeps && Handler.getType() != CGType::AsyncAlloc) {
766+
// This dependency is needed for the following purposes:
767+
// - host tasks are handled by the runtime and cannot be implicitly
768+
// synchronized by the backend.
769+
// - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
770+
// by a host task. This dependency allows to build the enqueue order in
771+
// the RT but will not be passed to the backend. See getPIEvents in
772+
// Command.
773+
if (EventToBuildDeps) {
774+
// If we have last event, this means we are no longer in no-event mode.
775+
assert(!MNoEventMode);
776+
777+
// In the case where the last event was discarded and we are to run a
778+
// host_task, we insert a barrier into the queue and use the resulting
779+
// event as the dependency for the host_task.
780+
// Note that host_task events can never be discarded, so this will not
781+
// insert barriers between host_task enqueues.
782+
if (EventToBuildDeps->isDiscarded() &&
783+
Handler.getType() == CGType::CodeplayHostTask)
784+
EventToBuildDeps = insertHelperBarrier(Handler);
785+
744786
// depends_on after an async alloc is explicitly disallowed. Async alloc
745787
// handles in order queue dependencies preemptively, so we skip them.
746788
// Note: This could be improved by moving the handling of dependencies
747789
// to before calling the CGF.
748-
Handler.depends_on(EventToBuildDeps);
790+
if (!EventToBuildDeps->isDiscarded() &&
791+
!(Handler.getType() == CGType::AsyncAlloc))
792+
Handler.depends_on(EventToBuildDeps);
749793
}
750794

751-
MEmpty = false;
795+
MNoEventMode = false;
752796

753-
// If there is an external event set, add it as a dependency and clear it.
754-
// We do not need to hold the lock as MLastEventMtx will ensure the last
755-
// event reflects the corresponding external event dependence as well.
756-
std::optional<event> ExternalEvent = popExternalEvent();
757-
if (ExternalEvent)
758-
Handler.depends_on(*ExternalEvent);
797+
synchronizeWithExternalEvent(Handler);
759798

760799
auto EventRet = Handler.finalize();
761-
762-
if (shouldRecordLastEvent()) {
763-
EventToBuildDeps = getSyclObjImpl(EventRet);
764-
}
800+
EventToBuildDeps = getSyclObjImpl(EventRet);
765801

766802
return EventRet;
767803
}
768804

769805
template <typename HandlerType = handler>
770-
event finalizeHandlerOutOfOrder(HandlerType &Handler,
771-
std::unique_lock<std::mutex> &Lock) {
806+
event finalizeHandlerOutOfOrder(HandlerType &Handler) {
807+
// Accessing and changing of an event isn't atomic operation.
808+
// Hence, here is the lock for thread-safety.
809+
std::lock_guard<std::mutex> Lock{MMutex};
810+
772811
const CGType Type = getSyclObjImpl(Handler)->MCGType;
773-
Lock.lock();
774812

775813
MEmpty = false;
776814

@@ -810,8 +848,7 @@ class queue_impl {
810848
template <typename HandlerType = handler>
811849
event finalizeHandlerPostProcess(
812850
HandlerType &Handler,
813-
const optional<SubmitPostProcessF> &PostProcessorFunc,
814-
std::unique_lock<std::mutex> &Lock) {
851+
const optional<SubmitPostProcessF> &PostProcessorFunc) {
815852
bool IsKernel = Handler.getType() == CGType::Kernel;
816853
bool KernelUsesAssert = false;
817854

@@ -822,8 +859,8 @@ class queue_impl {
822859
ProgramManager::getInstance().kernelUsesAssert(
823860
Handler.MKernelName.data());
824861

825-
auto Event = MIsInorder ? finalizeHandlerInOrder(Handler, Lock)
826-
: finalizeHandlerOutOfOrder(Handler, Lock);
862+
auto Event = MIsInorder ? finalizeHandlerInOrder(Handler)
863+
: finalizeHandlerOutOfOrder(Handler);
827864

828865
auto &PostProcess = *PostProcessorFunc;
829866

@@ -835,13 +872,12 @@ class queue_impl {
835872
// template is needed for proper unit testing
836873
template <typename HandlerType = handler>
837874
event finalizeHandler(HandlerType &Handler,
838-
const optional<SubmitPostProcessF> &PostProcessorFunc,
839-
std::unique_lock<std::mutex> &Lock) {
875+
const optional<SubmitPostProcessF> &PostProcessorFunc) {
840876
if (PostProcessorFunc) {
841-
return finalizeHandlerPostProcess(Handler, PostProcessorFunc, Lock);
877+
return finalizeHandlerPostProcess(Handler, PostProcessorFunc);
842878
} else {
843-
return MIsInorder ? finalizeHandlerInOrder(Handler, Lock)
844-
: finalizeHandlerOutOfOrder(Handler, Lock);
879+
return MIsInorder ? finalizeHandlerInOrder(Handler)
880+
: finalizeHandlerOutOfOrder(Handler);
845881
}
846882
}
847883

@@ -1011,18 +1047,11 @@ class queue_impl {
10111047

10121048
const bool MIsInorder;
10131049

1014-
// Specifies whether this queue uses host tasks. If yes, then event
1015-
// from all operations need to be recorded for proper synchronization.
1016-
bool MHostTaskMode = false;
1017-
1018-
bool shouldRecordLastEvent() const {
1019-
// For in-order queues we rely on UR queue ordering.
1020-
// We only need to keep the event if host task are used
1021-
// (to ensure proper ordering).
1022-
1023-
// TODO: do not record last event for graphs as well
1024-
return MIsInorder && (MHostTaskMode || !MGraph.expired());
1025-
}
1050+
// Specifies whether this queue records last event. This can only
1051+
// be true if the queue is in-order, the command graph is not
1052+
// associated with the queue and there has never been any host
1053+
// tasks submitted to the queue.
1054+
bool MNoEventMode = true;
10261055

10271056
bool MEmpty = true;
10281057

sycl/unittests/scheduler/InOrderQueueSyncCheck.cpp

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,12 @@ TEST_F(SchedulerTest, InOrderQueueSyncCheck) {
8686
LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue};
8787
EXPECT_CALL(MockCGH, depends_on(An<const sycl::detail::EventImplPtr &>()))
8888
.Times(0);
89-
std::mutex mtx;
90-
std::unique_lock<std::mutex> Lock(mtx, std::defer_lock);
91-
Queue->finalizeHandler<LimitedHandlerSimulation>(MockCGH, std::nullopt,
92-
Lock);
89+
Queue->finalizeHandler<LimitedHandlerSimulation>(MockCGH, std::nullopt);
9390
}
9491
{
9592
LimitedHandlerSimulation MockCGH{detail::CGType::CodeplayHostTask, Queue};
9693
EXPECT_CALL(MockCGH, depends_on(An<const sycl::detail::EventImplPtr &>()))
9794
.Times(1);
98-
std::mutex mtx;
99-
std::unique_lock<std::mutex> Lock(mtx, std::defer_lock);
100-
Queue->finalizeHandler<LimitedHandlerSimulation>(MockCGH, std::nullopt,
101-
Lock);
95+
Queue->finalizeHandler<LimitedHandlerSimulation>(MockCGH, std::nullopt);
10296
}
10397
}

0 commit comments

Comments
 (0)