Skip to content

Commit 1e2bf93

Browse files
committed
[SYCL] Do not store last event for in-order queues
unless Host Tasks are used. Without Host Tasks, we can just rely on UR for ordering. Having no last event means that ext_oneapi_get_last_event() needs to submit a barrier to return an event to the user. Similarly, ext_oneapi_submit_barrier() now always submits a barrier, even for in-order queues. Whenever Host Tasks are used we need to start recording all events. This is needed because of how kernel submission synchronizes with Host Tasks. With a following scenario: q.host_task(); q.submit_kernel(); q.host_task(): The kernel won't even be submitted to UR until the first Host Task completes. To properly synchronize the second Host Task we need to keep the event describing kernel submission.
1 parent 7b8996e commit 1e2bf93

14 files changed

+113
-1240
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -282,19 +282,26 @@ event queue_impl::memcpyFromDeviceGlobal(
282282
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
283283
}
284284

285-
sycl::detail::optional<event> queue_impl::getLastEvent() {
285+
sycl::detail::optional<event>
286+
queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
286287
// The external event is required to finish last if set, so it is considered
287288
// the last event if present.
288289
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read())
289290
return ExternalEvent;
290291

291292
std::lock_guard<std::mutex> Lock{MMutex};
292-
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
293+
if (MEmpty)
293294
return std::nullopt;
294-
if (MDiscardEvents)
295-
return createDiscardedEvent();
295+
if (MNoEventMode) {
296+
assert(MGraph.expired());
297+
assert(!MDefaultGraphDeps.LastEventPtr);
298+
299+
// We insert a marker to represent an event at end.
300+
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
301+
}
296302
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
297303
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
304+
assert(MDefaultGraphDeps.LastEventPtr);
298305
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
299306
}
300307

@@ -339,10 +346,27 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
339346

340347
HandlerImpl->MEventMode = SubmitInfo.EventMode();
341348

342-
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc());
349+
auto isInNoEventsMode =
350+
isInOrder() && MNoEventMode.load(std::memory_order_relaxed);
351+
auto needToStoreEvent = Type == CGType::CodeplayHostTask ||
352+
Streams.size() > 0 || SubmitInfo.PostProcessorFunc();
353+
354+
if (isInNoEventsMode && !needToStoreEvent) {
355+
std::unique_lock<std::mutex> Lock(MMutex);
356+
357+
// Check the condition again, under the lock to ensure that the
358+
// there was no concurrent submit that changed the state.
359+
if (MNoEventMode.load(std::memory_order_relaxed))
360+
return finalizeHandlerInOrderNoEventsUnlocked(Handler);
361+
}
343362

363+
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc());
344364
addEvent(Event);
345365

366+
// If we taken this path, it's because we need to store the last event.
367+
if (isInOrder())
368+
assert(!MNoEventMode);
369+
346370
const auto &EventImpl = detail::getSyclObjImpl(Event);
347371
for (auto &Stream : Streams) {
348372
// We don't want stream flushing to be blocking operation that is why submit
@@ -456,25 +480,18 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
456480
const std::vector<event> &ExpandedDepEvents =
457481
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
458482

483+
MEmpty = false;
484+
459485
// If we have a command graph set we need to capture the op through the
460486
// handler rather than by-passing the scheduler.
461487
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
462488
ExpandedDepEvents, MContext)) {
463-
if ((MDiscardEvents || !CallerNeedsEvent) &&
464-
supportsDiscardingPiEvents()) {
489+
if (!CallerNeedsEvent && MNoEventMode) {
465490
NestedCallsTracker tracker;
466491
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
467492
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
468493

469-
event DiscardedEvent = createDiscardedEvent();
470-
if (isInOrder()) {
471-
// Store the discarded event for proper in-order dependency tracking.
472-
auto &EventToStoreIn = MGraph.expired()
473-
? MDefaultGraphDeps.LastEventPtr
474-
: MExtGraphDeps.LastEventPtr;
475-
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
476-
}
477-
return DiscardedEvent;
494+
return createDiscardedEvent();
478495
}
479496

480497
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
@@ -500,7 +517,7 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
500517
}
501518
}
502519

503-
if (isInOrder()) {
520+
if (!MNoEventMode) {
504521
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
505522
: MExtGraphDeps.LastEventPtr;
506523
EventToStoreIn = EventImpl;
@@ -745,14 +762,11 @@ bool queue_impl::ext_oneapi_empty() const {
745762
// the status of the last event.
746763
if (isInOrder() && !MDiscardEvents) {
747764
std::lock_guard<std::mutex> Lock(MMutex);
748-
// If there is no last event we know that no work has been submitted, so it
749-
// must be trivially empty.
750-
if (!MDefaultGraphDeps.LastEventPtr)
751-
return true;
752-
// Otherwise, check if the last event is finished.
765+
assert((MDefaultGraphDeps.LastEventPtr == nullptr) == MNoEventMode);
753766
// Note that we fall back to the backend query if the event was discarded,
754767
// which may happend despite the queue not being a discard event queue.
755-
if (!MDefaultGraphDeps.LastEventPtr->isDiscarded())
768+
if (MDefaultGraphDeps.LastEventPtr &&
769+
!MDefaultGraphDeps.LastEventPtr->isDiscarded())
756770
return MDefaultGraphDeps.LastEventPtr
757771
->get_info<info::event::command_execution_status>() ==
758772
info::event_command_status::complete;
@@ -765,6 +779,11 @@ bool queue_impl::ext_oneapi_empty() const {
765779
if (!IsReady)
766780
return false;
767781

782+
// If got here, it means that LastEventPtr is nullptr (so no possible Host
783+
// Tasks) and there is nothing executing on the device.
784+
if (isInOrder())
785+
return true;
786+
768787
// We may have events like host tasks which are not submitted to the backend
769788
// queue so we need to get their status separately.
770789
std::lock_guard<std::mutex> Lock(MMutex);

sycl/source/detail/queue_impl.hpp

Lines changed: 55 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class queue_impl {
116116
: MDevice(Device), MContext(Context), MAsyncHandler(AsyncHandler),
117117
MPropList(PropList),
118118
MIsInorder(has_property<property::queue::in_order>()),
119+
MNoEventMode(MIsInorder),
119120
MDiscardEvents(
120121
has_property<ext::oneapi::property::queue::discard_events>()),
121122
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
@@ -180,7 +181,8 @@ class queue_impl {
180181
#endif
181182
}
182183

183-
sycl::detail::optional<event> getLastEvent();
184+
sycl::detail::optional<event>
185+
getLastEvent(const std::shared_ptr<queue_impl> &Self);
184186

185187
private:
186188
void queue_impl_interop(ur_queue_handle_t UrQueue) {
@@ -630,6 +632,7 @@ class queue_impl {
630632
std::lock_guard<std::mutex> Lock(MMutex);
631633
MGraph = Graph;
632634
MExtGraphDeps.reset();
635+
MNoEventMode = false;
633636
}
634637

635638
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
@@ -719,6 +722,29 @@ class queue_impl {
719722
return ResEvent;
720723
}
721724

725+
template <typename HandlerType = handler>
726+
void synchronizeWithExternalEvent(HandlerType &Handler) {
727+
// If there is an external event set, add it as a dependency and clear it.
728+
// We do not need to hold the lock as MLastEventMtx will ensure the last
729+
// event reflects the corresponding external event dependence as well.
730+
std::optional<event> ExternalEvent = popExternalEvent();
731+
if (ExternalEvent)
732+
Handler.depends_on(*ExternalEvent);
733+
}
734+
735+
template <typename HandlerType = handler>
736+
event finalizeHandlerInOrderNoEventsUnlocked(HandlerType &Handler) {
737+
assert(isInOrder());
738+
assert(MGraph.expired());
739+
assert(MNoEventMode);
740+
741+
MEmpty = false;
742+
743+
synchronizeWithExternalEvent(Handler);
744+
745+
return Handler.finalize();
746+
}
747+
722748
template <typename HandlerType = handler>
723749
event finalizeHandlerInOrder(HandlerType &Handler) {
724750
// Accessing and changing of an event isn't atomic operation.
@@ -728,6 +754,16 @@ class queue_impl {
728754
auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
729755
: MExtGraphDeps.LastEventPtr;
730756

757+
if (!MEmpty && MNoEventMode &&
758+
Handler.getType() == CGType::CodeplayHostTask) {
759+
assert(MGraph.expired());
760+
assert(MDefaultGraphDeps.LastEventPtr == nullptr);
761+
// There might be some operations submitted to the queue
762+
// but the LastEventPtr is not set. If we are to run a host_task,
763+
// we need to insert a barrier to ensure proper synchronization.
764+
Handler.depends_on(insertHelperBarrier(Handler));
765+
}
766+
731767
// This dependency is needed for the following purposes:
732768
// - host tasks are handled by the runtime and cannot be implicitly
733769
// synchronized by the backend.
@@ -736,6 +772,9 @@ class queue_impl {
736772
// the RT but will not be passed to the backend. See getPIEvents in
737773
// Command.
738774
if (EventToBuildDeps) {
775+
// If we have last event, this means we are no longer in no-event mode.
776+
assert(!MNoEventMode);
777+
739778
// In the case where the last event was discarded and we are to run a
740779
// host_task, we insert a barrier into the queue and use the resulting
741780
// event as the dependency for the host_task.
@@ -754,12 +793,10 @@ class queue_impl {
754793
Handler.depends_on(EventToBuildDeps);
755794
}
756795

757-
// If there is an external event set, add it as a dependency and clear it.
758-
// We do not need to hold the lock as MLastEventMtx will ensure the last
759-
// event reflects the corresponding external event dependence as well.
760-
std::optional<event> ExternalEvent = popExternalEvent();
761-
if (ExternalEvent)
762-
Handler.depends_on(*ExternalEvent);
796+
MEmpty = false;
797+
MNoEventMode = false;
798+
799+
synchronizeWithExternalEvent(Handler);
763800

764801
auto EventRet = Handler.finalize();
765802
EventToBuildDeps = getSyclObjImpl(EventRet);
@@ -771,6 +808,9 @@ class queue_impl {
771808
event finalizeHandlerOutOfOrder(HandlerType &Handler) {
772809
const CGType Type = getSyclObjImpl(Handler)->MCGType;
773810
std::lock_guard<std::mutex> Lock{MMutex};
811+
812+
MEmpty = false;
813+
774814
// The following code supports barrier synchronization if host task is
775815
// involved in the scenario. Native barriers cannot handle host task
776816
// dependency so in the case where some commands were not enqueued
@@ -1006,6 +1046,14 @@ class queue_impl {
10061046

10071047
const bool MIsInorder;
10081048

1049+
// Specifies whether this queue records last event. This can only
1050+
// be true if the queue is in-order, the command graph is not
1051+
// associated with the queue and there has never been any host
1052+
// tasks submitted to the queue.
1053+
std::atomic<bool> MNoEventMode;
1054+
1055+
bool MEmpty = true;
1056+
10091057
std::vector<EventImplPtr> MStreamsServiceEvents;
10101058
std::mutex MStreamsServiceEventsMutex;
10111059

sycl/source/queue.cpp

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
302302
assert(!QueueImpl->hasCommandGraph() &&
303303
"Should not be called in on graph recording.");
304304

305-
sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent();
305+
sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent(QueueImpl);
306306
if (LastEvent)
307307
return *LastEvent;
308308

@@ -321,10 +321,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
321321
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
322322
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
323323
!impl->MIsProfilingEnabled) {
324-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
325-
// If the last event was discarded, fall back to enqueuing a barrier.
326-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
327-
return InOrderLastEvent;
324+
return getBarrierEventForInorderQueueHelper(impl);
328325
}
329326

330327
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
@@ -349,10 +346,7 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
349346
});
350347
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
351348
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
352-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
353-
// If the last event was discarded, fall back to enqueuing a barrier.
354-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
355-
return InOrderLastEvent;
349+
return getBarrierEventForInorderQueueHelper(impl);
356350
}
357351

358352
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
@@ -432,20 +426,7 @@ sycl::detail::optional<event> queue::ext_oneapi_get_last_event_impl() const {
432426
make_error_code(errc::invalid),
433427
"ext_oneapi_get_last_event() can only be called on in-order queues.");
434428

435-
sycl::detail::optional<event> LastEvent = impl->getLastEvent();
436-
437-
// If there was no last event, the queue is yet to have any work submitted and
438-
// we return a std::nullopt.
439-
if (!LastEvent)
440-
return std::nullopt;
441-
442-
// If the last event was discarded or a NOP, we insert a marker to represent
443-
// an event at end.
444-
auto LastEventImpl = detail::getSyclObjImpl(*LastEvent);
445-
if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP())
446-
LastEvent =
447-
detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
448-
return LastEvent;
429+
return impl->getLastEvent(impl);
449430
}
450431

451432
void queue::ext_oneapi_set_external_event(const event &external_event) {

0 commit comments

Comments
 (0)