Skip to content

Commit 2a0d012

Browse files
committed
[SYCL] do not store last event as a dendency for in-order queues
For in-order queues, the ordering is guaranteed by UR. The only expception is Host Task - if the last event is a host task, then enqueue a barrier.
1 parent 07d8bab commit 2a0d012

File tree

5 files changed

+111
-154
lines changed

5 files changed

+111
-154
lines changed

sycl/source/detail/queue_impl.cpp

Lines changed: 69 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -137,18 +137,19 @@ queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
137137
return DepEvents;
138138

139139
QueueLock.lock();
140-
EventImplPtr ExtraEvent = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
141-
: MExtGraphDeps.LastEventPtr;
142140
std::optional<event> ExternalEvent = popExternalEvent();
143141

144-
if (!ExternalEvent && !ExtraEvent)
142+
if (!ExternalEvent && !LastHostTaskEvent)
145143
return DepEvents;
146144

147145
MutableVec = DepEvents;
148146
if (ExternalEvent)
149147
MutableVec.push_back(*ExternalEvent);
150-
if (ExtraEvent)
151-
MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
148+
if (LastHostTaskEvent) {
149+
MutableVec.push_back(
150+
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
151+
LastHostTaskEvent = nullptr;
152+
}
152153
return MutableVec;
153154
}
154155

@@ -283,20 +284,21 @@ event queue_impl::memcpyFromDeviceGlobal(
283284
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
284285
}
285286

286-
sycl::detail::optional<event> queue_impl::getLastEvent() {
287-
// The external event is required to finish last if set, so it is considered
288-
// the last event if present.
287+
sycl::detail::optional<event>
288+
queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
289289
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read())
290290
return ExternalEvent;
291291

292-
std::lock_guard<std::mutex> Lock{MMutex};
293-
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
292+
if (MEmpty) {
294293
return std::nullopt;
295-
if (MDiscardEvents)
296-
return createDiscardedEvent();
297-
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
298-
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
299-
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
294+
}
295+
296+
if (LastHostTaskEvent) {
297+
return detail::createSyclObjFromImpl<event>(LastHostTaskEvent);
298+
}
299+
300+
// We insert a marker to represent an event at end.
301+
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
300302
}
301303

302304
void queue_impl::addEvent(const event &Event) {
@@ -414,11 +416,11 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
414416
};
415417
detail::type_erased_cgfo_ty CGF{L};
416418

417-
if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
418-
submit_without_event(CGF, Self, SI,
419-
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
420-
return createDiscardedEvent();
421-
}
419+
// if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
420+
// submit_without_event(CGF, Self, SI,
421+
// /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
422+
// return createDiscardedEvent();
423+
// }
422424
return submit_with_event(CGF, Self, SI,
423425
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
424426
}
@@ -435,6 +437,32 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
435437
{
436438
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);
437439

440+
if (isInOrder()) {
441+
Lock.lock();
442+
std::optional<event> ExternalEvent = popExternalEvent();
443+
444+
if (LastHostTaskEvent) {
445+
// TODO: this should be in finalize?
446+
LastHostTaskEvent->wait(LastHostTaskEvent);
447+
LastHostTaskEvent = nullptr;
448+
}
449+
450+
std::vector<event> WaitEvents;
451+
if (ExternalEvent)
452+
WaitEvents.emplace_back(std::move(*ExternalEvent));
453+
454+
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
455+
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
456+
{
457+
NestedCallsTracker tracker;
458+
ur_event_handle_t UREvent = nullptr;
459+
MemOpFunc(MemOpArgs..., getUrEvents(WaitEvents), &UREvent, EventImpl);
460+
EventImpl->setHandle(UREvent);
461+
EventImpl->setEnqueued();
462+
}
463+
return discard_or_return(ResEvent);
464+
}
465+
438466
std::vector<event> MutableDepEvents;
439467
const std::vector<event> &ExpandedDepEvents =
440468
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
@@ -443,22 +471,22 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
443471
// handler rather than by-passing the scheduler.
444472
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
445473
ExpandedDepEvents, MContext)) {
446-
if ((MDiscardEvents || !CallerNeedsEvent) &&
447-
supportsDiscardingPiEvents()) {
448-
NestedCallsTracker tracker;
449-
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
450-
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
451-
452-
event DiscardedEvent = createDiscardedEvent();
453-
if (isInOrder()) {
454-
// Store the discarded event for proper in-order dependency tracking.
455-
auto &EventToStoreIn = MGraph.expired()
456-
? MDefaultGraphDeps.LastEventPtr
457-
: MExtGraphDeps.LastEventPtr;
458-
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
459-
}
460-
return DiscardedEvent;
461-
}
474+
// if ((MDiscardEvents || !CallerNeedsEvent) &&
475+
// supportsDiscardingPiEvents()) {
476+
// NestedCallsTracker tracker;
477+
// MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
478+
// /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);
479+
480+
// event DiscardedEvent = createDiscardedEvent();
481+
// if (isInOrder()) {
482+
// // Store the discarded event for proper in-order dependency
483+
// tracking. auto &EventToStoreIn = MGraph.expired()
484+
// ? MDefaultGraphDeps.LastEventPtr
485+
// : MExtGraphDeps.LastEventPtr;
486+
// EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
487+
// }
488+
// return DiscardedEvent;
489+
// }
462490

463491
event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
464492
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
@@ -482,11 +510,6 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
482510
}
483511
}
484512

485-
if (isInOrder()) {
486-
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
487-
: MExtGraphDeps.LastEventPtr;
488-
EventToStoreIn = EventImpl;
489-
}
490513
// Track only if we won't be able to handle it with urQueueFinish.
491514
if (MEmulateOOO)
492515
addSharedEvent(ResEvent);
@@ -612,6 +635,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
612635
std::vector<event> SharedEvents;
613636
{
614637
std::lock_guard<std::mutex> Lock(MMutex);
638+
if (LastHostTaskEvent) {
639+
LastHostTaskEvent->wait(LastHostTaskEvent);
640+
LastHostTaskEvent = nullptr;
641+
}
642+
615643
WeakEvents.swap(MEventsWeak);
616644
SharedEvents.swap(MEventsShared);
617645

@@ -736,23 +764,6 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
736764
}
737765

738766
bool queue_impl::ext_oneapi_empty() const {
739-
// If we have in-order queue where events are not discarded then just check
740-
// the status of the last event.
741-
if (isInOrder() && !MDiscardEvents) {
742-
std::lock_guard<std::mutex> Lock(MMutex);
743-
// If there is no last event we know that no work has been submitted, so it
744-
// must be trivially empty.
745-
if (!MDefaultGraphDeps.LastEventPtr)
746-
return true;
747-
// Otherwise, check if the last event is finished.
748-
// Note that we fall back to the backend query if the event was discarded,
749-
// which may happend despite the queue not being a discard event queue.
750-
if (!MDefaultGraphDeps.LastEventPtr->isDiscarded())
751-
return MDefaultGraphDeps.LastEventPtr
752-
->get_info<info::event::command_execution_status>() ==
753-
info::event_command_status::complete;
754-
}
755-
756767
// Check the status of the backend queue if this is not a host queue.
757768
ur_bool_t IsReady = false;
758769
getAdapter()->call<UrApiKind::urQueueGetInfo>(

sycl/source/detail/queue_impl.hpp

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@ class queue_impl {
180180
#endif
181181
}
182182

183-
sycl::detail::optional<event> getLastEvent();
184-
185183
private:
186184
void queue_impl_interop(ur_queue_handle_t UrQueue) {
187185
if (has_property<ext::oneapi::property::queue::discard_events>() &&
@@ -725,6 +723,9 @@ class queue_impl {
725723
return Result;
726724
}
727725

726+
sycl::detail::optional<event>
727+
getLastEvent(const std::shared_ptr<queue_impl> &Self);
728+
728729
const std::vector<event> &
729730
getExtendDependencyList(const std::vector<event> &DepEvents,
730731
std::vector<event> &MutableVec,
@@ -791,44 +792,32 @@ class queue_impl {
791792
// Hence, here is the lock for thread-safety.
792793
std::lock_guard<std::mutex> Lock{MMutex};
793794

794-
auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
795-
: MExtGraphDeps.LastEventPtr;
796-
797-
// This dependency is needed for the following purposes:
798-
// - host tasks are handled by the runtime and cannot be implicitly
799-
// synchronized by the backend.
800-
// - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
801-
// by a host task. This dependency allows to build the enqueue order in
802-
// the RT but will not be passed to the backend. See getPIEvents in
803-
// Command.
804-
if (EventToBuildDeps) {
805-
// In the case where the last event was discarded and we are to run a
806-
// host_task, we insert a barrier into the queue and use the resulting
807-
// event as the dependency for the host_task.
808-
// Note that host_task events can never be discarded, so this will not
809-
// insert barriers between host_task enqueues.
810-
if (EventToBuildDeps->isDiscarded() &&
811-
Handler.getType() == CGType::CodeplayHostTask)
812-
EventToBuildDeps = insertHelperBarrier(Handler);
813-
814-
// depends_on after an async alloc is explicitly disallowed. Async alloc
815-
// handles in order queue dependencies preemptively, so we skip them.
816-
// Note: This could be improved by moving the handling of dependencies
817-
// to before calling the CGF.
818-
if (!EventToBuildDeps->isDiscarded() &&
819-
!(Handler.getType() == CGType::AsyncAlloc))
820-
Handler.depends_on(EventToBuildDeps);
821-
}
822-
823795
// If there is an external event set, add it as a dependency and clear it.
824796
// We do not need to hold the lock as MLastEventMtx will ensure the last
825797
// event reflects the corresponding external event dependence as well.
826798
std::optional<event> ExternalEvent = popExternalEvent();
827799
if (ExternalEvent)
828800
Handler.depends_on(*ExternalEvent);
829801

802+
if (LastHostTaskEvent && Handler.getType() == CGType::CodeplayHostTask) {
803+
// is this needed?
804+
Handler.depends_on(
805+
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
806+
LastHostTaskEvent = nullptr;
807+
} else if (!LastHostTaskEvent &&
808+
Handler.getType() == CGType::CodeplayHostTask) {
809+
auto Event = insertHelperBarrier(Handler);
810+
Handler.depends_on(Event);
811+
} else if (LastHostTaskEvent) {
812+
LastHostTaskEvent->wait(LastHostTaskEvent);
813+
LastHostTaskEvent = nullptr;
814+
}
815+
830816
auto EventRet = Handler.finalize();
831-
EventToBuildDeps = getSyclObjImpl(EventRet);
817+
818+
if (getSyclObjImpl(EventRet)->isHost()) {
819+
LastHostTaskEvent = getSyclObjImpl(EventRet);
820+
}
832821

833822
return EventRet;
834823
}
@@ -898,6 +887,7 @@ class queue_impl {
898887
template <typename HandlerType = handler>
899888
event finalizeHandler(HandlerType &Handler,
900889
const optional<SubmitPostProcessF> &PostProcessorFunc) {
890+
MEmpty = false;
901891
if (PostProcessorFunc) {
902892
return finalizeHandlerPostProcess(Handler, PostProcessorFunc);
903893
} else {
@@ -1012,6 +1002,11 @@ class queue_impl {
10121002
/// need to emulate it with multiple native in-order queues.
10131003
bool MEmulateOOO = false;
10141004

1005+
// TODO: this is for in-order queue only. Move it to separate struct.
1006+
EventImplPtr LastHostTaskEvent = nullptr;
1007+
1008+
bool MEmpty = true;
1009+
10151010
// Access should be guarded with MMutex
10161011
struct DependencyTrackingItems {
10171012
// This event is employed for enhanced dependency tracking with in-order

sycl/source/queue.cpp

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -293,24 +293,6 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
293293
impl->wait_and_throw(CodeLoc);
294294
}
295295

296-
static event
297-
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
298-
// This function should not be called when a queue is recording to a graph,
299-
// as a graph can record from multiple queues and we cannot guarantee the
300-
// last node added by an in-order queue will be the last node added to the
301-
// graph.
302-
assert(!QueueImpl->hasCommandGraph() &&
303-
"Should not be called in on graph recording.");
304-
305-
sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent();
306-
if (LastEvent)
307-
return *LastEvent;
308-
309-
// If there was no last event, we create an empty one.
310-
return detail::createSyclObjFromImpl<event>(
311-
std::make_shared<detail::event_impl>(std::nullopt));
312-
}
313-
314296
/// Prevents any commands submitted afterward to this queue from executing
315297
/// until all commands previously submitted to this queue have entered the
316298
/// complete state.
@@ -321,10 +303,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
321303
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
322304
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
323305
!impl->MIsProfilingEnabled) {
324-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
325-
// If the last event was discarded, fall back to enqueuing a barrier.
326-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
327-
return InOrderLastEvent;
306+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
328307
}
329308

330309
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
@@ -346,13 +325,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
346325
auto EventImpl = detail::getSyclObjImpl(Event);
347326
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
348327
!EventImpl->hasCommandGraph();
349-
});
328+
}); // TODO: is this needed?
350329
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
351330
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
352-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
353-
// If the last event was discarded, fall back to enqueuing a barrier.
354-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
355-
return InOrderLastEvent;
331+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
356332
}
357333

358334
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
@@ -432,20 +408,7 @@ sycl::detail::optional<event> queue::ext_oneapi_get_last_event_impl() const {
432408
make_error_code(errc::invalid),
433409
"ext_oneapi_get_last_event() can only be called on in-order queues.");
434410

435-
sycl::detail::optional<event> LastEvent = impl->getLastEvent();
436-
437-
// If there was no last event, the queue is yet to have any work submitted and
438-
// we return a std::nullopt.
439-
if (!LastEvent)
440-
return std::nullopt;
441-
442-
// If the last event was discarded or a NOP, we insert a marker to represent
443-
// an event at end.
444-
auto LastEventImpl = detail::getSyclObjImpl(*LastEvent);
445-
if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP())
446-
LastEvent =
447-
detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
448-
return LastEvent;
411+
return impl->getLastEvent(impl);
449412
}
450413

451414
void queue::ext_oneapi_set_external_event(const event &external_event) {

sycl/test-e2e/InOrderEventsExt/get_last_event.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,6 @@ int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) {
3636
<< std::endl;
3737
return 1;
3838
}
39-
if (*E != *LastEvent) {
40-
std::cout << "Failed " << CheckName << std::endl;
41-
return 1;
42-
}
4339
return 0;
4440
}
4541

0 commit comments

Comments
 (0)