Skip to content

[SYCL] do not store last event as a dendency for in-order queues #18018

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 69 additions & 59 deletions sycl/source/detail/queue_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,18 +136,19 @@ queue_impl::getExtendDependencyList(const std::vector<event> &DepEvents,
return DepEvents;

QueueLock.lock();
EventImplPtr ExtraEvent = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;
std::optional<event> ExternalEvent = popExternalEvent();

if (!ExternalEvent && !ExtraEvent)
if (!ExternalEvent && !LastHostTaskEvent)
return DepEvents;

MutableVec = DepEvents;
if (ExternalEvent)
MutableVec.push_back(*ExternalEvent);
if (ExtraEvent)
MutableVec.push_back(detail::createSyclObjFromImpl<event>(ExtraEvent));
if (LastHostTaskEvent) {
MutableVec.push_back(
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
LastHostTaskEvent = nullptr;
}
return MutableVec;
}

Expand Down Expand Up @@ -282,20 +283,21 @@ event queue_impl::memcpyFromDeviceGlobal(
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
}

sycl::detail::optional<event> queue_impl::getLastEvent() {
// The external event is required to finish last if set, so it is considered
// the last event if present.
sycl::detail::optional<event>
queue_impl::getLastEvent(const std::shared_ptr<queue_impl> &Self) {
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read())
return ExternalEvent;

std::lock_guard<std::mutex> Lock{MMutex};
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
if (MEmpty) {
return std::nullopt;
if (MDiscardEvents)
return createDiscardedEvent();
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
}

if (LastHostTaskEvent) {
return detail::createSyclObjFromImpl<event>(LastHostTaskEvent);
}

// We insert a marker to represent an event at end.
return detail::createSyclObjFromImpl<event>(insertMarkerEvent(Self));
}

void queue_impl::addEvent(const event &Event) {
Expand Down Expand Up @@ -375,11 +377,11 @@ event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,
};
detail::type_erased_cgfo_ty CGF{L};

if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
submit_without_event(CGF, Self, SI,
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
return createDiscardedEvent();
}
// if (!CallerNeedsEvent && supportsDiscardingPiEvents()) {
// submit_without_event(CGF, Self, SI,
// /*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
// return createDiscardedEvent();
// }
return submit_with_event(CGF, Self, SI,
/*CodeLoc*/ {}, /*IsTopCodeLoc*/ true);
}
Expand All @@ -396,6 +398,32 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
{
std::unique_lock<std::mutex> Lock(MMutex, std::defer_lock);

if (isInOrder()) {
Lock.lock();
std::optional<event> ExternalEvent = popExternalEvent();

if (LastHostTaskEvent) {
// TODO: this should be in finalize?
LastHostTaskEvent->wait(LastHostTaskEvent);
LastHostTaskEvent = nullptr;
}

std::vector<event> WaitEvents;
if (ExternalEvent)
WaitEvents.emplace_back(std::move(*ExternalEvent));

event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
{
NestedCallsTracker tracker;
ur_event_handle_t UREvent = nullptr;
MemOpFunc(MemOpArgs..., getUrEvents(WaitEvents), &UREvent, EventImpl);
EventImpl->setHandle(UREvent);
EventImpl->setEnqueued();
}
return discard_or_return(ResEvent);
}

std::vector<event> MutableDepEvents;
const std::vector<event> &ExpandedDepEvents =
getExtendDependencyList(DepEvents, MutableDepEvents, Lock);
Expand All @@ -404,22 +432,22 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
// handler rather than by-passing the scheduler.
if (MGraph.expired() && Scheduler::areEventsSafeForSchedulerBypass(
ExpandedDepEvents, MContext)) {
if ((MDiscardEvents || !CallerNeedsEvent) &&
supportsDiscardingPiEvents()) {
NestedCallsTracker tracker;
MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
/*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);

event DiscardedEvent = createDiscardedEvent();
if (isInOrder()) {
// Store the discarded event for proper in-order dependency tracking.
auto &EventToStoreIn = MGraph.expired()
? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;
EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
}
return DiscardedEvent;
}
// if ((MDiscardEvents || !CallerNeedsEvent) &&
// supportsDiscardingPiEvents()) {
// NestedCallsTracker tracker;
// MemOpFunc(MemOpArgs..., getUrEvents(ExpandedDepEvents),
// /*PiEvent*/ nullptr, /*EventImplPtr*/ nullptr);

// event DiscardedEvent = createDiscardedEvent();
// if (isInOrder()) {
// // Store the discarded event for proper in-order dependency
// tracking. auto &EventToStoreIn = MGraph.expired()
// ? MDefaultGraphDeps.LastEventPtr
// : MExtGraphDeps.LastEventPtr;
// EventToStoreIn = detail::getSyclObjImpl(DiscardedEvent);
// }
// return DiscardedEvent;
// }

event ResEvent = prepareSYCLEventAssociatedWithQueue(Self);
const auto &EventImpl = detail::getSyclObjImpl(ResEvent);
Expand All @@ -443,12 +471,6 @@ event queue_impl::submitMemOpHelper(const std::shared_ptr<queue_impl> &Self,
}
}

if (isInOrder()) {
auto &EventToStoreIn = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;
EventToStoreIn = EventImpl;
}

return discard_or_return(ResEvent);
}
}
Expand Down Expand Up @@ -569,6 +591,11 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
std::vector<std::weak_ptr<event_impl>> WeakEvents;
{
std::lock_guard<std::mutex> Lock(MMutex);
if (LastHostTaskEvent) {
LastHostTaskEvent->wait(LastHostTaskEvent);
LastHostTaskEvent = nullptr;
}

WeakEvents.swap(MEventsWeak);

MMissedCleanupRequests.unset(
Expand Down Expand Up @@ -684,23 +711,6 @@ ur_native_handle_t queue_impl::getNative(int32_t &NativeHandleDesc) const {
}

bool queue_impl::ext_oneapi_empty() const {
// If we have in-order queue where events are not discarded then just check
// the status of the last event.
if (isInOrder() && !MDiscardEvents) {
std::lock_guard<std::mutex> Lock(MMutex);
// If there is no last event we know that no work has been submitted, so it
// must be trivially empty.
if (!MDefaultGraphDeps.LastEventPtr)
return true;
// Otherwise, check if the last event is finished.
// Note that we fall back to the backend query if the event was discarded,
// which may happend despite the queue not being a discard event queue.
if (!MDefaultGraphDeps.LastEventPtr->isDiscarded())
return MDefaultGraphDeps.LastEventPtr
->get_info<info::event::command_execution_status>() ==
info::event_command_status::complete;
}

// Check the status of the backend queue if this is not a host queue.
ur_bool_t IsReady = false;
getAdapter()->call<UrApiKind::urQueueGetInfo>(
Expand Down
59 changes: 27 additions & 32 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,6 @@ class queue_impl {
#endif
}

sycl::detail::optional<event> getLastEvent();

private:
void queue_impl_interop(ur_queue_handle_t UrQueue) {
if (has_property<ext::oneapi::property::queue::discard_events>() &&
Expand Down Expand Up @@ -676,6 +674,9 @@ class queue_impl {
return Result;
}

sycl::detail::optional<event>
getLastEvent(const std::shared_ptr<queue_impl> &Self);

const std::vector<event> &
getExtendDependencyList(const std::vector<event> &DepEvents,
std::vector<event> &MutableVec,
Expand Down Expand Up @@ -742,44 +743,32 @@ class queue_impl {
// Hence, here is the lock for thread-safety.
std::lock_guard<std::mutex> Lock{MMutex};

auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
: MExtGraphDeps.LastEventPtr;

// This dependency is needed for the following purposes:
// - host tasks are handled by the runtime and cannot be implicitly
// synchronized by the backend.
// - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
// by a host task. This dependency allows to build the enqueue order in
// the RT but will not be passed to the backend. See getPIEvents in
// Command.
if (EventToBuildDeps) {
// In the case where the last event was discarded and we are to run a
// host_task, we insert a barrier into the queue and use the resulting
// event as the dependency for the host_task.
// Note that host_task events can never be discarded, so this will not
// insert barriers between host_task enqueues.
if (EventToBuildDeps->isDiscarded() &&
Handler.getType() == CGType::CodeplayHostTask)
EventToBuildDeps = insertHelperBarrier(Handler);

// depends_on after an async alloc is explicitly disallowed. Async alloc
// handles in order queue dependencies preemptively, so we skip them.
// Note: This could be improved by moving the handling of dependencies
// to before calling the CGF.
if (!EventToBuildDeps->isDiscarded() &&
!(Handler.getType() == CGType::AsyncAlloc))
Handler.depends_on(EventToBuildDeps);
}

// If there is an external event set, add it as a dependency and clear it.
// We do not need to hold the lock as MLastEventMtx will ensure the last
// event reflects the corresponding external event dependence as well.
std::optional<event> ExternalEvent = popExternalEvent();
if (ExternalEvent)
Handler.depends_on(*ExternalEvent);

if (LastHostTaskEvent && Handler.getType() == CGType::CodeplayHostTask) {
// is this needed?
Handler.depends_on(
detail::createSyclObjFromImpl<event>(LastHostTaskEvent));
LastHostTaskEvent = nullptr;
} else if (!LastHostTaskEvent &&
Handler.getType() == CGType::CodeplayHostTask) {
auto Event = insertHelperBarrier(Handler);
Handler.depends_on(Event);
} else if (LastHostTaskEvent) {
LastHostTaskEvent->wait(LastHostTaskEvent);
LastHostTaskEvent = nullptr;
}

auto EventRet = Handler.finalize();
EventToBuildDeps = getSyclObjImpl(EventRet);

if (getSyclObjImpl(EventRet)->isHost()) {
LastHostTaskEvent = getSyclObjImpl(EventRet);
}

return EventRet;
}
Expand Down Expand Up @@ -849,6 +838,7 @@ class queue_impl {
template <typename HandlerType = handler>
event finalizeHandler(HandlerType &Handler,
const optional<SubmitPostProcessF> &PostProcessorFunc) {
MEmpty = false;
if (PostProcessorFunc) {
return finalizeHandlerPostProcess(Handler, PostProcessorFunc);
} else {
Expand Down Expand Up @@ -956,6 +946,11 @@ class queue_impl {
/// List of queues created for FPGA device from a single SYCL queue.
ur_queue_handle_t MQueue;

// TODO: this is for in-order queue only. Move it to separate struct.
EventImplPtr LastHostTaskEvent = nullptr;

bool MEmpty = true;

// Access should be guarded with MMutex
struct DependencyTrackingItems {
// This event is employed for enhanced dependency tracking with in-order
Expand Down
45 changes: 4 additions & 41 deletions sycl/source/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,24 +293,6 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
impl->wait_and_throw(CodeLoc);
}

static event
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
// This function should not be called when a queue is recording to a graph,
// as a graph can record from multiple queues and we cannot guarantee the
// last node added by an in-order queue will be the last node added to the
// graph.
assert(!QueueImpl->hasCommandGraph() &&
"Should not be called in on graph recording.");

sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent();
if (LastEvent)
return *LastEvent;

// If there was no last event, we create an empty one.
return detail::createSyclObjFromImpl<event>(
std::make_shared<detail::event_impl>(std::nullopt));
}

/// Prevents any commands submitted afterward to this queue from executing
/// until all commands previously submitted to this queue have entered the
/// complete state.
Expand All @@ -321,10 +303,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
!impl->MIsProfilingEnabled) {
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
// If the last event was discarded, fall back to enqueuing a barrier.
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
return InOrderLastEvent;
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
}

return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
Expand All @@ -346,13 +325,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
auto EventImpl = detail::getSyclObjImpl(Event);
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
!EventImpl->hasCommandGraph();
});
}); // TODO: is this needed?
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
// If the last event was discarded, fall back to enqueuing a barrier.
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
return InOrderLastEvent;
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
}

return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
Expand Down Expand Up @@ -432,20 +408,7 @@ sycl::detail::optional<event> queue::ext_oneapi_get_last_event_impl() const {
make_error_code(errc::invalid),
"ext_oneapi_get_last_event() can only be called on in-order queues.");

sycl::detail::optional<event> LastEvent = impl->getLastEvent();

// If there was no last event, the queue is yet to have any work submitted and
// we return a std::nullopt.
if (!LastEvent)
return std::nullopt;

// If the last event was discarded or a NOP, we insert a marker to represent
// an event at end.
auto LastEventImpl = detail::getSyclObjImpl(*LastEvent);
if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP())
LastEvent =
detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
return LastEvent;
return impl->getLastEvent(impl);
}

void queue::ext_oneapi_set_external_event(const event &external_event) {
Expand Down
4 changes: 0 additions & 4 deletions sycl/test-e2e/InOrderEventsExt/get_last_event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,6 @@ int Check(const sycl::queue &Q, const char *CheckName, const F &CheckFunc) {
<< std::endl;
return 1;
}
if (*E != *LastEvent) {
std::cout << "Failed " << CheckName << std::endl;
return 1;
}
return 0;
}

Expand Down
Loading
Loading