Skip to content

Commit f38c72e

Browse files
committed
[SYCL] do not store last event as a dendency for in-order queues
For in-order queues, the ordering is guaranteed by UR. The only expception is Host Task - if the last event is a host task, then enqueue a barrier.
1 parent 07d8bab commit f38c72e

File tree

4 files changed

+26
-85
lines changed

4 files changed

+26
-85
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -468,11 +468,11 @@ class __SYCL_EXPORT handler {
468468
detail::CGType getType() const;
469469

470470
void throwIfActionIsCreated() {
471-
if (detail::CGType::None != getType())
472-
throw sycl::exception(make_error_code(errc::runtime),
473-
"Attempt to set multiple actions for the "
474-
"command group. Command group must consist of "
475-
"a single kernel or explicit memory operation.");
471+
//if (detail::CGType::None != getType())
472+
// throw sycl::exception(make_error_code(errc::runtime),
473+
// "Attempt to set multiple actions for the "
474+
// "command group. Command group must consist of "
475+
// "a single kernel or explicit memory operation.");
476476
}
477477

478478
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES

sycl/source/detail/queue_impl.cpp

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -283,22 +283,6 @@ event queue_impl::memcpyFromDeviceGlobal(
283283
DeviceGlobalPtr, IsDeviceImageScope, Self, NumBytes, Offset, Dest);
284284
}
285285

286-
sycl::detail::optional<event> queue_impl::getLastEvent() {
287-
// The external event is required to finish last if set, so it is considered
288-
// the last event if present.
289-
if (std::optional<event> ExternalEvent = MInOrderExternalEvent.read())
290-
return ExternalEvent;
291-
292-
std::lock_guard<std::mutex> Lock{MMutex};
293-
if (MGraph.expired() && !MDefaultGraphDeps.LastEventPtr)
294-
return std::nullopt;
295-
if (MDiscardEvents)
296-
return createDiscardedEvent();
297-
if (!MGraph.expired() && MExtGraphDeps.LastEventPtr)
298-
return detail::createSyclObjFromImpl<event>(MExtGraphDeps.LastEventPtr);
299-
return detail::createSyclObjFromImpl<event>(MDefaultGraphDeps.LastEventPtr);
300-
}
301-
302286
void queue_impl::addEvent(const event &Event) {
303287
const EventImplPtr &EImpl = getSyclObjImpl(Event);
304288
assert(EImpl && "Event implementation is missing");

sycl/source/detail/queue_impl.hpp

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,6 @@ class queue_impl {
180180
#endif
181181
}
182182

183-
sycl::detail::optional<event> getLastEvent();
184-
185183
private:
186184
void queue_impl_interop(ur_queue_handle_t UrQueue) {
187185
if (has_property<ext::oneapi::property::queue::discard_events>() &&
@@ -785,39 +783,28 @@ class queue_impl {
785783
return ResEvent;
786784
}
787785

786+
template <typename HandlerType = handler>
787+
void insertHelperBarrierNoEvent(const HandlerType &Handler) {
788+
getAdapter()->call<UrApiKind::urEnqueueEventsWaitWithBarrier>(
789+
Handler.MQueue->getHandleRef(), 0, nullptr, nullptr);
790+
}
791+
788792
template <typename HandlerType = handler>
789793
event finalizeHandlerInOrder(HandlerType &Handler) {
790794
// Accessing and changing of an event isn't atomic operation.
791795
// Hence, here is the lock for thread-safety.
792796
std::lock_guard<std::mutex> Lock{MMutex};
793797

794-
auto &EventToBuildDeps = MGraph.expired() ? MDefaultGraphDeps.LastEventPtr
795-
: MExtGraphDeps.LastEventPtr;
796-
797798
// This dependency is needed for the following purposes:
798799
// - host tasks are handled by the runtime and cannot be implicitly
799800
// synchronized by the backend.
800801
// - to prevent the 2nd kernel enqueue when the 1st kernel is blocked
801802
// by a host task. This dependency allows to build the enqueue order in
802803
// the RT but will not be passed to the backend. See getPIEvents in
803804
// Command.
804-
if (EventToBuildDeps) {
805-
// In the case where the last event was discarded and we are to run a
806-
// host_task, we insert a barrier into the queue and use the resulting
807-
// event as the dependency for the host_task.
808-
// Note that host_task events can never be discarded, so this will not
809-
// insert barriers between host_task enqueues.
810-
if (EventToBuildDeps->isDiscarded() &&
811-
Handler.getType() == CGType::CodeplayHostTask)
812-
EventToBuildDeps = insertHelperBarrier(Handler);
813-
814-
// depends_on after an async alloc is explicitly disallowed. Async alloc
815-
// handles in order queue dependencies preemptively, so we skip them.
816-
// Note: This could be improved by moving the handling of dependencies
817-
// to before calling the CGF.
818-
if (!EventToBuildDeps->isDiscarded() &&
819-
!(Handler.getType() == CGType::AsyncAlloc))
820-
Handler.depends_on(EventToBuildDeps);
805+
if (LastHostTaskEvent) {
806+
insertHelperBarrierNoEvent(Handler);
807+
LastHostTaskEvent = nullptr;
821808
}
822809

823810
// If there is an external event set, add it as a dependency and clear it.
@@ -828,7 +815,10 @@ class queue_impl {
828815
Handler.depends_on(*ExternalEvent);
829816

830817
auto EventRet = Handler.finalize();
831-
EventToBuildDeps = getSyclObjImpl(EventRet);
818+
819+
if (getSyclObjImpl(EventRet)->isHost()) {
820+
LastHostTaskEvent = getSyclObjImpl(EventRet);
821+
}
832822

833823
return EventRet;
834824
}
@@ -1012,6 +1002,9 @@ class queue_impl {
10121002
/// need to emulate it with multiple native in-order queues.
10131003
bool MEmulateOOO = false;
10141004

1005+
// TODO: this is for in-order queue only. Move it to separate struct.
1006+
EventImplPtr LastHostTaskEvent = nullptr;
1007+
10151008
// Access should be guarded with MMutex
10161009
struct DependencyTrackingItems {
10171010
// This event is employed for enhanced dependency tracking with in-order

sycl/source/queue.cpp

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -293,24 +293,6 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
293293
impl->wait_and_throw(CodeLoc);
294294
}
295295

296-
static event
297-
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
298-
// This function should not be called when a queue is recording to a graph,
299-
// as a graph can record from multiple queues and we cannot guarantee the
300-
// last node added by an in-order queue will be the last node added to the
301-
// graph.
302-
assert(!QueueImpl->hasCommandGraph() &&
303-
"Should not be called in on graph recording.");
304-
305-
sycl::detail::optional<event> LastEvent = QueueImpl->getLastEvent();
306-
if (LastEvent)
307-
return *LastEvent;
308-
309-
// If there was no last event, we create an empty one.
310-
return detail::createSyclObjFromImpl<event>(
311-
std::make_shared<detail::event_impl>(std::nullopt));
312-
}
313-
314296
/// Prevents any commands submitted afterward to this queue from executing
315297
/// until all commands previously submitted to this queue have entered the
316298
/// complete state.
@@ -321,10 +303,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
321303
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
322304
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
323305
!impl->MIsProfilingEnabled) {
324-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
325-
// If the last event was discarded, fall back to enqueuing a barrier.
326-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
327-
return InOrderLastEvent;
306+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
328307
}
329308

330309
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
@@ -346,13 +325,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
346325
auto EventImpl = detail::getSyclObjImpl(Event);
347326
return (EventImpl->isDefaultConstructed() || EventImpl->isNOP()) &&
348327
!EventImpl->hasCommandGraph();
349-
});
328+
}); // TODO: is this needed?
350329
if (is_in_order() && !impl->hasCommandGraph() && !impl->MDiscardEvents &&
351330
!impl->MIsProfilingEnabled && AllEventsEmptyOrNop) {
352-
event InOrderLastEvent = getBarrierEventForInorderQueueHelper(impl);
353-
// If the last event was discarded, fall back to enqueuing a barrier.
354-
if (!detail::getSyclObjImpl(InOrderLastEvent)->isDiscarded())
355-
return InOrderLastEvent;
331+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));;
356332
}
357333

358334
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
@@ -432,20 +408,8 @@ sycl::detail::optional<event> queue::ext_oneapi_get_last_event_impl() const {
432408
make_error_code(errc::invalid),
433409
"ext_oneapi_get_last_event() can only be called on in-order queues.");
434410

435-
sycl::detail::optional<event> LastEvent = impl->getLastEvent();
436-
437-
// If there was no last event, the queue is yet to have any work submitted and
438-
// we return a std::nullopt.
439-
if (!LastEvent)
440-
return std::nullopt;
441-
442-
// If the last event was discarded or a NOP, we insert a marker to represent
443-
// an event at end.
444-
auto LastEventImpl = detail::getSyclObjImpl(*LastEvent);
445-
if (LastEventImpl->isDiscarded() || LastEventImpl->isNOP())
446-
LastEvent =
447-
detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
448-
return LastEvent;
411+
// We insert a marker to represent an event at end.
412+
return detail::createSyclObjFromImpl<event>(impl->insertMarkerEvent(impl));
449413
}
450414

451415
void queue::ext_oneapi_set_external_event(const event &external_event) {

0 commit comments

Comments
 (0)