Skip to content

Commit d6340b6

Browse files
EwanCori-sky
andauthored
[SYCL][Graph] in-order queue barrier fix (#13193)
Fix for #13066 The special case for using barriers on an in-order queue is that the last event/node submitted to the queue is used as an event for the barrier to depend on. Looking at the last command submitted to the queue isn't correct for a graph, because previous commands submitted to a graph could have been added explicitly or from recording another queue. Therefore, there is not guaranteed that the last command submitted by the in-order queue is correct dependency for the barrier node in the graph. --------- Co-authored-by: Ori Sky <[email protected]>
1 parent 3288a66 commit d6340b6

File tree

5 files changed

+364
-20
lines changed

5 files changed

+364
-20
lines changed

sycl/doc/extensions/experimental/sycl_ext_oneapi_graph.asciidoc

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1703,11 +1703,15 @@ passed an invalid event.
17031703
The new handler methods, and queue shortcuts, defined by
17041704
link:../supported/sycl_ext_oneapi_enqueue_barrier.asciidoc[sycl_ext_oneapi_enqueue_barrier]
17051705
can only be used in graph nodes created using the Record & Replay API, as
1706-
barriers rely on events to enforce dependencies. A synchronous exception will be
1707-
thrown with error code `invalid` if a user tries to add them to a graph using
1708-
the Explicit API. Empty nodes created with the `node::depends_on_all_leaves`
1709-
property can be used instead of barriers when a user is building a graph with
1710-
the explicit API.
1706+
barriers rely on events to enforce dependencies. For barriers with an empty
1707+
wait list parameter, the semantics are that the barrier node being added to
1708+
will depend on all the existing graph leaf nodes, not only the leaf nodes
1709+
that were added from the queue being recorded.
1710+
1711+
A synchronous exception will be thrown with error code `invalid` if a user
1712+
tries to add them to a graph using the Explicit API. Empty nodes created with
1713+
the `node::depends_on_all_leaves` property can be used instead of barriers when
1714+
a user is building a graph with the explicit API.
17111715

17121716
==== sycl_ext_oneapi_memcpy2d
17131717

sycl/source/queue.cpp

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -207,14 +207,13 @@ void queue::wait_and_throw_proxy(const detail::code_location &CodeLoc) {
207207

208208
static event
209209
getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
210-
// The last command recorded in the graph is not tracked by the queue but by
211-
// the graph itself. We must therefore search for the last node/event in the
210+
// This function should not be called when a queue is recording to a graph,
211+
// as a graph can record from multiple queues and we cannot guarantee the
212+
// last node added by an in-order queue will be the last node added to the
212213
// graph.
213-
if (auto Graph = QueueImpl->getCommandGraph()) {
214-
auto LastEvent =
215-
Graph->getEventForNode(Graph->getLastInorderNode(QueueImpl));
216-
return sycl::detail::createSyclObjFromImpl<event>(LastEvent);
217-
}
214+
assert(!QueueImpl->getCommandGraph() &&
215+
"Should not be called in on graph recording.");
216+
218217
auto LastEvent = QueueImpl->getLastEvent();
219218
if (QueueImpl->MDiscardEvents) {
220219
std::cout << "Discard event enabled" << std::endl;
@@ -241,7 +240,7 @@ getBarrierEventForInorderQueueHelper(const detail::QueueImplPtr QueueImpl) {
241240
/// \return a SYCL event object, which corresponds to the queue the command
242241
/// group is being enqueued on.
243242
event queue::ext_oneapi_submit_barrier(const detail::code_location &CodeLoc) {
244-
if (is_in_order())
243+
if (is_in_order() && !impl->getCommandGraph())
245244
return getBarrierEventForInorderQueueHelper(impl);
246245

247246
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(); }, CodeLoc);
@@ -260,10 +259,10 @@ event queue::ext_oneapi_submit_barrier(const std::vector<event> &WaitList,
260259
const detail::code_location &CodeLoc) {
261260
bool AllEventsEmptyOrNop = std::all_of(
262261
begin(WaitList), end(WaitList), [&](const event &Event) -> bool {
263-
return !detail::getSyclObjImpl(Event)->isContextInitialized() ||
264-
detail::getSyclObjImpl(Event)->isNOP();
262+
auto EventImpl = detail::getSyclObjImpl(Event);
263+
return !EventImpl->isContextInitialized() || EventImpl->isNOP();
265264
});
266-
if (is_in_order() && AllEventsEmptyOrNop)
265+
if (is_in_order() && !impl->getCommandGraph() && AllEventsEmptyOrNop)
267266
return getBarrierEventForInorderQueueHelper(impl);
268267

269268
return submit([=](handler &CGH) { CGH.ext_oneapi_barrier(WaitList); },
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
9+
#include "../graph_common.hpp"
10+
11+
int main() {
12+
queue Queue1{{sycl::property::queue::in_order()}};
13+
queue Queue2{Queue1.get_context(),
14+
Queue1.get_device(),
15+
{sycl::property::queue::in_order()}};
16+
17+
int *PtrA = malloc_device<int>(Size, Queue1);
18+
int *PtrB = malloc_device<int>(Size, Queue1);
19+
20+
exp_ext::command_graph Graph{Queue1};
21+
Graph.begin_recording({Queue1, Queue2});
22+
23+
auto EventA = Queue1.submit([&](handler &CGH) {
24+
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; });
25+
});
26+
27+
Queue2.ext_oneapi_submit_barrier({EventA});
28+
29+
auto EventB = Queue2.copy(PtrA, PtrB, Size);
30+
Graph.end_recording();
31+
32+
auto ExecGraph = Graph.finalize();
33+
Queue1.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraph); });
34+
35+
std::array<int, Size> Output;
36+
Queue1.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();
37+
38+
for (int i = 0; i < Size; i++) {
39+
assert(Output[i] == i);
40+
}
41+
42+
free(PtrA, Queue1);
43+
free(PtrB, Queue1);
44+
return 0;
45+
}

sycl/test-e2e/InorderQueue/in_order_ext_oneapi_submit_barrier.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,7 @@ int main() {
7070

7171
{
7272
// Test cast 4 - graph.
73-
sycl::queue GQueue{
74-
{sycl::property::queue::in_order{},
75-
sycl::ext::intel::property::queue::no_immediate_command_list{}}};
73+
sycl::queue GQueue{sycl::property::queue::in_order{}};
7674

7775
if (GQueue.get_device().has(sycl::aspect::ext_oneapi_graph)) {
7876
std::cout << "Test 4" << std::endl;
@@ -84,7 +82,6 @@ int main() {
8482
cgh.single_task<class kernel3>([=]() { *Res += 9; });
8583
});
8684
auto Barrier = GQueue.ext_oneapi_submit_barrier();
87-
assert(Barrier == BeforeBarrierEvent);
8885
GQueue.submit([&](sycl::handler &cgh) {
8986
cgh.single_task<class kernel4>([=]() { *Res *= 2; });
9087
});

0 commit comments

Comments
 (0)