Skip to content

Commit 82f77d1

Browse files
authored
[SYCL][Graph] Fix queue recording barrier to different graphs (intel#14212)
Recording barrier submissions to from the same queue to a different graph current produces the following error with added regression test: ``` Terminate called after throwing an instance of 'sycl::_V1::exception' what(): Graph nodes cannot depend on events from another graph. ``` This is because the queue implementation doesn't clear all the state around what the last queue submission was between graph recordings. Fixed by clearing all members of the barrier book keeping struct in the queue.
1 parent 8e3b8ce commit 82f77d1

File tree

3 files changed

+92
-1
lines changed

3 files changed

+92
-1
lines changed

sycl/source/detail/queue_impl.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ class queue_impl {
732732
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> Graph) {
733733
std::lock_guard<std::mutex> Lock(MMutex);
734734
MGraph = Graph;
735-
MExtGraphDeps.LastEventPtr = nullptr;
735+
MExtGraphDeps.reset();
736736
}
737737

738738
std::shared_ptr<ext::oneapi::experimental::detail::graph_impl>
@@ -938,6 +938,12 @@ class queue_impl {
938938
// ordering
939939
std::vector<EventImplPtr> UnenqueuedCmdEvents;
940940
EventImplPtr LastBarrier;
941+
942+
void reset() {
943+
LastEventPtr = nullptr;
944+
UnenqueuedCmdEvents.clear();
945+
LastBarrier = nullptr;
946+
}
941947
} MDefaultGraphDeps, MExtGraphDeps;
942948

943949
const bool MIsInorder;
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
//
8+
9+
#include "../graph_common.hpp"
10+
11+
int main() {
12+
queue Queue{};
13+
14+
int *PtrA = malloc_device<int>(Size, Queue);
15+
int *PtrB = malloc_device<int>(Size, Queue);
16+
17+
exp_ext::command_graph GraphA{Queue};
18+
exp_ext::command_graph GraphB{Queue};
19+
20+
GraphA.begin_recording(Queue);
21+
auto EventA = Queue.submit([&](handler &CGH) {
22+
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; });
23+
});
24+
Queue.ext_oneapi_submit_barrier({EventA});
25+
Queue.copy(PtrA, PtrB, Size);
26+
GraphA.end_recording();
27+
28+
GraphB.begin_recording(Queue);
29+
auto EventB = Queue.submit([&](handler &CGH) {
30+
CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it * 2; });
31+
});
32+
Queue.ext_oneapi_submit_barrier();
33+
Queue.copy(PtrA, PtrB, Size);
34+
GraphB.end_recording();
35+
36+
auto ExecGraphA = GraphA.finalize();
37+
auto ExecGraphB = GraphB.finalize();
38+
39+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphA); }).wait();
40+
41+
std::array<int, Size> Output;
42+
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();
43+
44+
for (int i = 0; i < Size; i++) {
45+
assert(Output[i] == i);
46+
}
47+
48+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphB); }).wait();
49+
Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait();
50+
51+
for (int i = 0; i < Size; i++) {
52+
assert(Output[i] == 2 * i);
53+
}
54+
55+
free(PtrA, Queue);
56+
free(PtrB, Queue);
57+
return 0;
58+
}

sycl/unittests/Extensions/CommandGraph/Regressions.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,30 @@ TEST_F(CommandGraphTest, AccessorModeRegression) {
5858
EXPECT_EQ(NodeC.get_predecessors().size(), 0ul);
5959
EXPECT_EQ(NodeC.get_successors().size(), 0ul);
6060
}
61+
62+
TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) {
63+
// Test that using barriers recorded from the same queue to
64+
// different graphs.
65+
66+
Graph.begin_recording(Queue);
67+
auto NodeKernel = Queue.submit(
68+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
69+
Queue.ext_oneapi_submit_barrier({NodeKernel});
70+
Graph.end_recording(Queue);
71+
72+
experimental::command_graph<experimental::graph_state::modifiable> GraphB{
73+
Queue};
74+
GraphB.begin_recording(Queue);
75+
auto NodeKernelB = Queue.submit(
76+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
77+
Queue.ext_oneapi_submit_barrier({NodeKernelB});
78+
GraphB.end_recording(Queue);
79+
80+
experimental::command_graph<experimental::graph_state::modifiable> GraphC{
81+
Queue};
82+
GraphC.begin_recording(Queue);
83+
auto NodeKernelC = Queue.submit(
84+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
85+
Queue.ext_oneapi_submit_barrier();
86+
GraphC.end_recording(Queue);
87+
}

0 commit comments

Comments
 (0)