From 9c8002dc9ad242c9665a3a58e84b9069aa718c6e Mon Sep 17 00:00:00 2001 From: Ewan Crawford Date: Tue, 18 Jun 2024 14:14:52 +0100 Subject: [PATCH] [SYCL][Graph] Fix queue recording barrier to different graphs Recording barrier submissions to from the same queue to a different graph current produces the following error with added regression tests: ``` Terminate called after throwing an instance of 'sycl::_V1::exception' what(): Graph nodes cannot depend on events from another graph. ``` This is because the queue implementation doesn't clear all the state around what the last queue submission was between graph recordings. Fixed by clearing all members of the barrier book keeping struct in the queue. --- sycl/source/detail/queue_impl.hpp | 8 ++- .../RecordReplay/barrier_multi_graph.cpp | 58 +++++++++++++++++++ .../Extensions/CommandGraph/Regressions.cpp | 27 +++++++++ 3 files changed, 92 insertions(+), 1 deletion(-) create mode 100644 sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 82334e6467dfd..52d01dc2923a6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -732,7 +732,7 @@ class queue_impl { std::shared_ptr Graph) { std::lock_guard Lock(MMutex); MGraph = Graph; - MExtGraphDeps.LastEventPtr = nullptr; + MExtGraphDeps.reset(); } std::shared_ptr @@ -938,6 +938,12 @@ class queue_impl { // ordering std::vector UnenqueuedCmdEvents; EventImplPtr LastBarrier; + + void reset() { + LastEventPtr = nullptr; + UnenqueuedCmdEvents.clear(); + LastBarrier = nullptr; + } } MDefaultGraphDeps, MExtGraphDeps; const bool MIsInorder; diff --git a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp new file mode 100644 index 0000000000000..deee2abc9f390 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp @@ -0,0 +1,58 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + exp_ext::command_graph GraphA{Queue}; + exp_ext::command_graph GraphB{Queue}; + + GraphA.begin_recording(Queue); + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; }); + }); + Queue.ext_oneapi_submit_barrier({EventA}); + Queue.copy(PtrA, PtrB, Size); + GraphA.end_recording(); + + GraphB.begin_recording(Queue); + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it * 2; }); + }); + Queue.ext_oneapi_submit_barrier(); + Queue.copy(PtrA, PtrB, Size); + GraphB.end_recording(); + + auto ExecGraphA = GraphA.finalize(); + auto ExecGraphB = GraphB.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphA); }).wait(); + + std::array Output; + Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait(); + + for (int i = 0; i < Size; i++) { + assert(Output[i] == i); + } + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphB); }).wait(); + Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait(); + + for (int i = 0; i < Size; i++) { + assert(Output[i] == 2 * i); + } + + free(PtrA, Queue); + free(PtrB, Queue); + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp index 17b58f542d760..94b8549ed7c04 100644 --- a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -58,3 +58,30 @@ TEST_F(CommandGraphTest, AccessorModeRegression) { EXPECT_EQ(NodeC.get_predecessors().size(), 0ul); EXPECT_EQ(NodeC.get_successors().size(), 0ul); } + +TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) { + // Test that using barriers recorded from the same queue to + // different graphs. + + Graph.begin_recording(Queue); + auto NodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernel}); + Graph.end_recording(Queue); + + experimental::command_graph GraphB{ + Queue}; + GraphB.begin_recording(Queue); + auto NodeKernelB = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernelB}); + GraphB.end_recording(Queue); + + experimental::command_graph GraphC{ + Queue}; + GraphC.begin_recording(Queue); + auto NodeKernelC = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier(); + GraphC.end_recording(Queue); +}