diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 82334e6467dfd..52d01dc2923a6 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -732,7 +732,7 @@ class queue_impl { std::shared_ptr Graph) { std::lock_guard Lock(MMutex); MGraph = Graph; - MExtGraphDeps.LastEventPtr = nullptr; + MExtGraphDeps.reset(); } std::shared_ptr @@ -938,6 +938,12 @@ class queue_impl { // ordering std::vector UnenqueuedCmdEvents; EventImplPtr LastBarrier; + + void reset() { + LastEventPtr = nullptr; + UnenqueuedCmdEvents.clear(); + LastBarrier = nullptr; + } } MDefaultGraphDeps, MExtGraphDeps; const bool MIsInorder; diff --git a/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp new file mode 100644 index 0000000000000..deee2abc9f390 --- /dev/null +++ b/sycl/test-e2e/Graph/RecordReplay/barrier_multi_graph.cpp @@ -0,0 +1,58 @@ +// RUN: %{build} -o %t.out +// RUN: %{run} %t.out +// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// Extra run to check for immediate-command-list in Level Zero +// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} +// + +#include "../graph_common.hpp" + +int main() { + queue Queue{}; + + int *PtrA = malloc_device(Size, Queue); + int *PtrB = malloc_device(Size, Queue); + + exp_ext::command_graph GraphA{Queue}; + exp_ext::command_graph GraphB{Queue}; + + GraphA.begin_recording(Queue); + auto EventA = Queue.submit([&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it; }); + }); + Queue.ext_oneapi_submit_barrier({EventA}); + Queue.copy(PtrA, PtrB, Size); + GraphA.end_recording(); + + GraphB.begin_recording(Queue); + auto EventB = Queue.submit([&](handler &CGH) { + CGH.parallel_for(range<1>{Size}, [=](id<1> it) { PtrA[it] = it * 2; }); + }); + Queue.ext_oneapi_submit_barrier(); + Queue.copy(PtrA, PtrB, Size); + GraphB.end_recording(); + + auto ExecGraphA = GraphA.finalize(); + auto ExecGraphB = GraphB.finalize(); + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphA); }).wait(); + + std::array Output; + Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait(); + + for (int i = 0; i < Size; i++) { + assert(Output[i] == i); + } + + Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(ExecGraphB); }).wait(); + Queue.memcpy(Output.data(), PtrB, sizeof(int) * Size).wait(); + + for (int i = 0; i < Size; i++) { + assert(Output[i] == 2 * i); + } + + free(PtrA, Queue); + free(PtrB, Queue); + return 0; +} diff --git a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp index 17b58f542d760..94b8549ed7c04 100644 --- a/sycl/unittests/Extensions/CommandGraph/Regressions.cpp +++ b/sycl/unittests/Extensions/CommandGraph/Regressions.cpp @@ -58,3 +58,30 @@ TEST_F(CommandGraphTest, AccessorModeRegression) { EXPECT_EQ(NodeC.get_predecessors().size(), 0ul); EXPECT_EQ(NodeC.get_successors().size(), 0ul); } + +TEST_F(CommandGraphTest, QueueRecordBarrierMultipleGraph) { + // Test that using barriers recorded from the same queue to + // different graphs. + + Graph.begin_recording(Queue); + auto NodeKernel = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernel}); + Graph.end_recording(Queue); + + experimental::command_graph GraphB{ + Queue}; + GraphB.begin_recording(Queue); + auto NodeKernelB = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier({NodeKernelB}); + GraphB.end_recording(Queue); + + experimental::command_graph GraphC{ + Queue}; + GraphC.begin_recording(Queue); + auto NodeKernelC = Queue.submit( + [&](sycl::handler &cgh) { cgh.single_task>([]() {}); }); + Queue.ext_oneapi_submit_barrier(); + GraphC.end_recording(Queue); +}