From 38a11439d8f1927bf2525a14ab39f88f3df03ea7 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Wed, 17 Jul 2024 08:21:02 -0700 Subject: [PATCH 1/4] [SYCL] Fix exception duplication for copy back command Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/scheduler.cpp | 25 ++++++++++---- sycl/unittests/scheduler/FailedCommands.cpp | 38 +++++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 258ab3f6d6a54..0e89de9b011b1 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -214,27 +214,40 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { } std::vector ToCleanUp; + // EnqueueCommand will try to enqueue dependencies (previous operations on the + // buffer). If any dep kernel failed it would be reported as sync exception or + // async exception on host task completion and enqueue attempt. + // No need to report those failures again in copy back submission. So report + // only copy back auxiliary and main command failures. + bool CopyBackCmdsFailed = false; try { ReadLockT Lock = acquireReadLock(); EnqueueResultT Res; - bool Enqueued; + bool Enqueued = false; for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) { + CopyBackCmdsFailed |= Res.MCmd == Cmd; throw exception(make_error_code(errc::runtime), "Enqueue process failed."); + } } Enqueued = GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) { + CopyBackCmdsFailed |= Res.MCmd == NewCmd; throw exception(make_error_code(errc::runtime), "Enqueue process failed."); + } } catch (...) { - auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); - assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null"); - WorkerQueue->reportAsyncException(std::current_exception()); + if (CopyBackCmdsFailed) { + auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); + assert(WorkerQueue && + "WorkerQueue for CopyBack command must be not null"); + WorkerQueue->reportAsyncException(std::current_exception()); + } } EventImplPtr NewEvent = NewCmd->getEvent(); cleanupCommands(ToCleanUp); diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 6e3014ce79179..24d01e7859ede 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -10,6 +10,7 @@ #include "SchedulerTestUtils.hpp" #include +#include using namespace sycl; @@ -42,3 +43,40 @@ TEST_F(SchedulerTest, FailedDependency) { ASSERT_EQ(MDep.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed) << "MDep should be marked as failed\n"; } + +inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, + pi_uint32 EventsCount, + const pi_event *, pi_event *) { + return PI_ERROR_UNKNOWN; +} + +TEST_F(SchedulerTest, FailedKernelException) { + unittest::PiMock Mock; + Mock.redefineBefore( + customEnqueueKernelLaunch); + platform Plt = Mock.getPlatform(); + int ExceptionListSize = 0; + sycl::async_handler AsyncHandler = + [&ExceptionListSize](sycl::exception_list ExceptionList) { + ExceptionListSize = ExceptionList.size(); + }; + bool ExceptionThrown = false; + queue Queue(context(Plt), default_selector_v, AsyncHandler); + { + int initVal = 0; + sycl::buffer Buf(&initVal, 1); + try { + Queue.submit([&](sycl::handler &CGH) { + Buf.get_access(CGH); + CGH.single_task>([]() {}); + }); + } catch (...) { + ExceptionThrown = true; + } + } + EXPECT_TRUE(ExceptionThrown); + Queue.wait_and_throw(); + EXPECT_EQ(ExceptionListSize, 0); +} \ No newline at end of file From 361b74314e7e5af1f9ffc101d151d47aa1ce854e Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 18 Jul 2024 03:47:31 -0700 Subject: [PATCH 2/4] fix unit tests Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/scheduler/FailedCommands.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 24d01e7859ede..13b52ef3b3187 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -44,18 +44,18 @@ TEST_F(SchedulerTest, FailedDependency) { << "MDep should be marked as failed\n"; } -inline pi_result customEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, - pi_uint32 EventsCount, - const pi_event *, pi_event *) { +inline pi_result failingEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, + pi_uint32 EventsCount, + const pi_event *, pi_event *) { return PI_ERROR_UNKNOWN; } TEST_F(SchedulerTest, FailedKernelException) { unittest::PiMock Mock; Mock.redefineBefore( - customEnqueueKernelLaunch); + failingEnqueueKernelLaunch); platform Plt = Mock.getPlatform(); int ExceptionListSize = 0; sycl::async_handler AsyncHandler = From cbd36a1a47c58a05d1a0dfb1d18215c621d500b8 Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 18 Jul 2024 04:22:05 -0700 Subject: [PATCH 3/4] add more tests Signed-off-by: Tikhomirova, Kseniya --- sycl/source/detail/scheduler/commands.cpp | 17 +++++---- sycl/unittests/scheduler/FailedCommands.cpp | 39 ++++++++++++++++++++- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e8ec2d9764afe..5635b76f93ba1 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1719,12 +1719,17 @@ pi_int32 MemCpyCommandHost::enqueueImp() { } flushCrossQueueDeps(EventImpls, MWorkerQueue); - MemoryManager::copy( - MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), - MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, - MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, - MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, - MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); + try { + MemoryManager::copy( + MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), + MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, + MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, + MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, + MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), + MEvent); + } catch (sycl::exception &e) { + return get_pi_error(e); + } return PI_SUCCESS; } diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 13b52ef3b3187..df54619f0d18d 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -79,4 +79,41 @@ TEST_F(SchedulerTest, FailedKernelException) { EXPECT_TRUE(ExceptionThrown); Queue.wait_and_throw(); EXPECT_EQ(ExceptionListSize, 0); -} \ No newline at end of file +} + +inline pi_result +failingEnqueueMemBufferRead(pi_queue queue, pi_mem buffer, + pi_bool blocking_read, size_t offset, size_t size, + void *ptr, pi_uint32 num_events_in_wait_list, + const pi_event *event_wait_list, pi_event *event) { + return PI_ERROR_UNKNOWN; +} + +TEST_F(SchedulerTest, FailedCopyBackException) { + unittest::PiMock Mock; + Mock.redefineBefore( + failingEnqueueMemBufferRead); + platform Plt = Mock.getPlatform(); + int ExceptionListSize = 0; + sycl::async_handler AsyncHandler = + [&ExceptionListSize](sycl::exception_list ExceptionList) { + ExceptionListSize = ExceptionList.size(); + }; + bool ExceptionThrown = false; + queue Queue(context(Plt), default_selector_v, AsyncHandler); + { + int initVal = 0; + sycl::buffer Buf(&initVal, 1); + try { + Queue.submit([&](sycl::handler &CGH) { + Buf.get_access(CGH); + CGH.single_task>([]() {}); + }); + } catch (...) { + ExceptionThrown = true; + } + } + EXPECT_FALSE(ExceptionThrown); + Queue.wait_and_throw(); + EXPECT_EQ(ExceptionListSize, 1); +} From 84cbe46c13d55344b5bb5bdcd23b142ca8912f4e Mon Sep 17 00:00:00 2001 From: "Tikhomirova, Kseniya" Date: Thu, 18 Jul 2024 04:26:01 -0700 Subject: [PATCH 4/4] extrcat common from unittests Signed-off-by: Tikhomirova, Kseniya --- sycl/unittests/scheduler/FailedCommands.cpp | 58 ++++++++------------- 1 file changed, 21 insertions(+), 37 deletions(-) diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index df54619f0d18d..b396184c34922 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -44,18 +44,9 @@ TEST_F(SchedulerTest, FailedDependency) { << "MDep should be marked as failed\n"; } -inline pi_result failingEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, - const size_t *, const size_t *, - const size_t *, - pi_uint32 EventsCount, - const pi_event *, pi_event *) { - return PI_ERROR_UNKNOWN; -} - -TEST_F(SchedulerTest, FailedKernelException) { - unittest::PiMock Mock; - Mock.redefineBefore( - failingEnqueueKernelLaunch); +void RunWithFailedCommandsAndCheck(unittest::PiMock &Mock, + bool SyncExceptionExpected, + int AsyncExceptionCountExpected) { platform Plt = Mock.getPlatform(); int ExceptionListSize = 0; sycl::async_handler AsyncHandler = @@ -76,9 +67,24 @@ TEST_F(SchedulerTest, FailedKernelException) { ExceptionThrown = true; } } - EXPECT_TRUE(ExceptionThrown); + EXPECT_EQ(ExceptionThrown, SyncExceptionExpected); Queue.wait_and_throw(); - EXPECT_EQ(ExceptionListSize, 0); + EXPECT_EQ(ExceptionListSize, AsyncExceptionCountExpected); +} + +inline pi_result failingEnqueueKernelLaunch(pi_queue, pi_kernel, pi_uint32, + const size_t *, const size_t *, + const size_t *, + pi_uint32 EventsCount, + const pi_event *, pi_event *) { + return PI_ERROR_UNKNOWN; +} + +TEST_F(SchedulerTest, FailedKernelException) { + unittest::PiMock Mock; + Mock.redefineBefore( + failingEnqueueKernelLaunch); + RunWithFailedCommandsAndCheck(Mock, true, 0); } inline pi_result @@ -93,27 +99,5 @@ TEST_F(SchedulerTest, FailedCopyBackException) { unittest::PiMock Mock; Mock.redefineBefore( failingEnqueueMemBufferRead); - platform Plt = Mock.getPlatform(); - int ExceptionListSize = 0; - sycl::async_handler AsyncHandler = - [&ExceptionListSize](sycl::exception_list ExceptionList) { - ExceptionListSize = ExceptionList.size(); - }; - bool ExceptionThrown = false; - queue Queue(context(Plt), default_selector_v, AsyncHandler); - { - int initVal = 0; - sycl::buffer Buf(&initVal, 1); - try { - Queue.submit([&](sycl::handler &CGH) { - Buf.get_access(CGH); - CGH.single_task>([]() {}); - }); - } catch (...) { - ExceptionThrown = true; - } - } - EXPECT_FALSE(ExceptionThrown); - Queue.wait_and_throw(); - EXPECT_EQ(ExceptionListSize, 1); + RunWithFailedCommandsAndCheck(Mock, false, 1); }