diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f2ac3963b76c6..75b5060f03403 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1722,12 +1722,19 @@ ur_result_t MemCpyCommandHost::enqueueImp() { } flushCrossQueueDeps(EventImpls, MWorkerQueue); - MemoryManager::copy( - MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), - MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, - MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, - MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, - MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent); + + try { + MemoryManager::copy( + MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(), + MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange, + MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims, + MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset, + MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), + MEvent); + } catch (sycl::exception &e) { + return static_cast(get_ur_error(e)); + } + return UR_RESULT_SUCCESS; } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 5f04439b7ea0a..aafe58b4f8ffe 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -218,27 +218,40 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) { } std::vector ToCleanUp; + // EnqueueCommand will try to enqueue dependencies (previous operations on the + // buffer). If any dep kernel failed it would be reported as sync exception or + // async exception on host task completion and enqueue attempt. + // No need to report those failures again in copy back submission. So report + // only copy back auxiliary and main command failures. + bool CopyBackCmdsFailed = false; try { ReadLockT Lock = acquireReadLock(); EnqueueResultT Res; - bool Enqueued; + bool Enqueued = false; for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) { + CopyBackCmdsFailed |= Res.MCmd == Cmd; throw exception(make_error_code(errc::runtime), "Enqueue process failed."); + } } Enqueued = GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd); - if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) { + CopyBackCmdsFailed |= Res.MCmd == NewCmd; throw exception(make_error_code(errc::runtime), "Enqueue process failed."); + } } catch (...) { - auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); - assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null"); - WorkerQueue->reportAsyncException(std::current_exception()); + if (CopyBackCmdsFailed) { + auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue(); + assert(WorkerQueue && + "WorkerQueue for CopyBack command must be not null"); + WorkerQueue->reportAsyncException(std::current_exception()); + } } EventImplPtr NewEvent = NewCmd->getEvent(); cleanupCommands(ToCleanUp); diff --git a/sycl/unittests/scheduler/FailedCommands.cpp b/sycl/unittests/scheduler/FailedCommands.cpp index 5443099b790f0..9273194d41344 100644 --- a/sycl/unittests/scheduler/FailedCommands.cpp +++ b/sycl/unittests/scheduler/FailedCommands.cpp @@ -9,6 +9,7 @@ #include "SchedulerTest.hpp" #include "SchedulerTestUtils.hpp" +#include #include using namespace sycl; @@ -42,3 +43,46 @@ TEST_F(SchedulerTest, FailedDependency) { ASSERT_EQ(MDep.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed) << "MDep should be marked as failed\n"; } + +void RunWithFailedCommandsAndCheck(bool SyncExceptionExpected, + int AsyncExceptionCountExpected) { + platform Plt = sycl::platform(); + int ExceptionListSize = 0; + sycl::async_handler AsyncHandler = + [&ExceptionListSize](sycl::exception_list ExceptionList) { + ExceptionListSize = ExceptionList.size(); + }; + bool ExceptionThrown = false; + queue Queue(context(Plt), default_selector_v, AsyncHandler); + { + int initVal = 0; + sycl::buffer Buf(&initVal, 1); + try { + Queue.submit([&](sycl::handler &CGH) { + Buf.get_access(CGH); + CGH.single_task>([]() {}); + }); + } catch (...) { + ExceptionThrown = true; + } + } + EXPECT_EQ(ExceptionThrown, SyncExceptionExpected); + Queue.wait_and_throw(); + EXPECT_EQ(ExceptionListSize, AsyncExceptionCountExpected); +} + +ur_result_t failingUrCall(void *) { return UR_RESULT_ERROR_UNKNOWN; } + +TEST_F(SchedulerTest, FailedKernelException) { + unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch", + &failingUrCall); + RunWithFailedCommandsAndCheck(true, 0); +} + +TEST_F(SchedulerTest, FailedCopyBackException) { + unittest::UrMock<> Mock; + mock::getCallbacks().set_before_callback("urEnqueueMemBufferRead", + &failingUrCall); + RunWithFailedCommandsAndCheck(false, 1); +}