Skip to content

Commit a3d33b2

Browse files
[SYCL] Fix exception duplication for copy back command (#14622)
kernel_features_device_has_exceptions fails due to duplication of sync and async exceptions when command before copy back (copy back dependency) fails. This commit adds check if failed command is really some of copy back related commands or something already reported. Except from this it makes enqueueImp for mem copy host command handle exception instead of ignoring and bypassing (MemoryManager operations return void and throws exceptions) to enable proper enqueueCommand results handling. --------- Signed-off-by: Tikhomirova, Kseniya <[email protected]>
1 parent 2578b75 commit a3d33b2

File tree

3 files changed

+76
-12
lines changed

3 files changed

+76
-12
lines changed

sycl/source/detail/scheduler/commands.cpp

+13-6
Original file line numberDiff line numberDiff line change
@@ -1722,12 +1722,19 @@ ur_result_t MemCpyCommandHost::enqueueImp() {
17221722
}
17231723

17241724
flushCrossQueueDeps(EventImpls, MWorkerQueue);
1725-
MemoryManager::copy(
1726-
MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
1727-
MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange,
1728-
MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims,
1729-
MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset,
1730-
MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(), MEvent);
1725+
1726+
try {
1727+
MemoryManager::copy(
1728+
MSrcAllocaCmd->getSYCLMemObj(), MSrcAllocaCmd->getMemAllocation(),
1729+
MSrcQueue, MSrcReq.MDims, MSrcReq.MMemoryRange, MSrcReq.MAccessRange,
1730+
MSrcReq.MOffset, MSrcReq.MElemSize, *MDstPtr, MQueue, MDstReq.MDims,
1731+
MDstReq.MMemoryRange, MDstReq.MAccessRange, MDstReq.MOffset,
1732+
MDstReq.MElemSize, std::move(RawEvents), MEvent->getHandleRef(),
1733+
MEvent);
1734+
} catch (sycl::exception &e) {
1735+
return static_cast<ur_result_t>(get_ur_error(e));
1736+
}
1737+
17311738
return UR_RESULT_SUCCESS;
17321739
}
17331740

sycl/source/detail/scheduler/scheduler.cpp

+19-6
Original file line numberDiff line numberDiff line change
@@ -218,27 +218,40 @@ EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
218218
}
219219

220220
std::vector<Command *> ToCleanUp;
221+
// EnqueueCommand will try to enqueue dependencies (previous operations on the
222+
// buffer). If any dep kernel failed it would be reported as sync exception or
223+
// async exception on host task completion and enqueue attempt.
224+
// No need to report those failures again in copy back submission. So report
225+
// only copy back auxiliary and main command failures.
226+
bool CopyBackCmdsFailed = false;
221227
try {
222228
ReadLockT Lock = acquireReadLock();
223229
EnqueueResultT Res;
224-
bool Enqueued;
230+
bool Enqueued = false;
225231

226232
for (Command *Cmd : AuxiliaryCmds) {
227233
Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd);
228-
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
234+
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) {
235+
CopyBackCmdsFailed |= Res.MCmd == Cmd;
229236
throw exception(make_error_code(errc::runtime),
230237
"Enqueue process failed.");
238+
}
231239
}
232240

233241
Enqueued =
234242
GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd);
235-
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
243+
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) {
244+
CopyBackCmdsFailed |= Res.MCmd == NewCmd;
236245
throw exception(make_error_code(errc::runtime),
237246
"Enqueue process failed.");
247+
}
238248
} catch (...) {
239-
auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue();
240-
assert(WorkerQueue && "WorkerQueue for CopyBack command must be not null");
241-
WorkerQueue->reportAsyncException(std::current_exception());
249+
if (CopyBackCmdsFailed) {
250+
auto WorkerQueue = NewCmd->getEvent()->getWorkerQueue();
251+
assert(WorkerQueue &&
252+
"WorkerQueue for CopyBack command must be not null");
253+
WorkerQueue->reportAsyncException(std::current_exception());
254+
}
242255
}
243256
EventImplPtr NewEvent = NewCmd->getEvent();
244257
cleanupCommands(ToCleanUp);

sycl/unittests/scheduler/FailedCommands.cpp

+44
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "SchedulerTest.hpp"
1010
#include "SchedulerTestUtils.hpp"
1111

12+
#include <helpers/TestKernel.hpp>
1213
#include <helpers/UrMock.hpp>
1314

1415
using namespace sycl;
@@ -42,3 +43,46 @@ TEST_F(SchedulerTest, FailedDependency) {
4243
ASSERT_EQ(MDep.MEnqueueStatus, detail::EnqueueResultT::SyclEnqueueFailed)
4344
<< "MDep should be marked as failed\n";
4445
}
46+
47+
void RunWithFailedCommandsAndCheck(bool SyncExceptionExpected,
48+
int AsyncExceptionCountExpected) {
49+
platform Plt = sycl::platform();
50+
int ExceptionListSize = 0;
51+
sycl::async_handler AsyncHandler =
52+
[&ExceptionListSize](sycl::exception_list ExceptionList) {
53+
ExceptionListSize = ExceptionList.size();
54+
};
55+
bool ExceptionThrown = false;
56+
queue Queue(context(Plt), default_selector_v, AsyncHandler);
57+
{
58+
int initVal = 0;
59+
sycl::buffer<int, 1> Buf(&initVal, 1);
60+
try {
61+
Queue.submit([&](sycl::handler &CGH) {
62+
Buf.get_access<sycl::access::mode::write>(CGH);
63+
CGH.single_task<TestKernel<1>>([]() {});
64+
});
65+
} catch (...) {
66+
ExceptionThrown = true;
67+
}
68+
}
69+
EXPECT_EQ(ExceptionThrown, SyncExceptionExpected);
70+
Queue.wait_and_throw();
71+
EXPECT_EQ(ExceptionListSize, AsyncExceptionCountExpected);
72+
}
73+
74+
ur_result_t failingUrCall(void *) { return UR_RESULT_ERROR_UNKNOWN; }
75+
76+
TEST_F(SchedulerTest, FailedKernelException) {
77+
unittest::UrMock<> Mock;
78+
mock::getCallbacks().set_before_callback("urEnqueueKernelLaunch",
79+
&failingUrCall);
80+
RunWithFailedCommandsAndCheck(true, 0);
81+
}
82+
83+
TEST_F(SchedulerTest, FailedCopyBackException) {
84+
unittest::UrMock<> Mock;
85+
mock::getCallbacks().set_before_callback("urEnqueueMemBufferRead",
86+
&failingUrCall);
87+
RunWithFailedCommandsAndCheck(false, 1);
88+
}

0 commit comments

Comments
 (0)