Skip to content

Commit 00f3de9

Browse files
cperkinsinteladamfidel
authored andcommitted
[SYCL] cmd mem leak fix (intel#17125)
When enqueueing a command and its dependencies, an exception might be thrown. In that case, the command will have a failed EnqueueStatus and not stored in graph_builder DAG. But we need to make sure that dependencies also check so they are not stored there. Otherwise there will be leaks. During the clean up, we don't want to reenqueue it if we know it has failed before. This is broken out from intel#16618
1 parent 60afe61 commit 00f3de9

File tree

3 files changed

+80
-2
lines changed

3 files changed

+80
-2
lines changed

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,9 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req,
486486

487487
std::vector<Command *> ToCleanUp;
488488
for (Command *Dep : Deps) {
489+
if (Dep->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed)
490+
continue;
491+
489492
Command *ConnCmd = MemCpyCmd->addDep(
490493
DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp);
491494
if (ConnCmd)

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
5252
#endif
5353
std::vector<Command *> ToCleanUp;
5454
for (Command *Cmd : Record->MReadLeaves) {
55+
if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed)
56+
continue;
57+
5558
EnqueueResultT Res;
5659
bool Enqueued =
5760
GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
@@ -65,6 +68,9 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record,
6568
GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp);
6669
}
6770
for (Command *Cmd : Record->MWriteLeaves) {
71+
if (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed)
72+
continue;
73+
6874
EnqueueResultT Res;
6975
bool Enqueued =
7076
GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd);
@@ -156,12 +162,14 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent,
156162
}
157163
delete NewCmd;
158164
}
165+
cleanupCommands(ToCleanUp);
159166
};
160167

161168
for (Command *Cmd : AuxiliaryCmds) {
162-
Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd,
163-
Blocking);
164169
try {
170+
Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp,
171+
Cmd, Blocking);
172+
165173
if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult)
166174
throw exception(make_error_code(errc::runtime),
167175
"Auxiliary enqueue process failed.");
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
//==------------------- DeleteCmdException.cpp ----------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
// REQUIRES: level_zero
10+
11+
// UNSUPPORTED: windows
12+
// UNSUPPORTED-TRACKER: CMPLRLLVM-44705
13+
14+
// RUN: %{build} -o %t.out
15+
// RUN: %{l0_leak_check} %{run} %t.out
16+
17+
#include <sycl/detail/core.hpp>
18+
19+
void test_exception(sycl::queue &q, sycl::buffer<int, 1> &buf,
20+
size_t workGroupSize) {
21+
22+
try {
23+
// Illegal nd_range
24+
auto illegal_range = sycl::nd_range<1>{sycl::range<1>{workGroupSize * 2},
25+
sycl::range<1>{workGroupSize + 32}};
26+
27+
// Will throw when submitted
28+
q.submit([&](sycl::handler &cgh) {
29+
auto acc = buf.get_access<sycl::access::mode::read_write>(cgh);
30+
cgh.parallel_for(illegal_range, [=](sycl::nd_item<1> nd_item) {
31+
acc[nd_item.get_global_linear_id()] = 42; // will not be reached
32+
});
33+
}).wait_and_throw();
34+
} catch (const sycl::exception &e) {
35+
std::cout << "exception caught: " << e.code() << ":\t";
36+
std::cout << e.what() << std::endl;
37+
}
38+
}
39+
40+
int main() {
41+
sycl::queue q;
42+
sycl::device dev = q.get_device();
43+
int maxWorkGroupSize =
44+
dev.get_info<sycl::info::device::max_work_group_size>();
45+
46+
constexpr size_t NumWorkItems =
47+
2048; // this value is arbitrary since kernel is never run.
48+
std::vector<int> source(NumWorkItems, 0);
49+
{
50+
// Buffers with their own memory will have their memory release deferred,
51+
// while buffers backstopped by host memory will release when the buffer is
52+
// destroyed. This means there are two different paths we need to check to
53+
// ensure we are not leaking resources when encountering exceptions.
54+
55+
// buffer with own memory
56+
sycl::buffer<int, 1> buf{sycl::range<1>{NumWorkItems}};
57+
58+
// buffer backstopped by host memory
59+
sycl::buffer<int, 1> buf2{source.data(), sycl::range<1>{NumWorkItems}};
60+
61+
test_exception(q, buf, maxWorkGroupSize);
62+
63+
test_exception(q, buf2, maxWorkGroupSize);
64+
}
65+
66+
return 0;
67+
}

0 commit comments

Comments
 (0)