Skip to content

Commit a00be43

Browse files
committed
[SYCL][Graph] Permit empty & barrier nodes in WGU
In order to enable the minimum viable real life usecase for the Whole Graph Update feature. Allow graphs to contain empty nodes and barrier nodes during update. See discussion thread intel#13253 (comment) on SYCL-Graph spec PR for publicizing the availability of the Whole Graph Update feature.
1 parent d4f2fe5 commit a00be43

File tree

5 files changed

+315
-11
lines changed

5 files changed

+315
-11
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -372,7 +372,6 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
372372
(void)Args;
373373
sycl::handler Handler{Impl};
374374
CGF(Handler);
375-
Handler.finalize();
376375

377376
if (Handler.MCGType == sycl::detail::CG::Barrier) {
378377
throw sycl::exception(
@@ -381,6 +380,8 @@ graph_impl::add(const std::shared_ptr<graph_impl> &Impl,
381380
"SYCL Graph Explicit API. Please use empty nodes instead.");
382381
}
383382

383+
Handler.finalize();
384+
384385
node_type NodeType =
385386
Handler.MImpl->MUserFacingNodeType !=
386387
ext::oneapi::experimental::node_type::empty
@@ -1236,18 +1237,22 @@ void exec_graph_impl::update(
12361237
sycl::make_error_code(errc::invalid),
12371238
"Node passed to update() is not part of the graph.");
12381239
}
1239-
if (Node->MCGType != sycl::detail::CG::Kernel) {
1240-
throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes");
1241-
}
12421240

1243-
if (Node->MCommandGroup->getRequirements().size() == 0) {
1244-
continue;
1241+
if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel ||
1242+
Node->MCGType == sycl::detail::CG::Barrier)) {
1243+
throw sycl::exception(errc::invalid,
1244+
"Unsupported node type for update. Only kernel, "
1245+
"barrier and empty nodes are supported.");
12451246
}
1246-
NeedScheduledUpdate = true;
12471247

1248-
UpdateRequirements.insert(UpdateRequirements.end(),
1249-
Node->MCommandGroup->getRequirements().begin(),
1250-
Node->MCommandGroup->getRequirements().end());
1248+
if (const auto &CG = Node->MCommandGroup;
1249+
CG && CG->getRequirements().size() != 0) {
1250+
NeedScheduledUpdate = true;
1251+
1252+
UpdateRequirements.insert(UpdateRequirements.end(),
1253+
Node->MCommandGroup->getRequirements().begin(),
1254+
Node->MCommandGroup->getRequirements().end());
1255+
}
12511256
}
12521257

12531258
// Clean up any execution events which have finished so we don't pass them to
@@ -1290,6 +1295,11 @@ void exec_graph_impl::update(
12901295
}
12911296

12921297
void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
1298+
// Kernel node update is the only command type supported in UR for update.
1299+
// Updating any other types of nodes, e.g. empty & barrier nodes is a no-op.
1300+
if (Node->MCGType != sycl::detail::CG::Kernel) {
1301+
return;
1302+
}
12931303
auto ContextImpl = sycl::detail::getSyclObjImpl(MContext);
12941304
const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
12951305
auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice());
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// REQUIRES: aspect-usm_shared_allocations
9+
10+
// Tests that whole graph update works when a graph contains a barrier node.
11+
12+
#include "../graph_common.hpp"
13+
14+
// Queue submissions that can be recorded to a graph, with a barrier node
15+
// separating initialization and computation kernel nodes
16+
template <class T>
17+
void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) {
18+
Queue.submit([&](handler &CGH) {
19+
CGH.single_task([=]() {
20+
for (int i = 0; i < Size; i++) {
21+
Input1[i] += i;
22+
}
23+
});
24+
});
25+
26+
Queue.submit([&](handler &CGH) {
27+
CGH.single_task([=]() {
28+
for (int i = 0; i < Size; i++) {
29+
Input2[i] += i;
30+
}
31+
});
32+
});
33+
34+
Queue.ext_oneapi_submit_barrier();
35+
36+
Queue.submit([&](handler &CGH) {
37+
CGH.single_task([=]() {
38+
for (int i = 0; i < Size; i++) {
39+
Output[i] = Input1[i] * Input2[i];
40+
}
41+
});
42+
});
43+
}
44+
45+
int main() {
46+
queue Queue{};
47+
48+
using T = int;
49+
50+
// USM allocations for GraphA
51+
T *InputA1 = malloc_shared<T>(Size, Queue);
52+
T *InputA2 = malloc_shared<T>(Size, Queue);
53+
T *OutputA = malloc_shared<T>(Size, Queue);
54+
55+
// Initialize USM allocations
56+
T Pattern1 = 0xA;
57+
T Pattern2 = 0x42;
58+
T PatternZero = 0;
59+
60+
Queue.fill(InputA1, Pattern1, Size);
61+
Queue.fill(InputA2, Pattern2, Size);
62+
Queue.fill(OutputA, PatternZero, Size);
63+
Queue.wait();
64+
65+
// Define GraphA
66+
exp_ext::command_graph GraphA{Queue};
67+
GraphA.begin_recording(Queue);
68+
RecordGraph(Queue, Size, InputA1, InputA2, OutputA);
69+
GraphA.end_recording();
70+
71+
// Finalize, run, and validate GraphA
72+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
73+
Queue.ext_oneapi_graph(GraphExecA).wait();
74+
75+
for (int i = 0; i < Size; i++) {
76+
T Ref = (Pattern1 + i) * (Pattern2 + i);
77+
assert(check_value(i, Ref, OutputA[i], "OutputA"));
78+
}
79+
80+
// Create GraphB which will be used to update GraphA
81+
exp_ext::command_graph GraphB{Queue};
82+
83+
// USM allocations for GraphB
84+
T *InputB1 = malloc_shared<T>(Size, Queue);
85+
T *InputB2 = malloc_shared<T>(Size, Queue);
86+
T *OutputB = malloc_shared<T>(Size, Queue);
87+
88+
// Initialize GraphB allocations
89+
Pattern1 = -42;
90+
Pattern2 = 0xF;
91+
92+
Queue.fill(InputB1, Pattern1, Size);
93+
Queue.fill(InputB2, Pattern2, Size);
94+
Queue.fill(OutputB, PatternZero, Size);
95+
Queue.wait();
96+
97+
// Create GraphB
98+
GraphB.begin_recording(Queue);
99+
RecordGraph(Queue, Size, InputB1, InputB2, OutputB);
100+
GraphB.end_recording();
101+
102+
// Update executable GraphA with GraphB, run, and validate
103+
GraphExecA.update(GraphB);
104+
Queue.ext_oneapi_graph(GraphExecA).wait();
105+
106+
for (int i = 0; i < Size; i++) {
107+
T Ref = (Pattern1 + i) * (Pattern2 + i);
108+
assert(check_value(i, Ref, OutputB[i], "OutputB"));
109+
}
110+
111+
free(InputA1, Queue);
112+
free(InputA2, Queue);
113+
free(OutputA, Queue);
114+
115+
free(InputB1, Queue);
116+
free(InputB2, Queue);
117+
free(OutputB, Queue);
118+
return 0;
119+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// Tests that whole graph update works when a graph contain an empty node.
9+
10+
#include "../graph_common.hpp"
11+
12+
// Creates a graph with an empty node separating initialization and computation
13+
// kernel nodes
14+
template <class T>
15+
void CreateGraph(
16+
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
17+
size_t Size, T *Input1, T *Input2, T *Output) {
18+
Graph.add([&](handler &CGH) {
19+
CGH.single_task([=]() {
20+
for (int i = 0; i < Size; i++) {
21+
Input1[i] += i;
22+
}
23+
});
24+
});
25+
26+
Graph.add([&](handler &CGH) {
27+
CGH.single_task([=]() {
28+
for (int i = 0; i < Size; i++) {
29+
Input2[i] += i;
30+
}
31+
});
32+
});
33+
34+
auto EmptyNodeA =
35+
Graph.add({exp_ext::property::node::depends_on_all_leaves()});
36+
37+
Graph.add(
38+
[&](handler &CGH) {
39+
CGH.single_task([=]() {
40+
for (int i = 0; i < Size; i++) {
41+
Output[i] = Input1[i] * Input2[i];
42+
}
43+
});
44+
},
45+
{exp_ext::property::node::depends_on(EmptyNodeA)});
46+
}
47+
48+
int main() {
49+
queue Queue{};
50+
51+
using T = int;
52+
53+
// USM allocations for GraphA
54+
T *InputA1 = malloc_device<T>(Size, Queue);
55+
T *InputA2 = malloc_device<T>(Size, Queue);
56+
T *OutputA = malloc_device<T>(Size, Queue);
57+
58+
// Initialize USM allocations
59+
T Pattern1 = 0xA;
60+
T Pattern2 = 0x42;
61+
T PatternZero = 0;
62+
63+
Queue.fill(InputA1, Pattern1, Size);
64+
Queue.fill(InputA2, Pattern2, Size);
65+
Queue.fill(OutputA, PatternZero, Size);
66+
Queue.wait();
67+
68+
// Construct GraphA
69+
exp_ext::command_graph GraphA{Queue};
70+
CreateGraph(GraphA, Size, InputA1, InputA2, OutputA);
71+
72+
// Finalize, run, and validate GraphA
73+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
74+
Queue.ext_oneapi_graph(GraphExecA).wait();
75+
76+
std::vector<T> HostOutput(Size);
77+
Queue.copy(OutputA, HostOutput.data(), Size).wait();
78+
79+
for (int i = 0; i < Size; i++) {
80+
T Ref = (Pattern1 + i) * (Pattern2 + i);
81+
assert(check_value(i, Ref, HostOutput[i], "OutputA"));
82+
}
83+
84+
// Create GraphB which will be used to update GraphA
85+
exp_ext::command_graph GraphB{Queue};
86+
87+
// USM allocations for GraphB
88+
T *InputB1 = malloc_device<T>(Size, Queue);
89+
T *InputB2 = malloc_device<T>(Size, Queue);
90+
T *OutputB = malloc_device<T>(Size, Queue);
91+
92+
// Initialize GraphB
93+
Pattern1 = -42;
94+
Pattern2 = 0xF;
95+
96+
Queue.fill(InputB1, Pattern1, Size);
97+
Queue.fill(InputB2, Pattern2, Size);
98+
Queue.fill(OutputB, PatternZero, Size);
99+
Queue.wait();
100+
101+
// Construct GraphB
102+
CreateGraph(GraphB, Size, InputB1, InputB2, OutputB);
103+
104+
// Update executable GraphA with GraphB, run, and validate
105+
GraphExecA.update(GraphB);
106+
Queue.ext_oneapi_graph(GraphExecA).wait();
107+
108+
Queue.copy(OutputB, HostOutput.data(), Size).wait();
109+
110+
for (int i = 0; i < Size; i++) {
111+
T Ref = (Pattern1 + i) * (Pattern2 + i);
112+
assert(check_value(i, Ref, HostOutput[i], "OutputB"));
113+
}
114+
115+
free(InputA1, Queue);
116+
free(InputA2, Queue);
117+
free(OutputA, Queue);
118+
119+
free(InputB1, Queue);
120+
free(InputB2, Queue);
121+
free(OutputB, Queue);
122+
return 0;
123+
}

sycl/unittests/Extensions/CommandGraph/Exceptions.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,15 +216,25 @@ void addImagesCopies(experimental::detail::modifiable_command_graph &G,
216216
} // anonymous namespace
217217

218218
TEST_F(CommandGraphTest, ExplicitBarrierException) {
219-
219+
bool Success = true;
220220
std::error_code ExceptionCode = make_error_code(sycl::errc::success);
221221
try {
222222
auto Barrier =
223223
Graph.add([&](sycl::handler &cgh) { cgh.ext_oneapi_barrier(); });
224224
} catch (exception &Exception) {
225225
ExceptionCode = Exception.code();
226+
std::string ErrorStr =
227+
"The sycl_ext_oneapi_enqueue_barrier feature is "
228+
"not available with SYCL Graph Explicit API. Please use empty nodes "
229+
"instead.";
230+
std::cout << Exception.what() << std::endl;
231+
std::cout << ErrorStr << std::endl;
232+
ASSERT_FALSE(std::string(Exception.what()).find(ErrorStr) ==
233+
std::string::npos);
234+
Success = false;
226235
}
227236
ASSERT_EQ(ExceptionCode, sycl::errc::invalid);
237+
ASSERT_EQ(Success, false);
228238
}
229239

230240
TEST_F(CommandGraphTest, FusionExtensionExceptionCheck) {

sycl/unittests/Extensions/CommandGraph/Update.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,18 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) {
109109
cgh.host_task([]() {});
110110
}));
111111

112+
ASSERT_ANY_THROW(auto NodeBarreriTask = Graph.add([&](sycl::handler &cgh) {
113+
cgh.set_arg(0, DynamicParam);
114+
cgh.ext_oneapi_barrier();
115+
}));
116+
117+
Graph.begin_recording(Queue);
118+
ASSERT_ANY_THROW(auto NodeBarrierTask = Graph.add([&](sycl::handler &cgh) {
119+
cgh.set_arg(0, DynamicParam);
120+
cgh.ext_oneapi_barrier();
121+
}));
122+
Graph.end_recording(Queue);
123+
112124
auto NodeEmpty = Graph.add();
113125

114126
experimental::command_graph Subgraph(Queue.get_context(), Dev);
@@ -375,3 +387,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) {
375387
auto GraphExec = Graph.finalize();
376388
EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);
377389
}
390+
391+
TEST_F(WholeGraphUpdateTest, EmptyNode) {
392+
// Test that updating a graph that has an empty node is not an error
393+
auto NodeEmpty = Graph.add();
394+
auto UpdateNodeEmpty = UpdateGraph.add();
395+
396+
auto NodeKernel = Graph.add(EmptyKernel);
397+
auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel);
398+
399+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
400+
GraphExec.update(UpdateGraph);
401+
}
402+
403+
TEST_F(WholeGraphUpdateTest, BarrierNode) {
404+
// Test that updating a graph that has a barrier node is not an error
405+
Graph.begin_recording(Queue);
406+
auto NodeKernel = Queue.submit(
407+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
408+
Queue.ext_oneapi_submit_barrier({NodeKernel});
409+
Graph.end_recording(Queue);
410+
411+
UpdateGraph.begin_recording(Queue);
412+
auto UpdateNodeKernel = Queue.submit(
413+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
414+
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
415+
UpdateGraph.end_recording(Queue);
416+
417+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
418+
GraphExec.update(UpdateGraph);
419+
}

0 commit comments

Comments
 (0)