Skip to content

Commit 07bdfb2

Browse files
committed
[SYCL][Graph] Permit empty & barrier nodes in WGU
In order to enable the minimum viable real life usecase for the Whole Graph Update feature. Allow graphs to contain empty nodes and barrier nodes during update. Depends on PR intel#14212
1 parent 7b85fb2 commit 07bdfb2

File tree

4 files changed

+282
-9
lines changed

4 files changed

+282
-9
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,18 +1236,22 @@ void exec_graph_impl::update(
12361236
sycl::make_error_code(errc::invalid),
12371237
"Node passed to update() is not part of the graph.");
12381238
}
1239-
if (Node->MCGType != sycl::detail::CG::Kernel) {
1240-
throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes");
1241-
}
12421239

1243-
if (Node->MCommandGroup->getRequirements().size() == 0) {
1244-
continue;
1240+
if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel ||
1241+
Node->MCGType == sycl::detail::CG::Barrier)) {
1242+
throw sycl::exception(errc::invalid,
1243+
"Cannot update node type. Node must be be of "
1244+
"kernel, empty, or barrier type.");
12451245
}
1246-
NeedScheduledUpdate = true;
12471246

1248-
UpdateRequirements.insert(UpdateRequirements.end(),
1249-
Node->MCommandGroup->getRequirements().begin(),
1250-
Node->MCommandGroup->getRequirements().end());
1247+
if (Node->MCommandGroup &&
1248+
Node->MCommandGroup->getRequirements().size() != 0) {
1249+
NeedScheduledUpdate = true;
1250+
1251+
UpdateRequirements.insert(UpdateRequirements.end(),
1252+
Node->MCommandGroup->getRequirements().begin(),
1253+
Node->MCommandGroup->getRequirements().end());
1254+
}
12511255
}
12521256

12531257
// Clean up any execution events which have finished so we don't pass them to
@@ -1290,6 +1294,9 @@ void exec_graph_impl::update(
12901294
}
12911295

12921296
void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
1297+
if (Node->MCGType != sycl::detail::CG::Kernel) {
1298+
return;
1299+
}
12931300
auto ContextImpl = sycl::detail::getSyclObjImpl(MContext);
12941301
const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
12951302
auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice());
Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// REQUIRES: aspect-usm_shared_allocations
9+
10+
// Tests that whole graph update works when a graph contains barrier nodes.
11+
12+
#include "../graph_common.hpp"
13+
14+
// Queue submissions that can be recorded to a graph with a barrier node
15+
// separating initialization and computation kernel nodes
16+
template <class T>
17+
void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) {
18+
Queue.submit([&](handler &CGH) {
19+
CGH.single_task([=]() {
20+
for (int i = 0; i < Size; i++) {
21+
Input1[i] += i;
22+
}
23+
});
24+
});
25+
26+
Queue.submit([&](handler &CGH) {
27+
CGH.single_task([=]() {
28+
for (int i = 0; i < Size; i++) {
29+
Input2[i] += i;
30+
}
31+
});
32+
});
33+
34+
Queue.ext_oneapi_submit_barrier();
35+
36+
Queue.submit([&](handler &CGH) {
37+
CGH.single_task([=]() {
38+
for (int i = 0; i < Size; i++) {
39+
Output[i] = Input1[i] * Input2[i];
40+
}
41+
});
42+
});
43+
}
44+
45+
int main() {
46+
queue Queue{};
47+
48+
using T = int;
49+
50+
// USM allocations for Graph A
51+
T *InputA1 = malloc_shared<T>(Size, Queue);
52+
T *InputA2 = malloc_shared<T>(Size, Queue);
53+
T *OutputA = malloc_shared<T>(Size, Queue);
54+
55+
T Pattern1 = 0xA;
56+
T Pattern2 = 0x42;
57+
T PatternZero = 0;
58+
59+
Queue.fill(InputA1, Pattern1, Size);
60+
Queue.fill(InputA2, Pattern2, Size);
61+
Queue.fill(OutputA, PatternZero, Size);
62+
Queue.wait();
63+
64+
exp_ext::command_graph GraphA{Queue};
65+
GraphA.begin_recording(Queue);
66+
RecordGraph(Queue, Size, InputA1, InputA2, OutputA);
67+
GraphA.end_recording();
68+
69+
// Finalize, run, and validate GraphA
70+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
71+
Queue.ext_oneapi_graph(GraphExecA).wait();
72+
73+
for (int i = 0; i < Size; i++) {
74+
T Ref = (Pattern1 + i) * (Pattern2 + i);
75+
assert(check_value(i, Ref, OutputA[i], "OutputA"));
76+
}
77+
78+
// Create GraphB which will be used to update GraphA
79+
exp_ext::command_graph GraphB{Queue};
80+
81+
// USM allocations for GraphB
82+
T *InputB1 = malloc_shared<T>(Size, Queue);
83+
T *InputB2 = malloc_shared<T>(Size, Queue);
84+
T *OutputB = malloc_shared<T>(Size, Queue);
85+
86+
Pattern1 = -42;
87+
Pattern2 = 0xF;
88+
89+
Queue.fill(InputB1, Pattern1, Size);
90+
Queue.fill(InputB2, Pattern2, Size);
91+
Queue.fill(OutputB, PatternZero, Size);
92+
Queue.wait();
93+
94+
GraphB.begin_recording(Queue);
95+
RecordGraph(Queue, Size, InputB1, InputB2, OutputB);
96+
GraphB.end_recording();
97+
98+
// Update executable GraphA with GraphB, run, and validate
99+
GraphExecA.update(GraphB);
100+
Queue.ext_oneapi_graph(GraphExecA).wait();
101+
102+
for (int i = 0; i < Size; i++) {
103+
T Ref = (Pattern1 + i) * (Pattern2 + i);
104+
assert(check_value(i, Ref, OutputB[i], "OutputB"));
105+
}
106+
107+
free(InputA1, Queue);
108+
free(InputA2, Queue);
109+
free(OutputA, Queue);
110+
111+
free(InputB1, Queue);
112+
free(InputB2, Queue);
113+
free(OutputB, Queue);
114+
return 0;
115+
}
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// REQUIRES: aspect-usm_shared_allocations
9+
10+
// Tests that whole graph update works when a graph contains empty nodes.
11+
12+
#include "../graph_common.hpp"
13+
14+
// Creates a Graph with an empty node separating initialization and computation
15+
// kernel nodes
16+
template <class T>
17+
void CreateGraph(
18+
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
19+
size_t Size, T *Input1, T *Input2, T *Output) {
20+
Graph.add([&](handler &CGH) {
21+
CGH.single_task([=]() {
22+
for (int i = 0; i < Size; i++) {
23+
Input1[i] += i;
24+
}
25+
});
26+
});
27+
28+
Graph.add([&](handler &CGH) {
29+
CGH.single_task([=]() {
30+
for (int i = 0; i < Size; i++) {
31+
Input2[i] += i;
32+
}
33+
});
34+
});
35+
36+
auto EmptyNodeA =
37+
Graph.add({exp_ext::property::node::depends_on_all_leaves()});
38+
39+
Graph.add(
40+
[&](handler &CGH) {
41+
CGH.single_task([=]() {
42+
for (int i = 0; i < Size; i++) {
43+
Output[i] = Input1[i] * Input2[i];
44+
}
45+
});
46+
},
47+
{exp_ext::property::node::depends_on(EmptyNodeA)});
48+
}
49+
50+
int main() {
51+
queue Queue{};
52+
53+
using T = int;
54+
55+
// USM allocations for Graph A
56+
T *InputA1 = malloc_shared<T>(Size, Queue);
57+
T *InputA2 = malloc_shared<T>(Size, Queue);
58+
T *OutputA = malloc_shared<T>(Size, Queue);
59+
60+
T Pattern1 = 0xA;
61+
T Pattern2 = 0x42;
62+
T PatternZero = 0;
63+
64+
Queue.fill(InputA1, Pattern1, Size);
65+
Queue.fill(InputA2, Pattern2, Size);
66+
Queue.fill(OutputA, PatternZero, Size);
67+
Queue.wait();
68+
69+
exp_ext::command_graph GraphA{Queue};
70+
CreateGraph(GraphA, Size, InputA1, InputA2, OutputA);
71+
72+
// Finalize, run, and validate GraphA
73+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
74+
Queue.ext_oneapi_graph(GraphExecA).wait();
75+
76+
for (int i = 0; i < Size; i++) {
77+
T Ref = (Pattern1 + i) * (Pattern2 + i);
78+
assert(check_value(i, Ref, OutputA[i], "OutputA"));
79+
}
80+
81+
// Create GraphB which will be used to update GraphA
82+
exp_ext::command_graph GraphB{Queue};
83+
84+
// USM allocations for GraphB
85+
T *InputB1 = malloc_shared<T>(Size, Queue);
86+
T *InputB2 = malloc_shared<T>(Size, Queue);
87+
T *OutputB = malloc_shared<T>(Size, Queue);
88+
89+
Pattern1 = -42;
90+
Pattern2 = 0xF;
91+
92+
Queue.fill(InputB1, Pattern1, Size);
93+
Queue.fill(InputB2, Pattern2, Size);
94+
Queue.fill(OutputB, PatternZero, Size);
95+
Queue.wait();
96+
97+
CreateGraph(GraphB, Size, InputB1, InputB2, OutputB);
98+
99+
// Update executable GraphA with GraphB, run, and validate
100+
GraphExecA.update(GraphB);
101+
Queue.ext_oneapi_graph(GraphExecA).wait();
102+
103+
for (int i = 0; i < Size; i++) {
104+
T Ref = (Pattern1 + i) * (Pattern2 + i);
105+
assert(check_value(i, Ref, OutputB[i], "OutputB"));
106+
}
107+
108+
free(InputA1, Queue);
109+
free(InputA2, Queue);
110+
free(OutputA, Queue);
111+
112+
free(InputB1, Queue);
113+
free(InputB2, Queue);
114+
free(OutputB, Queue);
115+
return 0;
116+
}

sycl/unittests/Extensions/CommandGraph/Update.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) {
109109
cgh.host_task([]() {});
110110
}));
111111

112+
ASSERT_ANY_THROW(auto NodeBarrier = Graph.add([&](sycl::handler &cgh) {
113+
cgh.set_arg(0, DynamicParam);
114+
cgh.ext_oneapi_barrier();
115+
}));
116+
112117
auto NodeEmpty = Graph.add();
113118

114119
experimental::command_graph Subgraph(Queue.get_context(), Dev);
@@ -375,3 +380,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) {
375380
auto GraphExec = Graph.finalize();
376381
EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);
377382
}
383+
384+
TEST_F(WholeGraphUpdateTest, EmptyNode) {
385+
// Test that updating a graph that has an empty node is not an error
386+
auto NodeEmpty = Graph.add();
387+
auto UpdateNodeEmpty = UpdateGraph.add();
388+
389+
auto NodeKernel = Graph.add(EmptyKernel);
390+
auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel);
391+
392+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
393+
GraphExec.update(UpdateGraph);
394+
}
395+
396+
TEST_F(WholeGraphUpdateTest, BarrierNode) {
397+
// Test that updating a graph that has a barrier node is not an error
398+
Graph.begin_recording(Queue);
399+
auto NodeKernel = Queue.submit(
400+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
401+
Queue.ext_oneapi_submit_barrier({NodeKernel});
402+
Graph.end_recording(Queue);
403+
404+
UpdateGraph.begin_recording(Queue);
405+
auto UpdateNodeKernel = Queue.submit(
406+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
407+
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
408+
UpdateGraph.end_recording(Queue);
409+
410+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
411+
GraphExec.update(UpdateGraph);
412+
}

0 commit comments

Comments
 (0)