Skip to content

Commit e4d49f7

Browse files
committed
[SYCL][Graph] Permit empty & barrier nodes in WGU
In order to enable the minimum viable real life usecase for the Whole Graph Update feature. Allow graphs to contain empty nodes and barrier nodes during update. See discussion thread intel#13253 (comment) on SYCL-Graph spec PR for publicizing the availability of the Whole Graph Update feature.
1 parent 82f77d1 commit e4d49f7

File tree

4 files changed

+292
-9
lines changed

4 files changed

+292
-9
lines changed

sycl/source/detail/graph_impl.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,18 +1236,22 @@ void exec_graph_impl::update(
12361236
sycl::make_error_code(errc::invalid),
12371237
"Node passed to update() is not part of the graph.");
12381238
}
1239-
if (Node->MCGType != sycl::detail::CG::Kernel) {
1240-
throw sycl::exception(errc::invalid, "Cannot update non-kernel nodes");
1241-
}
12421239

1243-
if (Node->MCommandGroup->getRequirements().size() == 0) {
1244-
continue;
1240+
if (!(Node->isEmpty() || Node->MCGType == sycl::detail::CG::Kernel ||
1241+
Node->MCGType == sycl::detail::CG::Barrier)) {
1242+
throw sycl::exception(errc::invalid,
1243+
"Cannot update node type. Node must be be of "
1244+
"kernel, empty, or barrier type.");
12451245
}
1246-
NeedScheduledUpdate = true;
12471246

1248-
UpdateRequirements.insert(UpdateRequirements.end(),
1249-
Node->MCommandGroup->getRequirements().begin(),
1250-
Node->MCommandGroup->getRequirements().end());
1247+
if (const auto &CG = Node->MCommandGroup;
1248+
CG && CG->getRequirements().size() != 0) {
1249+
NeedScheduledUpdate = true;
1250+
1251+
UpdateRequirements.insert(UpdateRequirements.end(),
1252+
Node->MCommandGroup->getRequirements().begin(),
1253+
Node->MCommandGroup->getRequirements().end());
1254+
}
12511255
}
12521256

12531257
// Clean up any execution events which have finished so we don't pass them to
@@ -1290,6 +1294,11 @@ void exec_graph_impl::update(
12901294
}
12911295

12921296
void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
1297+
// Kernel node update is the only command type supported in UR for update.
1298+
// Updating any other types of nodes, e.g. empty & barrier nodes is a no-op.
1299+
if (Node->MCGType != sycl::detail::CG::Kernel) {
1300+
return;
1301+
}
12931302
auto ContextImpl = sycl::detail::getSyclObjImpl(MContext);
12941303
const sycl::detail::PluginPtr &Plugin = ContextImpl->getPlugin();
12951304
auto DeviceImpl = sycl::detail::getSyclObjImpl(MGraphImpl->getDevice());
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// REQUIRES: aspect-usm_shared_allocations
9+
10+
// Tests that whole graph update works when a graph contains a barrier node.
11+
12+
#include "../graph_common.hpp"
13+
14+
// Queue submissions that can be recorded to a graph, with a barrier node
15+
// separating initialization and computation kernel nodes
16+
template <class T>
17+
void RecordGraph(queue &Queue, size_t Size, T *Input1, T *Input2, T *Output) {
18+
Queue.submit([&](handler &CGH) {
19+
CGH.single_task([=]() {
20+
for (int i = 0; i < Size; i++) {
21+
Input1[i] += i;
22+
}
23+
});
24+
});
25+
26+
Queue.submit([&](handler &CGH) {
27+
CGH.single_task([=]() {
28+
for (int i = 0; i < Size; i++) {
29+
Input2[i] += i;
30+
}
31+
});
32+
});
33+
34+
Queue.ext_oneapi_submit_barrier();
35+
36+
Queue.submit([&](handler &CGH) {
37+
CGH.single_task([=]() {
38+
for (int i = 0; i < Size; i++) {
39+
Output[i] = Input1[i] * Input2[i];
40+
}
41+
});
42+
});
43+
}
44+
45+
int main() {
46+
queue Queue{};
47+
48+
using T = int;
49+
50+
// USM allocations for GraphA
51+
T *InputA1 = malloc_shared<T>(Size, Queue);
52+
T *InputA2 = malloc_shared<T>(Size, Queue);
53+
T *OutputA = malloc_shared<T>(Size, Queue);
54+
55+
// Initialize USM allocations
56+
T Pattern1 = 0xA;
57+
T Pattern2 = 0x42;
58+
T PatternZero = 0;
59+
60+
Queue.fill(InputA1, Pattern1, Size);
61+
Queue.fill(InputA2, Pattern2, Size);
62+
Queue.fill(OutputA, PatternZero, Size);
63+
Queue.wait();
64+
65+
// Define GraphA
66+
exp_ext::command_graph GraphA{Queue};
67+
GraphA.begin_recording(Queue);
68+
RecordGraph(Queue, Size, InputA1, InputA2, OutputA);
69+
GraphA.end_recording();
70+
71+
// Finalize, run, and validate GraphA
72+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
73+
Queue.ext_oneapi_graph(GraphExecA).wait();
74+
75+
for (int i = 0; i < Size; i++) {
76+
T Ref = (Pattern1 + i) * (Pattern2 + i);
77+
assert(check_value(i, Ref, OutputA[i], "OutputA"));
78+
}
79+
80+
// Create GraphB which will be used to update GraphA
81+
exp_ext::command_graph GraphB{Queue};
82+
83+
// USM allocations for GraphB
84+
T *InputB1 = malloc_shared<T>(Size, Queue);
85+
T *InputB2 = malloc_shared<T>(Size, Queue);
86+
T *OutputB = malloc_shared<T>(Size, Queue);
87+
88+
// Initialize GraphB allocations
89+
Pattern1 = -42;
90+
Pattern2 = 0xF;
91+
92+
Queue.fill(InputB1, Pattern1, Size);
93+
Queue.fill(InputB2, Pattern2, Size);
94+
Queue.fill(OutputB, PatternZero, Size);
95+
Queue.wait();
96+
97+
// Create GraphB
98+
GraphB.begin_recording(Queue);
99+
RecordGraph(Queue, Size, InputB1, InputB2, OutputB);
100+
GraphB.end_recording();
101+
102+
// Update executable GraphA with GraphB, run, and validate
103+
GraphExecA.update(GraphB);
104+
Queue.ext_oneapi_graph(GraphExecA).wait();
105+
106+
for (int i = 0; i < Size; i++) {
107+
T Ref = (Pattern1 + i) * (Pattern2 + i);
108+
assert(check_value(i, Ref, OutputB[i], "OutputB"));
109+
}
110+
111+
free(InputA1, Queue);
112+
free(InputA2, Queue);
113+
free(OutputA, Queue);
114+
115+
free(InputB1, Queue);
116+
free(InputB2, Queue);
117+
free(OutputB, Queue);
118+
return 0;
119+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
5+
// Extra run to check for immediate-command-list in Level Zero
6+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
7+
8+
// REQUIRES: aspect-usm_shared_allocations
9+
10+
// Tests that whole graph update works when a graph contain an empty node.
11+
12+
#include "../graph_common.hpp"
13+
14+
// Creates a graph with an empty node separating initialization and computation
15+
// kernel nodes
16+
template <class T>
17+
void CreateGraph(
18+
exp_ext::command_graph<exp_ext::graph_state::modifiable> &Graph,
19+
size_t Size, T *Input1, T *Input2, T *Output) {
20+
Graph.add([&](handler &CGH) {
21+
CGH.single_task([=]() {
22+
for (int i = 0; i < Size; i++) {
23+
Input1[i] += i;
24+
}
25+
});
26+
});
27+
28+
Graph.add([&](handler &CGH) {
29+
CGH.single_task([=]() {
30+
for (int i = 0; i < Size; i++) {
31+
Input2[i] += i;
32+
}
33+
});
34+
});
35+
36+
auto EmptyNodeA =
37+
Graph.add({exp_ext::property::node::depends_on_all_leaves()});
38+
39+
Graph.add(
40+
[&](handler &CGH) {
41+
CGH.single_task([=]() {
42+
for (int i = 0; i < Size; i++) {
43+
Output[i] = Input1[i] * Input2[i];
44+
}
45+
});
46+
},
47+
{exp_ext::property::node::depends_on(EmptyNodeA)});
48+
}
49+
50+
int main() {
51+
queue Queue{};
52+
53+
using T = int;
54+
55+
// USM allocations for GraphA
56+
T *InputA1 = malloc_shared<T>(Size, Queue);
57+
T *InputA2 = malloc_shared<T>(Size, Queue);
58+
T *OutputA = malloc_shared<T>(Size, Queue);
59+
60+
// Initialize USM allocations
61+
T Pattern1 = 0xA;
62+
T Pattern2 = 0x42;
63+
T PatternZero = 0;
64+
65+
Queue.fill(InputA1, Pattern1, Size);
66+
Queue.fill(InputA2, Pattern2, Size);
67+
Queue.fill(OutputA, PatternZero, Size);
68+
Queue.wait();
69+
70+
// Construct GraphA
71+
exp_ext::command_graph GraphA{Queue};
72+
CreateGraph(GraphA, Size, InputA1, InputA2, OutputA);
73+
74+
// Finalize, run, and validate GraphA
75+
auto GraphExecA = GraphA.finalize(exp_ext::property::graph::updatable{});
76+
Queue.ext_oneapi_graph(GraphExecA).wait();
77+
78+
for (int i = 0; i < Size; i++) {
79+
T Ref = (Pattern1 + i) * (Pattern2 + i);
80+
assert(check_value(i, Ref, OutputA[i], "OutputA"));
81+
}
82+
83+
// Create GraphB which will be used to update GraphA
84+
exp_ext::command_graph GraphB{Queue};
85+
86+
// USM allocations for GraphB
87+
T *InputB1 = malloc_shared<T>(Size, Queue);
88+
T *InputB2 = malloc_shared<T>(Size, Queue);
89+
T *OutputB = malloc_shared<T>(Size, Queue);
90+
91+
// Initialize GraphB
92+
Pattern1 = -42;
93+
Pattern2 = 0xF;
94+
95+
Queue.fill(InputB1, Pattern1, Size);
96+
Queue.fill(InputB2, Pattern2, Size);
97+
Queue.fill(OutputB, PatternZero, Size);
98+
Queue.wait();
99+
100+
// Construct GraphB
101+
CreateGraph(GraphB, Size, InputB1, InputB2, OutputB);
102+
103+
// Update executable GraphA with GraphB, run, and validate
104+
GraphExecA.update(GraphB);
105+
Queue.ext_oneapi_graph(GraphExecA).wait();
106+
107+
for (int i = 0; i < Size; i++) {
108+
T Ref = (Pattern1 + i) * (Pattern2 + i);
109+
assert(check_value(i, Ref, OutputB[i], "OutputB"));
110+
}
111+
112+
free(InputA1, Queue);
113+
free(InputA2, Queue);
114+
free(OutputA, Queue);
115+
116+
free(InputB1, Queue);
117+
free(InputB2, Queue);
118+
free(OutputB, Queue);
119+
return 0;
120+
}

sycl/unittests/Extensions/CommandGraph/Update.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,11 @@ TEST_F(CommandGraphTest, UpdateNodeTypeExceptions) {
109109
cgh.host_task([]() {});
110110
}));
111111

112+
ASSERT_ANY_THROW(auto NodeBarrier = Graph.add([&](sycl::handler &cgh) {
113+
cgh.set_arg(0, DynamicParam);
114+
cgh.ext_oneapi_barrier();
115+
}));
116+
112117
auto NodeEmpty = Graph.add();
113118

114119
experimental::command_graph Subgraph(Queue.get_context(), Dev);
@@ -375,3 +380,33 @@ TEST_F(WholeGraphUpdateTest, MissingUpdatableProperty) {
375380
auto GraphExec = Graph.finalize();
376381
EXPECT_THROW(GraphExec.update(UpdateGraph), sycl::exception);
377382
}
383+
384+
TEST_F(WholeGraphUpdateTest, EmptyNode) {
385+
// Test that updating a graph that has an empty node is not an error
386+
auto NodeEmpty = Graph.add();
387+
auto UpdateNodeEmpty = UpdateGraph.add();
388+
389+
auto NodeKernel = Graph.add(EmptyKernel);
390+
auto UpdateNodeKernel = UpdateGraph.add(EmptyKernel);
391+
392+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
393+
GraphExec.update(UpdateGraph);
394+
}
395+
396+
TEST_F(WholeGraphUpdateTest, BarrierNode) {
397+
// Test that updating a graph that has a barrier node is not an error
398+
Graph.begin_recording(Queue);
399+
auto NodeKernel = Queue.submit(
400+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
401+
Queue.ext_oneapi_submit_barrier({NodeKernel});
402+
Graph.end_recording(Queue);
403+
404+
UpdateGraph.begin_recording(Queue);
405+
auto UpdateNodeKernel = Queue.submit(
406+
[&](sycl::handler &cgh) { cgh.single_task<TestKernel<>>([]() {}); });
407+
Queue.ext_oneapi_submit_barrier({UpdateNodeKernel});
408+
UpdateGraph.end_recording(Queue);
409+
410+
auto GraphExec = Graph.finalize(experimental::property::graph::updatable{});
411+
GraphExec.update(UpdateGraph);
412+
}

0 commit comments

Comments
 (0)