Skip to content

Commit 653f9b3

Browse files
authored
[SYCL][Graph] submit.cpp: check result according to spec (#12777)
1 parent d6dfd0c commit 653f9b3

File tree

1 file changed

+24
-17
lines changed

1 file changed

+24
-17
lines changed

sycl/test-e2e/Graph/Threading/submit.cpp

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
// Extra run to check for immediate-command-list in Level Zero
55
// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
66

7-
// Test submitting a graph in a threaded situation.
8-
// Note that we do not check the outputs because multiple concurrent executions
9-
// is indeterministic (and depends on the backend command management).
10-
// However, this test verifies that concurrent graph submissions do not trigger
11-
// errors nor memory leaks.
7+
// Test submitting a graph multiple times in a threaded situation.
8+
// According to spec: If graph is submitted multiple times, dependencies are
9+
// automatically added by the runtime to prevent concurrent executions of an
10+
// identical graph, and so the result is deterministic and we can check the
11+
// results.
1212

1313
#include "../graph_common.hpp"
1414

@@ -20,15 +20,16 @@ int main() {
2020
using T = int;
2121

2222
const unsigned NumThreads = std::thread::hardware_concurrency();
23+
const unsigned SubmitsPerThread = 128;
2324
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
2425

2526
std::iota(DataA.begin(), DataA.end(), 1);
2627
std::iota(DataB.begin(), DataB.end(), 10);
2728
std::iota(DataC.begin(), DataC.end(), 1000);
2829

2930
std::vector<T> ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC);
30-
calculate_reference_data(NumThreads, Size, ReferenceA, ReferenceB,
31-
ReferenceC);
31+
calculate_reference_data(NumThreads * SubmitsPerThread, Size, ReferenceA,
32+
ReferenceB, ReferenceC);
3233

3334
exp_ext::command_graph Graph{Queue.get_context(), Queue.get_device()};
3435

@@ -45,34 +46,40 @@ int main() {
4546
run_kernels_usm(Queue, Size, PtrA, PtrB, PtrC);
4647
Graph.end_recording();
4748

48-
std::vector<exp_ext::command_graph<exp_ext::graph_state::executable>>
49-
GraphExecs;
50-
for (unsigned i = 0; i < NumThreads; ++i) {
51-
GraphExecs.push_back(Graph.finalize());
52-
}
49+
auto GraphExec = Graph.finalize();
5350

5451
Barrier SyncPoint{NumThreads};
5552

56-
auto SubmitGraph = [&](int ThreadNum) {
53+
auto SubmitGraph = [&]() {
5754
SyncPoint.wait();
58-
Queue.submit([&](sycl::handler &CGH) {
59-
CGH.ext_oneapi_graph(GraphExecs[ThreadNum]);
60-
});
55+
for (unsigned i = 0; i < SubmitsPerThread; ++i) {
56+
Queue.submit(
57+
[&](sycl::handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
58+
}
6159
};
6260

6361
std::vector<std::thread> Threads;
6462
Threads.reserve(NumThreads);
6563

6664
for (unsigned i = 0; i < NumThreads; ++i) {
67-
Threads.emplace_back(SubmitGraph, i);
65+
Threads.emplace_back(SubmitGraph);
6866
}
6967

7068
for (unsigned i = 0; i < NumThreads; ++i) {
7169
Threads[i].join();
7270
}
7371

72+
Queue.copy(PtrA, DataA.data(), Size);
73+
Queue.copy(PtrB, DataB.data(), Size);
74+
Queue.copy(PtrC, DataC.data(), Size);
7475
Queue.wait_and_throw();
7576

77+
for (int i = 0; i < Size; ++i) {
78+
check_value(i, ReferenceA[i], DataA[i], "A");
79+
check_value(i, ReferenceB[i], DataB[i], "B");
80+
check_value(i, ReferenceC[i], DataC[i], "C");
81+
}
82+
7683
free(PtrA, Queue);
7784
free(PtrB, Queue);
7885
free(PtrC, Queue);

0 commit comments

Comments
 (0)