4
4
// Extra run to check for immediate-command-list in Level Zero
5
5
// RUN: %if level_zero && linux %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
6
6
7
- // Test submitting a graph in a threaded situation.
8
- // Note that we do not check the outputs because multiple concurrent executions
9
- // is indeterministic (and depends on the backend command management).
10
- // However, this test verifies that concurrent graph submissions do not trigger
11
- // errors nor memory leaks .
7
+ // Test submitting a graph multiple times in a threaded situation.
8
+ // According to spec: If graph is submitted multiple times, dependencies are
9
+ // automatically added by the runtime to prevent concurrent executions of an
10
+ // identical graph, and so the result is deterministic and we can check the
11
+ // results .
12
12
13
13
#include " ../graph_common.hpp"
14
14
@@ -20,15 +20,16 @@ int main() {
20
20
using T = int ;
21
21
22
22
const unsigned NumThreads = std::thread::hardware_concurrency ();
23
+ const unsigned SubmitsPerThread = 128 ;
23
24
std::vector<T> DataA (Size ), DataB (Size ), DataC (Size );
24
25
25
26
std::iota (DataA.begin (), DataA.end (), 1 );
26
27
std::iota (DataB.begin (), DataB.end (), 10 );
27
28
std::iota (DataC.begin (), DataC.end (), 1000 );
28
29
29
30
std::vector<T> ReferenceA (DataA), ReferenceB (DataB), ReferenceC (DataC);
30
- calculate_reference_data (NumThreads, Size , ReferenceA, ReferenceB ,
31
- ReferenceC);
31
+ calculate_reference_data (NumThreads * SubmitsPerThread , Size , ReferenceA,
32
+ ReferenceB, ReferenceC);
32
33
33
34
exp_ext::command_graph Graph{Queue.get_context (), Queue.get_device ()};
34
35
@@ -45,34 +46,40 @@ int main() {
45
46
run_kernels_usm (Queue, Size , PtrA, PtrB, PtrC);
46
47
Graph.end_recording ();
47
48
48
- std::vector<exp_ext::command_graph<exp_ext::graph_state::executable>>
49
- GraphExecs;
50
- for (unsigned i = 0 ; i < NumThreads; ++i) {
51
- GraphExecs.push_back (Graph.finalize ());
52
- }
49
+ auto GraphExec = Graph.finalize ();
53
50
54
51
Barrier SyncPoint{NumThreads};
55
52
56
- auto SubmitGraph = [&](int ThreadNum ) {
53
+ auto SubmitGraph = [&]() {
57
54
SyncPoint.wait ();
58
- Queue.submit ([&](sycl::handler &CGH) {
59
- CGH.ext_oneapi_graph (GraphExecs[ThreadNum]);
60
- });
55
+ for (unsigned i = 0 ; i < SubmitsPerThread; ++i) {
56
+ Queue.submit (
57
+ [&](sycl::handler &CGH) { CGH.ext_oneapi_graph (GraphExec); });
58
+ }
61
59
};
62
60
63
61
std::vector<std::thread> Threads;
64
62
Threads.reserve (NumThreads);
65
63
66
64
for (unsigned i = 0 ; i < NumThreads; ++i) {
67
- Threads.emplace_back (SubmitGraph, i );
65
+ Threads.emplace_back (SubmitGraph);
68
66
}
69
67
70
68
for (unsigned i = 0 ; i < NumThreads; ++i) {
71
69
Threads[i].join ();
72
70
}
73
71
72
+ Queue.copy (PtrA, DataA.data (), Size );
73
+ Queue.copy (PtrB, DataB.data (), Size );
74
+ Queue.copy (PtrC, DataC.data (), Size );
74
75
Queue.wait_and_throw ();
75
76
77
+ for (int i = 0 ; i < Size ; ++i) {
78
+ check_value (i, ReferenceA[i], DataA[i], " A" );
79
+ check_value (i, ReferenceB[i], DataB[i], " B" );
80
+ check_value (i, ReferenceC[i], DataC[i], " C" );
81
+ }
82
+
76
83
free (PtrA, Queue);
77
84
free (PtrB, Queue);
78
85
free (PtrC, Queue);
0 commit comments