Skip to content

Commit 92dfb42

Browse files
Added extra test to invoke copy engine with D2H copies
1 parent 7f5da80 commit 92dfb42

File tree

2 files changed

+127
-8
lines changed

2 files changed

+127
-8
lines changed

sycl/test-e2e/Graph/ValidUsage/linear_graph_copy.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
// Extra run to check for immediate-command-list in Level Zero
66
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %}
77
//
8-
98
// Tests that the optimization to use the L0 Copy Engine for memory commands
109
// does not interfere with the linear graph optimization
1110

@@ -47,7 +46,6 @@ int main() {
4746
Queue.copy(DataA.data(), PtrA, Size);
4847
Queue.copy(DataB.data(), PtrB, Size);
4948
Queue.copy(DataC.data(), PtrC, Size);
50-
Queue.wait_and_throw();
5149

5250
Graph.begin_recording(Queue);
5351
Queue.submit([&](handler &CGH) {
@@ -79,15 +77,13 @@ int main() {
7977

8078
auto GraphExec = Graph.finalize();
8179

82-
event Event;
8380
for (unsigned n = 0; n < Iterations; n++) {
84-
Event =
85-
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
81+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
8682
}
8783

88-
Queue.copy(PtrA, DataA.data(), Size, Event);
89-
Queue.copy(PtrB, DataB.data(), Size, Event);
90-
Queue.copy(PtrC, DataC.data(), Size, Event);
84+
Queue.copy(PtrA, DataA.data(), Size);
85+
Queue.copy(PtrB, DataB.data(), Size);
86+
Queue.copy(PtrC, DataC.data(), Size);
9187
Queue.wait_and_throw();
9288

9389
free(PtrA, Queue);
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// RUN: %{build} -o %t.out
2+
// RUN: %{run} %t.out
3+
// Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG
4+
// RUN: %if level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0
5+
// %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK
6+
// %} Extra run to check for immediate-command-list in Level Zero RUN: %if
7+
// level_zero %{env SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
8+
// %{l0_leak_check} %{run} %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK
9+
// %}
10+
//
11+
// Extra runs to test in-order command lists path
12+
// RUN: %if level_zero %{env UR_L0_USE_DRIVER_INORDER_LISTS=1
13+
// SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=0 %{l0_leak_check} %{run}
14+
// %t.out 2>&1 | FileCheck %s --implicit-check-not=LEAK %} RUN: %if level_zero
15+
// %{env UR_L0_USE_DRIVER_INORDER_LISTS=1
16+
// SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
17+
// UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS=0 %{l0_leak_check} %{run} %t.out 2>&1 |
18+
// FileCheck %s --implicit-check-not=LEAK %}
19+
//
20+
// Tests that the optimization to use the L0 Copy Engine for memory commands
21+
// does not interfere with the linear graph optimization
22+
//
23+
// REQUIRES: aspect-usm_host_allocations
24+
25+
#include "../graph_common.hpp"
26+
27+
#include <sycl/properties/queue_properties.hpp>
28+
29+
int main() {
30+
queue Queue{{sycl::property::queue::in_order{}}};
31+
32+
using T = int;
33+
34+
const T ModValue = 7;
35+
std::vector<T> DataA(Size), DataB(Size), DataC(Size);
36+
37+
std::iota(DataA.begin(), DataA.end(), 1);
38+
std::iota(DataB.begin(), DataB.end(), 10);
39+
std::iota(DataC.begin(), DataC.end(), 1000);
40+
41+
// Create reference data for output
42+
std::vector<T> ReferenceA(DataA), ReferenceB(DataB), ReferenceC(DataC);
43+
for (size_t i = 0; i < Iterations; i++) {
44+
for (size_t j = 0; j < Size; j++) {
45+
ReferenceA[j] += ModValue;
46+
ReferenceB[j] = ReferenceA[j];
47+
ReferenceB[j] -= ModValue;
48+
ReferenceC[j] = ReferenceB[j];
49+
ReferenceC[j] += ModValue;
50+
}
51+
}
52+
53+
ext::oneapi::experimental::command_graph Graph{Queue.get_context(),
54+
Queue.get_device()};
55+
56+
T *PtrA = malloc_device<T>(Size, Queue);
57+
T *PtrB = malloc_device<T>(Size, Queue);
58+
T *PtrC = malloc_device<T>(Size, Queue);
59+
60+
// L0 copy engine is disabled for D2D copies so we need to create additional
61+
// D2H copy events in between to invoke it.
62+
T *PtrBHost = malloc_host<T>(Size, Queue);
63+
T *PtrCHost = malloc_host<T>(Size, Queue);
64+
65+
Queue.copy(DataA.data(), PtrA, Size);
66+
Queue.copy(DataB.data(), PtrB, Size);
67+
Queue.copy(DataC.data(), PtrC, Size);
68+
69+
Graph.begin_recording(Queue);
70+
Queue.submit([&](handler &CGH) {
71+
CGH.parallel_for(range<1>(Size), [=](item<1> id) {
72+
auto LinID = id.get_linear_id();
73+
PtrA[LinID] += ModValue;
74+
});
75+
});
76+
77+
Queue.submit(
78+
[&](handler &CGH) { CGH.memcpy(PtrBHost, PtrA, Size * sizeof(T)); });
79+
Queue.submit(
80+
[&](handler &CGH) { CGH.memcpy(PtrB, PtrBHost, Size * sizeof(T)); });
81+
82+
Queue.submit([&](handler &CGH) {
83+
CGH.parallel_for(range<1>(Size), [=](item<1> id) {
84+
auto LinID = id.get_linear_id();
85+
PtrB[LinID] -= ModValue;
86+
});
87+
});
88+
89+
Queue.submit(
90+
[&](handler &CGH) { CGH.memcpy(PtrCHost, PtrB, Size * sizeof(T)); });
91+
Queue.submit(
92+
[&](handler &CGH) { CGH.memcpy(PtrC, PtrCHost, Size * sizeof(T)); });
93+
94+
Queue.submit([&](handler &CGH) {
95+
CGH.parallel_for(range<1>(Size), [=](item<1> id) {
96+
auto LinID = id.get_linear_id();
97+
PtrC[LinID] += ModValue;
98+
});
99+
});
100+
101+
Graph.end_recording();
102+
103+
auto GraphExec = Graph.finalize();
104+
105+
for (unsigned n = 0; n < Iterations; n++) {
106+
Queue.submit([&](handler &CGH) { CGH.ext_oneapi_graph(GraphExec); });
107+
}
108+
109+
Queue.copy(PtrA, DataA.data(), Size);
110+
Queue.copy(PtrB, DataB.data(), Size);
111+
Queue.copy(PtrC, DataC.data(), Size);
112+
Queue.wait_and_throw();
113+
114+
free(PtrA, Queue);
115+
free(PtrB, Queue);
116+
free(PtrC, Queue);
117+
118+
for (size_t i = 0; i < Size; i++) {
119+
assert(check_value(i, ReferenceA[i], DataA[i], "DataA"));
120+
assert(check_value(i, ReferenceB[i], DataB[i], "DataB"));
121+
assert(check_value(i, ReferenceC[i], DataC[i], "DataC"));
122+
}
123+
}

0 commit comments

Comments
 (0)