Skip to content

Commit a7ad8b8

Browse files
authored
[SYCL] Improve testing of host task (#2551)
A follow-up on 7b4672d. This patch adds a bit more sophisticated test cases. The added tests are disabled and should be enabled within patch appropriate issues fix. Signed-off-by: Sergey Kanaev <[email protected]>
1 parent 7ce5776 commit a7ad8b8

File tree

4 files changed

+254
-71
lines changed

4 files changed

+254
-71
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out 10
8+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 10
9+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 10
10+
11+
#include <CL/sycl.hpp>
12+
#include <iostream>
13+
14+
using namespace cl::sycl;
15+
using namespace cl::sycl::access;
16+
17+
static constexpr size_t BUFFER_SIZE = 1024;
18+
19+
static auto EH = [](exception_list EL) {
20+
for (const std::exception_ptr &E : EL) {
21+
throw E;
22+
}
23+
};
24+
25+
// Host-task depending on another host-task via handler::depends_on() only
26+
// should not hang
27+
void test(size_t Count) {
28+
queue Q(EH);
29+
30+
static constexpr size_t BufferSize = 10 * 1024;
31+
32+
buffer<int, 1> B0{range<1>{BufferSize}};
33+
buffer<int, 1> B1{range<1>{BufferSize}};
34+
buffer<int, 1> B2{range<1>{BufferSize}};
35+
buffer<int, 1> B3{range<1>{BufferSize}};
36+
buffer<int, 1> B4{range<1>{BufferSize}};
37+
buffer<int, 1> B5{range<1>{BufferSize}};
38+
39+
for (size_t Idx = 1; Idx <= Count; ++Idx) {
40+
// This host task should be submitted without hesitation
41+
event E1 = Q.submit([&](handler &CGH) {
42+
std::cout << "Submit 1" << std::endl;
43+
44+
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
45+
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
46+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
47+
48+
CGH.codeplay_host_task([=] {
49+
Acc0[0] = 1 * Idx;
50+
Acc1[0] = 2 * Idx;
51+
Acc2[0] = 3 * Idx;
52+
});
53+
});
54+
55+
// This host task is going to depend on blocked empty node of the first
56+
// host-task (via buffer #2). Still this one should be enqueued.
57+
event E2 = Q.submit([&](handler &CGH) {
58+
std::cout << "Submit 2" << std::endl;
59+
60+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
61+
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);
62+
63+
CGH.codeplay_host_task([=] {
64+
Acc2[1] = 1 * Idx;
65+
Acc3[1] = 2 * Idx;
66+
});
67+
});
68+
69+
// This host-task only depends on the second host-task via
70+
// handler::depends_on(). This one should not hang and should be eexecuted
71+
// after host-task #2.
72+
event E3 = Q.submit([&](handler &CGH) {
73+
CGH.depends_on(E2);
74+
75+
std::cout << "Submit 3" << std::endl;
76+
77+
auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
78+
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);
79+
80+
CGH.codeplay_host_task([=] {
81+
Acc4[2] = 1 * Idx;
82+
Acc5[2] = 2 * Idx;
83+
});
84+
});
85+
}
86+
87+
Q.wait_and_throw();
88+
}
89+
90+
int main(int Argc, const char *Argv[]) {
91+
size_t Count = 1;
92+
if (Argc > 1)
93+
Count = std::stoi(Argv[1]);
94+
95+
test(Count);
96+
return 0;
97+
}
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out 10
8+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 10
9+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 10
10+
11+
#include <CL/sycl.hpp>
12+
#include <chrono>
13+
#include <iostream>
14+
#include <thread>
15+
16+
using namespace cl::sycl;
17+
using namespace cl::sycl::access;
18+
19+
static constexpr size_t BUFFER_SIZE = 1024;
20+
21+
static auto EH = [](exception_list EL) {
22+
for (const std::exception_ptr &E : EL) {
23+
throw E;
24+
}
25+
};
26+
27+
// Host-task depending on another host-task via handler::depends_on() only
28+
// should not hang. A bit more complicated case with kernels depending on
29+
// host-task being involved.
30+
void test(size_t Count) {
31+
queue Q(EH);
32+
33+
static constexpr size_t BufferSize = 10 * 1024;
34+
35+
buffer<int, 1> B0{range<1>{BufferSize}};
36+
buffer<int, 1> B1{range<1>{BufferSize}};
37+
buffer<int, 1> B2{range<1>{BufferSize}};
38+
buffer<int, 1> B3{range<1>{BufferSize}};
39+
buffer<int, 1> B4{range<1>{BufferSize}};
40+
buffer<int, 1> B5{range<1>{BufferSize}};
41+
42+
using namespace std::chrono_literals;
43+
constexpr auto SleepFor = 1s;
44+
45+
for (size_t Idx = 1; Idx <= Count; ++Idx) {
46+
// This host task should be submitted without hesitation
47+
Q.submit([&](handler &CGH) {
48+
std::cout << "Submit HT-1" << std::endl;
49+
50+
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
51+
52+
CGH.codeplay_host_task([=] {
53+
std::this_thread::sleep_for(SleepFor);
54+
Acc0[0] = 1 * Idx;
55+
});
56+
});
57+
58+
Q.submit([&](handler &CGH) {
59+
std::cout << "Submit Kernel-1" << std::endl;
60+
61+
auto Acc0 = B0.get_access<mode::read_write>(CGH);
62+
63+
CGH.single_task<class Test5_Kernel1>([=] { Acc0[1] = 1 * Idx; });
64+
});
65+
66+
Q.submit([&](handler &CGH) {
67+
std::cout << "Submit Kernel-2" << std::endl;
68+
69+
auto Acc1 = B1.get_access<mode::read_write>(CGH);
70+
71+
CGH.single_task<class Test5_Kernel2>([=] { Acc1[2] = 1 * Idx; });
72+
});
73+
74+
Q.submit([&](handler &CGH) {
75+
std::cout << "Submit HT-2" << std::endl;
76+
77+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
78+
79+
CGH.codeplay_host_task([=] {
80+
std::this_thread::sleep_for(SleepFor);
81+
Acc2[3] = 1 * Idx;
82+
});
83+
});
84+
85+
// This host task is going to depend on blocked empty node of the second
86+
// host-task (via buffer #0). Still this one should be enqueued.
87+
event EHT3 = Q.submit([&](handler &CGH) {
88+
std::cout << "Submit HT-3" << std::endl;
89+
90+
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
91+
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
92+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
93+
94+
CGH.codeplay_host_task([=] {
95+
std::this_thread::sleep_for(SleepFor);
96+
Acc0[4] = 1 * Idx;
97+
Acc1[4] = 2 * Idx;
98+
Acc2[4] = 3 * Idx;
99+
});
100+
});
101+
102+
// This host-task only depends on the third host-task via
103+
// handler::depends_on(). This one should not hang and should be executed
104+
// after host-task #3.
105+
Q.submit([&](handler &CGH) {
106+
std::cout << "Submit HT-4" << std::endl;
107+
108+
CGH.depends_on(EHT3);
109+
110+
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);
111+
112+
CGH.codeplay_host_task([=] { Acc5[5] = 1 * Idx; });
113+
});
114+
}
115+
116+
Q.wait_and_throw();
117+
}
118+
119+
int main(int Argc, const char *Argv[]) {
120+
size_t Count = 1;
121+
if (Argc > 1)
122+
Count = std::stoi(Argv[1]);
123+
124+
test(Count);
125+
return 0;
126+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
3+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
4+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
5+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
6+
7+
#include <CL/sycl.hpp>
8+
9+
cl::sycl::event submit(cl::sycl::queue &Q, cl::sycl::buffer<int> &B) {
10+
return Q.submit([&](cl::sycl::handler &CGH) {
11+
auto A = B.template get_access<cl::sycl::access::mode::read_write>(CGH);
12+
CGH.codeplay_host_task([=]() { (void)A; });
13+
});
14+
}
15+
16+
int main() {
17+
cl::sycl::queue Q;
18+
int Status = 0;
19+
cl::sycl::buffer<int> A{&Status, 1};
20+
cl::sycl::vector_class<cl::sycl::event> Events;
21+
22+
Events.push_back(submit(Q, A));
23+
Events.push_back(submit(Q, A));
24+
Q.submit([&](sycl::handler &CGH) {
25+
CGH.depends_on(Events);
26+
CGH.codeplay_host_task([&] { printf("all done\n"); });
27+
}).wait_and_throw();
28+
29+
return 0;
30+
}

sycl/test/host-interop-task/host-task.cpp

Lines changed: 1 addition & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
// RUN: %GPU_RUN_PLACEHOLDER %t.out 3
1212
// RUN: %ACC_RUN_PLACEHOLDER %t.out 3
1313

14-
// RUNx: %CPU_RUN_PLACEHOLDER %t.out 4
15-
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 4
16-
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 4
17-
1814
#include <CL/sycl.hpp>
1915
#include <chrono>
2016
#include <iostream>
@@ -103,7 +99,6 @@ void test3() {
10399

104100
std::vector<event> Deps;
105101

106-
using namespace std::chrono_literals;
107102
static constexpr size_t Count = 10;
108103

109104
auto Start = std::chrono::steady_clock::now();
@@ -146,74 +141,12 @@ void test3() {
146141
Q.wait_and_throw();
147142
auto End = std::chrono::steady_clock::now();
148143

144+
using namespace std::chrono_literals;
149145
constexpr auto Threshold = 2s;
150146

151147
assert(End - Start < Threshold && "Host tasks were waiting for too long");
152148
}
153149

154-
// Host-task depending on another host-task via handler::depends_on() only
155-
// should not hang
156-
void test4() {
157-
queue Q(EH);
158-
159-
static constexpr size_t BufferSize = 10 * 1024;
160-
161-
buffer<int, 1> B0{range<1>{BufferSize}};
162-
buffer<int, 1> B1{range<1>{BufferSize}};
163-
buffer<int, 1> B2{range<1>{BufferSize}};
164-
buffer<int, 1> B3{range<1>{BufferSize}};
165-
buffer<int, 1> B4{range<1>{BufferSize}};
166-
buffer<int, 1> B5{range<1>{BufferSize}};
167-
168-
// This host task should be submitted without hesitation
169-
event E1 = Q.submit([&](handler &CGH) {
170-
std::cout << "Submit 1" << std::endl;
171-
172-
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
173-
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
174-
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
175-
176-
CGH.codeplay_host_task([=] {
177-
Acc0[0] = 1;
178-
Acc1[0] = 2;
179-
Acc2[0] = 3;
180-
});
181-
});
182-
183-
// This host task is going to depend on blocked empty node of the first
184-
// host-task (via buffer #2). Still this one should be enqueued.
185-
event E2 = Q.submit([&](handler &CGH) {
186-
std::cout << "Submit 2" << std::endl;
187-
188-
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
189-
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);
190-
191-
CGH.codeplay_host_task([=] {
192-
Acc2[1] = 1;
193-
Acc3[1] = 2;
194-
});
195-
});
196-
197-
// This host-task only depends on the second host-task via
198-
// handler::depends_on(). This one should not hang and should be enqueued
199-
// after host-task #2.
200-
event E3 = Q.submit([&](handler &CGH) {
201-
CGH.depends_on(E2);
202-
203-
std::cout << "Submit 3" << std::endl;
204-
205-
auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
206-
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);
207-
208-
CGH.codeplay_host_task([=] {
209-
Acc4[2] = 1;
210-
Acc5[2] = 2;
211-
});
212-
});
213-
214-
Q.wait_and_throw();
215-
}
216-
217150
int main(int Argc, const char *Argv[]) {
218151
if (Argc < 2)
219152
return 1;
@@ -230,9 +163,6 @@ int main(int Argc, const char *Argv[]) {
230163
case 3:
231164
test3();
232165
break;
233-
case 4:
234-
test4();
235-
break;
236166
default:
237167
return 1;
238168
}

0 commit comments

Comments
 (0)