Skip to content

Commit 7b4672d

Browse files
authored
[SYCL] Improve testing of host-task (#2540)
This patch introduces two use-cases for host-task implementation. The issue of the first use-case (`test3`) was about performance and a long-run/wait of host-taske depening on another host-task. This issue rooted in lots of enqueue calls for blocked command and was fixed in bc8f0a4 (#2347). The second use-case shows a deadlock situation when host-task `A` depends on the blocked one `B` via call `handler::depends_on()`. The `A` host-task won't be enqueued as it's `EmptyCommand` is in blocked state. The test for this use-case is disable now and will be enabled when the deadlock is fixed. The deadlock will be fixed in follow-up patch.
1 parent 421860a commit 7b4672d

File tree

1 file changed

+186
-9
lines changed

1 file changed

+186
-9
lines changed

sycl/test/host-interop-task/host-task.cpp

Lines changed: 186 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,58 @@
1-
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib -lOpenCL
2-
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3-
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4-
// RUN: %ACC_RUN_PLACEHOLDER %t.out
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out 1
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out 1
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out 1
5+
6+
// RUN: %CPU_RUN_PLACEHOLDER %t.out 2
7+
// RUN: %GPU_RUN_PLACEHOLDER %t.out 2
8+
// RUN: %ACC_RUN_PLACEHOLDER %t.out 2
9+
10+
// RUN: %CPU_RUN_PLACEHOLDER %t.out 3
11+
// RUN: %GPU_RUN_PLACEHOLDER %t.out 3
12+
// RUN: %ACC_RUN_PLACEHOLDER %t.out 3
13+
14+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out 4
15+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out 4
16+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out 4
517

618
#include <CL/sycl.hpp>
19+
#include <chrono>
20+
#include <iostream>
21+
#include <vector>
722

823
using namespace cl::sycl;
924
using namespace cl::sycl::access;
1025

1126
static constexpr size_t BUFFER_SIZE = 1024;
1227

28+
static auto EH = [](exception_list EL) {
29+
for (const std::exception_ptr &E : EL) {
30+
throw E;
31+
}
32+
};
33+
1334
// Check that a single host-task with a buffer will work
1435
void test1() {
1536
buffer<int, 1> Buffer{BUFFER_SIZE};
1637

17-
queue Q;
38+
queue Q(EH);
1839

1940
Q.submit([&](handler &CGH) {
2041
auto Acc = Buffer.get_access<mode::write>(CGH);
2142
CGH.codeplay_host_task([=] {
2243
// A no-op
2344
});
2445
});
46+
47+
Q.wait_and_throw();
2548
}
2649

50+
// Check that a host task after the kernel (deps via buffer) will work
2751
void test2() {
2852
buffer<int, 1> Buffer1{BUFFER_SIZE};
2953
buffer<int, 1> Buffer2{BUFFER_SIZE};
3054

31-
queue Q;
55+
queue Q(EH);
3256

3357
Q.submit([&](handler &CGH) {
3458
auto Acc = Buffer1.template get_access<mode::write>(CGH);
@@ -55,10 +79,163 @@ void test2() {
5579
assert(Acc[Idx] == 123);
5680
}
5781
}
82+
83+
Q.wait_and_throw();
84+
}
85+
86+
// Host-task depending on another host-task via both buffers and
87+
// handler::depends_on() should not hang
88+
void test3() {
89+
queue Q(EH);
90+
91+
static constexpr size_t BufferSize = 10 * 1024;
92+
93+
buffer<int, 1> B0{range<1>{BufferSize}};
94+
buffer<int, 1> B1{range<1>{BufferSize}};
95+
buffer<int, 1> B2{range<1>{BufferSize}};
96+
buffer<int, 1> B3{range<1>{BufferSize}};
97+
buffer<int, 1> B4{range<1>{BufferSize}};
98+
buffer<int, 1> B5{range<1>{BufferSize}};
99+
buffer<int, 1> B6{range<1>{BufferSize}};
100+
buffer<int, 1> B7{range<1>{BufferSize}};
101+
buffer<int, 1> B8{range<1>{BufferSize}};
102+
buffer<int, 1> B9{range<1>{BufferSize}};
103+
104+
std::vector<event> Deps;
105+
106+
using namespace std::chrono_literals;
107+
static constexpr size_t Count = 10;
108+
109+
auto Start = std::chrono::steady_clock::now();
110+
for (size_t Idx = 0; Idx < Count; ++Idx) {
111+
event E = Q.submit([&](handler &CGH) {
112+
CGH.depends_on(Deps);
113+
114+
std::cout << "Submit: " << Idx << std::endl;
115+
116+
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
117+
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
118+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
119+
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);
120+
auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
121+
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);
122+
auto Acc6 = B6.get_access<mode::read_write, target::host_buffer>(CGH);
123+
auto Acc7 = B7.get_access<mode::read_write, target::host_buffer>(CGH);
124+
auto Acc8 = B8.get_access<mode::read_write, target::host_buffer>(CGH);
125+
auto Acc9 = B9.get_access<mode::read_write, target::host_buffer>(CGH);
126+
127+
CGH.codeplay_host_task([=] {
128+
uint64_t X = 0;
129+
130+
X ^= reinterpret_cast<uint64_t>(&Acc0[Idx + 0]);
131+
X ^= reinterpret_cast<uint64_t>(&Acc1[Idx + 1]);
132+
X ^= reinterpret_cast<uint64_t>(&Acc2[Idx + 2]);
133+
X ^= reinterpret_cast<uint64_t>(&Acc3[Idx + 3]);
134+
X ^= reinterpret_cast<uint64_t>(&Acc4[Idx + 4]);
135+
X ^= reinterpret_cast<uint64_t>(&Acc5[Idx + 5]);
136+
X ^= reinterpret_cast<uint64_t>(&Acc6[Idx + 6]);
137+
X ^= reinterpret_cast<uint64_t>(&Acc7[Idx + 7]);
138+
X ^= reinterpret_cast<uint64_t>(&Acc8[Idx + 8]);
139+
X ^= reinterpret_cast<uint64_t>(&Acc9[Idx + 9]);
140+
});
141+
});
142+
143+
Deps = {E};
144+
}
145+
146+
Q.wait_and_throw();
147+
auto End = std::chrono::steady_clock::now();
148+
149+
constexpr auto Threshold = 2s;
150+
151+
assert(End - Start < Threshold && "Host tasks were waiting for too long");
152+
}
153+
154+
// Host-task depending on another host-task via handler::depends_on() only
155+
// should not hang
156+
void test4() {
157+
queue Q(EH);
158+
159+
static constexpr size_t BufferSize = 10 * 1024;
160+
161+
buffer<int, 1> B0{range<1>{BufferSize}};
162+
buffer<int, 1> B1{range<1>{BufferSize}};
163+
buffer<int, 1> B2{range<1>{BufferSize}};
164+
buffer<int, 1> B3{range<1>{BufferSize}};
165+
buffer<int, 1> B4{range<1>{BufferSize}};
166+
buffer<int, 1> B5{range<1>{BufferSize}};
167+
168+
// This host task should be submitted without hesitation
169+
event E1 = Q.submit([&](handler &CGH) {
170+
std::cout << "Submit 1" << std::endl;
171+
172+
auto Acc0 = B0.get_access<mode::read_write, target::host_buffer>(CGH);
173+
auto Acc1 = B1.get_access<mode::read_write, target::host_buffer>(CGH);
174+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
175+
176+
CGH.codeplay_host_task([=] {
177+
Acc0[0] = 1;
178+
Acc1[0] = 2;
179+
Acc2[0] = 3;
180+
});
181+
});
182+
183+
// This host task is going to depend on blocked empty node of the first
184+
// host-task (via buffer #2). Still this one should be enqueued.
185+
event E2 = Q.submit([&](handler &CGH) {
186+
std::cout << "Submit 2" << std::endl;
187+
188+
auto Acc2 = B2.get_access<mode::read_write, target::host_buffer>(CGH);
189+
auto Acc3 = B3.get_access<mode::read_write, target::host_buffer>(CGH);
190+
191+
CGH.codeplay_host_task([=] {
192+
Acc2[1] = 1;
193+
Acc3[1] = 2;
194+
});
195+
});
196+
197+
// This host-task only depends on the second host-task via
198+
// handler::depends_on(). This one should not hang and should be enqueued
199+
// after host-task #2.
200+
event E3 = Q.submit([&](handler &CGH) {
201+
CGH.depends_on(E2);
202+
203+
std::cout << "Submit 3" << std::endl;
204+
205+
auto Acc4 = B4.get_access<mode::read_write, target::host_buffer>(CGH);
206+
auto Acc5 = B5.get_access<mode::read_write, target::host_buffer>(CGH);
207+
208+
CGH.codeplay_host_task([=] {
209+
Acc4[2] = 1;
210+
Acc5[2] = 2;
211+
});
212+
});
213+
214+
Q.wait_and_throw();
58215
}
59216

60-
int main() {
61-
test1();
62-
test2();
217+
int main(int Argc, const char *Argv[]) {
218+
if (Argc < 2)
219+
return 1;
220+
221+
int TestIdx = std::stoi(Argv[1]);
222+
223+
switch (TestIdx) {
224+
case 1:
225+
test1();
226+
break;
227+
case 2:
228+
test2();
229+
break;
230+
case 3:
231+
test3();
232+
break;
233+
case 4:
234+
test4();
235+
break;
236+
default:
237+
return 1;
238+
}
239+
63240
return 0;
64241
}

0 commit comments

Comments
 (0)