1
- // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib -lOpenCL
2
- // RUN: %CPU_RUN_PLACEHOLDER %t.out
3
- // RUN: %GPU_RUN_PLACEHOLDER %t.out
4
- // RUN: %ACC_RUN_PLACEHOLDER %t.out
1
+ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
2
+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 1
3
+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 1
4
+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 1
5
+
6
+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 2
7
+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 2
8
+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 2
9
+
10
+ // RUN: %CPU_RUN_PLACEHOLDER %t.out 3
11
+ // RUN: %GPU_RUN_PLACEHOLDER %t.out 3
12
+ // RUN: %ACC_RUN_PLACEHOLDER %t.out 3
13
+
14
+ // RUNx: %CPU_RUN_PLACEHOLDER %t.out 4
15
+ // RUNx: %GPU_RUN_PLACEHOLDER %t.out 4
16
+ // RUNx: %ACC_RUN_PLACEHOLDER %t.out 4
5
17
6
18
#include < CL/sycl.hpp>
19
+ #include < chrono>
20
+ #include < iostream>
21
+ #include < vector>
7
22
8
23
using namespace cl ::sycl;
9
24
using namespace cl ::sycl::access;
10
25
11
26
static constexpr size_t BUFFER_SIZE = 1024 ;
12
27
28
+ static auto EH = [](exception_list EL) {
29
+ for (const std::exception_ptr &E : EL) {
30
+ throw E;
31
+ }
32
+ };
33
+
13
34
// Check that a single host-task with a buffer will work
14
35
void test1 () {
15
36
buffer<int , 1 > Buffer{BUFFER_SIZE};
16
37
17
- queue Q;
38
+ queue Q (EH) ;
18
39
19
40
Q.submit ([&](handler &CGH) {
20
41
auto Acc = Buffer.get_access <mode::write >(CGH);
21
42
CGH.codeplay_host_task ([=] {
22
43
// A no-op
23
44
});
24
45
});
46
+
47
+ Q.wait_and_throw ();
25
48
}
26
49
50
+ // Check that a host task after the kernel (deps via buffer) will work
27
51
void test2 () {
28
52
buffer<int , 1 > Buffer1{BUFFER_SIZE};
29
53
buffer<int , 1 > Buffer2{BUFFER_SIZE};
30
54
31
- queue Q;
55
+ queue Q (EH) ;
32
56
33
57
Q.submit ([&](handler &CGH) {
34
58
auto Acc = Buffer1.template get_access <mode::write >(CGH);
@@ -55,10 +79,163 @@ void test2() {
55
79
assert (Acc[Idx] == 123 );
56
80
}
57
81
}
82
+
83
+ Q.wait_and_throw ();
84
+ }
85
+
86
+ // Host-task depending on another host-task via both buffers and
87
+ // handler::depends_on() should not hang
88
+ void test3 () {
89
+ queue Q (EH);
90
+
91
+ static constexpr size_t BufferSize = 10 * 1024 ;
92
+
93
+ buffer<int , 1 > B0{range<1 >{BufferSize}};
94
+ buffer<int , 1 > B1{range<1 >{BufferSize}};
95
+ buffer<int , 1 > B2{range<1 >{BufferSize}};
96
+ buffer<int , 1 > B3{range<1 >{BufferSize}};
97
+ buffer<int , 1 > B4{range<1 >{BufferSize}};
98
+ buffer<int , 1 > B5{range<1 >{BufferSize}};
99
+ buffer<int , 1 > B6{range<1 >{BufferSize}};
100
+ buffer<int , 1 > B7{range<1 >{BufferSize}};
101
+ buffer<int , 1 > B8{range<1 >{BufferSize}};
102
+ buffer<int , 1 > B9{range<1 >{BufferSize}};
103
+
104
+ std::vector<event> Deps;
105
+
106
+ using namespace std ::chrono_literals;
107
+ static constexpr size_t Count = 10 ;
108
+
109
+ auto Start = std::chrono::steady_clock::now ();
110
+ for (size_t Idx = 0 ; Idx < Count; ++Idx) {
111
+ event E = Q.submit ([&](handler &CGH) {
112
+ CGH.depends_on (Deps);
113
+
114
+ std::cout << " Submit: " << Idx << std::endl;
115
+
116
+ auto Acc0 = B0.get_access <mode::read_write, target::host_buffer>(CGH);
117
+ auto Acc1 = B1.get_access <mode::read_write, target::host_buffer>(CGH);
118
+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
119
+ auto Acc3 = B3.get_access <mode::read_write, target::host_buffer>(CGH);
120
+ auto Acc4 = B4.get_access <mode::read_write, target::host_buffer>(CGH);
121
+ auto Acc5 = B5.get_access <mode::read_write, target::host_buffer>(CGH);
122
+ auto Acc6 = B6.get_access <mode::read_write, target::host_buffer>(CGH);
123
+ auto Acc7 = B7.get_access <mode::read_write, target::host_buffer>(CGH);
124
+ auto Acc8 = B8.get_access <mode::read_write, target::host_buffer>(CGH);
125
+ auto Acc9 = B9.get_access <mode::read_write, target::host_buffer>(CGH);
126
+
127
+ CGH.codeplay_host_task ([=] {
128
+ uint64_t X = 0 ;
129
+
130
+ X ^= reinterpret_cast <uint64_t >(&Acc0[Idx + 0 ]);
131
+ X ^= reinterpret_cast <uint64_t >(&Acc1[Idx + 1 ]);
132
+ X ^= reinterpret_cast <uint64_t >(&Acc2[Idx + 2 ]);
133
+ X ^= reinterpret_cast <uint64_t >(&Acc3[Idx + 3 ]);
134
+ X ^= reinterpret_cast <uint64_t >(&Acc4[Idx + 4 ]);
135
+ X ^= reinterpret_cast <uint64_t >(&Acc5[Idx + 5 ]);
136
+ X ^= reinterpret_cast <uint64_t >(&Acc6[Idx + 6 ]);
137
+ X ^= reinterpret_cast <uint64_t >(&Acc7[Idx + 7 ]);
138
+ X ^= reinterpret_cast <uint64_t >(&Acc8[Idx + 8 ]);
139
+ X ^= reinterpret_cast <uint64_t >(&Acc9[Idx + 9 ]);
140
+ });
141
+ });
142
+
143
+ Deps = {E};
144
+ }
145
+
146
+ Q.wait_and_throw ();
147
+ auto End = std::chrono::steady_clock::now ();
148
+
149
+ constexpr auto Threshold = 2s;
150
+
151
+ assert (End - Start < Threshold && " Host tasks were waiting for too long" );
152
+ }
153
+
154
+ // Host-task depending on another host-task via handler::depends_on() only
155
+ // should not hang
156
+ void test4 () {
157
+ queue Q (EH);
158
+
159
+ static constexpr size_t BufferSize = 10 * 1024 ;
160
+
161
+ buffer<int , 1 > B0{range<1 >{BufferSize}};
162
+ buffer<int , 1 > B1{range<1 >{BufferSize}};
163
+ buffer<int , 1 > B2{range<1 >{BufferSize}};
164
+ buffer<int , 1 > B3{range<1 >{BufferSize}};
165
+ buffer<int , 1 > B4{range<1 >{BufferSize}};
166
+ buffer<int , 1 > B5{range<1 >{BufferSize}};
167
+
168
+ // This host task should be submitted without hesitation
169
+ event E1 = Q.submit ([&](handler &CGH) {
170
+ std::cout << " Submit 1" << std::endl;
171
+
172
+ auto Acc0 = B0.get_access <mode::read_write, target::host_buffer>(CGH);
173
+ auto Acc1 = B1.get_access <mode::read_write, target::host_buffer>(CGH);
174
+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
175
+
176
+ CGH.codeplay_host_task ([=] {
177
+ Acc0[0 ] = 1 ;
178
+ Acc1[0 ] = 2 ;
179
+ Acc2[0 ] = 3 ;
180
+ });
181
+ });
182
+
183
+ // This host task is going to depend on blocked empty node of the first
184
+ // host-task (via buffer #2). Still this one should be enqueued.
185
+ event E2 = Q.submit ([&](handler &CGH) {
186
+ std::cout << " Submit 2" << std::endl;
187
+
188
+ auto Acc2 = B2.get_access <mode::read_write, target::host_buffer>(CGH);
189
+ auto Acc3 = B3.get_access <mode::read_write, target::host_buffer>(CGH);
190
+
191
+ CGH.codeplay_host_task ([=] {
192
+ Acc2[1 ] = 1 ;
193
+ Acc3[1 ] = 2 ;
194
+ });
195
+ });
196
+
197
+ // This host-task only depends on the second host-task via
198
+ // handler::depends_on(). This one should not hang and should be enqueued
199
+ // after host-task #2.
200
+ event E3 = Q.submit ([&](handler &CGH) {
201
+ CGH.depends_on (E2 );
202
+
203
+ std::cout << " Submit 3" << std::endl;
204
+
205
+ auto Acc4 = B4.get_access <mode::read_write, target::host_buffer>(CGH);
206
+ auto Acc5 = B5.get_access <mode::read_write, target::host_buffer>(CGH);
207
+
208
+ CGH.codeplay_host_task ([=] {
209
+ Acc4[2 ] = 1 ;
210
+ Acc5[2 ] = 2 ;
211
+ });
212
+ });
213
+
214
+ Q.wait_and_throw ();
58
215
}
59
216
60
- int main () {
61
- test1 ();
62
- test2 ();
217
+ int main (int Argc, const char *Argv[]) {
218
+ if (Argc < 2 )
219
+ return 1 ;
220
+
221
+ int TestIdx = std::stoi (Argv[1 ]);
222
+
223
+ switch (TestIdx) {
224
+ case 1 :
225
+ test1 ();
226
+ break ;
227
+ case 2 :
228
+ test2 ();
229
+ break ;
230
+ case 3 :
231
+ test3 ();
232
+ break ;
233
+ case 4 :
234
+ test4 ();
235
+ break ;
236
+ default :
237
+ return 1 ;
238
+ }
239
+
63
240
return 0 ;
64
241
}
0 commit comments