Skip to content

Commit 20aa83e

Browse files
authored
[SYCL] Fix undefined symbols in async_work_group_copy (#1243)
Ensure proper name mangling by casting from SYCL types (namely cl::sycl::vec) to cl_* when calling OpenCL. Signed-off-by: Mokhov, Dmitri N <[email protected]>
1 parent 8ddf584 commit 20aa83e

File tree

2 files changed

+115
-16
lines changed

2 files changed

+115
-16
lines changed

sycl/include/CL/sycl/group.hpp

+29-16
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <CL/__spirv/spirv_types.hpp>
1313
#include <CL/__spirv/spirv_vars.hpp>
1414
#include <CL/sycl/detail/common.hpp>
15+
#include <CL/sycl/detail/generic_type_traits.hpp>
1516
#include <CL/sycl/detail/helpers.hpp>
1617
#include <CL/sycl/device_event.hpp>
1718
#include <CL/sycl/h_item.hpp>
@@ -271,21 +272,27 @@ template <int Dimensions = 1> class group {
271272
device_event async_work_group_copy(local_ptr<dataT> dest,
272273
global_ptr<dataT> src,
273274
size_t numElements) const {
274-
__ocl_event_t e =
275-
OpGroupAsyncCopyGlobalToLocal<dataT>(
276-
__spv::Scope::Workgroup,
277-
dest.get(), src.get(), numElements, 1, 0);
275+
using T = detail::ConvertToOpenCLType_t<dataT>;
276+
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
277+
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
278+
279+
__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal<T>(
280+
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
281+
numElements, 1, 0);
278282
return device_event(&e);
279283
}
280284

281285
template <typename dataT>
282286
device_event async_work_group_copy(global_ptr<dataT> dest,
283287
local_ptr<dataT> src,
284288
size_t numElements) const {
285-
__ocl_event_t e =
286-
OpGroupAsyncCopyLocalToGlobal<dataT>(
287-
__spv::Scope::Workgroup,
288-
dest.get(), src.get(), numElements, 1, 0);
289+
using T = detail::ConvertToOpenCLType_t<dataT>;
290+
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
291+
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
292+
293+
__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal<T>(
294+
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
295+
numElements, 1, 0);
289296
return device_event(&e);
290297
}
291298

@@ -294,10 +301,13 @@ template <int Dimensions = 1> class group {
294301
global_ptr<dataT> src,
295302
size_t numElements,
296303
size_t srcStride) const {
297-
__ocl_event_t e =
298-
OpGroupAsyncCopyGlobalToLocal<dataT>(
299-
__spv::Scope::Workgroup,
300-
dest.get(), src.get(), numElements, srcStride, 0);
304+
using T = detail::ConvertToOpenCLType_t<dataT>;
305+
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
306+
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
307+
308+
__ocl_event_t e = OpGroupAsyncCopyGlobalToLocal<T>(
309+
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
310+
numElements, srcStride, 0);
301311
return device_event(&e);
302312
}
303313

@@ -306,10 +316,13 @@ template <int Dimensions = 1> class group {
306316
local_ptr<dataT> src,
307317
size_t numElements,
308318
size_t destStride) const {
309-
__ocl_event_t e =
310-
OpGroupAsyncCopyLocalToGlobal<dataT>(
311-
__spv::Scope::Workgroup,
312-
dest.get(), src.get(), numElements, destStride, 0);
319+
using T = detail::ConvertToOpenCLType_t<dataT>;
320+
using DestT = detail::ConvertToOpenCLType_t<decltype(dest)>;
321+
using SrcT = detail::ConvertToOpenCLType_t<decltype(src)>;
322+
323+
__ocl_event_t e = OpGroupAsyncCopyLocalToGlobal<T>(
324+
__spv::Scope::Workgroup, DestT(dest.get()), SrcT(src.get()),
325+
numElements, destStride, 0);
313326
return device_event(&e);
314327
}
315328

sycl/test/regression/group.cpp

+86
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,96 @@ bool group__get_linear_id() {
162162
return Pass;
163163
}
164164

165+
// Tests group::async_work_group_copy()
166+
bool group__async_work_group_copy() {
167+
std::cout << "+++ Running group::async_work_group_copy() test...\n";
168+
constexpr int DIMS = 2;
169+
bool Pass = true;
170+
171+
std::vector<std::pair<range<DIMS>, range<DIMS>>> ranges;
172+
ranges.push_back({{3, 1}, {2, 3}});
173+
ranges.push_back({{1, 3}, {3, 2}});
174+
175+
for (const auto &i : ranges) {
176+
const auto LocalRange = i.first;
177+
const auto GroupRange = i.second;
178+
const range<DIMS> GlobalRange = LocalRange * GroupRange;
179+
using DataType = vec<size_t, DIMS>;
180+
const int DataLen = GlobalRange.size();
181+
std::unique_ptr<DataType[]> Data(new DataType[DataLen]);
182+
std::memset(Data.get(), 0, DataLen * sizeof(DataType));
183+
184+
try {
185+
buffer<DataType, 1> Buf(Data.get(), DataLen);
186+
queue Q(AsyncHandler{});
187+
188+
Q.submit([&](handler &cgh) {
189+
auto AccGlobal = Buf.get_access<access::mode::read_write>(cgh);
190+
accessor<DataType, DIMS, access::mode::read_write,
191+
access::target::local>
192+
AccLocal(LocalRange, cgh);
193+
194+
cgh.parallel_for<class group__async_work_group_copy>(
195+
nd_range<2>{GlobalRange, LocalRange},
196+
[=](nd_item<DIMS> I) {
197+
const auto Group = I.get_group();
198+
const auto NumElem = AccLocal.get_count();
199+
const auto Off = Group[0] * I.get_group_range(1) * NumElem +
200+
Group[1] * I.get_local_range(1);
201+
auto PtrGlobal = AccGlobal.get_pointer() + Off;
202+
auto PtrLocal = AccLocal.get_pointer();
203+
if (I.get_local_range(0) == 1) {
204+
Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem);
205+
} else {
206+
Group.async_work_group_copy(PtrLocal, PtrGlobal, NumElem,
207+
I.get_global_range(1));
208+
}
209+
AccLocal[I.get_local_id()][0] += I.get_global_id(0);
210+
AccLocal[I.get_local_id()][1] += I.get_global_id(1);
211+
if (I.get_local_range(0) == 1) {
212+
Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem);
213+
} else {
214+
Group.async_work_group_copy(PtrGlobal, PtrLocal, NumElem,
215+
I.get_global_range(1));
216+
}
217+
});
218+
});
219+
} catch (cl::sycl::exception const &E) {
220+
std::cout << "SYCL exception caught: " << E.what() << '\n';
221+
return 2;
222+
}
223+
const size_t SIZE_Y = GlobalRange.get(0);
224+
const size_t SIZE_X = GlobalRange.get(1);
225+
int ErrCnt = 0;
226+
227+
for (size_t Y = 0; Y < SIZE_Y; Y++) {
228+
for (size_t X = 0; X < SIZE_X; X++) {
229+
const size_t Ind = Y * SIZE_X + X;
230+
const auto Test0 = Data[Ind][0];
231+
const auto Test1 = Data[Ind][1];
232+
const auto Gold0 = Y;
233+
const auto Gold1 = X;
234+
const bool Ok = (Test0 == Gold0 && Test1 == Gold1);
235+
Pass &= Ok;
236+
237+
if (!Ok && ErrCnt++ < 10) {
238+
std::cout << "*** ERROR at [" << Y << "][" << X << "]: ";
239+
std::cout << Test0 << " " << Test1 << " != ";
240+
std::cout << Gold0 << " " << Gold1 << "\n";
241+
}
242+
}
243+
}
244+
}
245+
if (Pass)
246+
std::cout << " pass\n";
247+
return Pass;
248+
}
249+
165250
int main() {
166251
bool Pass = 1;
167252
Pass &= group__get_group_range();
168253
Pass &= group__get_linear_id();
254+
Pass &= group__async_work_group_copy();
169255

170256
if (!Pass) {
171257
std::cout << "FAILED\n";

0 commit comments

Comments
 (0)