Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit a8567cf

Browse files
authored
[SYCL] Add a test for group sorting algorithm (#438)
Signed-off-by: Fedorov, Andrey <[email protected]>
1 parent 7aaeec5 commit a8567cf

File tree

1 file changed

+374
-0
lines changed

1 file changed

+374
-0
lines changed

SYCL/GroupAlgorithm/SYCL2020/sort.cpp

+374
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t.out
2+
// RUN: %CPU_RUN_PLACEHOLDER %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
// RUN: %ACC_RUN_PLACEHOLDER %t.out
5+
//
6+
// RUNx: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -I . -o %t13.out
7+
8+
#include "support.h"
9+
#include <CL/sycl.hpp>
10+
11+
#include <algorithm>
12+
#include <iostream>
13+
#include <random>
14+
#include <vector>
15+
16+
namespace my_sycl = sycl::ext::oneapi;
17+
18+
auto async_handler_ = [](sycl::exception_list ex_list) {
19+
for (auto &ex : ex_list) {
20+
try {
21+
std::rethrow_exception(ex);
22+
} catch (sycl::exception &ex) {
23+
std::cerr << ex.what() << std::endl;
24+
std::exit(EXIT_FAILURE);
25+
}
26+
}
27+
};
28+
29+
constexpr uint32_t items_per_work_item = 4;
30+
31+
struct CustomType {
32+
int x;
33+
};
34+
35+
struct CustomFunctor {
36+
bool operator()(const CustomType &lhs, const CustomType &rhs) const {
37+
return lhs.x < rhs.x;
38+
}
39+
};
40+
41+
// we need it since using std::abs leads to compilation error
42+
template <typename T> T my_abs(T x) { return x >= 0 ? x : -x; }
43+
44+
template <typename T> bool check(T lhs, T rhs, float epsilon) {
45+
return my_abs(lhs - rhs) > epsilon;
46+
}
47+
bool check(CustomType lhs, CustomType rhs, float epsilon) {
48+
return my_abs(lhs.x - rhs.x) > epsilon;
49+
}
50+
51+
template <typename T>
52+
bool verify(T *expected, T *got, std::size_t n, float epsilon) {
53+
for (std::size_t i = 0; i < n; ++i) {
54+
if (check(expected[i], got[i], epsilon)) {
55+
return false;
56+
}
57+
}
58+
return true;
59+
}
60+
61+
// forward declared classes to name kernels
62+
template <typename... Args> class sort_over_group_kernel_name;
63+
template <typename... Args> class joint_sort_kernel_name;
64+
template <typename... Args> class custom_sorter_kernel_name;
65+
66+
// this class is needed to pass dimension value to aforementioned classes
67+
template <int dim> class int_wrapper;
68+
69+
// custom sorter
70+
template <typename Compare> struct bubble_sorter {
71+
Compare comp;
72+
size_t idx;
73+
74+
template <typename Group, typename Ptr>
75+
void operator()(Group g, Ptr begin, Ptr end) {
76+
size_t n = end - begin;
77+
if (idx == 0)
78+
for (size_t i = 0; i < n; ++i)
79+
for (size_t j = i + 1; j < n; ++j)
80+
if (comp(begin[j], begin[i]))
81+
std::swap(begin[i], begin[j]);
82+
}
83+
};
84+
85+
template <int dim> sycl::range<dim> get_range(const std::size_t local);
86+
87+
template <> sycl::range<1> get_range<1>(const std::size_t local) {
88+
return sycl::range<1>(local);
89+
}
90+
91+
template <> sycl::range<2> get_range<2>(const std::size_t local) {
92+
return sycl::range<2>(local, 1);
93+
}
94+
95+
template <> sycl::range<3> get_range<3>(const std::size_t local) {
96+
return sycl::range<3>(local, 1, 1);
97+
}
98+
99+
template <int dim, typename T, typename Compare>
100+
int test_sort_over_group(sycl::queue &q, std::size_t local,
101+
sycl::buffer<T> &bufI1, Compare comp, int test_case) {
102+
auto n = bufI1.size();
103+
if (n > local)
104+
return -1;
105+
106+
sycl::range<dim> local_range = get_range<dim>(local);
107+
108+
std::size_t local_memory_size =
109+
my_sycl::experimental::default_sorter<>::memory_required<T>(
110+
sycl::memory_scope::work_group, local_range);
111+
112+
if (local_memory_size >
113+
q.get_device().template get_info<sycl::info::device::local_mem_size>())
114+
std::cout << "local_memory_size = " << local_memory_size << ", available = "
115+
<< q.get_device()
116+
.template get_info<sycl::info::device::local_mem_size>()
117+
<< std::endl;
118+
q.submit([&](sycl::handler &h) {
119+
auto aI1 = sycl::accessor(bufI1, h);
120+
sycl::accessor<std::byte, 1, sycl::access_mode::read_write,
121+
sycl::access::target::local>
122+
scratch({local_memory_size}, h);
123+
124+
h.parallel_for<sort_over_group_kernel_name<int_wrapper<dim>, T, Compare>>(
125+
sycl::nd_range<dim>(local_range, local_range),
126+
[=](sycl::nd_item<dim> id) {
127+
scratch[0] = std::byte{};
128+
auto local_id = id.get_local_linear_id();
129+
switch (test_case) {
130+
case 0:
131+
if constexpr (std::is_same_v<Compare, std::less<T>> &&
132+
!std::is_same_v<T, CustomType>)
133+
aI1[local_id] = my_sycl::sort_over_group(
134+
my_sycl::experimental::group_with_scratchpad(
135+
id.get_group(),
136+
sycl::span{&scratch[0], local_memory_size}),
137+
aI1[local_id]);
138+
break;
139+
case 1:
140+
aI1[local_id] = my_sycl::sort_over_group(
141+
my_sycl::experimental::group_with_scratchpad(
142+
id.get_group(),
143+
sycl::span{&scratch[0], local_memory_size}),
144+
aI1[local_id], comp);
145+
break;
146+
case 2:
147+
aI1[local_id] = my_sycl::sort_over_group(
148+
id.get_group(), aI1[local_id],
149+
my_sycl::experimental::default_sorter<Compare>(
150+
sycl::span{&scratch[0], local_memory_size}));
151+
break;
152+
}
153+
});
154+
}).wait_and_throw();
155+
return 1;
156+
}
157+
158+
template <typename T, typename Compare>
159+
int test_joint_sort(sycl::queue &q, std::size_t n_items, std::size_t local,
160+
sycl::buffer<T> &bufI1, Compare comp, int test_case) {
161+
auto n = bufI1.size();
162+
auto n_groups = (n - 1) / n_items + 1;
163+
164+
std::size_t local_memory_size =
165+
my_sycl::experimental::default_sorter<>::memory_required<T>(
166+
sycl::memory_scope::work_group, n);
167+
if (local_memory_size >
168+
q.get_device().template get_info<sycl::info::device::local_mem_size>())
169+
std::cout << "local_memory_size = " << local_memory_size << ", available = "
170+
<< q.get_device()
171+
.template get_info<sycl::info::device::local_mem_size>()
172+
<< std::endl;
173+
q.submit([&](sycl::handler &h) {
174+
auto aI1 = sycl::accessor(bufI1, h);
175+
sycl::accessor<std::byte, 1, sycl::access_mode::read_write,
176+
sycl::access::target::local>
177+
scratch({local_memory_size}, h);
178+
179+
h.parallel_for<joint_sort_kernel_name<T, Compare>>(
180+
sycl::nd_range<1>{{n_groups * local}, {local}},
181+
[=](sycl::nd_item<1> id) {
182+
auto group_id = id.get_group(0);
183+
auto ptr_keys = &aI1[group_id * n_items];
184+
// Replacing the line above with the line below also works
185+
// auto ptr_keys = aI1.get_pointer() + group_id * n_items;
186+
187+
scratch[0] = std::byte{};
188+
switch (test_case) {
189+
case 0:
190+
if constexpr (std::is_same_v<Compare, std::less<T>> &&
191+
!std::is_same_v<T, CustomType>)
192+
my_sycl::joint_sort(
193+
my_sycl::experimental::group_with_scratchpad(
194+
id.get_group(),
195+
sycl::span{&scratch[0], local_memory_size}),
196+
ptr_keys,
197+
ptr_keys + sycl::min(n_items, n - group_id * n_items));
198+
break;
199+
case 1:
200+
my_sycl::joint_sort(
201+
my_sycl::experimental::group_with_scratchpad(
202+
id.get_group(),
203+
sycl::span{&scratch[0], local_memory_size}),
204+
ptr_keys,
205+
ptr_keys + sycl::min(n_items, n - group_id * n_items), comp);
206+
break;
207+
case 2:
208+
my_sycl::joint_sort(
209+
id.get_group(), ptr_keys,
210+
ptr_keys + sycl::min(n_items, n - group_id * n_items),
211+
my_sycl::experimental::default_sorter<Compare>(
212+
sycl::span{&scratch[0], local_memory_size}));
213+
break;
214+
}
215+
});
216+
}).wait_and_throw();
217+
return n_groups;
218+
}
219+
220+
template <typename T, typename Compare>
221+
int test_custom_sorter(sycl::queue &q, sycl::buffer<T> &bufI1, Compare comp) {
222+
std::size_t local = 256;
223+
auto n = bufI1.size();
224+
if (n > local)
225+
return -1;
226+
local = std::min(local, n);
227+
228+
q.submit([&](sycl::handler &h) {
229+
auto aI1 = sycl::accessor(bufI1, h);
230+
231+
h.parallel_for<custom_sorter_kernel_name<T, Compare>>(
232+
sycl::nd_range<2>({local, 1}, {local, 1}), [=](sycl::nd_item<2> id) {
233+
auto ptr = aI1.get_pointer();
234+
235+
my_sycl::joint_sort(
236+
id.get_group(), ptr, ptr + n,
237+
bubble_sorter<Compare>{comp, id.get_local_linear_id()});
238+
});
239+
}).wait_and_throw();
240+
return 1;
241+
}
242+
243+
template <typename T, typename Compare>
244+
void run_sort(sycl::queue &q, std::vector<T> &in, std::size_t size,
245+
Compare comp, int test_case, int sort_case) {
246+
std::vector<T> in2(in.begin(), in.begin() + size);
247+
std::vector<T> expected(in.begin(), in.begin() + size);
248+
std::size_t local =
249+
q.get_device()
250+
.template get_info<sycl::info::device::max_work_group_size>();
251+
local = std::min(local, size);
252+
auto n_items = items_per_work_item * local;
253+
254+
int n_groups = 1;
255+
{ // scope to destruct buffers
256+
sycl::buffer<T> bufKeys(in2.data(), size);
257+
{
258+
switch (sort_case) {
259+
case 0:
260+
// this case is just to check the compilation
261+
n_groups = test_sort_over_group<1>(q, local, bufKeys, comp, test_case);
262+
263+
n_groups = test_sort_over_group<2>(q, local, bufKeys, comp, test_case);
264+
break;
265+
case 1:
266+
n_groups = test_joint_sort(q, n_items, local, bufKeys, comp, test_case);
267+
break;
268+
case 2:
269+
n_groups = test_custom_sorter(q, bufKeys, comp);
270+
break;
271+
}
272+
}
273+
}
274+
275+
// check results
276+
for (int i_group = 0; i_group < n_groups; ++i_group) {
277+
std::sort(expected.begin() + i_group * n_items,
278+
expected.begin() + std::min((i_group + 1) * n_items, size), comp);
279+
}
280+
if (n_groups != -1 &&
281+
(test_case != 0 ||
282+
test_case == 0 && std::is_same_v<Compare, std::less<T>> &&
283+
!std::is_same_v<T, CustomType>)&&!verify(expected.data(), in2.data(),
284+
size, 0.001f)) {
285+
std::cerr << "Verification failed \n";
286+
exit(1);
287+
}
288+
}
289+
290+
template <typename T> struct test_sort_cases {
291+
template <typename Generator, typename Compare>
292+
void operator()(sycl::queue &q, std::size_t dataSize, Compare comp,
293+
Generator generate) {
294+
std::vector<T> stationaryData(dataSize);
295+
// fill data
296+
for (std::size_t i = 0; i < dataSize; ++i)
297+
stationaryData[i] = generate(i);
298+
299+
// run test
300+
for (int test_case = 0; test_case < 3; ++test_case) {
301+
for (int sort_case = 0; sort_case < 3; ++sort_case) {
302+
run_sort(q, stationaryData, dataSize, comp, test_case, sort_case);
303+
}
304+
}
305+
}
306+
};
307+
308+
void test_custom_type(sycl::queue &q, std::size_t dataSize) {
309+
std::vector<CustomType> stationaryData(dataSize, CustomType{0});
310+
// fill data
311+
for (std::size_t i = 0; i < dataSize; ++i)
312+
stationaryData[i] = CustomType{int(i)};
313+
314+
// run test
315+
for (int test_case = 0; test_case < 1; ++test_case) {
316+
for (int sort_case = 0; sort_case < 3; ++sort_case) {
317+
run_sort(q, stationaryData, dataSize, CustomFunctor{}, test_case,
318+
sort_case);
319+
}
320+
}
321+
}
322+
323+
template <typename T, typename Compare>
324+
void test_sort_by_comp(sycl::queue &q, std::size_t dataSize) {
325+
std::default_random_engine generator;
326+
std::normal_distribution<float> distribution((10.0), (2.0));
327+
328+
T max_size = std::numeric_limits<T>::max();
329+
std::size_t to_fill = dataSize;
330+
if (dataSize > max_size)
331+
to_fill = max_size;
332+
333+
// reversed order
334+
test_sort_cases<T>()(q, to_fill, Compare{},
335+
[to_fill](std::size_t i) { return T(to_fill - i - 1); });
336+
// filled by 1
337+
test_sort_cases<T>()(q, dataSize, Compare{},
338+
[](std::size_t) { return T(1); });
339+
// random distribution
340+
test_sort_cases<T>()(q, dataSize, Compare{},
341+
[&distribution, &generator](std::size_t) {
342+
return T(distribution(generator));
343+
});
344+
}
345+
346+
template <typename T>
347+
void test_sort_by_type(sycl::queue &q, std::size_t dataSize) {
348+
test_sort_by_comp<T, std::less<T>>(q, dataSize);
349+
test_sort_by_comp<T, std::greater<T>>(q, dataSize);
350+
}
351+
352+
int main(int argc, char *argv[]) {
353+
sycl::queue q(sycl::default_selector{}, async_handler_);
354+
if (!isSupportedDevice(q.get_device())) {
355+
std::cout << "Skipping test\n";
356+
return 0;
357+
}
358+
359+
std::vector<int> sizes{1, 2, 64, 256, 1024, 2048, 4096};
360+
361+
for (int i = 0; i < sizes.size(); ++i) {
362+
test_sort_by_type<std::int8_t>(q, sizes[i]);
363+
test_sort_by_type<std::uint16_t>(q, sizes[i]);
364+
test_sort_by_type<std::int32_t>(q, sizes[i]);
365+
test_sort_by_type<std::uint32_t>(q, sizes[i]);
366+
test_sort_by_type<float>(q, sizes[i]);
367+
test_sort_by_type<sycl::half>(q, sizes[i]);
368+
test_sort_by_type<double>(q, sizes[i]);
369+
test_sort_by_type<std::size_t>(q, sizes[i]);
370+
371+
test_custom_type(q, sizes[i]);
372+
}
373+
std::cout << "Test passed." << std::endl;
374+
}

0 commit comments

Comments
 (0)