Skip to content
This repository was archived by the owner on Mar 28, 2023. It is now read-only.

Commit e928221

Browse files
committed
[SYCL] Add exclusive scan over group test
1 parent afc48b2 commit e928221

File tree

1 file changed

+58
-0
lines changed

1 file changed

+58
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
// UNSUPPORTED: cuda || hip
2+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: %GPU_RUN_PLACEHOLDER %t.out
4+
5+
#include <CL/sycl.hpp>
6+
#include <algorithm>
7+
#include <iostream>
8+
9+
template <typename T>
10+
cl::sycl::event compiler_group_scan_impl(cl::sycl::queue *queue, T *in_data,
11+
T *out_data, int num_wg,
12+
int group_size) {
13+
cl::sycl::nd_range<1> thread_range(num_wg * group_size, group_size);
14+
cl::sycl::event event = queue->submit([&](cl::sycl::handler &cgh) {
15+
cgh.parallel_for(thread_range, [=](cl::sycl::nd_item<1> item) {
16+
auto id = item.get_global_linear_id();
17+
auto group = item.get_group();
18+
T data = in_data[id];
19+
20+
T updated_data = cl::sycl::exclusive_scan_over_group(
21+
group, data, cl::sycl::multiplies<T>());
22+
out_data[id] = updated_data;
23+
});
24+
});
25+
return event;
26+
}
27+
28+
template <typename T>
29+
void test_compiler_group_scan(cl::sycl::queue *queue, T *in_data, T *out_data,
30+
int num_wg, int group_size) {
31+
compiler_group_scan_impl(queue, in_data, out_data, num_wg, group_size);
32+
}
33+
34+
int main(int argc, const char **argv) {
35+
int num_wg = 1;
36+
int group_size = 16;
37+
38+
cl::sycl::queue queue{
39+
cl::sycl::gpu_selector{},
40+
cl::sycl::property_list{cl::sycl::property::queue::enable_profiling(),
41+
cl::sycl::property::queue::in_order()}};
42+
43+
typedef int T;
44+
size_t nelems = num_wg * group_size;
45+
T *data = cl::sycl::malloc_shared<T>(nelems, queue);
46+
T *result = cl::sycl::malloc_shared<T>(nelems, queue);
47+
queue.fill<T>(data, T(2), nelems).wait();
48+
queue.memset(result, 0, nelems * sizeof(T)).wait();
49+
50+
test_compiler_group_scan(&queue, data, result, num_wg, group_size);
51+
queue.wait();
52+
T expected[] = {1, 2, 4, 8, 16, 32, 64, 128,
53+
256, 512, 1024, 2048, 4096, 8192, 16384, 32768};
54+
for (int i = 0; i < sizeof(expected) / sizeof(T); ++i) {
55+
assert(result[i] == expected[i]);
56+
}
57+
return 0;
58+
}

0 commit comments

Comments
 (0)