16
16
template <typename T, int N> class sycl_subgr ;
17
17
18
18
using namespace cl ::sycl;
19
- // TODO remove this workaround when integration header will support correct
20
- // half generation
21
- struct wa_half ;
22
- typedef half aligned_half __attribute__ ((aligned(16 )));
23
19
24
20
template <typename T, int N> void check (queue &Queue) {
25
21
const int G = 1024 , L = 64 ;
@@ -34,12 +30,10 @@ template <typename T, int N> void check(queue &Queue) {
34
30
acc[i] += 0.1 ; // Check that floating point types are not casted to int
35
31
}
36
32
}
37
- using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
38
- wa_half, T>::type;
39
33
Queue.submit ([&](handler &cgh) {
40
34
auto acc = syclbuf.template get_access <access ::mode::read_write>(cgh);
41
35
auto sgsizeacc = sgsizebuf.get_access <access ::mode::read_write>(cgh);
42
- cgh.parallel_for <sycl_subgr<TT , N>>(NdRange, [=](nd_item<1 > NdItem) {
36
+ cgh.parallel_for <sycl_subgr<T , N>>(NdRange, [=](nd_item<1 > NdItem) {
43
37
intel::sub_group SG = NdItem.get_sub_group ();
44
38
if (SG.get_group_id ().get (0 ) % N == 0 ) {
45
39
size_t WGSGoffset =
@@ -103,12 +97,10 @@ template <typename T> void check(queue &Queue) {
103
97
}
104
98
}
105
99
106
- using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
107
- wa_half, T>::type;
108
100
Queue.submit ([&](handler &cgh) {
109
101
auto acc = syclbuf.template get_access <access ::mode::read_write>(cgh);
110
102
auto sgsizeacc = sgsizebuf.get_access <access ::mode::read_write>(cgh);
111
- cgh.parallel_for <sycl_subgr<TT , 0 >>(NdRange, [=](nd_item<1 > NdItem) {
103
+ cgh.parallel_for <sycl_subgr<T , 0 >>(NdRange, [=](nd_item<1 > NdItem) {
112
104
intel::sub_group SG = NdItem.get_sub_group ();
113
105
if (NdItem.get_global_id (0 ) == 0 )
114
106
sgsizeacc[0 ] = SG.get_max_local_range ()[0 ];
@@ -180,6 +172,7 @@ int main() {
180
172
check<aligned_short, 4 >(Queue);
181
173
check<aligned_short, 8 >(Queue);
182
174
if (Queue.get_device ().has_extension (" cl_khr_fp16" )) {
175
+ typedef half aligned_half __attribute__ ((aligned (16 )));
183
176
check<aligned_half>(Queue);
184
177
check<aligned_half, 1 >(Queue);
185
178
check<aligned_half, 2 >(Queue);
0 commit comments