Skip to content

Commit 8d186a3

Browse files
Fznamznonbader
authored andcommitted
[SYCL] Remove _Float16 from integration header
The main problem that half type is defined as _Float16 on the device and as manually implemented type on host so compiler could add _Float16 to the integration header and it will produce errors in host compilation. Signed-off-by: Mariya Podchishchaeva <[email protected]>
1 parent 4d847c0 commit 8d186a3

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

sycl/include/CL/sycl/detail/kernel_desc.hpp

+14
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ namespace cl {
1515
namespace sycl {
1616
namespace detail {
1717

18+
namespace half_impl {
19+
20+
class half;
21+
// Half type is defined as _Float16 on device and as manually implemented half
22+
// type on host. Integration header is generated by device compiler so it sees
23+
// half type as _Float16 and it will add _Float16 to integration header if it
24+
// is used in kernel name template parameters. To avoid errors in host
25+
// compilation we remove _Float16 from integration header using following macro.
26+
#ifndef __SYCL_DEVICE_ONLY__
27+
#define _Float16 cl::sycl::detail::half_impl::half
28+
#endif
29+
30+
} // namespace half_impl
31+
1832
// kernel parameter kinds
1933
enum class kernel_param_kind_t {
2034
kind_accessor,

sycl/test/sub_group/load_store.cpp

+3-10
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
template <typename T, int N> class sycl_subgr;
1717

1818
using namespace cl::sycl;
19-
// TODO remove this workaround when integration header will support correct
20-
// half generation
21-
struct wa_half;
22-
typedef half aligned_half __attribute__((aligned(16)));
2319

2420
template <typename T, int N> void check(queue &Queue) {
2521
const int G = 1024, L = 64;
@@ -34,12 +30,10 @@ template <typename T, int N> void check(queue &Queue) {
3430
acc[i] += 0.1; // Check that floating point types are not casted to int
3531
}
3632
}
37-
using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
38-
wa_half, T>::type;
3933
Queue.submit([&](handler &cgh) {
4034
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
4135
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
42-
cgh.parallel_for<sycl_subgr<TT, N>>(NdRange, [=](nd_item<1> NdItem) {
36+
cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
4337
intel::sub_group SG = NdItem.get_sub_group();
4438
if (SG.get_group_id().get(0) % N == 0) {
4539
size_t WGSGoffset =
@@ -103,12 +97,10 @@ template <typename T> void check(queue &Queue) {
10397
}
10498
}
10599

106-
using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
107-
wa_half, T>::type;
108100
Queue.submit([&](handler &cgh) {
109101
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
110102
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
111-
cgh.parallel_for<sycl_subgr<TT, 0>>(NdRange, [=](nd_item<1> NdItem) {
103+
cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
112104
intel::sub_group SG = NdItem.get_sub_group();
113105
if (NdItem.get_global_id(0) == 0)
114106
sgsizeacc[0] = SG.get_max_local_range()[0];
@@ -180,6 +172,7 @@ int main() {
180172
check<aligned_short, 4>(Queue);
181173
check<aligned_short, 8>(Queue);
182174
if (Queue.get_device().has_extension("cl_khr_fp16")) {
175+
typedef half aligned_half __attribute__((aligned(16)));
183176
check<aligned_half>(Queue);
184177
check<aligned_half, 1>(Queue);
185178
check<aligned_half, 2>(Queue);

sycl/test/sub_group/shuffle.cpp

+2-6
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
3333
buffer<vec<T, N>> buf_down(G);
3434
buffer<vec<T, N>> buf_xor(G);
3535
buffer<size_t> sgsizebuf(1);
36-
using TT = typename std::conditional<std::is_same<T, half>::value, wa_half,
37-
T>::type;
3836
Queue.submit([&](handler &cgh) {
3937
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
4038
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
@@ -48,7 +46,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
4846
auto acc_xor = buf_xor.template get_access<access::mode::read_write>(cgh);
4947
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
5048

51-
cgh.parallel_for<sycl_subgr<TT, N>>(NdRange, [=](nd_item<1> NdItem) {
49+
cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
5250
intel::sub_group SG = NdItem.get_sub_group();
5351
uint32_t wggid = NdItem.get_global_id(0);
5452
uint32_t sgid = SG.get_group_id().get(0);
@@ -141,8 +139,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
141139
buffer<T> buf_down(G);
142140
buffer<T> buf_xor(G);
143141
buffer<size_t> sgsizebuf(1);
144-
using TT = typename std::conditional<std::is_same<T, half>::value, wa_half,
145-
T>::type;
146142
Queue.submit([&](handler &cgh) {
147143
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
148144
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
@@ -156,7 +152,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
156152
auto acc_xor = buf_xor.template get_access<access::mode::read_write>(cgh);
157153
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
158154

159-
cgh.parallel_for<sycl_subgr<TT, 0>>(NdRange, [=](nd_item<1> NdItem) {
155+
cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
160156
intel::sub_group SG = NdItem.get_sub_group();
161157
uint32_t wggid = NdItem.get_global_id(0);
162158
uint32_t sgid = SG.get_group_id().get(0);

0 commit comments

Comments
 (0)