Skip to content

[SYCL] Remove _Float16 from integration header #185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions sycl/include/CL/sycl/detail/kernel_desc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,20 @@ namespace cl {
namespace sycl {
namespace detail {

namespace half_impl {

class half;
// Half type is defined as _Float16 on device and as manually implemented half
// type on host. Integration header is generated by device compiler so it sees
// half type as _Float16 and it will add _Float16 to integration header if it
// is used in kernel name template parameters. To avoid errors in host
// compilation we remove _Float16 from integration header using following macro.
#ifndef __SYCL_DEVICE_ONLY__
#define _Float16 cl::sycl::detail::half_impl::half
#endif

} // namespace half_impl

// kernel parameter kinds
enum class kernel_param_kind_t {
kind_accessor,
Expand Down
13 changes: 3 additions & 10 deletions sycl/test/sub_group/load_store.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,6 @@
template <typename T, int N> class sycl_subgr;

using namespace cl::sycl;
// TODO remove this workaround when integration header will support correct
// half generation
struct wa_half;
typedef half aligned_half __attribute__((aligned(16)));

template <typename T, int N> void check(queue &Queue) {
const int G = 1024, L = 64;
Expand All @@ -34,12 +30,10 @@ template <typename T, int N> void check(queue &Queue) {
acc[i] += 0.1; // Check that floating point types are not casted to int
}
}
using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
wa_half, T>::type;
Queue.submit([&](handler &cgh) {
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<sycl_subgr<TT, N>>(NdRange, [=](nd_item<1> NdItem) {
cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
if (SG.get_group_id().get(0) % N == 0) {
size_t WGSGoffset =
Expand Down Expand Up @@ -103,12 +97,10 @@ template <typename T> void check(queue &Queue) {
}
}

using TT = typename std::conditional<std::is_same<T, aligned_half>::value,
wa_half, T>::type;
Queue.submit([&](handler &cgh) {
auto acc = syclbuf.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);
cgh.parallel_for<sycl_subgr<TT, 0>>(NdRange, [=](nd_item<1> NdItem) {
cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
if (NdItem.get_global_id(0) == 0)
sgsizeacc[0] = SG.get_max_local_range()[0];
Expand Down Expand Up @@ -180,6 +172,7 @@ int main() {
check<aligned_short, 4>(Queue);
check<aligned_short, 8>(Queue);
if (Queue.get_device().has_extension("cl_khr_fp16")) {
typedef half aligned_half __attribute__((aligned(16)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about using instead in 2019?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test uses typedef in all places so I used it to save one style. I think changing all typedefs to usings not in scope of this PR :)

check<aligned_half>(Queue);
check<aligned_half, 1>(Queue);
check<aligned_half, 2>(Queue);
Expand Down
8 changes: 2 additions & 6 deletions sycl/test/sub_group/shuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
buffer<vec<T, N>> buf_down(G);
buffer<vec<T, N>> buf_xor(G);
buffer<size_t> sgsizebuf(1);
using TT = typename std::conditional<std::is_same<T, half>::value, wa_half,
T>::type;
Queue.submit([&](handler &cgh) {
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
Expand All @@ -48,7 +46,7 @@ void check(queue &Queue, size_t G = 240, size_t L = 60) {
auto acc_xor = buf_xor.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<sycl_subgr<TT, N>>(NdRange, [=](nd_item<1> NdItem) {
cgh.parallel_for<sycl_subgr<T, N>>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
uint32_t wggid = NdItem.get_global_id(0);
uint32_t sgid = SG.get_group_id().get(0);
Expand Down Expand Up @@ -141,8 +139,6 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
buffer<T> buf_down(G);
buffer<T> buf_xor(G);
buffer<size_t> sgsizebuf(1);
using TT = typename std::conditional<std::is_same<T, half>::value, wa_half,
T>::type;
Queue.submit([&](handler &cgh) {
auto acc2 = buf2.template get_access<access::mode::read_write>(cgh);
auto acc2_up = buf2_up.template get_access<access::mode::read_write>(cgh);
Expand All @@ -156,7 +152,7 @@ template <typename T> void check(queue &Queue, size_t G = 240, size_t L = 60) {
auto acc_xor = buf_xor.template get_access<access::mode::read_write>(cgh);
auto sgsizeacc = sgsizebuf.get_access<access::mode::read_write>(cgh);

cgh.parallel_for<sycl_subgr<TT, 0>>(NdRange, [=](nd_item<1> NdItem) {
cgh.parallel_for<sycl_subgr<T, 0>>(NdRange, [=](nd_item<1> NdItem) {
intel::sub_group SG = NdItem.get_sub_group();
uint32_t wggid = NdItem.get_global_id(0);
uint32_t sgid = SG.get_group_id().get(0);
Expand Down