diff --git a/SYCL/BFloat16/bfloat16_type.cpp b/SYCL/BFloat16/bfloat16_type.cpp index 7b473517c9..25bb8ac15c 100644 --- a/SYCL/BFloat16/bfloat16_type.cpp +++ b/SYCL/BFloat16/bfloat16_type.cpp @@ -15,193 +15,6 @@ // //===----------------------------------------------------------------------===// -#include -#include +#include "bfloat16_type.hpp" -#include - -using namespace cl::sycl; - -constexpr size_t N = 100; - -template void assert_close(const T &C, const float ref) { - for (size_t i = 0; i < N; i++) { - auto diff = C[i] - ref; - assert(std::fabs(static_cast(diff)) < - std::numeric_limits::epsilon()); - } -} - -void verify_conv_implicit(queue &q, buffer &a, range<1> &r, - const float ref) { - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - A[index] = AVal; - }); - }); - - assert_close(a.get_access(), ref); -} - -void verify_conv_explicit(queue &q, buffer &a, range<1> &r, - const float ref) { - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - uint16_t AVal = - cl::sycl::ext::intel::experimental::bfloat16::from_float(A[index]); - A[index] = cl::sycl::ext::intel::experimental::bfloat16::to_float(AVal); - }); - }); - - assert_close(a.get_access(), ref); -} - -void verify_add(queue &q, buffer &a, buffer &b, range<1> &r, - const float ref) { - buffer c{r}; - - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - auto C = c.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal + BVal; - C[index] = CVal; - }); - }); - - assert_close(c.get_access(), ref); -} - -void verify_sub(queue &q, buffer &a, buffer &b, range<1> &r, - const float ref) { - buffer c{r}; - - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - auto C = c.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal - BVal; - C[index] = CVal; - }); - }); - - assert_close(c.get_access(), ref); -} - -void verify_mul(queue &q, buffer &a, buffer &b, range<1> &r, - const float ref) { - buffer c{r}; - - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - auto C = c.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal * BVal; - C[index] = CVal; - }); - }); - - assert_close(c.get_access(), ref); -} - -void verify_div(queue &q, buffer &a, buffer &b, range<1> &r, - const float ref) { - buffer c{r}; - - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - auto C = c.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal / BVal; - C[index] = CVal; - }); - }); - - assert_close(c.get_access(), ref); -} - -void verify_logic(queue &q, buffer &a, buffer &b, - range<1> &r, const float ref) { - buffer c{r}; - - q.submit([&](handler &cgh) { - auto A = a.get_access(cgh); - auto B = b.get_access(cgh); - auto C = c.get_access(cgh); - cgh.parallel_for(r, [=](id<1> index) { - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; - if (AVal) { - if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || - !BVal) { - cl::sycl::ext::intel::experimental::bfloat16 CVal = - AVal != BVal ? AVal : BVal; - CVal--; - CVal++; - if (AVal == BVal) { - CVal -= AVal; - CVal *= 3.0; - CVal /= 2.0; - } else - CVal += BVal; - C[index] = CVal; - } - } - }); - }); - - assert_close(c.get_access(), ref); -} - -int main() { - device dev{default_selector()}; - - // TODO: replace is_gpu check with extension check when the appropriate part - // of implementation ready (aspect) - if (!dev.is_gpu() && !dev.is_cpu()) { - std::cout << "This device doesn't support bfloat16 conversion feature" - << std::endl; - return 0; - } - - std::vector vec_a(N, 5.0); - std::vector vec_b(N, 2.0); - std::vector vec_b_neg(N, -2.0); - - range<1> r(N); - buffer a{vec_a.data(), r}; - buffer b{vec_b.data(), r}; - buffer b_neg{vec_b_neg.data(), r}; - - queue q{dev}; - - verify_conv_implicit(q, a, r, 5.0); - verify_conv_explicit(q, a, r, 5.0); - verify_add(q, a, b, r, 7.0); - verify_sub(q, a, b, r, 3.0); - verify_mul(q, a, b, r, 10.0); - verify_div(q, a, b, r, 2.5); - verify_logic(q, a, b, r, 7.0); - verify_add(q, a, b_neg, r, 3.0); - verify_sub(q, a, b_neg, r, 7.0); - verify_mul(q, a, b_neg, r, -10.0); - verify_div(q, a, b_neg, r, -2.5); - verify_logic(q, a, b_neg, r, 3.0); - - return 0; -} +int main() { return run_tests(); } diff --git a/SYCL/BFloat16/bfloat16_type.hpp b/SYCL/BFloat16/bfloat16_type.hpp new file mode 100644 index 0000000000..230757b3be --- /dev/null +++ b/SYCL/BFloat16/bfloat16_type.hpp @@ -0,0 +1,207 @@ +#include +#include + +#include + +using namespace cl::sycl; + +constexpr size_t N = 100; + +template void assert_close(const T &C, const float ref) { + for (size_t i = 0; i < N; i++) { + auto diff = C[i] - ref; + assert(std::fabs(static_cast(diff)) < + std::numeric_limits::epsilon()); + } +} + +void verify_conv_implicit(queue &q, buffer &a, range<1> &r, + const float ref) { + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + A[index] = AVal; + }); + }); + + assert_close(a.get_access(), ref); +} + +void verify_conv_explicit(queue &q, buffer &a, range<1> &r, + const float ref) { + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + uint16_t AVal = + cl::sycl::ext::oneapi::experimental::bfloat16::from_float(A[index]); + A[index] = cl::sycl::ext::oneapi::experimental::bfloat16::to_float(AVal); + }); + }); + + assert_close(a.get_access(), ref); +} + +void verify_add(queue &q, buffer &a, buffer &b, range<1> &r, + const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = AVal + BVal; + C[index] = CVal; + }); + }); + + assert_close(c.get_access(), ref); +} + +void verify_sub(queue &q, buffer &a, buffer &b, range<1> &r, + const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = AVal - BVal; + C[index] = CVal; + }); + }); + + assert_close(c.get_access(), ref); +} + +void verify_minus(queue &q, buffer &a, range<1> &r, const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = -AVal; + C[index] = CVal; + }); + }); + + assert_close(c.get_access(), ref); +} + +void verify_mul(queue &q, buffer &a, buffer &b, range<1> &r, + const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = AVal * BVal; + C[index] = CVal; + }); + }); + + assert_close(c.get_access(), ref); +} + +void verify_div(queue &q, buffer &a, buffer &b, range<1> &r, + const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = AVal / BVal; + C[index] = CVal; + }); + }); + + assert_close(c.get_access(), ref); +} + +void verify_logic(queue &q, buffer &a, buffer &b, + range<1> &r, const float ref) { + buffer c{r}; + + q.submit([&](handler &cgh) { + auto A = a.get_access(cgh); + auto B = b.get_access(cgh); + auto C = c.get_access(cgh); + cgh.parallel_for(r, [=](id<1> index) { + cl::sycl::ext::oneapi::experimental::bfloat16 AVal{A[index]}; + cl::sycl::ext::oneapi::experimental::bfloat16 BVal{B[index]}; + if (AVal) { + if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || + !BVal) { + cl::sycl::ext::oneapi::experimental::bfloat16 CVal = + AVal != BVal ? AVal : BVal; + CVal--; + CVal++; + if (AVal == BVal) { + CVal -= AVal; + CVal *= 3.0; + CVal /= 2.0; + } else + CVal += BVal; + C[index] = CVal; + } + } + }); + }); + + assert_close(c.get_access(), ref); +} + +int run_tests() { + device dev{default_selector()}; + + // TODO: replace is_gpu check with extension check when the appropriate part + // of implementation ready (aspect) + if (!dev.is_gpu() && !dev.is_cpu()) { + std::cout << "This device doesn't support bfloat16 conversion feature" + << std::endl; + return 0; + } + + std::vector vec_a(N, 5.0); + std::vector vec_b(N, 2.0); + std::vector vec_b_neg(N, -2.0); + + range<1> r(N); + buffer a{vec_a.data(), r}; + buffer b{vec_b.data(), r}; + buffer b_neg{vec_b_neg.data(), r}; + + queue q{dev}; + + verify_conv_implicit(q, a, r, 5.0); + verify_conv_explicit(q, a, r, 5.0); + verify_add(q, a, b, r, 7.0); + verify_sub(q, a, b, r, 3.0); + verify_mul(q, a, b, r, 10.0); + verify_div(q, a, b, r, 2.5); + verify_logic(q, a, b, r, 7.0); + verify_add(q, a, b_neg, r, 3.0); + verify_sub(q, a, b_neg, r, 7.0); + verify_minus(q, a, r, -5.0); + verify_mul(q, a, b_neg, r, -10.0); + verify_div(q, a, b_neg, r, -2.5); + verify_logic(q, a, b_neg, r, 3.0); + + return 0; +} diff --git a/SYCL/BFloat16/bfloat16_type_cuda.cpp b/SYCL/BFloat16/bfloat16_type_cuda.cpp new file mode 100644 index 0000000000..89e46884b4 --- /dev/null +++ b/SYCL/BFloat16/bfloat16_type_cuda.cpp @@ -0,0 +1,17 @@ +// REQUIRES: gpu, cuda +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -Xsycl-target-backend --cuda-gpu-arch=sm_80 %s -o %t.out +// TODO: Currently the CI does not have a sm_80 capable machine. Enable the test +// execution once it does. +// RUNx: %t.out + +//==--------- bfloat16_type_cuda.cpp - SYCL bfloat16 type test -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "bfloat16_type.hpp" + +int main() { return run_tests(); }