Skip to content

Commit 84dab2d

Browse files
authored
[SYCL] Add BFloat16 feature end-to-end test (intel/llvm-test-suite#390)
Spec: intel#4237 Implementation: intel#4213 Signed-off-by: Dmitry Sidorov <[email protected]>
1 parent e37a346 commit 84dab2d

File tree

1 file changed

+206
-0
lines changed

1 file changed

+206
-0
lines changed

SYCL/BFloat16/bfloat16_type.cpp

+206
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
// UNSUPPORTED: cuda
2+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
3+
// TODO currently the feature isn't supported on most of the devices
4+
// need to enable the test when the aspect and device_if feature are
5+
// introduced
6+
// RUNx: %CPU_RUN_PLACEHOLDER %t.out
7+
// RUNx: %GPU_RUN_PLACEHOLDER %t.out
8+
// RUNx: %ACC_RUN_PLACEHOLDER %t.out
9+
10+
//==----------- bfloat16_type.cpp - SYCL bfloat16 type test ----------------==//
11+
//
12+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
13+
// See https://llvm.org/LICENSE.txt for license information.
14+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
15+
//
16+
//===----------------------------------------------------------------------===//
17+
18+
#include <CL/sycl.hpp>
19+
#include <sycl/ext/intel/experimental/bfloat16.hpp>
20+
21+
#include <cmath>
22+
23+
using namespace cl::sycl;
24+
25+
constexpr size_t N = 100;
26+
27+
template <typename T> void assert_close(const T &C, const float ref) {
28+
for (size_t i = 0; i < N; i++) {
29+
auto diff = C[i] - ref;
30+
assert(std::fabs(static_cast<float>(diff)) <
31+
std::numeric_limits<float>::epsilon());
32+
}
33+
}
34+
35+
void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r,
36+
const float ref) {
37+
q.submit([&](handler &cgh) {
38+
auto A = a.get_access<access::mode::read_write>(cgh);
39+
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
40+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
41+
A[index] = AVal;
42+
});
43+
});
44+
45+
assert_close(a.get_access<access::mode::read>(), ref);
46+
}
47+
48+
void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r,
49+
const float ref) {
50+
q.submit([&](handler &cgh) {
51+
auto A = a.get_access<access::mode::read_write>(cgh);
52+
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) {
53+
uint16_t AVal =
54+
cl::sycl::ext::intel::experimental::bfloat16::from_float(A[index]);
55+
A[index] = cl::sycl::ext::intel::experimental::bfloat16::to_float(AVal);
56+
});
57+
});
58+
59+
assert_close(a.get_access<access::mode::read>(), ref);
60+
}
61+
62+
void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
63+
const float ref) {
64+
buffer<float, 1> c{r};
65+
66+
q.submit([&](handler &cgh) {
67+
auto A = a.get_access<access::mode::read>(cgh);
68+
auto B = b.get_access<access::mode::read>(cgh);
69+
auto C = c.get_access<access::mode::write>(cgh);
70+
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) {
71+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
72+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
73+
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal + BVal;
74+
C[index] = CVal;
75+
});
76+
});
77+
78+
assert_close(c.get_access<access::mode::read>(), ref);
79+
}
80+
81+
void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
82+
const float ref) {
83+
buffer<float, 1> c{r};
84+
85+
q.submit([&](handler &cgh) {
86+
auto A = a.get_access<access::mode::read>(cgh);
87+
auto B = b.get_access<access::mode::read>(cgh);
88+
auto C = c.get_access<access::mode::write>(cgh);
89+
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) {
90+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
91+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
92+
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal - BVal;
93+
C[index] = CVal;
94+
});
95+
});
96+
97+
assert_close(c.get_access<access::mode::read>(), ref);
98+
}
99+
100+
void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
101+
const float ref) {
102+
buffer<float, 1> c{r};
103+
104+
q.submit([&](handler &cgh) {
105+
auto A = a.get_access<access::mode::read>(cgh);
106+
auto B = b.get_access<access::mode::read>(cgh);
107+
auto C = c.get_access<access::mode::write>(cgh);
108+
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
109+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
110+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
111+
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal * BVal;
112+
C[index] = CVal;
113+
});
114+
});
115+
116+
assert_close(c.get_access<access::mode::read>(), ref);
117+
}
118+
119+
void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
120+
const float ref) {
121+
buffer<float, 1> c{r};
122+
123+
q.submit([&](handler &cgh) {
124+
auto A = a.get_access<access::mode::read>(cgh);
125+
auto B = b.get_access<access::mode::read>(cgh);
126+
auto C = c.get_access<access::mode::write>(cgh);
127+
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
128+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
129+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
130+
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal / BVal;
131+
C[index] = CVal;
132+
});
133+
});
134+
135+
assert_close(c.get_access<access::mode::read>(), ref);
136+
}
137+
138+
void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b,
139+
range<1> &r, const float ref) {
140+
buffer<float, 1> c{r};
141+
142+
q.submit([&](handler &cgh) {
143+
auto A = a.get_access<access::mode::read>(cgh);
144+
auto B = b.get_access<access::mode::read>(cgh);
145+
auto C = c.get_access<access::mode::write>(cgh);
146+
cgh.parallel_for<class logic>(r, [=](id<1> index) {
147+
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
148+
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
149+
if (AVal) {
150+
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal ||
151+
!BVal) {
152+
cl::sycl::ext::intel::experimental::bfloat16 CVal =
153+
AVal != BVal ? AVal : BVal;
154+
CVal--;
155+
CVal++;
156+
if (AVal == BVal) {
157+
CVal -= AVal;
158+
CVal *= 3.0;
159+
CVal /= 2.0;
160+
} else
161+
CVal += BVal;
162+
}
163+
}
164+
});
165+
});
166+
167+
assert_close(c.get_access<access::mode::read>(), ref);
168+
}
169+
170+
int main() {
171+
device dev{default_selector()};
172+
173+
// TODO: replace is_gpu check with extension check when the appropriate part
174+
// of implementation ready (aspect)
175+
if (!dev.is_gpu()) {
176+
std::cout << "This device doesn't support bfloat16 conversion feature"
177+
<< std::endl;
178+
return 0;
179+
}
180+
181+
std::vector<float> vec_a(N, 5.0);
182+
std::vector<float> vec_b(N, 2.0);
183+
std::vector<float> vec_b_neg(N, -2.0);
184+
185+
range<1> r(N);
186+
buffer<float, 1> a{vec_a.data(), r};
187+
buffer<float, 1> b{vec_b.data(), r};
188+
buffer<float, 1> b_neg{vec_b_neg.data(), r};
189+
190+
queue q{dev};
191+
192+
verify_conv_implicit(q, a, r, 5.0);
193+
verify_conv_explicit(q, a, r, 5.0);
194+
verify_add(q, a, b, r, 7.0);
195+
verify_sub(q, a, b, r, 3.0);
196+
verify_mul(q, a, b, r, 10.0);
197+
verify_div(q, a, b, r, 2.5);
198+
verify_logic(q, a, b, r, 7.0);
199+
verify_add(q, a, b_neg, r, 3.0);
200+
verify_sub(q, a, b_neg, r, 7.0);
201+
verify_mul(q, a, b_neg, r, -10.0);
202+
verify_div(q, a, b_neg, r, -2.5);
203+
verify_logic(q, a, b_neg, r, 3.0);
204+
205+
return 0;
206+
}

0 commit comments

Comments
 (0)