|
15 | 15 | //
|
16 | 16 | //===----------------------------------------------------------------------===//
|
17 | 17 |
|
18 |
| -#include <CL/sycl.hpp> |
19 |
| -#include <sycl/ext/intel/experimental/bfloat16.hpp> |
| 18 | +#include "bfloat16_type.hpp" |
20 | 19 |
|
21 |
| -#include <cmath> |
22 |
| - |
23 |
| -using namespace cl::sycl; |
24 |
| - |
25 |
| -constexpr size_t N = 100; |
26 |
| - |
27 |
| -template <typename T> void assert_close(const T &C, const float ref) { |
28 |
| - for (size_t i = 0; i < N; i++) { |
29 |
| - auto diff = C[i] - ref; |
30 |
| - assert(std::fabs(static_cast<float>(diff)) < |
31 |
| - std::numeric_limits<float>::epsilon()); |
32 |
| - } |
33 |
| -} |
34 |
| - |
35 |
| -void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r, |
36 |
| - const float ref) { |
37 |
| - q.submit([&](handler &cgh) { |
38 |
| - auto A = a.get_access<access::mode::read_write>(cgh); |
39 |
| - cgh.parallel_for<class calc_conv>(r, [=](id<1> index) { |
40 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
41 |
| - A[index] = AVal; |
42 |
| - }); |
43 |
| - }); |
44 |
| - |
45 |
| - assert_close(a.get_access<access::mode::read>(), ref); |
46 |
| -} |
47 |
| - |
48 |
| -void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r, |
49 |
| - const float ref) { |
50 |
| - q.submit([&](handler &cgh) { |
51 |
| - auto A = a.get_access<access::mode::read_write>(cgh); |
52 |
| - cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) { |
53 |
| - uint16_t AVal = |
54 |
| - cl::sycl::ext::intel::experimental::bfloat16::from_float(A[index]); |
55 |
| - A[index] = cl::sycl::ext::intel::experimental::bfloat16::to_float(AVal); |
56 |
| - }); |
57 |
| - }); |
58 |
| - |
59 |
| - assert_close(a.get_access<access::mode::read>(), ref); |
60 |
| -} |
61 |
| - |
62 |
| -void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, |
63 |
| - const float ref) { |
64 |
| - buffer<float, 1> c{r}; |
65 |
| - |
66 |
| - q.submit([&](handler &cgh) { |
67 |
| - auto A = a.get_access<access::mode::read>(cgh); |
68 |
| - auto B = b.get_access<access::mode::read>(cgh); |
69 |
| - auto C = c.get_access<access::mode::write>(cgh); |
70 |
| - cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) { |
71 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
72 |
| - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; |
73 |
| - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal + BVal; |
74 |
| - C[index] = CVal; |
75 |
| - }); |
76 |
| - }); |
77 |
| - |
78 |
| - assert_close(c.get_access<access::mode::read>(), ref); |
79 |
| -} |
80 |
| - |
81 |
| -void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, |
82 |
| - const float ref) { |
83 |
| - buffer<float, 1> c{r}; |
84 |
| - |
85 |
| - q.submit([&](handler &cgh) { |
86 |
| - auto A = a.get_access<access::mode::read>(cgh); |
87 |
| - auto B = b.get_access<access::mode::read>(cgh); |
88 |
| - auto C = c.get_access<access::mode::write>(cgh); |
89 |
| - cgh.parallel_for<class calc_sub>(r, [=](id<1> index) { |
90 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
91 |
| - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; |
92 |
| - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal - BVal; |
93 |
| - C[index] = CVal; |
94 |
| - }); |
95 |
| - }); |
96 |
| - |
97 |
| - assert_close(c.get_access<access::mode::read>(), ref); |
98 |
| -} |
99 |
| - |
100 |
| -void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, |
101 |
| - const float ref) { |
102 |
| - buffer<float, 1> c{r}; |
103 |
| - |
104 |
| - q.submit([&](handler &cgh) { |
105 |
| - auto A = a.get_access<access::mode::read>(cgh); |
106 |
| - auto B = b.get_access<access::mode::read>(cgh); |
107 |
| - auto C = c.get_access<access::mode::write>(cgh); |
108 |
| - cgh.parallel_for<class calc_mul>(r, [=](id<1> index) { |
109 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
110 |
| - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; |
111 |
| - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal * BVal; |
112 |
| - C[index] = CVal; |
113 |
| - }); |
114 |
| - }); |
115 |
| - |
116 |
| - assert_close(c.get_access<access::mode::read>(), ref); |
117 |
| -} |
118 |
| - |
119 |
| -void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r, |
120 |
| - const float ref) { |
121 |
| - buffer<float, 1> c{r}; |
122 |
| - |
123 |
| - q.submit([&](handler &cgh) { |
124 |
| - auto A = a.get_access<access::mode::read>(cgh); |
125 |
| - auto B = b.get_access<access::mode::read>(cgh); |
126 |
| - auto C = c.get_access<access::mode::write>(cgh); |
127 |
| - cgh.parallel_for<class calc_div>(r, [=](id<1> index) { |
128 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
129 |
| - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; |
130 |
| - cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal / BVal; |
131 |
| - C[index] = CVal; |
132 |
| - }); |
133 |
| - }); |
134 |
| - |
135 |
| - assert_close(c.get_access<access::mode::read>(), ref); |
136 |
| -} |
137 |
| - |
138 |
| -void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, |
139 |
| - range<1> &r, const float ref) { |
140 |
| - buffer<float, 1> c{r}; |
141 |
| - |
142 |
| - q.submit([&](handler &cgh) { |
143 |
| - auto A = a.get_access<access::mode::read>(cgh); |
144 |
| - auto B = b.get_access<access::mode::read>(cgh); |
145 |
| - auto C = c.get_access<access::mode::write>(cgh); |
146 |
| - cgh.parallel_for<class logic>(r, [=](id<1> index) { |
147 |
| - cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]}; |
148 |
| - cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]}; |
149 |
| - if (AVal) { |
150 |
| - if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal || |
151 |
| - !BVal) { |
152 |
| - cl::sycl::ext::intel::experimental::bfloat16 CVal = |
153 |
| - AVal != BVal ? AVal : BVal; |
154 |
| - CVal--; |
155 |
| - CVal++; |
156 |
| - if (AVal == BVal) { |
157 |
| - CVal -= AVal; |
158 |
| - CVal *= 3.0; |
159 |
| - CVal /= 2.0; |
160 |
| - } else |
161 |
| - CVal += BVal; |
162 |
| - C[index] = CVal; |
163 |
| - } |
164 |
| - } |
165 |
| - }); |
166 |
| - }); |
167 |
| - |
168 |
| - assert_close(c.get_access<access::mode::read>(), ref); |
169 |
| -} |
170 |
| - |
171 |
| -int main() { |
172 |
| - device dev{default_selector()}; |
173 |
| - |
174 |
| - // TODO: replace is_gpu check with extension check when the appropriate part |
175 |
| - // of implementation ready (aspect) |
176 |
| - if (!dev.is_gpu() && !dev.is_cpu()) { |
177 |
| - std::cout << "This device doesn't support bfloat16 conversion feature" |
178 |
| - << std::endl; |
179 |
| - return 0; |
180 |
| - } |
181 |
| - |
182 |
| - std::vector<float> vec_a(N, 5.0); |
183 |
| - std::vector<float> vec_b(N, 2.0); |
184 |
| - std::vector<float> vec_b_neg(N, -2.0); |
185 |
| - |
186 |
| - range<1> r(N); |
187 |
| - buffer<float, 1> a{vec_a.data(), r}; |
188 |
| - buffer<float, 1> b{vec_b.data(), r}; |
189 |
| - buffer<float, 1> b_neg{vec_b_neg.data(), r}; |
190 |
| - |
191 |
| - queue q{dev}; |
192 |
| - |
193 |
| - verify_conv_implicit(q, a, r, 5.0); |
194 |
| - verify_conv_explicit(q, a, r, 5.0); |
195 |
| - verify_add(q, a, b, r, 7.0); |
196 |
| - verify_sub(q, a, b, r, 3.0); |
197 |
| - verify_mul(q, a, b, r, 10.0); |
198 |
| - verify_div(q, a, b, r, 2.5); |
199 |
| - verify_logic(q, a, b, r, 7.0); |
200 |
| - verify_add(q, a, b_neg, r, 3.0); |
201 |
| - verify_sub(q, a, b_neg, r, 7.0); |
202 |
| - verify_mul(q, a, b_neg, r, -10.0); |
203 |
| - verify_div(q, a, b_neg, r, -2.5); |
204 |
| - verify_logic(q, a, b_neg, r, 3.0); |
205 |
| - |
206 |
| - return 0; |
207 |
| -} |
| 20 | +int main() { return run_tests(); } |
0 commit comments