Skip to content

Commit 4028aab

Browse files
JackAKirkbb-sycl
authored andcommitted
[SYCL][CUDA][BFLOAT16] Temp oneapi test file adds unary coverage (intel#889)
Fixes broken verify_logic test. Once a backend agnostic ext_oneapi_bf16 aspect is added we can add the patches from this file to the existing bfloat16 test. Until then the cuda backend requires the Target provided at compile time to use the Target_Builtins.
1 parent 2dda512 commit 4028aab

File tree

3 files changed

+226
-189
lines changed

3 files changed

+226
-189
lines changed

SYCL/BFloat16/bfloat16_type.cpp

+2-189
Original file line numberDiff line numberDiff line change
@@ -15,193 +15,6 @@
1515
//
1616
//===----------------------------------------------------------------------===//
1717

18-
#include <CL/sycl.hpp>
19-
#include <sycl/ext/intel/experimental/bfloat16.hpp>
18+
#include "bfloat16_type.hpp"
2019

21-
#include <cmath>
22-
23-
using namespace cl::sycl;
24-
25-
constexpr size_t N = 100;
26-
27-
template <typename T> void assert_close(const T &C, const float ref) {
28-
for (size_t i = 0; i < N; i++) {
29-
auto diff = C[i] - ref;
30-
assert(std::fabs(static_cast<float>(diff)) <
31-
std::numeric_limits<float>::epsilon());
32-
}
33-
}
34-
35-
void verify_conv_implicit(queue &q, buffer<float, 1> &a, range<1> &r,
36-
const float ref) {
37-
q.submit([&](handler &cgh) {
38-
auto A = a.get_access<access::mode::read_write>(cgh);
39-
cgh.parallel_for<class calc_conv>(r, [=](id<1> index) {
40-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
41-
A[index] = AVal;
42-
});
43-
});
44-
45-
assert_close(a.get_access<access::mode::read>(), ref);
46-
}
47-
48-
void verify_conv_explicit(queue &q, buffer<float, 1> &a, range<1> &r,
49-
const float ref) {
50-
q.submit([&](handler &cgh) {
51-
auto A = a.get_access<access::mode::read_write>(cgh);
52-
cgh.parallel_for<class calc_conv_impl>(r, [=](id<1> index) {
53-
uint16_t AVal =
54-
cl::sycl::ext::intel::experimental::bfloat16::from_float(A[index]);
55-
A[index] = cl::sycl::ext::intel::experimental::bfloat16::to_float(AVal);
56-
});
57-
});
58-
59-
assert_close(a.get_access<access::mode::read>(), ref);
60-
}
61-
62-
void verify_add(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
63-
const float ref) {
64-
buffer<float, 1> c{r};
65-
66-
q.submit([&](handler &cgh) {
67-
auto A = a.get_access<access::mode::read>(cgh);
68-
auto B = b.get_access<access::mode::read>(cgh);
69-
auto C = c.get_access<access::mode::write>(cgh);
70-
cgh.parallel_for<class calc_add_expl>(r, [=](id<1> index) {
71-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
72-
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
73-
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal + BVal;
74-
C[index] = CVal;
75-
});
76-
});
77-
78-
assert_close(c.get_access<access::mode::read>(), ref);
79-
}
80-
81-
void verify_sub(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
82-
const float ref) {
83-
buffer<float, 1> c{r};
84-
85-
q.submit([&](handler &cgh) {
86-
auto A = a.get_access<access::mode::read>(cgh);
87-
auto B = b.get_access<access::mode::read>(cgh);
88-
auto C = c.get_access<access::mode::write>(cgh);
89-
cgh.parallel_for<class calc_sub>(r, [=](id<1> index) {
90-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
91-
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
92-
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal - BVal;
93-
C[index] = CVal;
94-
});
95-
});
96-
97-
assert_close(c.get_access<access::mode::read>(), ref);
98-
}
99-
100-
void verify_mul(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
101-
const float ref) {
102-
buffer<float, 1> c{r};
103-
104-
q.submit([&](handler &cgh) {
105-
auto A = a.get_access<access::mode::read>(cgh);
106-
auto B = b.get_access<access::mode::read>(cgh);
107-
auto C = c.get_access<access::mode::write>(cgh);
108-
cgh.parallel_for<class calc_mul>(r, [=](id<1> index) {
109-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
110-
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
111-
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal * BVal;
112-
C[index] = CVal;
113-
});
114-
});
115-
116-
assert_close(c.get_access<access::mode::read>(), ref);
117-
}
118-
119-
void verify_div(queue &q, buffer<float, 1> &a, buffer<float, 1> &b, range<1> &r,
120-
const float ref) {
121-
buffer<float, 1> c{r};
122-
123-
q.submit([&](handler &cgh) {
124-
auto A = a.get_access<access::mode::read>(cgh);
125-
auto B = b.get_access<access::mode::read>(cgh);
126-
auto C = c.get_access<access::mode::write>(cgh);
127-
cgh.parallel_for<class calc_div>(r, [=](id<1> index) {
128-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
129-
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
130-
cl::sycl::ext::intel::experimental::bfloat16 CVal = AVal / BVal;
131-
C[index] = CVal;
132-
});
133-
});
134-
135-
assert_close(c.get_access<access::mode::read>(), ref);
136-
}
137-
138-
void verify_logic(queue &q, buffer<float, 1> &a, buffer<float, 1> &b,
139-
range<1> &r, const float ref) {
140-
buffer<float, 1> c{r};
141-
142-
q.submit([&](handler &cgh) {
143-
auto A = a.get_access<access::mode::read>(cgh);
144-
auto B = b.get_access<access::mode::read>(cgh);
145-
auto C = c.get_access<access::mode::write>(cgh);
146-
cgh.parallel_for<class logic>(r, [=](id<1> index) {
147-
cl::sycl::ext::intel::experimental::bfloat16 AVal{A[index]};
148-
cl::sycl::ext::intel::experimental::bfloat16 BVal{B[index]};
149-
if (AVal) {
150-
if (AVal > BVal || AVal >= BVal || AVal < BVal || AVal <= BVal ||
151-
!BVal) {
152-
cl::sycl::ext::intel::experimental::bfloat16 CVal =
153-
AVal != BVal ? AVal : BVal;
154-
CVal--;
155-
CVal++;
156-
if (AVal == BVal) {
157-
CVal -= AVal;
158-
CVal *= 3.0;
159-
CVal /= 2.0;
160-
} else
161-
CVal += BVal;
162-
C[index] = CVal;
163-
}
164-
}
165-
});
166-
});
167-
168-
assert_close(c.get_access<access::mode::read>(), ref);
169-
}
170-
171-
int main() {
172-
device dev{default_selector()};
173-
174-
// TODO: replace is_gpu check with extension check when the appropriate part
175-
// of implementation ready (aspect)
176-
if (!dev.is_gpu() && !dev.is_cpu()) {
177-
std::cout << "This device doesn't support bfloat16 conversion feature"
178-
<< std::endl;
179-
return 0;
180-
}
181-
182-
std::vector<float> vec_a(N, 5.0);
183-
std::vector<float> vec_b(N, 2.0);
184-
std::vector<float> vec_b_neg(N, -2.0);
185-
186-
range<1> r(N);
187-
buffer<float, 1> a{vec_a.data(), r};
188-
buffer<float, 1> b{vec_b.data(), r};
189-
buffer<float, 1> b_neg{vec_b_neg.data(), r};
190-
191-
queue q{dev};
192-
193-
verify_conv_implicit(q, a, r, 5.0);
194-
verify_conv_explicit(q, a, r, 5.0);
195-
verify_add(q, a, b, r, 7.0);
196-
verify_sub(q, a, b, r, 3.0);
197-
verify_mul(q, a, b, r, 10.0);
198-
verify_div(q, a, b, r, 2.5);
199-
verify_logic(q, a, b, r, 7.0);
200-
verify_add(q, a, b_neg, r, 3.0);
201-
verify_sub(q, a, b_neg, r, 7.0);
202-
verify_mul(q, a, b_neg, r, -10.0);
203-
verify_div(q, a, b_neg, r, -2.5);
204-
verify_logic(q, a, b_neg, r, 3.0);
205-
206-
return 0;
207-
}
20+
int main() { return run_tests(); }

0 commit comments

Comments
 (0)