Skip to content

Commit 597619c

Browse files
authored
[SYCL][E2E] Ensuring lowering of llvm.bitreverse is functionally correct (#12774)
Ensure that lowering of llvm.bitreverse* intrinsics by llvm-spirv is functionally correct. --------- Signed-off-by: Lu, John <[email protected]>
1 parent d89ca59 commit 597619c

File tree

2 files changed

+263
-0
lines changed

2 files changed

+263
-0
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// Test that llvm.bitreverse is lowered correctly by llvm-spirv.
2+
3+
// UNSUPPORTED: hip || cuda
4+
5+
// Make dump directory.
6+
// RUN: rm -rf %t.spvdir && mkdir %t.spvdir
7+
8+
// Ensure that SPV_KHR_bit_instructions is disabled so that translator
9+
// will lower llvm.bitreverse.* intrinsics instead of relying on SPIRV
10+
// BitReverse instruction.
11+
// Also build executable with SPV dump.
12+
// RUN: %{build} -o %t.out -O2 -Xspirv-translator --spirv-ext=-SPV_KHR_bit_instructions -fsycl-dump-device-code=%t.spvdir
13+
14+
// Rename SPV file to explictly known filename.
15+
// RUN: mv %t.spvdir/*.spv %t.spvdir/dump.spv
16+
17+
// Convert to text.
18+
// RUN: llvm-spirv -to-text %t.spvdir/dump.spv
19+
20+
// Check that all lowerings are done by llvm-spirv.
21+
// RUN: cat %t.spvdir/dump.spt | FileCheck %s --check-prefix CHECK-SPV --implicit-check-not=BitReverse
22+
23+
// Execute to ensure lowering has correct functionality.
24+
// RUN: %{run} %t.out
25+
26+
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
27+
28+
// TODO FIXME Change NOT_READY to RUN when llvm.bitreverse.* is supported.
29+
30+
// Build without lowering explicitly disabled.
31+
// NOT_READY: %{build} -o %t.bitinstructions.out
32+
33+
// Execution should still be correct.
34+
// NOT_READY: %{run} %t.bitinstructions.out
35+
36+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i8"
37+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i16"
38+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i32"
39+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_i64"
40+
41+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i8"
42+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i16"
43+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v2i32"
44+
45+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i8"
46+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i16"
47+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v3i32"
48+
49+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i8"
50+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i16"
51+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v4i32"
52+
53+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i8"
54+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i16"
55+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v8i32"
56+
57+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i8"
58+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i16"
59+
// CHECK-SPV: Name {{[0-9]+}} "llvm_bitreverse_v16i32"
60+
61+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i8" Export
62+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i16" Export
63+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i32" Export
64+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_i64" Export
65+
66+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i8" Export
67+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i16" Export
68+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v2i32" Export
69+
70+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i8" Export
71+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i16" Export
72+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v3i32" Export
73+
74+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i8" Export
75+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i16" Export
76+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v4i32" Export
77+
78+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i8" Export
79+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i16" Export
80+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v8i32" Export
81+
82+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i8" Export
83+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i16" Export
84+
// CHECK-SPV: LinkageAttributes "llvm_bitreverse_v16i32" Export
85+
86+
#include <string.h>
87+
#include <sycl/sycl.hpp>
88+
#include <iostream>
89+
#include "common.hpp"
90+
91+
using namespace sycl;
92+
93+
template <typename TYPE>
94+
__attribute__((optnone, noinline)) TYPE reference_reverse(TYPE a, const int bitlength) {
95+
TYPE ret = 0;
96+
for (auto i = 0; i<bitlength; i++) {
97+
ret<<=1;
98+
ret |= a & 0x1;
99+
a>>=1;
100+
}
101+
return ret;
102+
}
103+
104+
template <typename TYPE>
105+
__attribute__((noinline)) TYPE reverse(TYPE a, int bitlength) {
106+
if (bitlength==8) {
107+
// Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 8-bit types.
108+
a = ((0x55 & a) << 1) | (0x55 & (a >> 1));
109+
a = ((0x33 & a) << 2) | (0x33 & (a >> 2));
110+
return (a << 4) | (a >> 4);
111+
} else if (bitlength==16) {
112+
// Avoid bug with __builtin_elementwise_bitreverse(a) on scalar 16-bit types.
113+
a = ((0x5555 & a) << 1) | (0x5555 & (a >> 1));
114+
a = ((0x3333 & a) << 2) | (0x3333 & (a >> 2));
115+
a = ((0x0F0F & a) << 4) | (0x0F0F & (a >> 4));
116+
return (a << 8) | (a >> 8);
117+
} else
118+
return __builtin_elementwise_bitreverse(a);
119+
}
120+
121+
template <class T> class BitreverseTest;
122+
123+
#define NUM_TESTS 1024
124+
125+
template <typename TYPE>
126+
void do_scalar_bitreverse_test() {
127+
queue q;
128+
129+
TYPE *Input = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context());
130+
TYPE *Output = (TYPE *) malloc_shared(sizeof(TYPE) * NUM_TESTS, q.get_device(), q.get_context());
131+
132+
for (unsigned i=0; i<NUM_TESTS; i++)
133+
Input[i] = get_rand<TYPE>();
134+
q.submit([=](handler &cgh) {
135+
cgh.single_task<BitreverseTest<TYPE>> ([=]() {
136+
for (unsigned i=0; i<NUM_TESTS; i++)
137+
Output[i] = reverse(Input[i],sizeof(TYPE)*8);
138+
});
139+
});
140+
q.wait();
141+
for (unsigned i=0; i<NUM_TESTS; i++)
142+
if (Output[i]!=reference_reverse(Input[i],sizeof(TYPE)*8)) {
143+
std::cerr << "Failed for scalar " << std::hex << Input[i] << " sizeof=" << sizeof(TYPE) << "\n";
144+
exit(-1);
145+
}
146+
147+
free(Input, q.get_context());
148+
free(Output, q.get_context());
149+
}
150+
151+
template <typename VTYPE>
152+
void do_vector_bitreverse_test() {
153+
queue q;
154+
155+
VTYPE *Input = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context());
156+
VTYPE *Output = (VTYPE *) malloc_shared(sizeof(VTYPE) * NUM_TESTS, q.get_device(), q.get_context());
157+
158+
for (unsigned i=0; i<NUM_TESTS; i++)
159+
for (unsigned j=0; j<__builtin_vectorelements(VTYPE); j++)
160+
Input[i][j] = get_rand<typename std::decay<decltype(Input[0][0])>::type>();
161+
162+
q.submit([=](handler &cgh) {
163+
cgh.single_task<BitreverseTest<VTYPE>> ([=]() {
164+
for (unsigned i=0; i<NUM_TESTS; i++)
165+
Output[i] = reverse(Input[i],sizeof(Input[0][0])*8);
166+
});
167+
});
168+
q.wait();
169+
for (unsigned i=0; i<NUM_TESTS; i++) {
170+
auto Reference=reference_reverse(Input[i],sizeof(Input[0][0])*8);
171+
for (unsigned j=0; j<__builtin_vectorelements(VTYPE); j++)
172+
if (Output[i][j]!=Reference[j]) {
173+
std::cerr << "Failed for vector " << std::hex << Input[i][j] << " sizeof=" << sizeof(Input[0][0]) << " elements=" << __builtin_vectorelements(VTYPE) << "\n";
174+
exit(-1);
175+
}
176+
}
177+
free(Input, q.get_context());
178+
free(Output, q.get_context());
179+
}
180+
181+
using uint8_t2 = uint8_t __attribute__((ext_vector_type(2)));
182+
using uint16_t2 = uint16_t __attribute__((ext_vector_type(2)));
183+
using uint32_t2 = uint32_t __attribute__((ext_vector_type(2)));
184+
using uint64_t2 = uint64_t __attribute__((ext_vector_type(2)));
185+
186+
using uint8_t3 = uint8_t __attribute__((ext_vector_type(3)));
187+
using uint16_t3 = uint16_t __attribute__((ext_vector_type(3)));
188+
using uint32_t3 = uint32_t __attribute__((ext_vector_type(3)));
189+
using uint64_t3 = uint64_t __attribute__((ext_vector_type(3)));
190+
191+
using uint8_t4 = uint8_t __attribute__((ext_vector_type(4)));
192+
using uint16_t4 = uint16_t __attribute__((ext_vector_type(4)));
193+
using uint32_t4 = uint32_t __attribute__((ext_vector_type(4)));
194+
using uint64_t4 = uint64_t __attribute__((ext_vector_type(4)));
195+
196+
using uint8_t8 = uint8_t __attribute__((ext_vector_type(8)));
197+
using uint16_t8 = uint16_t __attribute__((ext_vector_type(8)));
198+
using uint32_t8 = uint32_t __attribute__((ext_vector_type(8)));
199+
using uint64_t8 = uint64_t __attribute__((ext_vector_type(8)));
200+
201+
using uint8_t16 = uint8_t __attribute__((ext_vector_type(16)));
202+
using uint16_t16 = uint16_t __attribute__((ext_vector_type(16)));
203+
using uint32_t16 = uint32_t __attribute__((ext_vector_type(16)));
204+
using uint64_t16 = uint64_t __attribute__((ext_vector_type(16)));
205+
206+
int main() {
207+
srand(2024);
208+
209+
do_scalar_bitreverse_test<uint8_t>();
210+
do_scalar_bitreverse_test<uint16_t>();
211+
do_scalar_bitreverse_test<uint32_t>();
212+
do_scalar_bitreverse_test<uint64_t>();
213+
214+
do_vector_bitreverse_test<uint8_t2>();
215+
do_vector_bitreverse_test<uint16_t2>();
216+
do_vector_bitreverse_test<uint32_t2>();
217+
218+
do_vector_bitreverse_test<uint8_t3>();
219+
do_vector_bitreverse_test<uint16_t3>();
220+
do_vector_bitreverse_test<uint32_t3>();
221+
222+
do_vector_bitreverse_test<uint8_t4>();
223+
do_vector_bitreverse_test<uint16_t4>();
224+
do_vector_bitreverse_test<uint32_t4>();
225+
226+
do_vector_bitreverse_test<uint8_t8>();
227+
do_vector_bitreverse_test<uint16_t8>();
228+
do_vector_bitreverse_test<uint32_t8>();
229+
230+
do_vector_bitreverse_test<uint8_t16>();
231+
do_vector_bitreverse_test<uint16_t16>();
232+
do_vector_bitreverse_test<uint32_t16>();
233+
234+
return 0;
235+
}
236+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
//==------- common.hpp - DPC++ ESIMD on-device test ------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include <stdlib.h>
12+
#include <sycl/bit_cast.hpp>
13+
14+
template <int case_num> class KernelID;
15+
16+
template <typename T> T get_rand() {
17+
using Tuint = std::conditional_t<
18+
sizeof(T) == 1, uint8_t,
19+
std::conditional_t<
20+
sizeof(T) == 2, uint16_t,
21+
std::conditional_t<sizeof(T) == 4, uint32_t,
22+
std::conditional_t<sizeof(T) == 8, uint64_t, T>>>>;
23+
Tuint v = rand();
24+
if constexpr (sizeof(Tuint) > 4)
25+
v = (v << 32) | rand();
26+
return sycl::bit_cast<T>(v);
27+
}

0 commit comments

Comments
 (0)