Skip to content

Commit 719c9ed

Browse files
committed
[AArch64][FMV] Add a non-comprehensive test for ACLE Function Multi Versioning
1 parent 72a946a commit 719c9ed

File tree

4 files changed

+322
-0
lines changed

4 files changed

+322
-0
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
if(CMAKE_C_COMPILER_ID STREQUAL "Clang")
2+
if(ARCH STREQUAL "AArch64")
3+
llvm_singlesource(PREFIX "aarch64-")
4+
endif()
5+
endif()
Lines changed: 288 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,288 @@
1+
#include <stdbool.h>
2+
#include <stdio.h>
3+
#include <stdint.h>
4+
#include <stdlib.h>
5+
#include <sys/types.h>
6+
#include <sys/wait.h>
7+
#include <unistd.h>
8+
9+
#ifdef __APPLE__
10+
#include <sys/sysctl.h>
11+
#endif
12+
13+
static bool safe_try_feature(bool (*try_feature)(void));
14+
15+
static bool any_fails = false;
16+
17+
#if __HAVE_FUNCTION_MULTI_VERSIONING
18+
#define CHECK(X, BODY) \
19+
__attribute__((target(#X))) \
20+
static bool try_##X(void) { \
21+
do \
22+
BODY \
23+
while (0); \
24+
return true; \
25+
} \
26+
__attribute__((target_version(#X))) \
27+
static void check_##X(void) { \
28+
printf("%s\n", #X); \
29+
fflush(stdout); \
30+
if (!safe_try_feature(try_##X)) { \
31+
printf("\tFAIL\n"); \
32+
any_fails = true; \
33+
} \
34+
} \
35+
__attribute__((target_version("default"))) \
36+
static void check_##X(void) { \
37+
printf("%s\n", #X); \
38+
fflush(stdout); \
39+
if (safe_try_feature(try_##X)) { \
40+
printf("\tUPASS\n"); \
41+
any_fails = true; \
42+
} \
43+
}
44+
#else
45+
#define CHECK(X, BODY) \
46+
static void check_##X(void) { \
47+
printf("%s\n", #X); \
48+
}
49+
#endif
50+
51+
CHECK(flagm, {
52+
asm volatile (
53+
"cfinv" "\n"
54+
"cfinv" "\n"
55+
);
56+
})
57+
CHECK(flagm2, {
58+
asm volatile (
59+
"axflag" "\n"
60+
"xaflag" "\n"
61+
);
62+
})
63+
CHECK(dotprod, {
64+
asm volatile (
65+
"udot v0.4S,v1.16B,v2.16B"
66+
: : : "v0"
67+
);
68+
})
69+
CHECK(sha3, {
70+
asm volatile (
71+
"fmov d0, #0" "\n"
72+
"fmov d1, #0" "\n"
73+
"eor3 v0.16b, v0.16b, v0.16b, v0.16b" "\n"
74+
: : : "v0"
75+
);
76+
})
77+
CHECK(rdm, {
78+
asm volatile (
79+
"sqrdmlah s0, s1, s2"
80+
: : : "s0"
81+
);
82+
})
83+
CHECK(lse, {
84+
uint64_t pointee = 0;
85+
asm volatile (
86+
"swp xzr, xzr, [%[pointee]]"
87+
: : [pointee]"r"(&pointee)
88+
);
89+
})
90+
CHECK(sha2, {
91+
asm volatile (
92+
"fmov d0, #0" "\n"
93+
"fmov d1, #0" "\n"
94+
"sha256h q0, q0, v0.4s" "\n"
95+
: : : "v0"
96+
);
97+
})
98+
CHECK(sha1, {
99+
asm volatile (
100+
"fmov s0, #0" "\n"
101+
"sha1h s0, s0" "\n"
102+
: : : "v0"
103+
);
104+
})
105+
CHECK(aes, {
106+
asm volatile (
107+
"fmov d0, #0" "\n"
108+
"fmov d1, #0" "\n"
109+
"aesd v0.16B, v0.16B" "\n"
110+
: : : "v0"
111+
);
112+
})
113+
CHECK(pmull, {
114+
asm volatile (
115+
"fmov d0, #0" "\n"
116+
"pmull v0.1q, v0.1d, v0.1d" "\n"
117+
: : : "v0"
118+
);
119+
})
120+
CHECK(rcpc, {
121+
int x;
122+
asm volatile (
123+
"ldaprb w0, [%0]"
124+
: : "r" (&x) : "w0"
125+
);
126+
})
127+
CHECK(rcpc2, {
128+
int x;
129+
asm volatile (
130+
"ldapurb w0, [%0]"
131+
: : "r" (&x) : "w0"
132+
);
133+
})
134+
CHECK(fcma, {
135+
asm volatile (
136+
"fmov d0, #0" "\n"
137+
"fcadd v0.2s, v0.2s, v0.2s, #90" "\n"
138+
: : : "v0"
139+
);
140+
})
141+
CHECK(jscvt, {
142+
asm volatile (
143+
"fmov d0, #0" "\n"
144+
"fjcvtzs w1, d0" "\n"
145+
: : : "w1", "d0"
146+
);
147+
})
148+
CHECK(dpb, {
149+
int x;
150+
asm volatile (
151+
"dc cvap, %0"
152+
: : "r" (&x)
153+
);
154+
})
155+
CHECK(dpb2, {
156+
int x;
157+
asm volatile (
158+
"dc cvadp, %0"
159+
: : "r" (&x)
160+
);
161+
})
162+
CHECK(bf16, {
163+
asm volatile (
164+
"bfdot v0.4S,v1.8H,v2.8H"
165+
: : : "v0"
166+
);
167+
})
168+
CHECK(i8mm, {
169+
asm volatile (
170+
"sudot v0.4S,v1.16B,v2.4B[0]"
171+
: : : "v0"
172+
);
173+
})
174+
CHECK(dit, {
175+
asm volatile (
176+
"msr DIT, x0"
177+
: : : "x0"
178+
);
179+
})
180+
CHECK(fp16, {
181+
asm volatile (
182+
"fmov h0, #0" "\n"
183+
: : : "v0"
184+
);
185+
})
186+
CHECK(ssbs2, {
187+
asm volatile (
188+
"mrs x0, SSBS" "\n"
189+
"msr SSBS, x0" "\n"
190+
: : : "x0"
191+
);
192+
})
193+
CHECK(bti, {
194+
// The only test for this requires reading a register that is only
195+
// accessible to EL1.
196+
#ifdef __linux__
197+
// On Linux, the kernel emulates this system register read in a trap
198+
// handler, so we can just do the read as you would in EL1.
199+
int val = 0;
200+
asm volatile(
201+
"mrs %0, ID_AA64PFR1_EL1"
202+
: "=r"(val)
203+
);
204+
// https://developer.arm.com/documentation/ddi0601/2023-12/AArch64-Registers/ID-AA64PFR1-EL1--AArch64-Processor-Feature-Register-1?lang=en#fieldset_0-3_0
205+
if (val & 0xF != 0x1)
206+
return false;
207+
#elif defined(__APPLE__)
208+
// On Apple platforms, we need to check a sysctl.
209+
int32_t val = 0;
210+
size_t size = sizeof(val);
211+
if (sysctlbyname("hw.optional.arm.FEAT_BTI", &val, &size, NULL, 0) || val != 1)
212+
return false;
213+
#else
214+
// TODO: implement me on your platform to fix this test!
215+
#endif
216+
})
217+
CHECK(simd, {
218+
asm volatile (
219+
"mov v0.B[0], w0"
220+
: : :
221+
);
222+
})
223+
CHECK(fp, {
224+
asm volatile (
225+
"fmov s0, #0" "\n"
226+
: : : "v0"
227+
);
228+
})
229+
CHECK(crc, {
230+
asm volatile ( "crc32b wzr, wzr, wzr");
231+
})
232+
CHECK(sme, {
233+
asm volatile (
234+
"rdsvl x0, #1"
235+
: : : "x0"
236+
);
237+
})
238+
CHECK(sme2, {
239+
asm volatile (
240+
"smstart za" "\n"
241+
"zero { zt0 }" "\n"
242+
"smstop za" "\n"
243+
);
244+
})
245+
246+
static bool safe_try_feature(bool (*try_feature)(void)) {
247+
int child = fork();
248+
if (child) {
249+
int exit_status = -1;
250+
if (child != waitpid(child, &exit_status, 0))
251+
return false;
252+
return exit_status == 0;
253+
} else {
254+
exit(try_feature() ? 0 : 1);
255+
}
256+
}
257+
258+
int main(int, const char **) {
259+
check_flagm();
260+
check_flagm2();
261+
check_dotprod();
262+
check_sha3();
263+
check_rdm();
264+
check_lse();
265+
check_sha2();
266+
check_sha1();
267+
check_aes();
268+
check_pmull();
269+
check_rcpc();
270+
check_rcpc2();
271+
check_fcma();
272+
check_jscvt();
273+
check_dpb();
274+
check_dpb2();
275+
check_bf16();
276+
check_i8mm();
277+
check_dit();
278+
check_fp16();
279+
check_ssbs2();
280+
check_bti();
281+
check_simd();
282+
check_fp();
283+
check_crc();
284+
check_sme();
285+
check_sme2();
286+
287+
return any_fails ? -1 : 0;
288+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
flagm
2+
flagm2
3+
dotprod
4+
sha3
5+
rdm
6+
lse
7+
sha2
8+
sha1
9+
aes
10+
pmull
11+
rcpc
12+
rcpc2
13+
fcma
14+
jscvt
15+
dpb
16+
dpb2
17+
bf16
18+
i8mm
19+
dit
20+
fp16
21+
ssbs2
22+
bti
23+
simd
24+
fp
25+
crc
26+
sme
27+
sme2
28+
exit 0

SingleSource/UnitTests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ add_subdirectory(Threads)
88
add_subdirectory(Vector)
99
add_subdirectory(Vectorizer)
1010
add_subdirectory(X86)
11+
add_subdirectory(AArch64)
1112

1213
list(APPEND CFLAGS -Wno-implicit-function-declaration -Wno-implicit-int)
1314

0 commit comments

Comments
 (0)