|
| 1 | +#include <stdbool.h> |
| 2 | +#include <stdio.h> |
| 3 | +#include <stdint.h> |
| 4 | +#include <stdlib.h> |
| 5 | +#include <sys/types.h> |
| 6 | +#include <sys/wait.h> |
| 7 | +#include <unistd.h> |
| 8 | + |
| 9 | +#ifdef __APPLE__ |
| 10 | +#include <sys/sysctl.h> |
| 11 | +#endif |
| 12 | + |
| 13 | +static bool safe_try_feature(bool (*try_feature)(void)); |
| 14 | + |
| 15 | +static bool any_fails = false; |
| 16 | + |
| 17 | +#if __HAVE_FUNCTION_MULTI_VERSIONING |
| 18 | +#define CHECK(X, BODY) \ |
| 19 | + __attribute__((target(#X))) \ |
| 20 | + static bool try_##X(void) { \ |
| 21 | + do \ |
| 22 | + BODY \ |
| 23 | + while (0); \ |
| 24 | + return true; \ |
| 25 | + } \ |
| 26 | + __attribute__((target_version(#X))) \ |
| 27 | + static void check_##X(void) { \ |
| 28 | + printf("%s\n", #X); \ |
| 29 | + fflush(stdout); \ |
| 30 | + if (!safe_try_feature(try_##X)) { \ |
| 31 | + printf("\tFAIL\n"); \ |
| 32 | + any_fails = true; \ |
| 33 | + } \ |
| 34 | + } \ |
| 35 | + __attribute__((target_version("default"))) \ |
| 36 | + static void check_##X(void) { \ |
| 37 | + printf("%s\n", #X); \ |
| 38 | + fflush(stdout); \ |
| 39 | + if (safe_try_feature(try_##X)) { \ |
| 40 | + printf("\tUPASS\n"); \ |
| 41 | + any_fails = true; \ |
| 42 | + } \ |
| 43 | + } |
| 44 | +#else |
| 45 | +#define CHECK(X, BODY) \ |
| 46 | + static void check_##X(void) { \ |
| 47 | + printf("%s\n", #X); \ |
| 48 | + } |
| 49 | +#endif |
| 50 | + |
| 51 | +CHECK(flagm, { |
| 52 | + asm volatile ( |
| 53 | + "cfinv" "\n" |
| 54 | + "cfinv" "\n" |
| 55 | + ); |
| 56 | +}) |
| 57 | +CHECK(flagm2, { |
| 58 | + asm volatile ( |
| 59 | + "axflag" "\n" |
| 60 | + "xaflag" "\n" |
| 61 | + ); |
| 62 | +}) |
| 63 | +CHECK(dotprod, { |
| 64 | + asm volatile ( |
| 65 | + "udot v0.4S,v1.16B,v2.16B" |
| 66 | + : : : "v0" |
| 67 | + ); |
| 68 | +}) |
| 69 | +CHECK(sha3, { |
| 70 | + asm volatile ( |
| 71 | + "fmov d0, #0" "\n" |
| 72 | + "fmov d1, #0" "\n" |
| 73 | + "eor3 v0.16b, v0.16b, v0.16b, v0.16b" "\n" |
| 74 | + : : : "v0" |
| 75 | + ); |
| 76 | +}) |
| 77 | +CHECK(rdm, { |
| 78 | + asm volatile ( |
| 79 | + "sqrdmlah s0, s1, s2" |
| 80 | + : : : "s0" |
| 81 | + ); |
| 82 | +}) |
| 83 | +CHECK(lse, { |
| 84 | + uint64_t pointee = 0; |
| 85 | + asm volatile ( |
| 86 | + "swp xzr, xzr, [%[pointee]]" |
| 87 | + : : [pointee]"r"(&pointee) |
| 88 | + ); |
| 89 | +}) |
| 90 | +CHECK(sha2, { |
| 91 | + asm volatile ( |
| 92 | + "fmov d0, #0" "\n" |
| 93 | + "fmov d1, #0" "\n" |
| 94 | + "sha256h q0, q0, v0.4s" "\n" |
| 95 | + : : : "v0" |
| 96 | + ); |
| 97 | +}) |
| 98 | +CHECK(sha1, { |
| 99 | + asm volatile ( |
| 100 | + "fmov s0, #0" "\n" |
| 101 | + // FIXME: sha1h is under +sha2 in clang, and +sha1 doesn't exist yet. |
| 102 | + ".inst 0x5e280800" "\n" // sha1h s0, s0 |
| 103 | + : : : "v0" |
| 104 | + ); |
| 105 | +}) |
| 106 | +CHECK(aes, { |
| 107 | + asm volatile ( |
| 108 | + "fmov d0, #0" "\n" |
| 109 | + "fmov d1, #0" "\n" |
| 110 | + "aesd v0.16B, v0.16B" "\n" |
| 111 | + : : : "v0" |
| 112 | + ); |
| 113 | +}) |
| 114 | +CHECK(pmull, { |
| 115 | + asm volatile ( |
| 116 | + "fmov d0, #0" "\n" |
| 117 | + "pmull v0.1q, v0.1d, v0.1d" "\n" |
| 118 | + : : : "v0" |
| 119 | + ); |
| 120 | +}) |
| 121 | +CHECK(rcpc, { |
| 122 | + int x; |
| 123 | + asm volatile ( |
| 124 | + "ldaprb w0, [%0]" |
| 125 | + : : "r" (&x) : "w0" |
| 126 | + ); |
| 127 | +}) |
| 128 | +CHECK(rcpc2, { |
| 129 | + int x; |
| 130 | + asm volatile ( |
| 131 | + "mov x1, %0" "\n" |
| 132 | + // FIXME: rcpc2 instructions are under +rcpc-immo in clang, and not +rcpc2. |
| 133 | + ".inst 0x19400020" // ldapurb w0, [x1] |
| 134 | + : : "r" (&x) : "w0" |
| 135 | + ); |
| 136 | +}) |
| 137 | +CHECK(fcma, { |
| 138 | + asm volatile ( |
| 139 | + "fmov d0, #0" "\n" |
| 140 | + "fcadd v0.2s, v0.2s, v0.2s, #90" "\n" |
| 141 | + : : : "v0" |
| 142 | + ); |
| 143 | +}) |
| 144 | +CHECK(jscvt, { |
| 145 | + asm volatile ( |
| 146 | + "fmov d0, #0" "\n" |
| 147 | + "fjcvtzs w1, d0" "\n" |
| 148 | + : : : "w1", "d0" |
| 149 | + ); |
| 150 | +}) |
| 151 | +CHECK(dpb, { |
| 152 | + int x; |
| 153 | + asm volatile ( |
| 154 | + "dc cvap, %0" |
| 155 | + : : "r" (&x) |
| 156 | + ); |
| 157 | +}) |
| 158 | +CHECK(dpb2, { |
| 159 | + int x; |
| 160 | + asm volatile ( |
| 161 | + "dc cvadp, %0" |
| 162 | + : : "r" (&x) |
| 163 | + ); |
| 164 | +}) |
| 165 | +CHECK(bf16, { |
| 166 | + asm volatile ( |
| 167 | + "bfdot v0.4S,v1.8H,v2.8H" |
| 168 | + : : : "v0" |
| 169 | + ); |
| 170 | +}) |
| 171 | +CHECK(i8mm, { |
| 172 | + asm volatile ( |
| 173 | + "sudot v0.4S,v1.16B,v2.4B[0]" |
| 174 | + : : : "v0" |
| 175 | + ); |
| 176 | +}) |
| 177 | +CHECK(dit, { |
| 178 | + asm volatile ( |
| 179 | + "msr DIT, x0" |
| 180 | + : : : "x0" |
| 181 | + ); |
| 182 | +}) |
| 183 | +CHECK(fp16, { |
| 184 | + asm volatile ( |
| 185 | + "fmov h0, #0" |
| 186 | + : : : "v0" |
| 187 | + ); |
| 188 | +}) |
| 189 | +CHECK(ssbs2, { |
| 190 | + asm volatile ( |
| 191 | + "mrs x0, SSBS" "\n" |
| 192 | + "msr SSBS, x0" "\n" |
| 193 | + : : : "x0" |
| 194 | + ); |
| 195 | +}) |
| 196 | +CHECK(bti, { |
| 197 | + // The only test for this requires reading a register that is only |
| 198 | + // accessible to EL1. |
| 199 | + #if defined(__APPLE__) |
| 200 | + // On Apple platforms, we need to check a sysctl. |
| 201 | + int32_t val = 0; |
| 202 | + size_t size = sizeof(val); |
| 203 | + if (sysctlbyname("hw.optional.arm.FEAT_BTI", &val, &size, NULL, 0) || val != 1) |
| 204 | + return false; |
| 205 | + #else |
| 206 | + // TODO: implement me on your platform to fix this test! |
| 207 | + #endif |
| 208 | +}) |
| 209 | +CHECK(simd, { |
| 210 | + asm volatile ( |
| 211 | + "mov v0.B[0], w0" |
| 212 | + : : : |
| 213 | + ); |
| 214 | +}) |
| 215 | +CHECK(fp, { |
| 216 | + asm volatile ( |
| 217 | + "fmov s0, #0" |
| 218 | + : : : "v0" |
| 219 | + ); |
| 220 | +}) |
| 221 | +CHECK(crc, { |
| 222 | + asm volatile ( "crc32b wzr, wzr, wzr"); |
| 223 | +}) |
| 224 | +CHECK(sme, { |
| 225 | + asm volatile ( |
| 226 | + "rdsvl x0, #1" |
| 227 | + : : : "x0" |
| 228 | + ); |
| 229 | +}) |
| 230 | +CHECK(sme2, { |
| 231 | + asm volatile ( |
| 232 | + "smstart za" "\n" |
| 233 | + "zero { zt0 }" "\n" |
| 234 | + "smstop za" "\n" |
| 235 | + ); |
| 236 | +}) |
| 237 | + |
| 238 | +static bool safe_try_feature(bool (*try_feature)(void)) { |
| 239 | + int child = fork(); |
| 240 | + if (child) { |
| 241 | + int exit_status = -1; |
| 242 | + if (child != waitpid(child, &exit_status, 0)) |
| 243 | + return false; |
| 244 | + return exit_status == 0; |
| 245 | + } else { |
| 246 | + exit(try_feature() ? 0 : 1); |
| 247 | + } |
| 248 | +} |
| 249 | + |
| 250 | +int main(int, const char **) { |
| 251 | + check_flagm(); |
| 252 | + check_flagm2(); |
| 253 | + check_dotprod(); |
| 254 | + check_sha3(); |
| 255 | + check_rdm(); |
| 256 | + check_lse(); |
| 257 | + check_sha2(); |
| 258 | + check_sha1(); |
| 259 | + check_aes(); |
| 260 | + check_pmull(); |
| 261 | + check_rcpc(); |
| 262 | + check_rcpc2(); |
| 263 | + check_fcma(); |
| 264 | + check_jscvt(); |
| 265 | + check_dpb(); |
| 266 | + check_dpb2(); |
| 267 | + check_bf16(); |
| 268 | + check_i8mm(); |
| 269 | + check_dit(); |
| 270 | + check_fp16(); |
| 271 | + check_ssbs2(); |
| 272 | + check_bti(); |
| 273 | + check_simd(); |
| 274 | + check_fp(); |
| 275 | + check_crc(); |
| 276 | + check_sme(); |
| 277 | + check_sme2(); |
| 278 | + |
| 279 | + return any_fails ? -1 : 0; |
| 280 | +} |
0 commit comments