From b0941c3aa12534f49a784842c8ba5daad585799b Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Wed, 31 Jan 2024 18:09:27 -0800 Subject: [PATCH 1/4] [AArch64][FMV] Add a non-comprehensive test for ACLE Function Multi Versioning --- SingleSource/UnitTests/AArch64/CMakeLists.txt | 5 + .../AArch64/acle-function-multi-versioning.c | 288 ++++++++++++++++++ ...function-multi-versioning.reference_output | 28 ++ SingleSource/UnitTests/CMakeLists.txt | 1 + 4 files changed, 322 insertions(+) create mode 100644 SingleSource/UnitTests/AArch64/CMakeLists.txt create mode 100644 SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c create mode 100644 SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output diff --git a/SingleSource/UnitTests/AArch64/CMakeLists.txt b/SingleSource/UnitTests/AArch64/CMakeLists.txt new file mode 100644 index 0000000000..77b2f6caeb --- /dev/null +++ b/SingleSource/UnitTests/AArch64/CMakeLists.txt @@ -0,0 +1,5 @@ +if(CMAKE_C_COMPILER_ID STREQUAL "Clang") + if(ARCH STREQUAL "AArch64") + llvm_singlesource(PREFIX "aarch64-") + endif() +endif() diff --git a/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c b/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c new file mode 100644 index 0000000000..a83da4e92a --- /dev/null +++ b/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c @@ -0,0 +1,288 @@ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +#include +#endif + +static bool safe_try_feature(bool (*try_feature)(void)); + +static bool any_fails = false; + +#if __HAVE_FUNCTION_MULTI_VERSIONING +#define CHECK(X, BODY) \ + __attribute__((target(#X))) \ + static bool try_##X(void) { \ + do \ + BODY \ + while (0); \ + return true; \ + } \ + __attribute__((target_version(#X))) \ + static void check_##X(void) { \ + printf("%s\n", #X); \ + fflush(stdout); \ + if (!safe_try_feature(try_##X)) { \ + printf("\tFAIL\n"); \ + any_fails = true; \ + } \ + } \ + __attribute__((target_version("default"))) \ + static void check_##X(void) { \ + printf("%s\n", #X); \ + fflush(stdout); \ + if (safe_try_feature(try_##X)) { \ + printf("\tUPASS\n"); \ + any_fails = true; \ + } \ + } +#else +#define CHECK(X, BODY) \ + static void check_##X(void) { \ + printf("%s\n", #X); \ + } +#endif + +CHECK(flagm, { + asm volatile ( + "cfinv" "\n" + "cfinv" "\n" + ); +}) +CHECK(flagm2, { + asm volatile ( + "axflag" "\n" + "xaflag" "\n" + ); +}) +CHECK(dotprod, { + asm volatile ( + "udot v0.4S,v1.16B,v2.16B" + : : : "v0" + ); +}) +CHECK(sha3, { + asm volatile ( + "fmov d0, #0" "\n" + "fmov d1, #0" "\n" + "eor3 v0.16b, v0.16b, v0.16b, v0.16b" "\n" + : : : "v0" + ); +}) +CHECK(rdm, { + asm volatile ( + "sqrdmlah s0, s1, s2" + : : : "s0" + ); +}) +CHECK(lse, { + uint64_t pointee = 0; + asm volatile ( + "swp xzr, xzr, [%[pointee]]" + : : [pointee]"r"(&pointee) + ); +}) +CHECK(sha2, { + asm volatile ( + "fmov d0, #0" "\n" + "fmov d1, #0" "\n" + "sha256h q0, q0, v0.4s" "\n" + : : : "v0" + ); +}) +CHECK(sha1, { + asm volatile ( + "fmov s0, #0" "\n" + "sha1h s0, s0" "\n" + : : : "v0" + ); +}) +CHECK(aes, { + asm volatile ( + "fmov d0, #0" "\n" + "fmov d1, #0" "\n" + "aesd v0.16B, v0.16B" "\n" + : : : "v0" + ); +}) +CHECK(pmull, { + asm volatile ( + "fmov d0, #0" "\n" + "pmull v0.1q, v0.1d, v0.1d" "\n" + : : : "v0" + ); +}) +CHECK(rcpc, { + int x; + asm volatile ( + "ldaprb w0, [%0]" + : : "r" (&x) : "w0" + ); +}) +CHECK(rcpc2, { + int x; + asm volatile ( + "ldapurb w0, [%0]" + : : "r" (&x) : "w0" + ); +}) +CHECK(fcma, { + asm volatile ( + "fmov d0, #0" "\n" + "fcadd v0.2s, v0.2s, v0.2s, #90" "\n" + : : : "v0" + ); +}) +CHECK(jscvt, { + asm volatile ( + "fmov d0, #0" "\n" + "fjcvtzs w1, d0" "\n" + : : : "w1", "d0" + ); +}) +CHECK(dpb, { + int x; + asm volatile ( + "dc cvap, %0" + : : "r" (&x) + ); +}) +CHECK(dpb2, { + int x; + asm volatile ( + "dc cvadp, %0" + : : "r" (&x) + ); +}) +CHECK(bf16, { + asm volatile ( + "bfdot v0.4S,v1.8H,v2.8H" + : : : "v0" + ); +}) +CHECK(i8mm, { + asm volatile ( + "sudot v0.4S,v1.16B,v2.4B[0]" + : : : "v0" + ); +}) +CHECK(dit, { + asm volatile ( + "msr DIT, x0" + : : : "x0" + ); +}) +CHECK(fp16, { + asm volatile ( + "fmov h0, #0" + : : : "v0" + ); +}) +CHECK(ssbs2, { + asm volatile ( + "mrs x0, SSBS" "\n" + "msr SSBS, x0" "\n" + : : : "x0" + ); +}) +CHECK(bti, { + // The only test for this requires reading a register that is only + // accessible to EL1. + #ifdef __linux__ + // On Linux, the kernel emulates this system register read in a trap + // handler, so we can just do the read as you would in EL1. + int val = 0; + asm volatile ( + "mrs %0, ID_AA64PFR1_EL1" + : "=r"(val) + ); + // https://developer.arm.com/documentation/ddi0601/2023-12/AArch64-Registers/ID-AA64PFR1-EL1--AArch64-Processor-Feature-Register-1?lang=en#fieldset_0-3_0 + if (val & 0xF != 0x1) + return false; + #elif defined(__APPLE__) + // On Apple platforms, we need to check a sysctl. + int32_t val = 0; + size_t size = sizeof(val); + if (sysctlbyname("hw.optional.arm.FEAT_BTI", &val, &size, NULL, 0) || val != 1) + return false; + #else + // TODO: implement me on your platform to fix this test! + #endif +}) +CHECK(simd, { + asm volatile ( + "mov v0.B[0], w0" + : : : + ); +}) +CHECK(fp, { + asm volatile ( + "fmov s0, #0" + : : : "v0" + ); +}) +CHECK(crc, { + asm volatile ( "crc32b wzr, wzr, wzr"); +}) +CHECK(sme, { + asm volatile ( + "rdsvl x0, #1" + : : : "x0" + ); +}) +CHECK(sme2, { + asm volatile ( + "smstart za" "\n" + "zero { zt0 }" "\n" + "smstop za" "\n" + ); +}) + +static bool safe_try_feature(bool (*try_feature)(void)) { + int child = fork(); + if (child) { + int exit_status = -1; + if (child != waitpid(child, &exit_status, 0)) + return false; + return exit_status == 0; + } else { + exit(try_feature() ? 0 : 1); + } +} + +int main(int, const char **) { + check_flagm(); + check_flagm2(); + check_dotprod(); + check_sha3(); + check_rdm(); + check_lse(); + check_sha2(); + check_sha1(); + check_aes(); + check_pmull(); + check_rcpc(); + check_rcpc2(); + check_fcma(); + check_jscvt(); + check_dpb(); + check_dpb2(); + check_bf16(); + check_i8mm(); + check_dit(); + check_fp16(); + check_ssbs2(); + check_bti(); + check_simd(); + check_fp(); + check_crc(); + check_sme(); + check_sme2(); + + return any_fails ? -1 : 0; +} diff --git a/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output b/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output new file mode 100644 index 0000000000..003c65ddf7 --- /dev/null +++ b/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output @@ -0,0 +1,28 @@ +flagm +flagm2 +dotprod +sha3 +rdm +lse +sha2 +sha1 +aes +pmull +rcpc +rcpc2 +fcma +jscvt +dpb +dpb2 +bf16 +i8mm +dit +fp16 +ssbs2 +bti +simd +fp +crc +sme +sme2 +exit 0 diff --git a/SingleSource/UnitTests/CMakeLists.txt b/SingleSource/UnitTests/CMakeLists.txt index e70fcbd37b..c04bd2347b 100644 --- a/SingleSource/UnitTests/CMakeLists.txt +++ b/SingleSource/UnitTests/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(Threads) add_subdirectory(Vector) add_subdirectory(Vectorizer) add_subdirectory(X86) +add_subdirectory(AArch64) list(APPEND CFLAGS -Wno-implicit-function-declaration -Wno-implicit-int) From 339a75700a3967390137da5faed8414c5e9c4fa3 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 1 Feb 2024 13:26:36 -0800 Subject: [PATCH 2/4] rename the new test to leave more room for future sibling tests --- .../{acle-function-multi-versioning.c => acle-fmv-features.c} | 0 ...ioning.reference_output => acle-fmv-features.reference_output} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename SingleSource/UnitTests/AArch64/{acle-function-multi-versioning.c => acle-fmv-features.c} (100%) rename SingleSource/UnitTests/AArch64/{acle-function-multi-versioning.reference_output => acle-fmv-features.reference_output} (100%) diff --git a/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c b/SingleSource/UnitTests/AArch64/acle-fmv-features.c similarity index 100% rename from SingleSource/UnitTests/AArch64/acle-function-multi-versioning.c rename to SingleSource/UnitTests/AArch64/acle-fmv-features.c diff --git a/SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output b/SingleSource/UnitTests/AArch64/acle-fmv-features.reference_output similarity index 100% rename from SingleSource/UnitTests/AArch64/acle-function-multi-versioning.reference_output rename to SingleSource/UnitTests/AArch64/acle-fmv-features.reference_output From aa039f6a2e6610bda4fc98a9b3db14436fabb7c9 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Fri, 2 Feb 2024 14:04:24 -0800 Subject: [PATCH 3/4] apply ilinpv's feedback: drop linux support, and add missing target() features --- SingleSource/UnitTests/AArch64/CMakeLists.txt | 7 ++++++- .../UnitTests/AArch64/acle-fmv-features.c | 17 +++++------------ 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/SingleSource/UnitTests/AArch64/CMakeLists.txt b/SingleSource/UnitTests/AArch64/CMakeLists.txt index 77b2f6caeb..ff9be4842f 100644 --- a/SingleSource/UnitTests/AArch64/CMakeLists.txt +++ b/SingleSource/UnitTests/AArch64/CMakeLists.txt @@ -1,5 +1,10 @@ if(CMAKE_C_COMPILER_ID STREQUAL "Clang") if(ARCH STREQUAL "AArch64") - llvm_singlesource(PREFIX "aarch64-") + set(Source ) + # TODO: this test is currently only supported on Darwin platforms: + if (CMAKE_SYSTEM_NAME MATCHES "Darwin") + list(APPEND Source acle-fmv-features.c) + endif() + llvm_singlesource(PREFIX "aarch64-" Source) endif() endif() diff --git a/SingleSource/UnitTests/AArch64/acle-fmv-features.c b/SingleSource/UnitTests/AArch64/acle-fmv-features.c index a83da4e92a..6add1b3695 100644 --- a/SingleSource/UnitTests/AArch64/acle-fmv-features.c +++ b/SingleSource/UnitTests/AArch64/acle-fmv-features.c @@ -95,6 +95,8 @@ CHECK(sha2, { : : : "v0" ); }) +// FIXME: sha1h is under +sha2 in clang, and +sha1 doesn't exist yet. +__attribute__((target("sha2"))) CHECK(sha1, { asm volatile ( "fmov s0, #0" "\n" @@ -124,6 +126,8 @@ CHECK(rcpc, { : : "r" (&x) : "w0" ); }) +// FIXME: rcpc2 instructions are under +rcpc-immo in clang, and not +rcpc2. +__attribute__((target("rcpc-immo"))) CHECK(rcpc2, { int x; asm volatile ( @@ -193,18 +197,7 @@ CHECK(ssbs2, { CHECK(bti, { // The only test for this requires reading a register that is only // accessible to EL1. - #ifdef __linux__ - // On Linux, the kernel emulates this system register read in a trap - // handler, so we can just do the read as you would in EL1. - int val = 0; - asm volatile ( - "mrs %0, ID_AA64PFR1_EL1" - : "=r"(val) - ); - // https://developer.arm.com/documentation/ddi0601/2023-12/AArch64-Registers/ID-AA64PFR1-EL1--AArch64-Processor-Feature-Register-1?lang=en#fieldset_0-3_0 - if (val & 0xF != 0x1) - return false; - #elif defined(__APPLE__) + #if defined(__APPLE__) // On Apple platforms, we need to check a sysctl. int32_t val = 0; size_t size = sizeof(val); From 7386c798e399a6e5d8102cdf816dfeba607ce6fa Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Fri, 2 Feb 2024 14:10:22 -0800 Subject: [PATCH 4/4] on second thought, use .inst instead --- SingleSource/UnitTests/AArch64/acle-fmv-features.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/SingleSource/UnitTests/AArch64/acle-fmv-features.c b/SingleSource/UnitTests/AArch64/acle-fmv-features.c index 6add1b3695..97870670a1 100644 --- a/SingleSource/UnitTests/AArch64/acle-fmv-features.c +++ b/SingleSource/UnitTests/AArch64/acle-fmv-features.c @@ -95,12 +95,11 @@ CHECK(sha2, { : : : "v0" ); }) -// FIXME: sha1h is under +sha2 in clang, and +sha1 doesn't exist yet. -__attribute__((target("sha2"))) CHECK(sha1, { asm volatile ( "fmov s0, #0" "\n" - "sha1h s0, s0" "\n" + // FIXME: sha1h is under +sha2 in clang, and +sha1 doesn't exist yet. + ".inst 0x5e280800" "\n" // sha1h s0, s0 : : : "v0" ); }) @@ -126,12 +125,12 @@ CHECK(rcpc, { : : "r" (&x) : "w0" ); }) -// FIXME: rcpc2 instructions are under +rcpc-immo in clang, and not +rcpc2. -__attribute__((target("rcpc-immo"))) CHECK(rcpc2, { int x; asm volatile ( - "ldapurb w0, [%0]" + "mov x1, %0" "\n" + // FIXME: rcpc2 instructions are under +rcpc-immo in clang, and not +rcpc2. + ".inst 0x19400020" // ldapurb w0, [x1] : : "r" (&x) : "w0" ); })