diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 4700a183e67b..1efd532377a6 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -4,7 +4,6 @@ on: push: branches: - master - - llvm19 concurrency: # Cancels pending runs when a PR gets updated. group: ${{ github.head_ref || github.run_id }}-${{ github.actor }} diff --git a/CMakeLists.txt b/CMakeLists.txt index ea25212fec73..1b7a5779ebc1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -132,9 +132,9 @@ else() set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig") endif() -find_package(llvm 19) -find_package(clang 19) -find_package(lld 19) +find_package(llvm 20) +find_package(clang 20) +find_package(lld 20) if(ZIG_STATIC_ZLIB) if (MSVC) @@ -809,7 +809,7 @@ if(MSVC) else() set(ZIG_WASM2C_COMPILE_FLAGS "-std=c99 -O2") set(ZIG1_COMPILE_FLAGS "-std=c99 -Os") - set(ZIG2_COMPILE_FLAGS "-std=c99 -O0 -fno-stack-protector") + set(ZIG2_COMPILE_FLAGS "-std=c99 -O0 -fno-sanitize=undefined -fno-stack-protector") if(APPLE) set(ZIG2_LINK_FLAGS "-Wl,-stack_size,0x10000000") elseif(MINGW) diff --git a/build.zig b/build.zig index 375cb6df0974..830cae270214 100644 --- a/build.zig +++ b/build.zig @@ -376,7 +376,7 @@ pub fn build(b: *std.Build) !void { const test_filters = b.option([]const []const u8, "test-filter", "Skip tests that do not match any filter") orelse &[0][]const u8{}; const test_target_filters = b.option([]const []const u8, "test-target-filter", "Skip tests whose target triple do not match any filter") orelse &[0][]const u8{}; - const test_slow_targets = b.option(bool, "test-slow-targets", "Enable running module tests for targets that have a slow compiler backend") orelse false; + const test_extra_targets = b.option(bool, "test-extra-targets", "Enable running module tests for additional targets") orelse false; var chosen_opt_modes_buf: [4]builtin.OptimizeMode = undefined; var chosen_mode_index: usize = 0; @@ -433,7 +433,7 @@ pub fn build(b: *std.Build) !void { test_modules_step.dependOn(tests.addModuleTests(b, .{ .test_filters = test_filters, .test_target_filters = test_target_filters, - .test_slow_targets = test_slow_targets, + .test_extra_targets = test_extra_targets, .root_src = "test/behavior.zig", .name = "behavior", .desc = "Run the behavior tests", @@ -449,7 +449,7 @@ pub fn build(b: *std.Build) !void { test_modules_step.dependOn(tests.addModuleTests(b, .{ .test_filters = test_filters, .test_target_filters = test_target_filters, - .test_slow_targets = test_slow_targets, + .test_extra_targets = test_extra_targets, .root_src = "test/c_import.zig", .name = "c-import", .desc = "Run the @cImport tests", @@ -464,7 +464,7 @@ pub fn build(b: *std.Build) !void { test_modules_step.dependOn(tests.addModuleTests(b, .{ .test_filters = test_filters, .test_target_filters = test_target_filters, - .test_slow_targets = test_slow_targets, + .test_extra_targets = test_extra_targets, .root_src = "lib/compiler_rt.zig", .name = "compiler-rt", .desc = "Run the compiler_rt tests", @@ -480,7 +480,7 @@ pub fn build(b: *std.Build) !void { test_modules_step.dependOn(tests.addModuleTests(b, .{ .test_filters = test_filters, .test_target_filters = test_target_filters, - .test_slow_targets = test_slow_targets, + .test_extra_targets = test_extra_targets, .root_src = "lib/c.zig", .name = "universal-libc", .desc = "Run the universal libc tests", @@ -496,7 +496,7 @@ pub fn build(b: *std.Build) !void { test_modules_step.dependOn(tests.addModuleTests(b, .{ .test_filters = test_filters, .test_target_filters = test_target_filters, - .test_slow_targets = test_slow_targets, + .test_extra_targets = test_extra_targets, .root_src = "lib/std/std.zig", .name = "std", .desc = "Run the standard library tests", @@ -1166,10 +1166,10 @@ const llvm_libs = [_][]const u8{ "LLVMXRay", "LLVMLibDriver", "LLVMDlltoolDriver", + "LLVMTelemetry", "LLVMTextAPIBinaryReader", "LLVMCoverage", "LLVMLineEditor", - "LLVMSandboxIR", "LLVMXCoreDisassembler", "LLVMXCoreCodeGen", "LLVMXCoreDesc", @@ -1196,6 +1196,10 @@ const llvm_libs = [_][]const u8{ "LLVMSystemZCodeGen", "LLVMSystemZDesc", "LLVMSystemZInfo", + "LLVMSPIRVCodeGen", + "LLVMSPIRVDesc", + "LLVMSPIRVInfo", + "LLVMSPIRVAnalysis", "LLVMSparcDisassembler", "LLVMSparcAsmParser", "LLVMSparcCodeGen", @@ -1294,6 +1298,7 @@ const llvm_libs = [_][]const u8{ "LLVMCoroutines", "LLVMipo", "LLVMVectorize", + "LLVMSandboxIR", "LLVMLinker", "LLVMInstrumentation", "LLVMFrontendOpenMP", @@ -1301,11 +1306,11 @@ const llvm_libs = [_][]const u8{ "LLVMFrontendOpenACC", "LLVMFrontendHLSL", "LLVMFrontendDriver", + "LLVMFrontendAtomic", "LLVMExtensions", "LLVMDWARFLinkerParallel", "LLVMDWARFLinkerClassic", "LLVMDWARFLinker", - "LLVMCodeGenData", "LLVMGlobalISel", "LLVMMIRParser", "LLVMAsmPrinter", @@ -1314,6 +1319,7 @@ const llvm_libs = [_][]const u8{ "LLVMTarget", "LLVMObjCARCOpts", "LLVMCodeGenTypes", + "LLVMCGData", "LLVMIRPrinter", "LLVMInterfaceStub", "LLVMFileCheck", @@ -1329,6 +1335,7 @@ const llvm_libs = [_][]const u8{ "LLVMDebugInfoBTF", "LLVMDebugInfoPDB", "LLVMDebugInfoMSF", + "LLVMDebugInfoCodeView", "LLVMDebugInfoDWARF", "LLVMObject", "LLVMTextAPI", @@ -1336,7 +1343,6 @@ const llvm_libs = [_][]const u8{ "LLVMIRReader", "LLVMAsmParser", "LLVMMC", - "LLVMDebugInfoCodeView", "LLVMBitReader", "LLVMFuzzerCLI", "LLVMCore", diff --git a/ci/aarch64-linux-debug.sh b/ci/aarch64-linux-debug.sh index 143dd7c1c9bc..588e198bce64 100755 --- a/ci/aarch64-linux-debug.sh +++ b/ci/aarch64-linux-debug.sh @@ -8,7 +8,7 @@ set -e ARCH="$(uname -m)" TARGET="$ARCH-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-linux-release.sh b/ci/aarch64-linux-release.sh index f6602850a9be..c47bbf3e429b 100755 --- a/ci/aarch64-linux-release.sh +++ b/ci/aarch64-linux-release.sh @@ -8,7 +8,7 @@ set -e ARCH="$(uname -m)" TARGET="$ARCH-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-macos-debug.sh b/ci/aarch64-macos-debug.sh index 474f0c5cea28..9649f69b91fa 100755 --- a/ci/aarch64-macos-debug.sh +++ b/ci/aarch64-macos-debug.sh @@ -9,7 +9,7 @@ set -e ZIGDIR="$PWD" TARGET="$ARCH-macos-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-macos-release.sh b/ci/aarch64-macos-release.sh index dac793075a6b..7b512813a5d3 100755 --- a/ci/aarch64-macos-release.sh +++ b/ci/aarch64-macos-release.sh @@ -9,7 +9,7 @@ set -e ZIGDIR="$PWD" TARGET="$ARCH-macos-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/aarch64-windows.ps1 b/ci/aarch64-windows.ps1 index 44140506a743..f27ba0bec698 100644 --- a/ci/aarch64-windows.ps1 +++ b/ci/aarch64-windows.ps1 @@ -1,5 +1,5 @@ $TARGET = "$($Env:ARCH)-windows-gnu" -$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" $MCPU = "baseline" $ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip" $PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME" diff --git a/ci/x86_64-linux-debug.sh b/ci/x86_64-linux-debug.sh index 1ef474577959..3974f07a9550 100755 --- a/ci/x86_64-linux-debug.sh +++ b/ci/x86_64-linux-debug.sh @@ -8,7 +8,7 @@ set -e ARCH="$(uname -m)" TARGET="$ARCH-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-linux-release.sh b/ci/x86_64-linux-release.sh index d6c0cc5701b3..b468f65493a8 100755 --- a/ci/x86_64-linux-release.sh +++ b/ci/x86_64-linux-release.sh @@ -8,7 +8,7 @@ set -e ARCH="$(uname -m)" TARGET="$ARCH-linux-musl" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/deps/$CACHE_BASENAME" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-macos-release.sh b/ci/x86_64-macos-release.sh index 30b37819155e..15c80c3d8a38 100755 --- a/ci/x86_64-macos-release.sh +++ b/ci/x86_64-macos-release.sh @@ -6,7 +6,7 @@ set -e ZIGDIR="$PWD" TARGET="$ARCH-macos-none" MCPU="baseline" -CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" PREFIX="$HOME/$CACHE_BASENAME" JOBS="-j3" ZIG="$PREFIX/bin/zig" diff --git a/ci/x86_64-windows-debug.ps1 b/ci/x86_64-windows-debug.ps1 index 048ad3f15fd3..8a5e0b42da8a 100644 --- a/ci/x86_64-windows-debug.ps1 +++ b/ci/x86_64-windows-debug.ps1 @@ -1,5 +1,5 @@ $TARGET = "$($Env:ARCH)-windows-gnu" -$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" $MCPU = "baseline" $ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip" $PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME" diff --git a/ci/x86_64-windows-release.ps1 b/ci/x86_64-windows-release.ps1 index 965fc4b578f5..77ef64ccadf1 100644 --- a/ci/x86_64-windows-release.ps1 +++ b/ci/x86_64-windows-release.ps1 @@ -1,5 +1,5 @@ $TARGET = "$($Env:ARCH)-windows-gnu" -$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388" +$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.15.0-dev.233+7c85dc460" $MCPU = "baseline" $ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip" $PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME" diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake index a95a8c903b67..e47d9a0b48f7 100644 --- a/cmake/Findclang.cmake +++ b/cmake/Findclang.cmake @@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h if(${LLVM_LINK_MODE} STREQUAL "shared") find_library(CLANG_LIBRARIES NAMES - libclang-cpp.so.19 - libclang-cpp.so.19.1 - clang-cpp-19.0 - clang-cpp190 + libclang-cpp.so.20 + libclang-cpp.so.20.1 + clang-cpp-20.0 + clang-cpp200 clang-cpp NAMES_PER_DIR HINTS "${LLVM_LIBDIRS}" diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake index c3de9d411773..a16ab1bb7b60 100644 --- a/cmake/Findlld.cmake +++ b/cmake/Findlld.cmake @@ -9,21 +9,21 @@ find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h HINTS ${LLVM_INCLUDE_DIRS} PATHS - /usr/lib/llvm-19/include - /usr/local/llvm190/include - /usr/local/llvm19/include - /usr/local/opt/lld@19/include - /opt/homebrew/opt/lld@19/include + /usr/lib/llvm-20/include + /usr/local/llvm200/include + /usr/local/llvm20/include + /usr/local/opt/lld@20/include + /opt/homebrew/opt/lld@20/include /mingw64/include) -find_library(LLD_LIBRARY NAMES lld-19.0 lld190 lld NAMES_PER_DIR +find_library(LLD_LIBRARY NAMES lld-20.0 lld200 lld NAMES_PER_DIR HINTS ${LLVM_LIBDIRS} PATHS - /usr/lib/llvm-19/lib - /usr/local/llvm190/lib - /usr/local/llvm19/lib - /usr/local/opt/lld@19/lib - /opt/homebrew/opt/lld@19/lib + /usr/lib/llvm-20/lib + /usr/local/llvm200/lib + /usr/local/llvm20/lib + /usr/local/opt/lld@20/lib + /opt/homebrew/opt/lld@20/lib ) if(EXISTS ${LLD_LIBRARY}) set(LLD_LIBRARIES ${LLD_LIBRARY}) @@ -34,11 +34,11 @@ else() HINTS ${LLVM_LIBDIRS} PATHS ${LLD_LIBDIRS} - /usr/lib/llvm-19/lib - /usr/local/llvm190/lib - /usr/local/llvm19/lib - /usr/local/opt/lld@19/lib - /opt/homebrew/opt/lld@19/lib + /usr/lib/llvm-20/lib + /usr/local/llvm200/lib + /usr/local/llvm20/lib + /usr/local/opt/lld@20/lib + /opt/homebrew/opt/lld@20/lib /mingw64/lib /c/msys64/mingw64/lib c:/msys64/mingw64/lib) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 5bc874ec4279..5ff5de869c93 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG) # terminate when the right LLVM version is not found. unset(LLVM_CONFIG_EXE CACHE) find_program(LLVM_CONFIG_EXE - NAMES llvm-config-19 llvm-config-19.0 llvm-config190 llvm-config19 llvm-config NAMES_PER_DIR + NAMES llvm-config-20 llvm-config-20.0 llvm-config200 llvm-config20 llvm-config NAMES_PER_DIR PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-19.0.0/bin") + "C:/Libraries/llvm-20.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "") @@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG) OUTPUT_STRIP_TRAILING_WHITESPACE) get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY) - if("${LLVM_CONFIG_VERSION}" VERSION_LESS 19 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 20 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 20) + if("${LLVM_CONFIG_VERSION}" VERSION_LESS 20 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 21 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 21) # Save the error message, in case this is the last llvm-config we find - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 19.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 20.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") # Ignore this directory and try the search again list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}") @@ -66,9 +66,9 @@ if(ZIG_USE_LLVM_CONFIG) if (LLVM_CONFIG_ERROR) # Save the error message, in case this is the last llvm-config we find if (ZIG_SHARED_LLVM) - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 20.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library") else() - list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") + list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 20.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library") endif() # Ignore this directory and try the search again @@ -197,10 +197,10 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMXRay) FIND_AND_ADD_LLVM_LIB(LLVMLibDriver) FIND_AND_ADD_LLVM_LIB(LLVMDlltoolDriver) + FIND_AND_ADD_LLVM_LIB(LLVMTelemetry) FIND_AND_ADD_LLVM_LIB(LLVMTextAPIBinaryReader) FIND_AND_ADD_LLVM_LIB(LLVMCoverage) FIND_AND_ADD_LLVM_LIB(LLVMLineEditor) - FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR) FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler) FIND_AND_ADD_LLVM_LIB(LLVMXCoreCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMXCoreDesc) @@ -227,6 +227,10 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMSystemZCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMSystemZDesc) FIND_AND_ADD_LLVM_LIB(LLVMSystemZInfo) + FIND_AND_ADD_LLVM_LIB(LLVMSPIRVCodeGen) + FIND_AND_ADD_LLVM_LIB(LLVMSPIRVDesc) + FIND_AND_ADD_LLVM_LIB(LLVMSPIRVInfo) + FIND_AND_ADD_LLVM_LIB(LLVMSPIRVAnalysis) FIND_AND_ADD_LLVM_LIB(LLVMSparcDisassembler) FIND_AND_ADD_LLVM_LIB(LLVMSparcAsmParser) FIND_AND_ADD_LLVM_LIB(LLVMSparcCodeGen) @@ -325,6 +329,7 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) FIND_AND_ADD_LLVM_LIB(LLVMipo) FIND_AND_ADD_LLVM_LIB(LLVMVectorize) + FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR) FIND_AND_ADD_LLVM_LIB(LLVMLinker) FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation) FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP) @@ -332,11 +337,11 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC) FIND_AND_ADD_LLVM_LIB(LLVMFrontendHLSL) FIND_AND_ADD_LLVM_LIB(LLVMFrontendDriver) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendAtomic) FIND_AND_ADD_LLVM_LIB(LLVMExtensions) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic) FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker) - FIND_AND_ADD_LLVM_LIB(LLVMCodeGenData) FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel) FIND_AND_ADD_LLVM_LIB(LLVMMIRParser) FIND_AND_ADD_LLVM_LIB(LLVMAsmPrinter) @@ -345,6 +350,7 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMTarget) FIND_AND_ADD_LLVM_LIB(LLVMObjCARCOpts) FIND_AND_ADD_LLVM_LIB(LLVMCodeGenTypes) + FIND_AND_ADD_LLVM_LIB(LLVMCGData) FIND_AND_ADD_LLVM_LIB(LLVMIRPrinter) FIND_AND_ADD_LLVM_LIB(LLVMInterfaceStub) FIND_AND_ADD_LLVM_LIB(LLVMFileCheck) @@ -360,6 +366,7 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoBTF) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoPDB) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoMSF) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARF) FIND_AND_ADD_LLVM_LIB(LLVMObject) FIND_AND_ADD_LLVM_LIB(LLVMTextAPI) @@ -367,7 +374,6 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMIRReader) FIND_AND_ADD_LLVM_LIB(LLVMAsmParser) FIND_AND_ADD_LLVM_LIB(LLVMMC) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) FIND_AND_ADD_LLVM_LIB(LLVMBitReader) FIND_AND_ADD_LLVM_LIB(LLVMFuzzerCLI) FIND_AND_ADD_LLVM_LIB(LLVMCore) diff --git a/lib/compiler/aro/aro/target.zig b/lib/compiler/aro/aro/target.zig index bea982daa2d3..1f5262445ab4 100644 --- a/lib/compiler/aro/aro/target.zig +++ b/lib/compiler/aro/aro/target.zig @@ -698,6 +698,8 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 { .muslabi64 => "muslabi64", .musleabi => "musleabi", .musleabihf => "musleabihf", + .muslf32 => "muslf32", + .muslsf => "muslsf", .muslx32 => "muslx32", .msvc => "msvc", .itanium => "itanium", diff --git a/lib/compiler_rt/common.zig b/lib/compiler_rt/common.zig index f200b2a3e154..28707d720a78 100644 --- a/lib/compiler_rt/common.zig +++ b/lib/compiler_rt/common.zig @@ -96,21 +96,25 @@ pub const want_sparc_abi = builtin.cpu.arch.isSPARC(); // we're trying to test compiler-rt. pub const panic = if (builtin.is_test) std.debug.FullPanic(std.debug.defaultPanic) else std.debug.no_panic; -/// AArch64 is the only ABI (at the moment) to support f16 arguments without the -/// need for extending them to wider fp types. -/// TODO remove this; do this type selection in the language rather than -/// here in compiler-rt. +/// This seems to mostly correspond to `clang::TargetInfo::HasFloat16`. pub fn F16T(comptime OtherType: type) type { return switch (builtin.cpu.arch) { - .arm, .armeb, .thumb, .thumbeb => if (std.Target.arm.featureSetHas(builtin.cpu.features, .has_v8)) - switch (builtin.abi.float()) { - .soft => u16, - .hard => f16, - } - else - u16, - .aarch64, .aarch64_be => f16, - .riscv32, .riscv64 => f16, + .amdgcn, + .arm, + .armeb, + .thumb, + .thumbeb, + .aarch64, + .aarch64_be, + .nvptx, + .nvptx64, + .riscv32, + .riscv64, + .spirv, + .spirv32, + .spirv64, + => f16, + .hexagon => if (std.Target.hexagon.featureSetHas(builtin.target.cpu.features, .v68)) f16 else u16, .x86, .x86_64 => if (builtin.target.os.tag.isDarwin()) switch (OtherType) { // Starting with LLVM 16, Darwin uses different abi for f16 // depending on the type of the other return/argument..??? diff --git a/lib/compiler_rt/fixdfti.zig b/lib/compiler_rt/fixdfti.zig index 889e31db53e5..46b17505fc5f 100644 --- a/lib/compiler_rt/fixdfti.zig +++ b/lib/compiler_rt/fixdfti.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__fixdfti_windows_x86_64, .{ .name = "__fixdfti", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__fixdfti, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_fixdfti", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__fixdfti, .{ .name = "__fixdfti", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/compiler_rt/fixsfti.zig b/lib/compiler_rt/fixsfti.zig index ae37f0f0e6fb..e1d4e7188caf 100644 --- a/lib/compiler_rt/fixsfti.zig +++ b/lib/compiler_rt/fixsfti.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__fixsfti_windows_x86_64, .{ .name = "__fixsfti", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__fixsfti, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_fixsfti", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__fixsfti, .{ .name = "__fixsfti", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/compiler_rt/fixunsdfti.zig b/lib/compiler_rt/fixunsdfti.zig index ddaa45bc6649..b4429a9d7622 100644 --- a/lib/compiler_rt/fixunsdfti.zig +++ b/lib/compiler_rt/fixunsdfti.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__fixunsdfti_windows_x86_64, .{ .name = "__fixunsdfti", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__fixunsdfti, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_fixunsdfti", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__fixunsdfti, .{ .name = "__fixunsdfti", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/compiler_rt/fixunssfti.zig b/lib/compiler_rt/fixunssfti.zig index 0b3ac65fe947..3137fb3bc616 100644 --- a/lib/compiler_rt/fixunssfti.zig +++ b/lib/compiler_rt/fixunssfti.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__fixunssfti_windows_x86_64, .{ .name = "__fixunssfti", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__fixunssfti, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_fixunssfti", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__fixunssfti, .{ .name = "__fixunssfti", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/compiler_rt/floattidf.zig b/lib/compiler_rt/floattidf.zig index 78cef508a06b..420ef9b20148 100644 --- a/lib/compiler_rt/floattidf.zig +++ b/lib/compiler_rt/floattidf.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__floattidf_windows_x86_64, .{ .name = "__floattidf", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__floattidf, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_floattidf", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__floattidf, .{ .name = "__floattidf", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/compiler_rt/floattisf.zig b/lib/compiler_rt/floattisf.zig index 4483643d453a..284580c4b2cc 100644 --- a/lib/compiler_rt/floattisf.zig +++ b/lib/compiler_rt/floattisf.zig @@ -8,10 +8,7 @@ comptime { if (common.want_windows_v2u64_abi) { @export(&__floattisf_windows_x86_64, .{ .name = "__floattisf", .linkage = common.linkage, .visibility = common.visibility }); } else { - @export(&__floattisf, .{ .name = switch (builtin.cpu.arch) { - .hexagon => "__hexagon", - else => "_", - } ++ "_floattisf", .linkage = common.linkage, .visibility = common.visibility }); + @export(&__floattisf, .{ .name = "__floattisf", .linkage = common.linkage, .visibility = common.visibility }); } } diff --git a/lib/include/adcintrin.h b/lib/include/adcintrin.h index 0065a1b543f8..5c68fce9370b 100644 --- a/lib/include/adcintrin.h +++ b/lib/include/adcintrin.h @@ -15,7 +15,12 @@ #endif /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__)) constexpr +#else #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#endif /* Use C++ inline semantics in C++, GNU inline for C mode. */ #if defined(__cplusplus) diff --git a/lib/include/adxintrin.h b/lib/include/adxintrin.h index bc6a4caf3533..055e91f8e2b3 100644 --- a/lib/include/adxintrin.h +++ b/lib/include/adxintrin.h @@ -15,8 +15,13 @@ #define __ADXINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("adx"))) constexpr +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("adx"))) +#endif /* Use C++ inline semantics in C++, GNU inline for C mode. */ #if defined(__cplusplus) diff --git a/lib/include/altivec.h b/lib/include/altivec.h index 4971631c50f4..8da65055012f 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -2502,37 +2502,37 @@ vec_cmplt(vector unsigned long long __a, vector unsigned long long __b) { static __inline__ vector unsigned char __ATTRS_o_ai vec_popcnt(vector signed char __a) { - return (vector unsigned char)__builtin_altivec_vpopcntb( + return (vector unsigned char)__builtin_elementwise_popcount( (vector unsigned char)__a); } static __inline__ vector unsigned char __ATTRS_o_ai vec_popcnt(vector unsigned char __a) { - return __builtin_altivec_vpopcntb(__a); + return __builtin_elementwise_popcount(__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_popcnt(vector signed short __a) { - return (vector unsigned short)__builtin_altivec_vpopcnth( + return (vector unsigned short)__builtin_elementwise_popcount( (vector unsigned short)__a); } static __inline__ vector unsigned short __ATTRS_o_ai vec_popcnt(vector unsigned short __a) { - return __builtin_altivec_vpopcnth(__a); + return __builtin_elementwise_popcount(__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_popcnt(vector signed int __a) { - return __builtin_altivec_vpopcntw((vector unsigned int)__a); + return __builtin_elementwise_popcount((vector unsigned int)__a); } static __inline__ vector unsigned int __ATTRS_o_ai vec_popcnt(vector unsigned int __a) { - return __builtin_altivec_vpopcntw(__a); + return __builtin_elementwise_popcount(__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_popcnt(vector signed long long __a) { - return __builtin_altivec_vpopcntd((vector unsigned long long)__a); + return __builtin_elementwise_popcount((vector unsigned long long)__a); } static __inline__ vector unsigned long long __ATTRS_o_ai vec_popcnt(vector unsigned long long __a) { - return __builtin_altivec_vpopcntd(__a); + return __builtin_elementwise_popcount(__a); } #define vec_vclz vec_cntlz diff --git a/lib/include/amxavx512intrin.h b/lib/include/amxavx512intrin.h new file mode 100644 index 000000000000..a158983482d5 --- /dev/null +++ b/lib/include/amxavx512intrin.h @@ -0,0 +1,382 @@ +/*===--------------------- amxavx512intrin.h - AMXAVX512 --------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AMX_AVX512INTRIN_H +#define __AMX_AVX512INTRIN_H +#if defined(__x86_64__) && defined(__SSE2__) + +#define __DEFAULT_FN_ATTRS_AVX512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-avx512,avx10.2-512"))) + +/// Moves a row from a tile register to a zmm destination register, converting +/// the int32 source elements to fp32. The row of the tile is selected by a +/// 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowd2ps(__tile tsrc, unsigned int row); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := row & 0xffff +/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.f32[i] := CONVERT_INT32_TO_FP32(tsrc.row[row_index].dword[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWD2PS instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param row +/// The row of the source tile +#define _tile_cvtrowd2ps(tsrc, row) __builtin_ia32_tcvtrowd2ps(tsrc, row) + +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to bf16. It places the resulting bf16 elements +/// in the high 16 bits within each dword. The row of the tile is selected +/// by a 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2bf16h(__tile tsrc, unsigned int row); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := row & 0xffff +/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+0] := 0 +/// dst.bf16[2*i+1] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2BF16H instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param row +/// The the row of the source tile. +#define _tile_cvtrowps2bf16h(tsrc, row) \ + __builtin_ia32_tcvtrowps2bf16h(tsrc, row) + +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to bf16. It places the resulting bf16 elements +/// in the low 16 bits within each dword. The row of the tile is selected +/// by a 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2bf16l(__tile tsrc, unsigned int row); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := row & 0xffff +/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+1] := 0 +/// dst.bf16[2*i+0] := CONVERT_FP32_TO_BF16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2BF16L instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param row +/// The the row of the source tile. +#define _tile_cvtrowps2bf16l(tsrc, row) \ + __builtin_ia32_tcvtrowps2bf16l(tsrc, row) + +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to fp16. It places the resulting fp16 elements +/// in the high 16 bits within each dword. The row of the tile is selected +/// by a 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2phh(__tile tsrc, unsigned int row); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := row & 0xffff +/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+0] := 0 +/// dst.fp16[2*i+1] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2PHH instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param row +/// The the row of the source tile. +#define _tile_cvtrowps2phh(tsrc, row) __builtin_ia32_tcvtrowps2phh(tsrc, row) + +/// Moves a row from a tile register to a zmm destination register, converting +/// the fp32 source elements to fp16. It places the resulting fp16 elements +/// in the low 16 bits within each dword. The row of the tile is selected +/// by a 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512i _tile_cvtrowps2phl(__tile tsrc, unsigned int row); +/// \endcode +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL >> 3 +/// row_index := row & 0xffff +/// row_chunk := ((row >> 16) & 0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes / 4) - 1 +/// IF i + row_chunk / 4 >= tsrc.colsb / 4 +/// dst.dword[i] := 0 +/// ELSE +/// dst.word[2*i+1] := 0 +/// dst.fp16[2*i+0] := CONVERT_FP32_TO_FP16(tsrc.row[row_index].fp32[row_chunk/4+i], RNE) +/// FI +/// ENDFOR +/// dst[MAX_VL-1:VL] := 0 +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCVTROWPS2PHL instruction. +/// +/// \param tsrc +/// The source tile. Max size is 1024 Bytes. +/// \param row +/// The the row of the source tile. +#define _tile_cvtrowps2phl(tsrc, row) __builtin_ia32_tcvtrowps2phl(tsrc, row) + +/// Move one row of a tile data to a v16f32 data. +/// The row of the tile is selected by a 32b GPR. +/// +/// \headerfile +/// +/// \code +/// __m512 _tile_movrow(__tile a, unsigned b); +/// \endcode +/// +/// This intrinsic corresponds to the TILEMOVROW instruction. +/// +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v16f32 data. Size is 64 Bytes. +/// +/// \code{.operation} +/// VL := 512 +/// VL_bytes := VL>>3 +/// row_index := b&0xffff +/// row_chunk := ((b>>16)&0xffff) * VL_bytes +/// FOR i := 0 TO (VL_bytes-1) +/// IF (row_chunk + i >= a.colsb) +/// dst.byte[i] := 0 +/// ELSE +/// dst.byte[i] := a.row[row_index].byte[row_chunk+i] +/// ENDFOR +/// \endcode +#define _tile_movrow(a, b) __builtin_ia32_tilemovrow(a, b) + +/// This is internal intrinsic. C/C++ user should avoid calling it directly. + +static __inline__ __m512 __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowd2ps_internal( + unsigned short m, unsigned short n, _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowd2ps_internal(m, n, src, u); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 +_tile_cvtrowps2bf16h_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2bf16h_internal(m, n, src, u); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS_AVX512 +_tile_cvtrowps2bf16l_internal(unsigned short m, unsigned short n, + _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2bf16l_internal(m, n, src, u); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phh_internal( + unsigned short m, unsigned short n, _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2phh_internal(m, n, src, u); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS_AVX512 _tile_cvtrowps2phl_internal( + unsigned short m, unsigned short n, _tile1024i src, unsigned u) { + return __builtin_ia32_tcvtrowps2phl_internal(m, n, src, u); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_AVX512 _tile_movrow_internal( + unsigned short m, unsigned short n, _tile1024i src, unsigned u) { + return (__m512i)__builtin_ia32_tilemovrow_internal(m, n, src, u); +} + +/// Move a row from a tile (src0) to a v16f32 dst, converting the int32 source +/// elements to fp32. No SIMD exceptions are generated. Rounding is done as if +/// MXCSR.RC=RNE. Embedded rounding is not supported. +/// The row and chunk elements of tile is fetched from 32bit src1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCVTROWD2PS instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v16f32 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512 __tile_cvtrowd2ps(__tile1024i src0, unsigned src1) { + return _tile_cvtrowd2ps_internal(src0.row, src0.col, src0.tile, src1); +} + +/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source +/// elements to bf16 at high 16-bits of each dword. +/// The row and chunk elements of tile is fetched from 32bit src1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCVTROWPS2BF16H instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v32bf16 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512bh __tile_cvtrowps2bf16h(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2bf16h_internal(src0.row, src0.col, src0.tile, src1); +} + +/// Move a row from a tile (src0) to a v32bf16 dst, converting the fp32 source +/// elements to bf16 at low 16-bits of each dword. +/// The row and chunk elements of tile is fetched from 32bit src1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCVTROWPS2BF16L instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v32bf16 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512bh __tile_cvtrowps2bf16l(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2bf16l_internal(src0.row, src0.col, src0.tile, src1); +} + +/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source +/// elements to fp16 at high 16-bits of each dword. +/// The row and chunk elements of tile is fetched from 32bit src1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCVTROWPS2PHH instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v32fp16 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512h __tile_cvtrowps2phh(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2phh_internal(src0.row, src0.col, src0.tile, src1); +} + +/// Move a row from a tile (src0) to a v32fp16 dst, converting the fp32 source +/// elements to fp16 at low 16-bits of each dword. +/// The row and chunk elements of tile is fetched from 32bit src1. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCVTROWPS2PHL instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v32fp16 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512h __tile_cvtrowps2phl(__tile1024i src0, unsigned src1) { + return _tile_cvtrowps2phl_internal(src0.row, src0.col, src0.tile, src1); +} + +/// Move one row of a tile data to a v16f32 data. +/// The row of the tile is selected by a 32b GPR. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TILEMOVROW instruction. +/// +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source r32. Size is 4 Bytes. +/// \returns +/// The destination v16i32 data. Size is 64 Bytes. +__DEFAULT_FN_ATTRS_AVX512 +static __m512i __tile_movrow(__tile1024i src0, unsigned src1) { + return (__m512i)_tile_movrow_internal(src0.row, src0.col, src0.tile, src1); +} + +#endif // __x86_64__ && __SSE2__ +#endif // __AMX_AVX512INTRIN_H diff --git a/lib/include/amxbf16transposeintrin.h b/lib/include/amxbf16transposeintrin.h new file mode 100644 index 000000000000..86f09f2ad8db --- /dev/null +++ b/lib/include/amxbf16transposeintrin.h @@ -0,0 +1,94 @@ +/*===----- amxbf16transposeintrin.h - AMX-BF16 and AMX-TRANSPOSE ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; use instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMX_BF16TRANSPOSEINTRIN_H +#define __AMX_BF16TRANSPOSEINTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-bf16,amx-transpose"))) + +/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in +/// tiles \a a and \a b, accumulating the intermediate single-precision +/// (32-bit) floating-point elements with elements in \a dst, and store the +/// 32-bit result back to tile \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_tdpbf16ps (__tile dst, __tile a, __tile b) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO (a.colsb / 4) - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+0]) * +/// FP32(b.row[k].bf16[2*n+0]) +/// tmp.bf32[n] += FP32(a.row[m].bf16[2*k+1]) * +/// FP32(b.row[k].bf16[2*n+1]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TTDPBF16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_tdpbf16ps(dst, a, b) __builtin_ia32_ttdpbf16ps((dst), (a), (b)) + +/// This is internal intrinsic. C/C++ user should avoid calling it directly. +static __inline__ _tile1024i __DEFAULT_FN_ATTRS +_tile_tdpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_ttdpbf16ps_internal(m, n, k, dst, src1, src2); +} + +/// Compute transpose and dot-product of BF16 (16-bit) floating-point pairs in +/// tiles src0 and src1, accumulating the intermediate single-precision +/// (32-bit) floating-point elements with elements in "dst", and store the +/// 32-bit result back to tile "dst". +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTDPBF16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static __inline__ void __tile_tdpbf16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_tdpbf16ps_internal(src0.row, src1.col, src0.col, dst->tile, + src0.tile, src1.tile); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __x86_64__ */ +#endif /* __AMX_BF16TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxcomplextransposeintrin.h b/lib/include/amxcomplextransposeintrin.h new file mode 100644 index 000000000000..11abaf98e937 --- /dev/null +++ b/lib/include/amxcomplextransposeintrin.h @@ -0,0 +1,303 @@ +/*===----- amxcomplextransposeintrin.h - AMX-COMPLEX and AMX-TRANSPOSE ------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AMX_COMPLEXTRANSPOSEINTRIN_H +#define __AMX_COMPLEXTRANSPOSEINTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-complex,amx-transpose"))) + +/// Perform matrix multiplication of two tiles containing complex elements and +/// accumulate the results into a packed single precision tile. Each dword +/// element in input tiles \a a and \a b is interpreted as a complex number +/// with FP16 real part and FP16 imaginary part. +/// Calculates the imaginary part of the result. For each possible combination +/// of (transposed column of \a a, column of \a b), it performs a set of +/// multiplication and accumulations on all corresponding complex numbers +/// (one from \a a and one from \a b). The imaginary part of the \a a element +/// is multiplied with the real part of the corresponding \a b element, and +/// the real part of the \a a element is multiplied with the imaginary part +/// of the corresponding \a b elements. The two accumulated results are +/// added, and then accumulated into the corresponding row and column of +/// \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_tcmmimfp16ps(__tile dst, __tile a, __tile b); +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO a.rows - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_tcmmimfp16ps(dst, a, b) \ + __builtin_ia32_ttcmmimfp16ps((dst), (a), (b)) + +/// Perform matrix multiplication of two tiles containing complex elements and +/// accumulate the results into a packed single precision tile. Each dword +/// element in input tiles \a a and \a b is interpreted as a complex number +/// with FP16 real part and FP16 imaginary part. +/// Calculates the real part of the result. For each possible combination +/// of (rtransposed colum of \a a, column of \a b), it performs a set of +/// multiplication and accumulations on all corresponding complex numbers +/// (one from \a a and one from \a b). The real part of the \a a element is +/// multiplied with the real part of the corresponding \a b element, and the +/// negated imaginary part of the \a a element is multiplied with the +/// imaginary part of the corresponding \a b elements. The two accumulated +/// results are added, and then accumulated into the corresponding row and +/// column of \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_tcmmrlfp16ps(__tile dst, __tile a, __tile b); +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO a.rows - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+0]) +/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+1]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TTCMMIMFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_tcmmrlfp16ps(dst, a, b) \ + __builtin_ia32_ttcmmrlfp16ps((dst), (a), (b)) + +/// Perform matrix conjugate transpose and multiplication of two tiles +/// containing complex elements and accumulate the results into a packed +/// single precision tile. Each dword element in input tiles \a a and \a b +/// is interpreted as a complex number with FP16 real part and FP16 imaginary +/// part. +/// Calculates the imaginary part of the result. For each possible combination +/// of (transposed column of \a a, column of \a b), it performs a set of +/// multiplication and accumulations on all corresponding complex numbers +/// (one from \a a and one from \a b). The negated imaginary part of the \a a +/// element is multiplied with the real part of the corresponding \a b +/// element, and the real part of the \a a element is multiplied with the +/// imaginary part of the corresponding \a b elements. The two accumulated +/// results are added, and then accumulated into the corresponding row and +/// column of \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_conjtcmmimfp16ps(__tile dst, __tile a, __tile b); +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO a.rows - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * FP32(b.row[k].fp16[2*n+1]) +/// tmp.fp32[n] += FP32(-a.row[m].fp16[2*k+1]) * FP32(b.row[k].fp16[2*n+0]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCONJTCMMIMFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_conjtcmmimfp16ps(dst, a, b) \ + __builtin_ia32_tconjtcmmimfp16ps((dst), (a), (b)) + +/// Perform conjugate transpose of an FP16-pair of complex elements from \a a +/// and writes the result to \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_conjtfp16(__tile dst, __tile a); +/// \endcode +/// +/// \code{.operation} +/// FOR i := 0 TO dst.rows - 1 +/// FOR j := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp16[2*j+0] := a.row[j].fp16[2*i+0] +/// tmp.fp16[2*j+1] := -a.row[j].fp16[2*i+1] +/// ENDFOR +/// write_row_and_zero(dst, i, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TCONJTFP16 instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The source tile. Max size is 1024 Bytes. +#define _tile_conjtfp16(dst, a) __builtin_ia32_tconjtfp16((dst), (a)) + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmimfp16ps_internal( + unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, + _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_ttcmmimfp16ps_internal(m, n, k, dst, src1, src2); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_tcmmrlfp16ps_internal( + unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, + _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_ttcmmrlfp16ps_internal(m, n, k, dst, src1, src2); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS _tile_conjtcmmimfp16ps_internal( + unsigned short m, unsigned short n, unsigned short k, _tile1024i dst, + _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tconjtcmmimfp16ps_internal(m, n, k, dst, src1, src2); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS +_tile_conjtfp16_internal(unsigned short m, unsigned short n, _tile1024i src) { + return __builtin_ia32_tconjtfp16_internal(m, n, src); +} + +/// Perform matrix multiplication of two tiles containing complex elements and +/// accumulate the results into a packed single precision tile. Each dword +/// element in input tiles src0 and src1 is interpreted as a complex number +/// with FP16 real part and FP16 imaginary part. +/// This function calculates the imaginary part of the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTCMMIMFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static void __tile_tcmmimfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_tcmmimfp16ps_internal(src0.row, src1.col, src0.col, + dst->tile, src0.tile, src1.tile); +} + +/// Perform matrix multiplication of two tiles containing complex elements and +/// accumulate the results into a packed single precision tile. Each dword +/// element in input tiles src0 and src1 is interpreted as a complex number +/// with FP16 real part and FP16 imaginary part. +/// This function calculates the real part of the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTCMMRLFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static void __tile_tcmmrlfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_tcmmrlfp16ps_internal(src0.row, src1.col, src0.col, + dst->tile, src0.tile, src1.tile); +} + +/// Perform matrix conjugate transpose and multiplication of two tiles +/// containing complex elements and accumulate the results into a packed +/// single precision tile. Each dword element in input tiles src0 and src1 +/// is interpreted as a complex number with FP16 real part and FP16 imaginary +/// part. +/// This function calculates the imaginary part of the result. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCONJTCMMIMFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static void __tile_conjtcmmimfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_conjtcmmimfp16ps_internal(src0.row, src1.col, src0.col, + dst->tile, src0.tile, src1.tile); +} + +/// Perform conjugate transpose of an FP16-pair of complex elements from src and +/// writes the result to dst. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TCONJTFP16 instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src +/// The source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static void __tile_conjtfp16(__tile1024i *dst, __tile1024i src) { + dst->tile = _tile_conjtfp16_internal(src.row, src.col, src.tile); +} + +#undef __DEFAULT_FN_ATTRS + +#endif // __x86_64__ +#endif // __AMX_COMPLEXTRANSPOSEINTRIN_H diff --git a/lib/include/amxfp16intrin.h b/lib/include/amxfp16intrin.h index ed798245d41e..bb4bc31fdafd 100644 --- a/lib/include/amxfp16intrin.h +++ b/lib/include/amxfp16intrin.h @@ -15,6 +15,10 @@ #define __AMX_FP16INTRIN_H #ifdef __x86_64__ +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16"))) + /// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles \a a /// and \a b, accumulating the intermediate single-precision (32-bit) /// floating-point elements with elements in \a dst, and store the 32-bit @@ -54,5 +58,36 @@ #define _tile_dpfp16ps(dst, a, b) \ __builtin_ia32_tdpfp16ps(dst, a, b) +/// This is internal intrinsic. C/C++ user should avoid calling it directly. +static __inline__ _tile1024i __DEFAULT_FN_ATTRS +_tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2); +} + +/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and +/// src1, accumulating the intermediate single-precision (32-bit) floating-point +/// elements with elements in "dst", and store the 32-bit result back to tile +/// "dst". +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TDPFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, + src0.tile, src1.tile); +} + +#undef __DEFAULT_FN_ATTRS + #endif /* __x86_64__ */ #endif /* __AMX_FP16INTRIN_H */ diff --git a/lib/include/amxfp16transposeintrin.h b/lib/include/amxfp16transposeintrin.h new file mode 100644 index 000000000000..191f8c6097a2 --- /dev/null +++ b/lib/include/amxfp16transposeintrin.h @@ -0,0 +1,94 @@ +/*===----- amxfp16transposeintrin.h - AMX-FP16 and AMX-TRANSPOSE ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; use instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMX_FP16TRANSPOSEINTRIN_H +#define __AMX_FP16TRANSPOSEINTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-fp16,amx-transpose"))) + +/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in +/// tiles \a a and \a b, accumulating the intermediate single-precision +/// (32-bit) floating-point elements with elements in \a dst, and store the +/// 32-bit result back to tile \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_tdpfp16ps (__tile dst, __tile a, __tile b) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// tmp := dst.row[m] +/// FOR k := 0 TO (a.colsb / 4) - 1 +/// FOR n := 0 TO (dst.colsb / 4) - 1 +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+0]) * +/// FP32(b.row[k].fp16[2*n+0]) +/// tmp.fp32[n] += FP32(a.row[m].fp16[2*k+1]) * +/// FP32(b.row[k].fp16[2*n+1]) +/// ENDFOR +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// ENDFOR +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TTDPFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +#define _tile_tdpfp16ps(dst, a, b) __builtin_ia32_ttdpfp16ps((dst), (a), (b)) + +/// This is internal intrinsic. C/C++ user should avoid calling it directly. +static __inline__ _tile1024i __DEFAULT_FN_ATTRS +_tile_tdpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_ttdpfp16ps_internal(m, n, k, dst, src1, src2); +} + +/// Compute transpose and dot-product of FP16 (16-bit) floating-point pairs in +/// tiles src0 and src1, accumulating the intermediate single-precision +/// (32-bit) floating-point elements with elements in "dst", and store the +/// 32-bit result back to tile "dst". +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTDPFP16PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS +static __inline__ void __tile_tdpfp16ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_tdpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, + src0.tile, src1.tile); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __x86_64__ */ +#endif /* __AMX_FP16TRANSPOSEINTRIN_H */ diff --git a/lib/include/amxfp8intrin.h b/lib/include/amxfp8intrin.h new file mode 100644 index 000000000000..92e7989974e7 --- /dev/null +++ b/lib/include/amxfp8intrin.h @@ -0,0 +1,230 @@ +/*===------------- amxfp8intrin.h - AMX intrinsics -*- C++ -*----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMXFP8INTRIN_H +#define __AMXFP8INTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_FP8 \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-fp8"))) + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8 +_tile_dpbf8ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpbf8ps_internal(m, n, k, dst, src1, src2); +} + +/// Perform the dot product of a BF8 value \a src1 by a BF8 value \a src2 +/// accumulating into a Single Precision (FP32) source/dest \a dst. +/// +/// \headerfile +/// +/// \code +/// void __tile_dpbf8ps (__tile1024i *dst, __tile1024i src1, __tile1024i src2) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// temp1[(dst.colsb / 4 - 1) : 0] = 0 +/// FOR k := 0 TO src1.colsb / 4 - 1 +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// temp1[n] += +/// INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0]) +/// + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1]) +/// + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2]) +/// + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3]) +/// ENDFOR +/// ENDFOR +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n]) +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPBF8PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src1 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src2 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_FP8 static void +__tile_dpbf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) { + dst->tile = _tile_dpbf8ps_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8 +_tile_dpbhf8ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpbhf8ps_internal(m, n, k, dst, src1, src2); +} + +/// Perform the dot product of a BF8 value \a src1 by an HF8 value \a src2 +/// accumulating into a Single Precision (FP32) source/dest \a dst. +/// +/// \headerfile +/// +/// \code +/// void __tile_dpbhf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// temp1[(dst.colsb / 4 - 1) : 0] = 0 +/// FOR k := 0 TO src1.colsb / 4 - 1 +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// temp1[n] += +/// INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0]) +/// + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1]) +/// + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2]) +/// + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3]) +/// ENDFOR +/// ENDFOR +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n]) +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPBHF8PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src1 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src2 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_FP8 static void +__tile_dpbhf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) { + dst->tile = _tile_dpbhf8ps_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8 +_tile_dphbf8ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdphbf8ps_internal(m, n, k, dst, src1, src2); +} + +/// Perform the dot product of an HF8 value \a src1 by a BF8 value \a src2 +/// accumulating into a Single Precision (FP32) source/dest \a dst. +/// +/// \headerfile +/// +/// \code +/// void __tile_dphbf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// temp1[(dst.colsb / 4 - 1) : 0] = 0 +/// FOR k := 0 TO src1.colsb / 4 - 1 +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// temp1[n] += +/// INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0]) +/// + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1]) +/// + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2]) +/// + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3]) +/// ENDFOR +/// ENDFOR +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n]) +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPHBF8PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src1 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src2 +/// The 2nd source tile. Max size is 1024 Bytes. + +__DEFAULT_FN_ATTRS_FP8 static void +__tile_dphbf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) { + dst->tile = _tile_dphbf8ps_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP8 +_tile_dphf8ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdphf8ps_internal(m, n, k, dst, src1, src2); +} + +/// Perform the dot product of an HF8 value \a src1 by an HF8 value \a src2 +/// accumulating into a Single Precision (FP32) source/dest \a dst. +/// +/// \headerfile +/// +/// \code +/// void __tile_dphf8ps (__tile1024i dst, __tile1024i src1, __tile1024i src2) +/// \endcode +/// +/// \code{.operation} +/// FOR m := 0 TO dst.rows - 1 +/// temp1[(dst.colsb / 4 - 1) : 0] = 0 +/// FOR k := 0 TO src1.colsb / 4 - 1 +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// temp1[n] += +/// INT64(src1.row[m].float8[4*k+0]) * INT64(src2.row[k].float8[4*n+0]) +/// + INT64(src1.row[m].float8[4*k+1]) * INT64(src2.row[k].float8[4*n+1]) +/// + INT64(src1.row[m].float8[4*k+2]) * INT64(src2.row[k].float8[4*n+2]) +/// + INT64(src1.row[m].float8[4*k+3]) * INT64(src2.row[k].float8[4*n+3]) +/// ENDFOR +/// ENDFOR +/// FOR n := 0 TO dst.colsb / 4 - 1 +/// tmp.row[m].fp32[n] = dst.row[m].fp32[n] + FP32(temp1[n]) +/// ENDFOR +/// write_row_and_zero(dst, m, tmp, dst.colsb) +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +/// +/// This intrinsic corresponds to the \c TDPHF8PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src1 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src2 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_FP8 static void +__tile_dphf8ps(__tile1024i *dst, __tile1024i src1, __tile1024i src2) { + dst->tile = _tile_dphf8ps_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +#define _tile_dpbf8ps(dst, src1, src2) \ + __builtin_ia32_tdpbf8ps((dst), (src1), (src2)) +#define _tile_dpbhf8ps(dst, src1, src2) \ + __builtin_ia32_tdpbhf8ps((dst), (src1), (src2)) +#define _tile_dphbf8ps(dst, src1, src2) \ + __builtin_ia32_tdphbf8ps((dst), (src1), (src2)) +#define _tile_dphf8ps(dst, src1, src2) \ + __builtin_ia32_tdphf8ps((dst), (src1), (src2)) + +#undef __DEFAULT_FN_ATTRS_FP8 + +#endif /* __x86_64__ */ +#endif /* __AMXFP8INTRIN_H */ diff --git a/lib/include/amxintrin.h b/lib/include/amxintrin.h index baa56f5b28e8..a7da10d9951e 100644 --- a/lib/include/amxintrin.h +++ b/lib/include/amxintrin.h @@ -22,8 +22,6 @@ __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) #define __DEFAULT_FN_ATTRS_BF16 \ __attribute__((__always_inline__, __nodebug__, __target__("amx-bf16"))) -#define __DEFAULT_FN_ATTRS_FP16 \ - __attribute__((__always_inline__, __nodebug__, __target__("amx-fp16"))) /// Load tile configuration from a 64-byte memory location specified by /// "mem_addr". The tile configuration includes the tile type palette, the @@ -232,9 +230,11 @@ static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { /// bytes. Since there is no 2D type in llvm IR, we use vector type to /// represent 2D tile and the fixed size is maximum amx tile register size. typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); +typedef int _tile1024i_1024a + __attribute__((__vector_size__(1024), __aligned__(1024))); /// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE _tile_loadd_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloadd64_internal(m, n, base, @@ -242,7 +242,7 @@ _tile_loadd_internal(unsigned short m, unsigned short n, const void *base, } /// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TILE _tile_loaddt1_internal(unsigned short m, unsigned short n, const void *base, __SIZE_TYPE__ stride) { return __builtin_ia32_tileloaddt164_internal(m, n, base, @@ -278,7 +278,7 @@ _tile_dpbuud_internal(unsigned short m, unsigned short n, unsigned short k, } /// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ void __DEFAULT_FN_ATTRS_INT8 +static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_stored_internal(unsigned short m, unsigned short n, void *base, __SIZE_TYPE__ stride, _tile1024i tile) { return __builtin_ia32_tilestored64_internal(m, n, base, @@ -292,13 +292,6 @@ _tile_dpbf16ps_internal(unsigned short m, unsigned short n, unsigned short k, return __builtin_ia32_tdpbf16ps_internal(m, n, k, dst, src1, src2); } -/// This is internal intrinsic. C/C++ user should avoid calling it directly. -static __inline__ _tile1024i __DEFAULT_FN_ATTRS_FP16 -_tile_dpfp16ps_internal(unsigned short m, unsigned short n, unsigned short k, - _tile1024i dst, _tile1024i src1, _tile1024i src2) { - return __builtin_ia32_tdpfp16ps_internal(m, n, k, dst, src1, src2); -} - /// This struct pack the shape and tile data together for user. We suggest /// initializing the struct as early as possible, because compiler depends /// on the shape information to do configure. The constant value is preferred @@ -493,32 +486,9 @@ static __inline__ void __tile_dpbf16ps(__tile1024i *dst, __tile1024i src0, src0.tile, src1.tile); } -/// Compute dot-product of FP16 (16-bit) floating-point pairs in tiles src0 and -/// src1, accumulating the intermediate single-precision (32-bit) floating-point -/// elements with elements in "dst", and store the 32-bit result back to tile -/// "dst". -/// -/// \headerfile -/// -/// This intrinsic corresponds to the TDPFP16PS instruction. -/// -/// \param dst -/// The destination tile. Max size is 1024 Bytes. -/// \param src0 -/// The 1st source tile. Max size is 1024 Bytes. -/// \param src1 -/// The 2nd source tile. Max size is 1024 Bytes. -__DEFAULT_FN_ATTRS_FP16 -static __inline__ void __tile_dpfp16ps(__tile1024i *dst, __tile1024i src0, - __tile1024i src1) { - dst->tile = _tile_dpfp16ps_internal(src0.row, src1.col, src0.col, dst->tile, - src0.tile, src1.tile); -} - #undef __DEFAULT_FN_ATTRS_TILE #undef __DEFAULT_FN_ATTRS_INT8 #undef __DEFAULT_FN_ATTRS_BF16 -#undef __DEFAULT_FN_ATTRS_FP16 #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ diff --git a/lib/include/amxmovrsintrin.h b/lib/include/amxmovrsintrin.h new file mode 100644 index 000000000000..5fe2fdecb8b5 --- /dev/null +++ b/lib/include/amxmovrsintrin.h @@ -0,0 +1,48 @@ +/*===-------- amxmovrsintrin.h - AMX MOVRS intrinsics -*- C++ -*---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * ===-------------------------------------------------------------------=== */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMXMOVRSINTRIN_H +#define __AMXMOVRSINTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_MOVRS \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-movrs"))) + +#define _tile_loaddrs(dst, base, stride) \ + __builtin_ia32_tileloaddrs64((dst), ((const void *)(base)), \ + (__SIZE_TYPE__)(stride)) +#define _tile_stream_loaddrs(dst, base, stride) \ + __builtin_ia32_tileloaddrst164((dst), ((const void *)(base)), \ + (__SIZE_TYPE__)(stride)) +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS +_tile_loaddrs_internal(unsigned short m, unsigned short n, const void *base, + __SIZE_TYPE__ stride) { + return __builtin_ia32_tileloaddrs64_internal(m, n, base, + (__SIZE_TYPE__)(stride)); +} +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_MOVRS +_tile_loaddrst1_internal(unsigned short m, unsigned short n, const void *base, + __SIZE_TYPE__ stride) { + return __builtin_ia32_tileloaddrst164_internal(m, n, base, + (__SIZE_TYPE__)(stride)); +} +static __inline__ void __DEFAULT_FN_ATTRS_MOVRS +__tile_loaddrs(__tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { + dst->tile = _tile_loaddrs_internal(dst->row, dst->col, base, stride); +} +static __inline__ void __DEFAULT_FN_ATTRS_MOVRS __tile_stream_loaddrs( + __tile1024i *dst, const void *base, __SIZE_TYPE__ stride) { + dst->tile = _tile_loaddrst1_internal(dst->row, dst->col, base, stride); +} +#undef __DEFAULT_FN_ATTRS_MOVRS +#endif /* __x86_64__ */ +#endif /* __AMXMOVRSINTRIN_H */ diff --git a/lib/include/amxmovrstransposeintrin.h b/lib/include/amxmovrstransposeintrin.h new file mode 100644 index 000000000000..17a9f7506a04 --- /dev/null +++ b/lib/include/amxmovrstransposeintrin.h @@ -0,0 +1,200 @@ +/* ===--- amxmovrstransposeintrin.h - AMX_MOVRS_TRANSPOSE intrinsics --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; use instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMX_MOVRS_TRANSPOSEINTRIN_H +#define __AMX_MOVRS_TRANSPOSEINTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-transpose,amx-movrs"))) + +#define _tile_2rpntlvwz0rs(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz0rs(tdst, base, stride) +#define _tile_2rpntlvwz0rst1(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz0rst1(tdst, base, stride) +#define _tile_2rpntlvwz1rs(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz1rs(tdst, base, stride) +#define _tile_2rpntlvwz1rst1(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz1rst1(tdst, base, stride) + +static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rs_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + // Use __tile1024i_1024a* to escape the alignment check in + // clang/test/Headers/x86-intrinsics-headers-clean.cpp + __builtin_ia32_t2rpntlvwz0rs_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz0rst1_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz0rst1_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rs_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz1rs_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS _tile_2rpntlvwz1rst1_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz1rst1_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. +/// Provides a hint to the implementation that the data will likely become +/// read shared in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ0RS instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS +static void __tile_2rpntlvwz0rs(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz0rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ0T1RS instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS +static void __tile_2rpntlvwz0rst1(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz0rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. The last row will be not be read from memory but instead +/// filled with zeros. +/// Provides a hint to the implementation that the data will likely become +/// read shared in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ1 instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS +static void __tile_2rpntlvwz1rs(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz1rs_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. The last row will be not be read from memory but instead +/// filled with zeros. +/// Provides a hint to the implementation that the data will likely become +/// read shared in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ1T1RS instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS +static void __tile_2rpntlvwz1rst1(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz1rst1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +#undef __DEFAULT_FN_ATTRS +#endif /* __x86_64__ */ +#endif /* __AMX_MOVRS_TRANSPOSEINTRIN_H */ \ No newline at end of file diff --git a/lib/include/amxtf32intrin.h b/lib/include/amxtf32intrin.h new file mode 100644 index 000000000000..44d002c6600d --- /dev/null +++ b/lib/include/amxtf32intrin.h @@ -0,0 +1,108 @@ +/*===------------- amxtf32intrin.h - AMX_TF32 intrinsics -*- C++ -*---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AMX_TF32INTRIN_H +#define __AMX_TF32INTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_TF32 \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-tf32"))) + +/// Do Matrix Multiplication of \a a and \a b, and then do Matrix Plus +/// with \a srcdst. +/// All the calculation is base on float32 but with the lower 13-bit set to 0. +/// +/// \headerfile +/// +/// \code +/// void _tile_mmultf32ps(constexpr int srcdst, constexpr int a, \ +/// constexpr int b); +/// \endcode +/// +/// This intrinsic corresponds to the TMMULTF32PS instruction. +/// +/// \param srcdst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +/// +/// \code{.operation} +/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) { +/// dword[12:0] := 0 +/// dword[31:13] := x[31:13] +/// return dword +/// } +/// +/// DEFINE silence_snan_fp32(x[31:0]) { +/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0) +/// x.fraction[22] := 1 +/// return x +/// } +/// +/// elements_a := a.colsb / 4 +/// elements_dest := srcdst.colsb / 4 +/// +/// FOR m = 0 TO (srcdst.rows-1) +/// tmp[511:0] := 0 +/// FOR k = 0 TO (elements_a-1) +/// FOR n = 0 TO (elements_dest-1) +/// af := silence_snan_fp32(a.row[m].fp32[k]) +/// bf := silence_snan_fp32(b.row[k].fp32[n]) +/// tmp.fp32[n] += zero_lower_mantissa_bits_fp32(af) +/// * zero_lower_mantissa_bits_fp32(bf) +/// ENDFOR +/// ENDFOR +/// +/// FOR n = 0 TO (elements_dest-1) +/// tmp.fp32[n] += srcdst.row[m].fp32[n] +/// ENDFOR +/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb) +/// +/// ENDFOR +/// +/// zero_upper_rows(srcdst, srcdst.rows) +/// zero_tileconfig_start() +/// \endcode +#define _tile_mmultf32ps(srcdst, a, b) \ + __builtin_ia32_tmmultf32ps((srcdst), (a), (b)) + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32 +_tile_mmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tmmultf32ps_internal(m, n, k, dst, src1, src2); +} + +/// Do Matrix Multiplication of src0 and src1, and then do Matrix Plus with dst. +/// All the calculation is base on float32 but with the lower 13-bit set to 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TMMULTF32PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_TF32 +static void __tile_mmultf32ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_mmultf32ps_internal(src0.row, src1.col, src0.col, dst->tile, + src0.tile, src1.tile); +} + +#endif // __x86_64__ +#endif // __AMX_TF32INTRIN_H diff --git a/lib/include/amxtf32transposeintrin.h b/lib/include/amxtf32transposeintrin.h new file mode 100644 index 000000000000..60336f953ecb --- /dev/null +++ b/lib/include/amxtf32transposeintrin.h @@ -0,0 +1,105 @@ +/*===--------- amxtf32transposeintrin.h - AMX-TF32 and AMX-TRANSPOSE --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===------------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AMX_TF32TRANSPOSEINTRIN_H +#define __AMX_TF32TRANSPOSEINTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_TF32_TRANSPOSE \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("amx-tf32,amx-transpose"))) + +/// \code +/// void _tile_tmmultf32ps(constexpr int srcdst, constexpr int a, \ +/// constexpr int b); +/// \endcode +/// +/// This intrinsic corresponds to the TTMMULTF32PS instruction. +/// +/// \param srcdst +/// The destination tile. Max size is 1024 Bytes. +/// \param a +/// The 1st source tile. Max size is 1024 Bytes. +/// \param b +/// The 2nd source tile. Max size is 1024 Bytes. +/// +/// \code{.operation} +/// DEFINE zero_lower_mantissa_bits_fp32(x[31:0]) { +/// dword[12:0] := 0 +/// dword[31:13] := x[31:13] +/// return dword +/// } +/// +/// DEFINE silence_snan_fp32(x[31:0]) { +/// IF (x.exponent == 255 and x.fraction != 0 and x.fraction[22] == 0) +/// x.fraction[22] := 1 +/// return x +/// } +/// +/// elements_dest:= srcdst.colsb/4 +/// +/// FOR m := 0 TO (srcdst.rows-1) +/// tmp[511:0] := 0 +/// FOR k := 0 TO (a.rows-1) +/// FOR n := 0 TO (elements_dest-1) +/// a1e := silence_snan_fp32(a.row[k].fp32[m]) +/// a2e := silence_snan_fp32(b.row[k].fp32[n]) +/// s1e := zero_lower_mantissa_bits_fp32(a1e) +/// s2e := zero_lower_mantissa_bits_fp32(a2e) +/// tmp.fp32[n] += s1e * s2e +/// ENDFOR +/// ENDFOR +/// +/// FOR n := 0 TO (elements_dest-1) +/// tmp.fp32[n] += srcdst.row[m].fp32[n] +/// ENDFOR +/// write_row_and_zero(srcdst, m, tmp, srcdst.colsb) +/// +/// ENDFOR +/// +/// zero_upper_rows(srcdst, srcdst.rows) +/// zero_tileconfig_start() +/// \endcode +#define _tile_tmmultf32ps(srcdst, a, b) \ + __builtin_ia32_ttmmultf32ps((srcdst), (a), (b)) + +// dst = m x n (srcdest), src1 = k x m, src2 = k x n +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TF32_TRANSPOSE +_tile_tmmultf32ps_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_ttmmultf32ps_internal(m, n, k, dst, src1, src2); +} + +/// Compute transpose and do Matrix Multiplication of src0 and src1, and then do +/// Matrix Plus with dst. All the calculation is base on float32 but with the +/// lower 13-bit set to 0. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTMMULTF32PS instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src0 +/// The 1st source tile. Max size is 1024 Bytes. +/// \param src1 +/// The 2nd source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_TF32_TRANSPOSE +static void __tile_tmmultf32ps(__tile1024i *dst, __tile1024i src0, + __tile1024i src1) { + dst->tile = _tile_tmmultf32ps_internal(src0.row, src1.col, src0.col, + dst->tile, src0.tile, src1.tile); +} + +#endif // __x86_64__ +#endif // __AMX_TF32TRANSPOSEINTRIN_H diff --git a/lib/include/amxtransposeintrin.h b/lib/include/amxtransposeintrin.h new file mode 100644 index 000000000000..b3fa37d766c4 --- /dev/null +++ b/lib/include/amxtransposeintrin.h @@ -0,0 +1,248 @@ +/* ===--- amxtransposeintrin.h - AMX_TRANSPOSE intrinsics -*- C++ -*---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * ===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; use instead." +#endif /* __IMMINTRIN_H */ + +#ifndef __AMX_TRANSPOSEINTRIN_H +#define __AMX_TRANSPOSEINTRIN_H +#ifdef __x86_64__ + +#define __DEFAULT_FN_ATTRS_TRANSPOSE \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-transpose"))) + +#define _tile_2rpntlvwz0(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz0(tdst, base, stride) +#define _tile_2rpntlvwz0t1(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz0t1(tdst, base, stride) +#define _tile_2rpntlvwz1(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz1(tdst, base, stride) +#define _tile_2rpntlvwz1t1(tdst, base, stride) \ + __builtin_ia32_t2rpntlvwz1t1(tdst, base, stride) + +/// Transpose 32-bit elements from \a src and write the result to \a dst. +/// +/// \headerfile +/// +/// \code +/// void _tile_transposed(__tile dst, __tile src); +/// \endcode +/// +/// This intrinsic corresponds to the TTRANSPOSED instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src +/// The source tile. Max size is 1024 Bytes. +/// +/// \code{.operation} +/// +/// FOR i := 0 TO (dst.rows-1) +/// tmp[511:0] := 0 +/// FOR j := 0 TO (dst.colsb/4-1) +/// tmp.dword[j] := src.row[j].dword[i] +/// ENDFOR +/// dst.row[i] := tmp +/// ENDFOR +/// +/// zero_upper_rows(dst, dst.rows) +/// zero_tileconfig_start() +/// \endcode +#define _tile_transposed(dst, src) __builtin_ia32_ttransposed(dst, src) + +static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + // Use __tile1024i_1024a* to escape the alignment check in + // clang/test/Headers/x86-intrinsics-headers-clean.cpp + __builtin_ia32_t2rpntlvwz0_internal(row, col0, col1, (_tile1024i_1024a *)dst0, + (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz0t1_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz0t1_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz1_internal(row, col0, col1, (_tile1024i_1024a *)dst0, + (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ void __DEFAULT_FN_ATTRS_TRANSPOSE _tile_2rpntlvwz1t1_internal( + unsigned short row, unsigned short col0, unsigned short col1, + _tile1024i *dst0, _tile1024i *dst1, const void *base, + __SIZE_TYPE__ stride) { + __builtin_ia32_t2rpntlvwz1t1_internal( + row, col0, col1, (_tile1024i_1024a *)dst0, (_tile1024i_1024a *)dst1, base, + (__SIZE_TYPE__)(stride)); +} + +// This is internal intrinsic. C/C++ user should avoid calling it directly. +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_TRANSPOSE +_tile_transposed_internal(unsigned short m, unsigned short n, _tile1024i src) { + return __builtin_ia32_ttransposed_internal(m, n, src); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. +/// Provides a hint to the implementation that the data will likely not be +/// reused in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ0 instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS_TRANSPOSE +static void __tile_2rpntlvwz0(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz0_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ0T1 instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS_TRANSPOSE +static void __tile_2rpntlvwz0t1(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz0t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. The last row will be not be read from memory but instead +/// filled with zeros. +/// Provides a hint to the implementation that the data will likely not be +/// reused in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ1 instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS_TRANSPOSE +static void __tile_2rpntlvwz1(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Converts a pair of tiles from memory into VNNI format, and places the +/// results in a pair of destinations specified by dst. The pair of tiles +/// in memory is specified via a tsib; the second tile is after the first +/// one, separated by the same stride that separates each row. +/// The tile configuration for the destination tiles indicates the amount +/// of data to read from memory. The instruction will load a number of rows +/// that is equal to twice the number of rows in tmm1. The size of each row +/// is equal to the average width of the destination tiles. If the second +/// tile is configured with zero rows and columns, only the first tile will +/// be written. The last row will be not be read from memory but instead +/// filled with zeros. +/// Provides a hint to the implementation that the data will likely not be +/// reused in the near future and the data caching can be optimized. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the T2RPNTLVWZ1T1 instruction. +/// +/// \param dst0 +/// First tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param dst1 +/// Second tile of destination tile pair. Max size is 1024i*2 Bytes. +/// \param base +/// A pointer to base address. +/// \param stride +/// The stride between the rows' data to be loaded in memory. +__DEFAULT_FN_ATTRS_TRANSPOSE +static void __tile_2rpntlvwz1t1(__tile1024i *dst0, __tile1024i *dst1, + const void *base, __SIZE_TYPE__ stride) { + _tile_2rpntlvwz1t1_internal(dst0->row, dst0->col, dst1->col, &dst0->tile, + &dst1->tile, base, stride); +} + +/// Transpose 32-bit elements from src and write the result to dst. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TTRANSPOSED instruction. +/// +/// \param dst +/// The destination tile. Max size is 1024 Bytes. +/// \param src +/// The source tile. Max size is 1024 Bytes. +__DEFAULT_FN_ATTRS_TRANSPOSE +static void __tile_transposed(__tile1024i *dst, __tile1024i src) { + dst->tile = _tile_transposed_internal(dst->row, dst->col, src.tile); +} + +#endif /* __x86_64__ */ +#endif /* __AMX_TRANSPOSEINTRIN_H */ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index 1518b0c4c842..b1dc90f84ad3 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -264,28 +264,28 @@ __rbitl(unsigned long __t) { } /* 8.3 16-bit multiplications */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulbb(int32_t __a, int32_t __b) { return __builtin_arm_smulbb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulbt(int32_t __a, int32_t __b) { return __builtin_arm_smulbt(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smultb(int32_t __a, int32_t __b) { return __builtin_arm_smultb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smultt(int32_t __a, int32_t __b) { return __builtin_arm_smultt(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulwb(int32_t __a, int32_t __b) { return __builtin_arm_smulwb(__a, __b); } -static __inline__ int32_t __attribute__((__always_inline__,__nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__,__nodebug__, target("dsp"))) __smulwt(int32_t __a, int32_t __b) { return __builtin_arm_smulwt(__a, __b); } @@ -304,46 +304,46 @@ __smulwt(int32_t __a, int32_t __b) { #endif /* 8.4.2 Saturating addition and subtraction intrinsics */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qadd(int32_t __t, int32_t __v) { return __builtin_arm_qadd(__t, __v); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qsub(int32_t __t, int32_t __v) { return __builtin_arm_qsub(__t, __v); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __qdbl(int32_t __t) { return __builtin_arm_qadd(__t, __t); } #endif /* 8.4.3 Accumulating multiplications */ -#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +#if defined(__ARM_32BIT_STATE) && __ARM_32BIT_STATE +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlabb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlabt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlabt(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlatb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlatt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlatt(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlawb(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawb(__a, __b, __c); } -static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__, target("dsp"))) __smlawt(int32_t __a, int32_t __b, int32_t __c) { return __builtin_arm_smlawt(__a, __b, __c); } @@ -621,8 +621,6 @@ __rintnf(float __a) { #endif /* 8.8 CRC32 intrinsics */ -#if (defined(__ARM_FEATURE_CRC32) && __ARM_FEATURE_CRC32) || \ - (defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE) static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target("crc"))) __crc32b(uint32_t __a, uint8_t __b) { return __builtin_arm_crc32b(__a, __b); @@ -662,7 +660,6 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__, target __crc32cd(uint32_t __a, uint64_t __b) { return __builtin_arm_crc32cd(__a, __b); } -#endif /* 8.6 Floating-point data-processing intrinsics */ /* Armv8.3-A Javascript conversion intrinsic */ diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index b67616134b88..ab28e839e4cb 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -359,9 +359,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t __noswap_vcombine_bf16(bf }) __ai __attribute__((target("bf16,neon"))) float32_t vcvtah_f32_bf16(bfloat16_t __p0) { float32_t __ret; -bfloat16_t __reint = __p0; -int32_t __reint1 = (int32_t)(*(int16_t *) &__reint) << 16; - __ret = *(float32_t *) &__reint1; + __ret = __builtin_bit_cast(float32_t, (uint32_t)(__builtin_bit_cast(uint16_t, __p0)) << 16); return __ret; } __ai __attribute__((target("bf16,neon"))) bfloat16_t vcvth_bf16_f32(float32_t __p0) { @@ -35841,9 +35839,7 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_f float16x4_t __s0_150 = __p0_150; \ float16x4_t __s1_150 = __p1_150; \ float16x4_t __s2_150 = __p2_150; \ -float16x4_t __reint_150 = __s2_150; \ -uint32x2_t __reint1_150 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_150, __p3_150), vget_lane_u32(*(uint32x2_t *) &__reint_150, __p3_150)}; \ - __ret_150 = vcmla_f16(__s0_150, __s1_150, *(float16x4_t *) &__reint1_150); \ + __ret_150 = vcmla_f16(__s0_150, __s1_150, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_150), __p3_150), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_150), __p3_150)})); \ __ret_150; \ }) #else @@ -35855,9 +35851,7 @@ uint32x2_t __reint1_150 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x4_t __rev0_151; __rev0_151 = __builtin_shufflevector(__s0_151, __s0_151, 3, 2, 1, 0); \ float16x4_t __rev1_151; __rev1_151 = __builtin_shufflevector(__s1_151, __s1_151, 3, 2, 1, 0); \ float16x4_t __rev2_151; __rev2_151 = __builtin_shufflevector(__s2_151, __s2_151, 3, 2, 1, 0); \ -float16x4_t __reint_151 = __rev2_151; \ -uint32x2_t __reint1_151 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_151, __p3_151), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_151, __p3_151)}; \ - __ret_151 = __noswap_vcmla_f16(__rev0_151, __rev1_151, *(float16x4_t *) &__reint1_151); \ + __ret_151 = __noswap_vcmla_f16(__rev0_151, __rev1_151, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_151), __p3_151), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_151), __p3_151)})); \ __ret_151 = __builtin_shufflevector(__ret_151, __ret_151, 3, 2, 1, 0); \ __ret_151; \ }) @@ -35869,9 +35863,7 @@ uint32x2_t __reint1_151 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x8_t __s0_152 = __p0_152; \ float16x8_t __s1_152 = __p1_152; \ float16x4_t __s2_152 = __p2_152; \ -float16x4_t __reint_152 = __s2_152; \ -uint32x4_t __reint1_152 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_152, __p3_152), vget_lane_u32(*(uint32x2_t *) &__reint_152, __p3_152), vget_lane_u32(*(uint32x2_t *) &__reint_152, __p3_152), vget_lane_u32(*(uint32x2_t *) &__reint_152, __p3_152)}; \ - __ret_152 = vcmlaq_f16(__s0_152, __s1_152, *(float16x8_t *) &__reint1_152); \ + __ret_152 = vcmlaq_f16(__s0_152, __s1_152, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_152), __p3_152), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_152), __p3_152), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_152), __p3_152), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_152), __p3_152)})); \ __ret_152; \ }) #else @@ -35883,9 +35875,7 @@ uint32x4_t __reint1_152 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x8_t __rev0_153; __rev0_153 = __builtin_shufflevector(__s0_153, __s0_153, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_153; __rev1_153 = __builtin_shufflevector(__s1_153, __s1_153, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_153; __rev2_153 = __builtin_shufflevector(__s2_153, __s2_153, 3, 2, 1, 0); \ -float16x4_t __reint_153 = __rev2_153; \ -uint32x4_t __reint1_153 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_153, __p3_153), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_153, __p3_153), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_153, __p3_153), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_153, __p3_153)}; \ - __ret_153 = __noswap_vcmlaq_f16(__rev0_153, __rev1_153, *(float16x8_t *) &__reint1_153); \ + __ret_153 = __noswap_vcmlaq_f16(__rev0_153, __rev1_153, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_153), __p3_153)})); \ __ret_153 = __builtin_shufflevector(__ret_153, __ret_153, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_153; \ }) @@ -35897,9 +35887,7 @@ uint32x4_t __reint1_153 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x4_t __s0_154 = __p0_154; \ float16x4_t __s1_154 = __p1_154; \ float16x8_t __s2_154 = __p2_154; \ -float16x8_t __reint_154 = __s2_154; \ -uint32x2_t __reint1_154 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_154, __p3_154), vgetq_lane_u32(*(uint32x4_t *) &__reint_154, __p3_154)}; \ - __ret_154 = vcmla_f16(__s0_154, __s1_154, *(float16x4_t *) &__reint1_154); \ + __ret_154 = vcmla_f16(__s0_154, __s1_154, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_154), __p3_154), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_154), __p3_154)})); \ __ret_154; \ }) #else @@ -35911,9 +35899,7 @@ uint32x2_t __reint1_154 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x4_t __rev0_155; __rev0_155 = __builtin_shufflevector(__s0_155, __s0_155, 3, 2, 1, 0); \ float16x4_t __rev1_155; __rev1_155 = __builtin_shufflevector(__s1_155, __s1_155, 3, 2, 1, 0); \ float16x8_t __rev2_155; __rev2_155 = __builtin_shufflevector(__s2_155, __s2_155, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_155 = __rev2_155; \ -uint32x2_t __reint1_155 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_155, __p3_155), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_155, __p3_155)}; \ - __ret_155 = __noswap_vcmla_f16(__rev0_155, __rev1_155, *(float16x4_t *) &__reint1_155); \ + __ret_155 = __noswap_vcmla_f16(__rev0_155, __rev1_155, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_155), __p3_155), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_155), __p3_155)})); \ __ret_155 = __builtin_shufflevector(__ret_155, __ret_155, 3, 2, 1, 0); \ __ret_155; \ }) @@ -35925,9 +35911,7 @@ uint32x2_t __reint1_155 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) float16x8_t __s0_156 = __p0_156; \ float16x8_t __s1_156 = __p1_156; \ float16x8_t __s2_156 = __p2_156; \ -float16x8_t __reint_156 = __s2_156; \ -uint32x4_t __reint1_156 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_156, __p3_156), vgetq_lane_u32(*(uint32x4_t *) &__reint_156, __p3_156), vgetq_lane_u32(*(uint32x4_t *) &__reint_156, __p3_156), vgetq_lane_u32(*(uint32x4_t *) &__reint_156, __p3_156)}; \ - __ret_156 = vcmlaq_f16(__s0_156, __s1_156, *(float16x8_t *) &__reint1_156); \ + __ret_156 = vcmlaq_f16(__s0_156, __s1_156, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_156), __p3_156), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_156), __p3_156), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_156), __p3_156), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_156), __p3_156)})); \ __ret_156; \ }) #else @@ -35939,9 +35923,7 @@ uint32x4_t __reint1_156 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x8_t __rev0_157; __rev0_157 = __builtin_shufflevector(__s0_157, __s0_157, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_157; __rev1_157 = __builtin_shufflevector(__s1_157, __s1_157, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_157; __rev2_157 = __builtin_shufflevector(__s2_157, __s2_157, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_157 = __rev2_157; \ -uint32x4_t __reint1_157 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_157, __p3_157), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_157, __p3_157), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_157, __p3_157), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_157, __p3_157)}; \ - __ret_157 = __noswap_vcmlaq_f16(__rev0_157, __rev1_157, *(float16x8_t *) &__reint1_157); \ + __ret_157 = __noswap_vcmlaq_f16(__rev0_157, __rev1_157, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_157), __p3_157)})); \ __ret_157 = __builtin_shufflevector(__ret_157, __ret_157, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_157; \ }) @@ -35999,9 +35981,7 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_158 = __p0_158; \ float16x4_t __s1_158 = __p1_158; \ float16x4_t __s2_158 = __p2_158; \ -float16x4_t __reint_158 = __s2_158; \ -uint32x2_t __reint1_158 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_158, __p3_158), vget_lane_u32(*(uint32x2_t *) &__reint_158, __p3_158)}; \ - __ret_158 = vcmla_rot180_f16(__s0_158, __s1_158, *(float16x4_t *) &__reint1_158); \ + __ret_158 = vcmla_rot180_f16(__s0_158, __s1_158, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_158), __p3_158), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_158), __p3_158)})); \ __ret_158; \ }) #else @@ -36013,9 +35993,7 @@ uint32x2_t __reint1_158 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x4_t __rev0_159; __rev0_159 = __builtin_shufflevector(__s0_159, __s0_159, 3, 2, 1, 0); \ float16x4_t __rev1_159; __rev1_159 = __builtin_shufflevector(__s1_159, __s1_159, 3, 2, 1, 0); \ float16x4_t __rev2_159; __rev2_159 = __builtin_shufflevector(__s2_159, __s2_159, 3, 2, 1, 0); \ -float16x4_t __reint_159 = __rev2_159; \ -uint32x2_t __reint1_159 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_159, __p3_159), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_159, __p3_159)}; \ - __ret_159 = __noswap_vcmla_rot180_f16(__rev0_159, __rev1_159, *(float16x4_t *) &__reint1_159); \ + __ret_159 = __noswap_vcmla_rot180_f16(__rev0_159, __rev1_159, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_159), __p3_159), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_159), __p3_159)})); \ __ret_159 = __builtin_shufflevector(__ret_159, __ret_159, 3, 2, 1, 0); \ __ret_159; \ }) @@ -36027,9 +36005,7 @@ uint32x2_t __reint1_159 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x8_t __s0_160 = __p0_160; \ float16x8_t __s1_160 = __p1_160; \ float16x4_t __s2_160 = __p2_160; \ -float16x4_t __reint_160 = __s2_160; \ -uint32x4_t __reint1_160 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_160, __p3_160), vget_lane_u32(*(uint32x2_t *) &__reint_160, __p3_160), vget_lane_u32(*(uint32x2_t *) &__reint_160, __p3_160), vget_lane_u32(*(uint32x2_t *) &__reint_160, __p3_160)}; \ - __ret_160 = vcmlaq_rot180_f16(__s0_160, __s1_160, *(float16x8_t *) &__reint1_160); \ + __ret_160 = vcmlaq_rot180_f16(__s0_160, __s1_160, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_160), __p3_160), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_160), __p3_160), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_160), __p3_160), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_160), __p3_160)})); \ __ret_160; \ }) #else @@ -36041,9 +36017,7 @@ uint32x4_t __reint1_160 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x8_t __rev0_161; __rev0_161 = __builtin_shufflevector(__s0_161, __s0_161, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_161; __rev1_161 = __builtin_shufflevector(__s1_161, __s1_161, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_161; __rev2_161 = __builtin_shufflevector(__s2_161, __s2_161, 3, 2, 1, 0); \ -float16x4_t __reint_161 = __rev2_161; \ -uint32x4_t __reint1_161 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_161, __p3_161), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_161, __p3_161), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_161, __p3_161), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_161, __p3_161)}; \ - __ret_161 = __noswap_vcmlaq_rot180_f16(__rev0_161, __rev1_161, *(float16x8_t *) &__reint1_161); \ + __ret_161 = __noswap_vcmlaq_rot180_f16(__rev0_161, __rev1_161, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_161), __p3_161)})); \ __ret_161 = __builtin_shufflevector(__ret_161, __ret_161, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_161; \ }) @@ -36055,9 +36029,7 @@ uint32x4_t __reint1_161 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x4_t __s0_162 = __p0_162; \ float16x4_t __s1_162 = __p1_162; \ float16x8_t __s2_162 = __p2_162; \ -float16x8_t __reint_162 = __s2_162; \ -uint32x2_t __reint1_162 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_162, __p3_162), vgetq_lane_u32(*(uint32x4_t *) &__reint_162, __p3_162)}; \ - __ret_162 = vcmla_rot180_f16(__s0_162, __s1_162, *(float16x4_t *) &__reint1_162); \ + __ret_162 = vcmla_rot180_f16(__s0_162, __s1_162, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_162), __p3_162), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_162), __p3_162)})); \ __ret_162; \ }) #else @@ -36069,9 +36041,7 @@ uint32x2_t __reint1_162 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x4_t __rev0_163; __rev0_163 = __builtin_shufflevector(__s0_163, __s0_163, 3, 2, 1, 0); \ float16x4_t __rev1_163; __rev1_163 = __builtin_shufflevector(__s1_163, __s1_163, 3, 2, 1, 0); \ float16x8_t __rev2_163; __rev2_163 = __builtin_shufflevector(__s2_163, __s2_163, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_163 = __rev2_163; \ -uint32x2_t __reint1_163 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_163, __p3_163), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_163, __p3_163)}; \ - __ret_163 = __noswap_vcmla_rot180_f16(__rev0_163, __rev1_163, *(float16x4_t *) &__reint1_163); \ + __ret_163 = __noswap_vcmla_rot180_f16(__rev0_163, __rev1_163, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_163), __p3_163), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_163), __p3_163)})); \ __ret_163 = __builtin_shufflevector(__ret_163, __ret_163, 3, 2, 1, 0); \ __ret_163; \ }) @@ -36083,9 +36053,7 @@ uint32x2_t __reint1_163 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) float16x8_t __s0_164 = __p0_164; \ float16x8_t __s1_164 = __p1_164; \ float16x8_t __s2_164 = __p2_164; \ -float16x8_t __reint_164 = __s2_164; \ -uint32x4_t __reint1_164 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_164, __p3_164), vgetq_lane_u32(*(uint32x4_t *) &__reint_164, __p3_164), vgetq_lane_u32(*(uint32x4_t *) &__reint_164, __p3_164), vgetq_lane_u32(*(uint32x4_t *) &__reint_164, __p3_164)}; \ - __ret_164 = vcmlaq_rot180_f16(__s0_164, __s1_164, *(float16x8_t *) &__reint1_164); \ + __ret_164 = vcmlaq_rot180_f16(__s0_164, __s1_164, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_164), __p3_164), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_164), __p3_164), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_164), __p3_164), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_164), __p3_164)})); \ __ret_164; \ }) #else @@ -36097,9 +36065,7 @@ uint32x4_t __reint1_164 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x8_t __rev0_165; __rev0_165 = __builtin_shufflevector(__s0_165, __s0_165, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_165; __rev1_165 = __builtin_shufflevector(__s1_165, __s1_165, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_165; __rev2_165 = __builtin_shufflevector(__s2_165, __s2_165, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_165 = __rev2_165; \ -uint32x4_t __reint1_165 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_165, __p3_165), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_165, __p3_165), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_165, __p3_165), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_165, __p3_165)}; \ - __ret_165 = __noswap_vcmlaq_rot180_f16(__rev0_165, __rev1_165, *(float16x8_t *) &__reint1_165); \ + __ret_165 = __noswap_vcmlaq_rot180_f16(__rev0_165, __rev1_165, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_165), __p3_165)})); \ __ret_165 = __builtin_shufflevector(__ret_165, __ret_165, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_165; \ }) @@ -36157,9 +36123,7 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_166 = __p0_166; \ float16x4_t __s1_166 = __p1_166; \ float16x4_t __s2_166 = __p2_166; \ -float16x4_t __reint_166 = __s2_166; \ -uint32x2_t __reint1_166 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_166, __p3_166), vget_lane_u32(*(uint32x2_t *) &__reint_166, __p3_166)}; \ - __ret_166 = vcmla_rot270_f16(__s0_166, __s1_166, *(float16x4_t *) &__reint1_166); \ + __ret_166 = vcmla_rot270_f16(__s0_166, __s1_166, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_166), __p3_166), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_166), __p3_166)})); \ __ret_166; \ }) #else @@ -36171,9 +36135,7 @@ uint32x2_t __reint1_166 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x4_t __rev0_167; __rev0_167 = __builtin_shufflevector(__s0_167, __s0_167, 3, 2, 1, 0); \ float16x4_t __rev1_167; __rev1_167 = __builtin_shufflevector(__s1_167, __s1_167, 3, 2, 1, 0); \ float16x4_t __rev2_167; __rev2_167 = __builtin_shufflevector(__s2_167, __s2_167, 3, 2, 1, 0); \ -float16x4_t __reint_167 = __rev2_167; \ -uint32x2_t __reint1_167 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_167, __p3_167), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_167, __p3_167)}; \ - __ret_167 = __noswap_vcmla_rot270_f16(__rev0_167, __rev1_167, *(float16x4_t *) &__reint1_167); \ + __ret_167 = __noswap_vcmla_rot270_f16(__rev0_167, __rev1_167, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_167), __p3_167), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_167), __p3_167)})); \ __ret_167 = __builtin_shufflevector(__ret_167, __ret_167, 3, 2, 1, 0); \ __ret_167; \ }) @@ -36185,9 +36147,7 @@ uint32x2_t __reint1_167 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x8_t __s0_168 = __p0_168; \ float16x8_t __s1_168 = __p1_168; \ float16x4_t __s2_168 = __p2_168; \ -float16x4_t __reint_168 = __s2_168; \ -uint32x4_t __reint1_168 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_168, __p3_168), vget_lane_u32(*(uint32x2_t *) &__reint_168, __p3_168), vget_lane_u32(*(uint32x2_t *) &__reint_168, __p3_168), vget_lane_u32(*(uint32x2_t *) &__reint_168, __p3_168)}; \ - __ret_168 = vcmlaq_rot270_f16(__s0_168, __s1_168, *(float16x8_t *) &__reint1_168); \ + __ret_168 = vcmlaq_rot270_f16(__s0_168, __s1_168, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_168), __p3_168), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_168), __p3_168), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_168), __p3_168), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_168), __p3_168)})); \ __ret_168; \ }) #else @@ -36199,9 +36159,7 @@ uint32x4_t __reint1_168 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x8_t __rev0_169; __rev0_169 = __builtin_shufflevector(__s0_169, __s0_169, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_169; __rev1_169 = __builtin_shufflevector(__s1_169, __s1_169, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_169; __rev2_169 = __builtin_shufflevector(__s2_169, __s2_169, 3, 2, 1, 0); \ -float16x4_t __reint_169 = __rev2_169; \ -uint32x4_t __reint1_169 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_169, __p3_169), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_169, __p3_169), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_169, __p3_169), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_169, __p3_169)}; \ - __ret_169 = __noswap_vcmlaq_rot270_f16(__rev0_169, __rev1_169, *(float16x8_t *) &__reint1_169); \ + __ret_169 = __noswap_vcmlaq_rot270_f16(__rev0_169, __rev1_169, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_169), __p3_169)})); \ __ret_169 = __builtin_shufflevector(__ret_169, __ret_169, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_169; \ }) @@ -36213,9 +36171,7 @@ uint32x4_t __reint1_169 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x4_t __s0_170 = __p0_170; \ float16x4_t __s1_170 = __p1_170; \ float16x8_t __s2_170 = __p2_170; \ -float16x8_t __reint_170 = __s2_170; \ -uint32x2_t __reint1_170 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_170, __p3_170), vgetq_lane_u32(*(uint32x4_t *) &__reint_170, __p3_170)}; \ - __ret_170 = vcmla_rot270_f16(__s0_170, __s1_170, *(float16x4_t *) &__reint1_170); \ + __ret_170 = vcmla_rot270_f16(__s0_170, __s1_170, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_170), __p3_170), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_170), __p3_170)})); \ __ret_170; \ }) #else @@ -36227,9 +36183,7 @@ uint32x2_t __reint1_170 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x4_t __rev0_171; __rev0_171 = __builtin_shufflevector(__s0_171, __s0_171, 3, 2, 1, 0); \ float16x4_t __rev1_171; __rev1_171 = __builtin_shufflevector(__s1_171, __s1_171, 3, 2, 1, 0); \ float16x8_t __rev2_171; __rev2_171 = __builtin_shufflevector(__s2_171, __s2_171, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_171 = __rev2_171; \ -uint32x2_t __reint1_171 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_171, __p3_171), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_171, __p3_171)}; \ - __ret_171 = __noswap_vcmla_rot270_f16(__rev0_171, __rev1_171, *(float16x4_t *) &__reint1_171); \ + __ret_171 = __noswap_vcmla_rot270_f16(__rev0_171, __rev1_171, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_171), __p3_171), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_171), __p3_171)})); \ __ret_171 = __builtin_shufflevector(__ret_171, __ret_171, 3, 2, 1, 0); \ __ret_171; \ }) @@ -36241,9 +36195,7 @@ uint32x2_t __reint1_171 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) float16x8_t __s0_172 = __p0_172; \ float16x8_t __s1_172 = __p1_172; \ float16x8_t __s2_172 = __p2_172; \ -float16x8_t __reint_172 = __s2_172; \ -uint32x4_t __reint1_172 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_172, __p3_172), vgetq_lane_u32(*(uint32x4_t *) &__reint_172, __p3_172), vgetq_lane_u32(*(uint32x4_t *) &__reint_172, __p3_172), vgetq_lane_u32(*(uint32x4_t *) &__reint_172, __p3_172)}; \ - __ret_172 = vcmlaq_rot270_f16(__s0_172, __s1_172, *(float16x8_t *) &__reint1_172); \ + __ret_172 = vcmlaq_rot270_f16(__s0_172, __s1_172, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_172), __p3_172), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_172), __p3_172), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_172), __p3_172), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_172), __p3_172)})); \ __ret_172; \ }) #else @@ -36255,9 +36207,7 @@ uint32x4_t __reint1_172 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x8_t __rev0_173; __rev0_173 = __builtin_shufflevector(__s0_173, __s0_173, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_173; __rev1_173 = __builtin_shufflevector(__s1_173, __s1_173, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_173; __rev2_173 = __builtin_shufflevector(__s2_173, __s2_173, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_173 = __rev2_173; \ -uint32x4_t __reint1_173 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_173, __p3_173), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_173, __p3_173), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_173, __p3_173), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_173, __p3_173)}; \ - __ret_173 = __noswap_vcmlaq_rot270_f16(__rev0_173, __rev1_173, *(float16x8_t *) &__reint1_173); \ + __ret_173 = __noswap_vcmlaq_rot270_f16(__rev0_173, __rev1_173, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_173), __p3_173)})); \ __ret_173 = __builtin_shufflevector(__ret_173, __ret_173, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_173; \ }) @@ -36315,9 +36265,7 @@ __ai __attribute__((target("v8.3a,fullfp16,neon"))) float16x4_t __noswap_vcmla_r float16x4_t __s0_174 = __p0_174; \ float16x4_t __s1_174 = __p1_174; \ float16x4_t __s2_174 = __p2_174; \ -float16x4_t __reint_174 = __s2_174; \ -uint32x2_t __reint1_174 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_174, __p3_174), vget_lane_u32(*(uint32x2_t *) &__reint_174, __p3_174)}; \ - __ret_174 = vcmla_rot90_f16(__s0_174, __s1_174, *(float16x4_t *) &__reint1_174); \ + __ret_174 = vcmla_rot90_f16(__s0_174, __s1_174, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_174), __p3_174), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_174), __p3_174)})); \ __ret_174; \ }) #else @@ -36329,9 +36277,7 @@ uint32x2_t __reint1_174 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x4_t __rev0_175; __rev0_175 = __builtin_shufflevector(__s0_175, __s0_175, 3, 2, 1, 0); \ float16x4_t __rev1_175; __rev1_175 = __builtin_shufflevector(__s1_175, __s1_175, 3, 2, 1, 0); \ float16x4_t __rev2_175; __rev2_175 = __builtin_shufflevector(__s2_175, __s2_175, 3, 2, 1, 0); \ -float16x4_t __reint_175 = __rev2_175; \ -uint32x2_t __reint1_175 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_175, __p3_175), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_175, __p3_175)}; \ - __ret_175 = __noswap_vcmla_rot90_f16(__rev0_175, __rev1_175, *(float16x4_t *) &__reint1_175); \ + __ret_175 = __noswap_vcmla_rot90_f16(__rev0_175, __rev1_175, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_175), __p3_175), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_175), __p3_175)})); \ __ret_175 = __builtin_shufflevector(__ret_175, __ret_175, 3, 2, 1, 0); \ __ret_175; \ }) @@ -36343,9 +36289,7 @@ uint32x2_t __reint1_175 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x8_t __s0_176 = __p0_176; \ float16x8_t __s1_176 = __p1_176; \ float16x4_t __s2_176 = __p2_176; \ -float16x4_t __reint_176 = __s2_176; \ -uint32x4_t __reint1_176 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_176, __p3_176), vget_lane_u32(*(uint32x2_t *) &__reint_176, __p3_176), vget_lane_u32(*(uint32x2_t *) &__reint_176, __p3_176), vget_lane_u32(*(uint32x2_t *) &__reint_176, __p3_176)}; \ - __ret_176 = vcmlaq_rot90_f16(__s0_176, __s1_176, *(float16x8_t *) &__reint1_176); \ + __ret_176 = vcmlaq_rot90_f16(__s0_176, __s1_176, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_176), __p3_176), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_176), __p3_176), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_176), __p3_176), vget_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_176), __p3_176)})); \ __ret_176; \ }) #else @@ -36357,9 +36301,7 @@ uint32x4_t __reint1_176 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_1 float16x8_t __rev0_177; __rev0_177 = __builtin_shufflevector(__s0_177, __s0_177, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_177; __rev1_177 = __builtin_shufflevector(__s1_177, __s1_177, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_177; __rev2_177 = __builtin_shufflevector(__s2_177, __s2_177, 3, 2, 1, 0); \ -float16x4_t __reint_177 = __rev2_177; \ -uint32x4_t __reint1_177 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_177, __p3_177), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_177, __p3_177), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_177, __p3_177), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_177, __p3_177)}; \ - __ret_177 = __noswap_vcmlaq_rot90_f16(__rev0_177, __rev1_177, *(float16x8_t *) &__reint1_177); \ + __ret_177 = __noswap_vcmlaq_rot90_f16(__rev0_177, __rev1_177, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177), __noswap_vget_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_177), __p3_177)})); \ __ret_177 = __builtin_shufflevector(__ret_177, __ret_177, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_177; \ }) @@ -36371,9 +36313,7 @@ uint32x4_t __reint1_177 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) & float16x4_t __s0_178 = __p0_178; \ float16x4_t __s1_178 = __p1_178; \ float16x8_t __s2_178 = __p2_178; \ -float16x8_t __reint_178 = __s2_178; \ -uint32x2_t __reint1_178 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_178, __p3_178), vgetq_lane_u32(*(uint32x4_t *) &__reint_178, __p3_178)}; \ - __ret_178 = vcmla_rot90_f16(__s0_178, __s1_178, *(float16x4_t *) &__reint1_178); \ + __ret_178 = vcmla_rot90_f16(__s0_178, __s1_178, __builtin_bit_cast(float16x4_t, (uint32x2_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_178), __p3_178), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_178), __p3_178)})); \ __ret_178; \ }) #else @@ -36385,9 +36325,7 @@ uint32x2_t __reint1_178 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x4_t __rev0_179; __rev0_179 = __builtin_shufflevector(__s0_179, __s0_179, 3, 2, 1, 0); \ float16x4_t __rev1_179; __rev1_179 = __builtin_shufflevector(__s1_179, __s1_179, 3, 2, 1, 0); \ float16x8_t __rev2_179; __rev2_179 = __builtin_shufflevector(__s2_179, __s2_179, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_179 = __rev2_179; \ -uint32x2_t __reint1_179 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_179, __p3_179), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_179, __p3_179)}; \ - __ret_179 = __noswap_vcmla_rot90_f16(__rev0_179, __rev1_179, *(float16x4_t *) &__reint1_179); \ + __ret_179 = __noswap_vcmla_rot90_f16(__rev0_179, __rev1_179, __builtin_bit_cast(float16x4_t, (uint32x2_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_179), __p3_179), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_179), __p3_179)})); \ __ret_179 = __builtin_shufflevector(__ret_179, __ret_179, 3, 2, 1, 0); \ __ret_179; \ }) @@ -36399,9 +36337,7 @@ uint32x2_t __reint1_179 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) float16x8_t __s0_180 = __p0_180; \ float16x8_t __s1_180 = __p1_180; \ float16x8_t __s2_180 = __p2_180; \ -float16x8_t __reint_180 = __s2_180; \ -uint32x4_t __reint1_180 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_180, __p3_180), vgetq_lane_u32(*(uint32x4_t *) &__reint_180, __p3_180), vgetq_lane_u32(*(uint32x4_t *) &__reint_180, __p3_180), vgetq_lane_u32(*(uint32x4_t *) &__reint_180, __p3_180)}; \ - __ret_180 = vcmlaq_rot90_f16(__s0_180, __s1_180, *(float16x8_t *) &__reint1_180); \ + __ret_180 = vcmlaq_rot90_f16(__s0_180, __s1_180, __builtin_bit_cast(float16x8_t, (uint32x4_t) {vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_180), __p3_180), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_180), __p3_180), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_180), __p3_180), vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __s2_180), __p3_180)})); \ __ret_180; \ }) #else @@ -36413,9 +36349,7 @@ uint32x4_t __reint1_180 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_ float16x8_t __rev0_181; __rev0_181 = __builtin_shufflevector(__s0_181, __s0_181, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev1_181; __rev1_181 = __builtin_shufflevector(__s1_181, __s1_181, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_181; __rev2_181 = __builtin_shufflevector(__s2_181, __s2_181, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_181 = __rev2_181; \ -uint32x4_t __reint1_181 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_181, __p3_181), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_181, __p3_181), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_181, __p3_181), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_181, __p3_181)}; \ - __ret_181 = __noswap_vcmlaq_rot90_f16(__rev0_181, __rev1_181, *(float16x8_t *) &__reint1_181); \ + __ret_181 = __noswap_vcmlaq_rot90_f16(__rev0_181, __rev1_181, __builtin_bit_cast(float16x8_t, (uint32x4_t) {__noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181), __noswap_vgetq_lane_u32(__builtin_bit_cast(uint32x4_t, __rev2_181), __p3_181)})); \ __ret_181 = __builtin_shufflevector(__ret_181, __ret_181, 7, 6, 5, 4, 3, 2, 1, 0); \ __ret_181; \ }) @@ -36541,9 +36475,7 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_f32(float3 float32x2_t __s0_182 = __p0_182; \ float32x2_t __s1_182 = __p1_182; \ float32x2_t __s2_182 = __p2_182; \ -float32x2_t __reint_182 = __s2_182; \ -uint64x1_t __reint1_182 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_182, __p3_182)}; \ - __ret_182 = vcmla_f32(__s0_182, __s1_182, *(float32x2_t *) &__reint1_182); \ + __ret_182 = vcmla_f32(__s0_182, __s1_182, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_182), __p3_182)})); \ __ret_182; \ }) #else @@ -36555,9 +36487,7 @@ uint64x1_t __reint1_182 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x2_t __rev0_183; __rev0_183 = __builtin_shufflevector(__s0_183, __s0_183, 1, 0); \ float32x2_t __rev1_183; __rev1_183 = __builtin_shufflevector(__s1_183, __s1_183, 1, 0); \ float32x2_t __rev2_183; __rev2_183 = __builtin_shufflevector(__s2_183, __s2_183, 1, 0); \ -float32x2_t __reint_183 = __rev2_183; \ -uint64x1_t __reint1_183 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_183, __p3_183)}; \ - __ret_183 = __noswap_vcmla_f32(__rev0_183, __rev1_183, *(float32x2_t *) &__reint1_183); \ + __ret_183 = __noswap_vcmla_f32(__rev0_183, __rev1_183, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_183), __p3_183)})); \ __ret_183 = __builtin_shufflevector(__ret_183, __ret_183, 1, 0); \ __ret_183; \ }) @@ -36569,9 +36499,7 @@ uint64x1_t __reint1_183 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x4_t __s0_184 = __p0_184; \ float32x4_t __s1_184 = __p1_184; \ float32x2_t __s2_184 = __p2_184; \ -float32x2_t __reint_184 = __s2_184; \ -uint64x2_t __reint1_184 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_184, __p3_184), vget_lane_u64(*(uint64x1_t *) &__reint_184, __p3_184)}; \ - __ret_184 = vcmlaq_f32(__s0_184, __s1_184, *(float32x4_t *) &__reint1_184); \ + __ret_184 = vcmlaq_f32(__s0_184, __s1_184, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_184), __p3_184), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_184), __p3_184)})); \ __ret_184; \ }) #else @@ -36583,9 +36511,7 @@ uint64x2_t __reint1_184 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x4_t __rev0_185; __rev0_185 = __builtin_shufflevector(__s0_185, __s0_185, 3, 2, 1, 0); \ float32x4_t __rev1_185; __rev1_185 = __builtin_shufflevector(__s1_185, __s1_185, 3, 2, 1, 0); \ float32x2_t __rev2_185; __rev2_185 = __builtin_shufflevector(__s2_185, __s2_185, 1, 0); \ -float32x2_t __reint_185 = __rev2_185; \ -uint64x2_t __reint1_185 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_185, __p3_185), vget_lane_u64(*(uint64x1_t *) &__reint_185, __p3_185)}; \ - __ret_185 = __noswap_vcmlaq_f32(__rev0_185, __rev1_185, *(float32x4_t *) &__reint1_185); \ + __ret_185 = __noswap_vcmlaq_f32(__rev0_185, __rev1_185, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_185), __p3_185), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_185), __p3_185)})); \ __ret_185 = __builtin_shufflevector(__ret_185, __ret_185, 3, 2, 1, 0); \ __ret_185; \ }) @@ -36597,9 +36523,7 @@ uint64x2_t __reint1_185 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x2_t __s0_186 = __p0_186; \ float32x2_t __s1_186 = __p1_186; \ float32x4_t __s2_186 = __p2_186; \ -float32x4_t __reint_186 = __s2_186; \ -uint64x1_t __reint1_186 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_186, __p3_186)}; \ - __ret_186 = vcmla_f32(__s0_186, __s1_186, *(float32x2_t *) &__reint1_186); \ + __ret_186 = vcmla_f32(__s0_186, __s1_186, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_186), __p3_186)})); \ __ret_186; \ }) #else @@ -36611,9 +36535,7 @@ uint64x1_t __reint1_186 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x2_t __rev0_187; __rev0_187 = __builtin_shufflevector(__s0_187, __s0_187, 1, 0); \ float32x2_t __rev1_187; __rev1_187 = __builtin_shufflevector(__s1_187, __s1_187, 1, 0); \ float32x4_t __rev2_187; __rev2_187 = __builtin_shufflevector(__s2_187, __s2_187, 3, 2, 1, 0); \ -float32x4_t __reint_187 = __rev2_187; \ -uint64x1_t __reint1_187 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_187, __p3_187)}; \ - __ret_187 = __noswap_vcmla_f32(__rev0_187, __rev1_187, *(float32x2_t *) &__reint1_187); \ + __ret_187 = __noswap_vcmla_f32(__rev0_187, __rev1_187, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_187), __p3_187)})); \ __ret_187 = __builtin_shufflevector(__ret_187, __ret_187, 1, 0); \ __ret_187; \ }) @@ -36625,9 +36547,7 @@ uint64x1_t __reint1_187 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) float32x4_t __s0_188 = __p0_188; \ float32x4_t __s1_188 = __p1_188; \ float32x4_t __s2_188 = __p2_188; \ -float32x4_t __reint_188 = __s2_188; \ -uint64x2_t __reint1_188 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_188, __p3_188), vgetq_lane_u64(*(uint64x2_t *) &__reint_188, __p3_188)}; \ - __ret_188 = vcmlaq_f32(__s0_188, __s1_188, *(float32x4_t *) &__reint1_188); \ + __ret_188 = vcmlaq_f32(__s0_188, __s1_188, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_188), __p3_188), vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_188), __p3_188)})); \ __ret_188; \ }) #else @@ -36639,9 +36559,7 @@ uint64x2_t __reint1_188 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x4_t __rev0_189; __rev0_189 = __builtin_shufflevector(__s0_189, __s0_189, 3, 2, 1, 0); \ float32x4_t __rev1_189; __rev1_189 = __builtin_shufflevector(__s1_189, __s1_189, 3, 2, 1, 0); \ float32x4_t __rev2_189; __rev2_189 = __builtin_shufflevector(__s2_189, __s2_189, 3, 2, 1, 0); \ -float32x4_t __reint_189 = __rev2_189; \ -uint64x2_t __reint1_189 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_189, __p3_189), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_189, __p3_189)}; \ - __ret_189 = __noswap_vcmlaq_f32(__rev0_189, __rev1_189, *(float32x4_t *) &__reint1_189); \ + __ret_189 = __noswap_vcmlaq_f32(__rev0_189, __rev1_189, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_189), __p3_189), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_189), __p3_189)})); \ __ret_189 = __builtin_shufflevector(__ret_189, __ret_189, 3, 2, 1, 0); \ __ret_189; \ }) @@ -36699,9 +36617,7 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot180_f32 float32x2_t __s0_190 = __p0_190; \ float32x2_t __s1_190 = __p1_190; \ float32x2_t __s2_190 = __p2_190; \ -float32x2_t __reint_190 = __s2_190; \ -uint64x1_t __reint1_190 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_190, __p3_190)}; \ - __ret_190 = vcmla_rot180_f32(__s0_190, __s1_190, *(float32x2_t *) &__reint1_190); \ + __ret_190 = vcmla_rot180_f32(__s0_190, __s1_190, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_190), __p3_190)})); \ __ret_190; \ }) #else @@ -36713,9 +36629,7 @@ uint64x1_t __reint1_190 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x2_t __rev0_191; __rev0_191 = __builtin_shufflevector(__s0_191, __s0_191, 1, 0); \ float32x2_t __rev1_191; __rev1_191 = __builtin_shufflevector(__s1_191, __s1_191, 1, 0); \ float32x2_t __rev2_191; __rev2_191 = __builtin_shufflevector(__s2_191, __s2_191, 1, 0); \ -float32x2_t __reint_191 = __rev2_191; \ -uint64x1_t __reint1_191 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_191, __p3_191)}; \ - __ret_191 = __noswap_vcmla_rot180_f32(__rev0_191, __rev1_191, *(float32x2_t *) &__reint1_191); \ + __ret_191 = __noswap_vcmla_rot180_f32(__rev0_191, __rev1_191, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_191), __p3_191)})); \ __ret_191 = __builtin_shufflevector(__ret_191, __ret_191, 1, 0); \ __ret_191; \ }) @@ -36727,9 +36641,7 @@ uint64x1_t __reint1_191 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x4_t __s0_192 = __p0_192; \ float32x4_t __s1_192 = __p1_192; \ float32x2_t __s2_192 = __p2_192; \ -float32x2_t __reint_192 = __s2_192; \ -uint64x2_t __reint1_192 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_192, __p3_192), vget_lane_u64(*(uint64x1_t *) &__reint_192, __p3_192)}; \ - __ret_192 = vcmlaq_rot180_f32(__s0_192, __s1_192, *(float32x4_t *) &__reint1_192); \ + __ret_192 = vcmlaq_rot180_f32(__s0_192, __s1_192, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_192), __p3_192), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_192), __p3_192)})); \ __ret_192; \ }) #else @@ -36741,9 +36653,7 @@ uint64x2_t __reint1_192 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x4_t __rev0_193; __rev0_193 = __builtin_shufflevector(__s0_193, __s0_193, 3, 2, 1, 0); \ float32x4_t __rev1_193; __rev1_193 = __builtin_shufflevector(__s1_193, __s1_193, 3, 2, 1, 0); \ float32x2_t __rev2_193; __rev2_193 = __builtin_shufflevector(__s2_193, __s2_193, 1, 0); \ -float32x2_t __reint_193 = __rev2_193; \ -uint64x2_t __reint1_193 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_193, __p3_193), vget_lane_u64(*(uint64x1_t *) &__reint_193, __p3_193)}; \ - __ret_193 = __noswap_vcmlaq_rot180_f32(__rev0_193, __rev1_193, *(float32x4_t *) &__reint1_193); \ + __ret_193 = __noswap_vcmlaq_rot180_f32(__rev0_193, __rev1_193, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_193), __p3_193), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_193), __p3_193)})); \ __ret_193 = __builtin_shufflevector(__ret_193, __ret_193, 3, 2, 1, 0); \ __ret_193; \ }) @@ -36755,9 +36665,7 @@ uint64x2_t __reint1_193 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x2_t __s0_194 = __p0_194; \ float32x2_t __s1_194 = __p1_194; \ float32x4_t __s2_194 = __p2_194; \ -float32x4_t __reint_194 = __s2_194; \ -uint64x1_t __reint1_194 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_194, __p3_194)}; \ - __ret_194 = vcmla_rot180_f32(__s0_194, __s1_194, *(float32x2_t *) &__reint1_194); \ + __ret_194 = vcmla_rot180_f32(__s0_194, __s1_194, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_194), __p3_194)})); \ __ret_194; \ }) #else @@ -36769,9 +36677,7 @@ uint64x1_t __reint1_194 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x2_t __rev0_195; __rev0_195 = __builtin_shufflevector(__s0_195, __s0_195, 1, 0); \ float32x2_t __rev1_195; __rev1_195 = __builtin_shufflevector(__s1_195, __s1_195, 1, 0); \ float32x4_t __rev2_195; __rev2_195 = __builtin_shufflevector(__s2_195, __s2_195, 3, 2, 1, 0); \ -float32x4_t __reint_195 = __rev2_195; \ -uint64x1_t __reint1_195 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_195, __p3_195)}; \ - __ret_195 = __noswap_vcmla_rot180_f32(__rev0_195, __rev1_195, *(float32x2_t *) &__reint1_195); \ + __ret_195 = __noswap_vcmla_rot180_f32(__rev0_195, __rev1_195, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_195), __p3_195)})); \ __ret_195 = __builtin_shufflevector(__ret_195, __ret_195, 1, 0); \ __ret_195; \ }) @@ -36783,9 +36689,7 @@ uint64x1_t __reint1_195 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) float32x4_t __s0_196 = __p0_196; \ float32x4_t __s1_196 = __p1_196; \ float32x4_t __s2_196 = __p2_196; \ -float32x4_t __reint_196 = __s2_196; \ -uint64x2_t __reint1_196 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_196, __p3_196), vgetq_lane_u64(*(uint64x2_t *) &__reint_196, __p3_196)}; \ - __ret_196 = vcmlaq_rot180_f32(__s0_196, __s1_196, *(float32x4_t *) &__reint1_196); \ + __ret_196 = vcmlaq_rot180_f32(__s0_196, __s1_196, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_196), __p3_196), vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_196), __p3_196)})); \ __ret_196; \ }) #else @@ -36797,9 +36701,7 @@ uint64x2_t __reint1_196 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x4_t __rev0_197; __rev0_197 = __builtin_shufflevector(__s0_197, __s0_197, 3, 2, 1, 0); \ float32x4_t __rev1_197; __rev1_197 = __builtin_shufflevector(__s1_197, __s1_197, 3, 2, 1, 0); \ float32x4_t __rev2_197; __rev2_197 = __builtin_shufflevector(__s2_197, __s2_197, 3, 2, 1, 0); \ -float32x4_t __reint_197 = __rev2_197; \ -uint64x2_t __reint1_197 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_197, __p3_197), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_197, __p3_197)}; \ - __ret_197 = __noswap_vcmlaq_rot180_f32(__rev0_197, __rev1_197, *(float32x4_t *) &__reint1_197); \ + __ret_197 = __noswap_vcmlaq_rot180_f32(__rev0_197, __rev1_197, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_197), __p3_197), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_197), __p3_197)})); \ __ret_197 = __builtin_shufflevector(__ret_197, __ret_197, 3, 2, 1, 0); \ __ret_197; \ }) @@ -36857,9 +36759,7 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot270_f32 float32x2_t __s0_198 = __p0_198; \ float32x2_t __s1_198 = __p1_198; \ float32x2_t __s2_198 = __p2_198; \ -float32x2_t __reint_198 = __s2_198; \ -uint64x1_t __reint1_198 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_198, __p3_198)}; \ - __ret_198 = vcmla_rot270_f32(__s0_198, __s1_198, *(float32x2_t *) &__reint1_198); \ + __ret_198 = vcmla_rot270_f32(__s0_198, __s1_198, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_198), __p3_198)})); \ __ret_198; \ }) #else @@ -36871,9 +36771,7 @@ uint64x1_t __reint1_198 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x2_t __rev0_199; __rev0_199 = __builtin_shufflevector(__s0_199, __s0_199, 1, 0); \ float32x2_t __rev1_199; __rev1_199 = __builtin_shufflevector(__s1_199, __s1_199, 1, 0); \ float32x2_t __rev2_199; __rev2_199 = __builtin_shufflevector(__s2_199, __s2_199, 1, 0); \ -float32x2_t __reint_199 = __rev2_199; \ -uint64x1_t __reint1_199 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_199, __p3_199)}; \ - __ret_199 = __noswap_vcmla_rot270_f32(__rev0_199, __rev1_199, *(float32x2_t *) &__reint1_199); \ + __ret_199 = __noswap_vcmla_rot270_f32(__rev0_199, __rev1_199, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_199), __p3_199)})); \ __ret_199 = __builtin_shufflevector(__ret_199, __ret_199, 1, 0); \ __ret_199; \ }) @@ -36885,9 +36783,7 @@ uint64x1_t __reint1_199 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_1 float32x4_t __s0_200 = __p0_200; \ float32x4_t __s1_200 = __p1_200; \ float32x2_t __s2_200 = __p2_200; \ -float32x2_t __reint_200 = __s2_200; \ -uint64x2_t __reint1_200 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_200, __p3_200), vget_lane_u64(*(uint64x1_t *) &__reint_200, __p3_200)}; \ - __ret_200 = vcmlaq_rot270_f32(__s0_200, __s1_200, *(float32x4_t *) &__reint1_200); \ + __ret_200 = vcmlaq_rot270_f32(__s0_200, __s1_200, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_200), __p3_200), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_200), __p3_200)})); \ __ret_200; \ }) #else @@ -36899,9 +36795,7 @@ uint64x2_t __reint1_200 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x4_t __rev0_201; __rev0_201 = __builtin_shufflevector(__s0_201, __s0_201, 3, 2, 1, 0); \ float32x4_t __rev1_201; __rev1_201 = __builtin_shufflevector(__s1_201, __s1_201, 3, 2, 1, 0); \ float32x2_t __rev2_201; __rev2_201 = __builtin_shufflevector(__s2_201, __s2_201, 1, 0); \ -float32x2_t __reint_201 = __rev2_201; \ -uint64x2_t __reint1_201 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_201, __p3_201), vget_lane_u64(*(uint64x1_t *) &__reint_201, __p3_201)}; \ - __ret_201 = __noswap_vcmlaq_rot270_f32(__rev0_201, __rev1_201, *(float32x4_t *) &__reint1_201); \ + __ret_201 = __noswap_vcmlaq_rot270_f32(__rev0_201, __rev1_201, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_201), __p3_201), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_201), __p3_201)})); \ __ret_201 = __builtin_shufflevector(__ret_201, __ret_201, 3, 2, 1, 0); \ __ret_201; \ }) @@ -36913,9 +36807,7 @@ uint64x2_t __reint1_201 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x2_t __s0_202 = __p0_202; \ float32x2_t __s1_202 = __p1_202; \ float32x4_t __s2_202 = __p2_202; \ -float32x4_t __reint_202 = __s2_202; \ -uint64x1_t __reint1_202 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_202, __p3_202)}; \ - __ret_202 = vcmla_rot270_f32(__s0_202, __s1_202, *(float32x2_t *) &__reint1_202); \ + __ret_202 = vcmla_rot270_f32(__s0_202, __s1_202, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_202), __p3_202)})); \ __ret_202; \ }) #else @@ -36927,9 +36819,7 @@ uint64x1_t __reint1_202 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x2_t __rev0_203; __rev0_203 = __builtin_shufflevector(__s0_203, __s0_203, 1, 0); \ float32x2_t __rev1_203; __rev1_203 = __builtin_shufflevector(__s1_203, __s1_203, 1, 0); \ float32x4_t __rev2_203; __rev2_203 = __builtin_shufflevector(__s2_203, __s2_203, 3, 2, 1, 0); \ -float32x4_t __reint_203 = __rev2_203; \ -uint64x1_t __reint1_203 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_203, __p3_203)}; \ - __ret_203 = __noswap_vcmla_rot270_f32(__rev0_203, __rev1_203, *(float32x2_t *) &__reint1_203); \ + __ret_203 = __noswap_vcmla_rot270_f32(__rev0_203, __rev1_203, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_203), __p3_203)})); \ __ret_203 = __builtin_shufflevector(__ret_203, __ret_203, 1, 0); \ __ret_203; \ }) @@ -36941,9 +36831,7 @@ uint64x1_t __reint1_203 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) float32x4_t __s0_204 = __p0_204; \ float32x4_t __s1_204 = __p1_204; \ float32x4_t __s2_204 = __p2_204; \ -float32x4_t __reint_204 = __s2_204; \ -uint64x2_t __reint1_204 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_204, __p3_204), vgetq_lane_u64(*(uint64x2_t *) &__reint_204, __p3_204)}; \ - __ret_204 = vcmlaq_rot270_f32(__s0_204, __s1_204, *(float32x4_t *) &__reint1_204); \ + __ret_204 = vcmlaq_rot270_f32(__s0_204, __s1_204, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_204), __p3_204), vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_204), __p3_204)})); \ __ret_204; \ }) #else @@ -36955,9 +36843,7 @@ uint64x2_t __reint1_204 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x4_t __rev0_205; __rev0_205 = __builtin_shufflevector(__s0_205, __s0_205, 3, 2, 1, 0); \ float32x4_t __rev1_205; __rev1_205 = __builtin_shufflevector(__s1_205, __s1_205, 3, 2, 1, 0); \ float32x4_t __rev2_205; __rev2_205 = __builtin_shufflevector(__s2_205, __s2_205, 3, 2, 1, 0); \ -float32x4_t __reint_205 = __rev2_205; \ -uint64x2_t __reint1_205 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_205, __p3_205), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_205, __p3_205)}; \ - __ret_205 = __noswap_vcmlaq_rot270_f32(__rev0_205, __rev1_205, *(float32x4_t *) &__reint1_205); \ + __ret_205 = __noswap_vcmlaq_rot270_f32(__rev0_205, __rev1_205, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_205), __p3_205), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_205), __p3_205)})); \ __ret_205 = __builtin_shufflevector(__ret_205, __ret_205, 3, 2, 1, 0); \ __ret_205; \ }) @@ -37015,9 +36901,7 @@ __ai __attribute__((target("v8.3a,neon"))) float32x2_t __noswap_vcmla_rot90_f32( float32x2_t __s0_206 = __p0_206; \ float32x2_t __s1_206 = __p1_206; \ float32x2_t __s2_206 = __p2_206; \ -float32x2_t __reint_206 = __s2_206; \ -uint64x1_t __reint1_206 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_206, __p3_206)}; \ - __ret_206 = vcmla_rot90_f32(__s0_206, __s1_206, *(float32x2_t *) &__reint1_206); \ + __ret_206 = vcmla_rot90_f32(__s0_206, __s1_206, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_206), __p3_206)})); \ __ret_206; \ }) #else @@ -37029,9 +36913,7 @@ uint64x1_t __reint1_206 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x2_t __rev0_207; __rev0_207 = __builtin_shufflevector(__s0_207, __s0_207, 1, 0); \ float32x2_t __rev1_207; __rev1_207 = __builtin_shufflevector(__s1_207, __s1_207, 1, 0); \ float32x2_t __rev2_207; __rev2_207 = __builtin_shufflevector(__s2_207, __s2_207, 1, 0); \ -float32x2_t __reint_207 = __rev2_207; \ -uint64x1_t __reint1_207 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_207, __p3_207)}; \ - __ret_207 = __noswap_vcmla_rot90_f32(__rev0_207, __rev1_207, *(float32x2_t *) &__reint1_207); \ + __ret_207 = __noswap_vcmla_rot90_f32(__rev0_207, __rev1_207, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_207), __p3_207)})); \ __ret_207 = __builtin_shufflevector(__ret_207, __ret_207, 1, 0); \ __ret_207; \ }) @@ -37043,9 +36925,7 @@ uint64x1_t __reint1_207 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x4_t __s0_208 = __p0_208; \ float32x4_t __s1_208 = __p1_208; \ float32x2_t __s2_208 = __p2_208; \ -float32x2_t __reint_208 = __s2_208; \ -uint64x2_t __reint1_208 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_208, __p3_208), vget_lane_u64(*(uint64x1_t *) &__reint_208, __p3_208)}; \ - __ret_208 = vcmlaq_rot90_f32(__s0_208, __s1_208, *(float32x4_t *) &__reint1_208); \ + __ret_208 = vcmlaq_rot90_f32(__s0_208, __s1_208, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_208), __p3_208), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __s2_208), __p3_208)})); \ __ret_208; \ }) #else @@ -37057,9 +36937,7 @@ uint64x2_t __reint1_208 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x4_t __rev0_209; __rev0_209 = __builtin_shufflevector(__s0_209, __s0_209, 3, 2, 1, 0); \ float32x4_t __rev1_209; __rev1_209 = __builtin_shufflevector(__s1_209, __s1_209, 3, 2, 1, 0); \ float32x2_t __rev2_209; __rev2_209 = __builtin_shufflevector(__s2_209, __s2_209, 1, 0); \ -float32x2_t __reint_209 = __rev2_209; \ -uint64x2_t __reint1_209 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_209, __p3_209), vget_lane_u64(*(uint64x1_t *) &__reint_209, __p3_209)}; \ - __ret_209 = __noswap_vcmlaq_rot90_f32(__rev0_209, __rev1_209, *(float32x4_t *) &__reint1_209); \ + __ret_209 = __noswap_vcmlaq_rot90_f32(__rev0_209, __rev1_209, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_209), __p3_209), vget_lane_u64(__builtin_bit_cast(uint64x1_t, __rev2_209), __p3_209)})); \ __ret_209 = __builtin_shufflevector(__ret_209, __ret_209, 3, 2, 1, 0); \ __ret_209; \ }) @@ -37071,9 +36949,7 @@ uint64x2_t __reint1_209 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_2 float32x2_t __s0_210 = __p0_210; \ float32x2_t __s1_210 = __p1_210; \ float32x4_t __s2_210 = __p2_210; \ -float32x4_t __reint_210 = __s2_210; \ -uint64x1_t __reint1_210 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_210, __p3_210)}; \ - __ret_210 = vcmla_rot90_f32(__s0_210, __s1_210, *(float32x2_t *) &__reint1_210); \ + __ret_210 = vcmla_rot90_f32(__s0_210, __s1_210, __builtin_bit_cast(float32x2_t, (uint64x1_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_210), __p3_210)})); \ __ret_210; \ }) #else @@ -37085,9 +36961,7 @@ uint64x1_t __reint1_210 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x2_t __rev0_211; __rev0_211 = __builtin_shufflevector(__s0_211, __s0_211, 1, 0); \ float32x2_t __rev1_211; __rev1_211 = __builtin_shufflevector(__s1_211, __s1_211, 1, 0); \ float32x4_t __rev2_211; __rev2_211 = __builtin_shufflevector(__s2_211, __s2_211, 3, 2, 1, 0); \ -float32x4_t __reint_211 = __rev2_211; \ -uint64x1_t __reint1_211 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_211, __p3_211)}; \ - __ret_211 = __noswap_vcmla_rot90_f32(__rev0_211, __rev1_211, *(float32x2_t *) &__reint1_211); \ + __ret_211 = __noswap_vcmla_rot90_f32(__rev0_211, __rev1_211, __builtin_bit_cast(float32x2_t, (uint64x1_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_211), __p3_211)})); \ __ret_211 = __builtin_shufflevector(__ret_211, __ret_211, 1, 0); \ __ret_211; \ }) @@ -37099,9 +36973,7 @@ uint64x1_t __reint1_211 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) float32x4_t __s0_212 = __p0_212; \ float32x4_t __s1_212 = __p1_212; \ float32x4_t __s2_212 = __p2_212; \ -float32x4_t __reint_212 = __s2_212; \ -uint64x2_t __reint1_212 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_212, __p3_212), vgetq_lane_u64(*(uint64x2_t *) &__reint_212, __p3_212)}; \ - __ret_212 = vcmlaq_rot90_f32(__s0_212, __s1_212, *(float32x4_t *) &__reint1_212); \ + __ret_212 = vcmlaq_rot90_f32(__s0_212, __s1_212, __builtin_bit_cast(float32x4_t, (uint64x2_t) {vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_212), __p3_212), vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __s2_212), __p3_212)})); \ __ret_212; \ }) #else @@ -37113,9 +36985,7 @@ uint64x2_t __reint1_212 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_ float32x4_t __rev0_213; __rev0_213 = __builtin_shufflevector(__s0_213, __s0_213, 3, 2, 1, 0); \ float32x4_t __rev1_213; __rev1_213 = __builtin_shufflevector(__s1_213, __s1_213, 3, 2, 1, 0); \ float32x4_t __rev2_213; __rev2_213 = __builtin_shufflevector(__s2_213, __s2_213, 3, 2, 1, 0); \ -float32x4_t __reint_213 = __rev2_213; \ -uint64x2_t __reint1_213 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_213, __p3_213), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_213, __p3_213)}; \ - __ret_213 = __noswap_vcmlaq_rot90_f32(__rev0_213, __rev1_213, *(float32x4_t *) &__reint1_213); \ + __ret_213 = __noswap_vcmlaq_rot90_f32(__rev0_213, __rev1_213, __builtin_bit_cast(float32x4_t, (uint64x2_t) {__noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_213), __p3_213), __noswap_vgetq_lane_u64(__builtin_bit_cast(uint64x2_t, __rev2_213), __p3_213)})); \ __ret_213 = __builtin_shufflevector(__ret_213, __ret_213, 3, 2, 1, 0); \ __ret_213; \ }) @@ -41249,33 +41119,2258 @@ __ai __attribute__((target("neon"))) float32x2_t vfms_f32(float32x2_t __p0, floa #endif #endif -#if defined(__aarch64__) || defined(__arm64ec__) -__ai __attribute__((target("aes,neon"))) poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { - poly128_t __ret; - __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); +#if defined(__aarch64__) +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt1_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt1_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_high_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_high_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt1_high_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt1_high_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_low_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt1_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt1_low_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt1_low_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt1_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt1_low_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_bf16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt2_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_f16_mf8_fpm(mfloat8x8_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt2_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_high_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_high_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_high_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt2_high_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_high_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt2_high_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_low_bf16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) bfloat16x8_t vcvt2_low_bf16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + bfloat16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (bfloat16x8_t) __builtin_neon_vcvt2_low_bf16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcvt2_low_f16_mf8_fpm(__p0, __p1); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vcvt2_low_f16_mf8_fpm(mfloat8x16_t __p0, fpm_t __p1) { + float16x8_t __ret; + mfloat8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vcvt2_low_f16_mf8_fpm(__rev0, __p1); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvt_high_mf8_f32_fpm(mfloat8x8_t __p0, float32x4_t __p1, float32x4_t __p2, fpm_t __p3) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) __builtin_neon_vcvt_high_mf8_f32_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvt_high_mf8_f32_fpm(mfloat8x8_t __p0, float32x4_t __p1, float32x4_t __p2, fpm_t __p3) { + mfloat8x16_t __ret; + mfloat8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + __ret = (mfloat8x16_t) __builtin_neon_vcvt_high_mf8_f32_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvtq_mf8_f16_fpm(float16x8_t __p0, float16x8_t __p1, fpm_t __p2) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t) __builtin_neon_vcvtq_mf8_f16_fpm((int8x16_t)__p0, (int8x16_t)__p1, __p2); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) mfloat8x16_t vcvtq_mf8_f16_fpm(float16x8_t __p0, float16x8_t __p1, fpm_t __p2) { + mfloat8x16_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (mfloat8x16_t) __builtin_neon_vcvtq_mf8_f16_fpm((int8x16_t)__rev0, (int8x16_t)__rev1, __p2); + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f16_fpm(float16x4_t __p0, float16x4_t __p1, fpm_t __p2) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f16_fpm((int8x8_t)__p0, (int8x8_t)__p1, __p2); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f16_fpm(float16x4_t __p0, float16x4_t __p1, fpm_t __p2) { + mfloat8x8_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f16_fpm((int8x8_t)__rev0, (int8x8_t)__rev1, __p2); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f32_fpm(float32x4_t __p0, float32x4_t __p1, fpm_t __p2) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f32_fpm(__p0, __p1, __p2); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) mfloat8x8_t vcvt_mf8_f32_fpm(float32x4_t __p0, float32x4_t __p1, fpm_t __p2) { + mfloat8x8_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (mfloat8x8_t) __builtin_neon_vcvt_mf8_f32_fpm(__rev0, __rev1, __p2); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float32x2_t vscale_f32(float32x2_t __p0, int32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vscale_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float32x2_t vscale_f32(float32x2_t __p0, int32x2_t __p1) { + float32x2_t __ret; + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float32x2_t) __builtin_neon_vscale_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x4_t vscale_f16(float16x4_t __p0, int16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vscale_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x4_t vscale_f16(float16x4_t __p0, int16x4_t __p1) { + float16x4_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float16x4_t) __builtin_neon_vscale_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float64x2_t vscaleq_f64(float64x2_t __p0, int64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vscaleq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float64x2_t vscaleq_f64(float64x2_t __p0, int64x2_t __p1) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float64x2_t) __builtin_neon_vscaleq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float32x4_t vscaleq_f32(float32x4_t __p0, int32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vscaleq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float32x4_t vscaleq_f32(float32x4_t __p0, int32x4_t __p1) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vscaleq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8,neon"))) float16x8_t vscaleq_f16(float16x8_t __p0, int16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vscaleq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} +#else +__ai __attribute__((target("fp8,neon"))) float16x8_t vscaleq_f16(float16x8_t __p0, int16x8_t __p1) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vscaleq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8dot2,neon"))) float16x8_t vdotq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vdotq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8dot2,neon"))) float16x8_t vdotq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vdotq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float16x4_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vdot_f16_mf8_fpm((int8x8_t)__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8dot2,neon"))) float16x4_t vdot_f16_mf8_fpm(float16x4_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { + float16x4_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x4_t) __builtin_neon_vdot_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdotq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vdotq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdotq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vdotq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x4_t __ret; \ + float16x4_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x4_t) __builtin_neon_vdot_lane_f16_mf8_fpm((int8x8_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdot_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x4_t __ret; \ + float16x4_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x4_t) __builtin_neon_vdot_lane_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdotq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vdotq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdotq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vdotq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x4_t __ret; \ + float16x4_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x4_t) __builtin_neon_vdot_laneq_f16_mf8_fpm((int8x8_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdot_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x4_t __ret; \ + float16x4_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x4_t) __builtin_neon_vdot_laneq_f16_mf8_fpm((int8x8_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8dot4,neon"))) float32x4_t vdotq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vdotq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8dot4,neon"))) float32x4_t vdotq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vdotq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float32x2_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vdot_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8dot4,neon"))) float32x2_t vdot_f32_mf8_fpm(float32x2_t __p0, mfloat8x8_t __p1, mfloat8x8_t __p2, fpm_t __p3) { + float32x2_t __ret; + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x2_t) __builtin_neon_vdot_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdotq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vdotq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdotq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vdotq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x2_t __ret; \ + float32x2_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x2_t) __builtin_neon_vdot_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdot_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x2_t __ret; \ + float32x2_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x2_t) __builtin_neon_vdot_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdotq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vdotq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdotq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vdotq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vdot_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x2_t __ret; \ + float32x2_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x2_t) __builtin_neon_vdot_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vdot_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x2_t __ret; \ + float32x2_t __s0 = __p0; \ + mfloat8x8_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 1, 0); \ + mfloat8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x2_t) __builtin_neon_vdot_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmlalbq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vmlalbq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlalbq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vmlalbq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlalbq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vmlalbq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlalbq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vmlalbq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlalbq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vmlalbq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmlallbbq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vmlallbbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallbbq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallbbq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallbbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallbbq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallbbq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallbbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmlallbtq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vmlallbtq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallbtq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallbtq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallbtq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallbtq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallbtq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallbtq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmlalltbq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vmlalltbq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlalltbq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlalltbq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlalltbq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlalltbq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlalltbq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlalltbq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vmlallttq_f32_mf8_fpm(__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vmlallttq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallttq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallttq_lane_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallttq_lane_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlallttq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float32x4_t) __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlallttq_laneq_f32_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float32x4_t __ret; \ + float32x4_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float32x4_t) __builtin_neon_vmlallttq_laneq_f32_mf8_fpm(__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vmlaltq_f16_mf8_fpm((int8x16_t)__p0, __p1, __p2, __p3); + return __ret; +} +#else +__ai __attribute__((target("fp8fma,neon"))) float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t __p0, mfloat8x16_t __p1, mfloat8x16_t __p2, fpm_t __p3) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vmlaltq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlaltq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vmlaltq_lane_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlaltq_lane_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x8_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x8_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vmlaltq_lane_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vmlaltq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + __ret = (float16x8_t) __builtin_neon_vmlaltq_laneq_f16_mf8_fpm((int8x16_t)__s0, __s1, __s2, __p3, __s4); \ + __ret; \ +}) +#else +#define vmlaltq_laneq_f16_mf8_fpm(__p0, __p1, __p2, __p3, __p4) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + mfloat8x16_t __s1 = __p1; \ + mfloat8x16_t __s2 = __p2; \ + fpm_t __s4 = __p4; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + mfloat8x16_t __rev2; __rev2 = __builtin_shufflevector(__s2, __s2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vmlaltq_laneq_f16_mf8_fpm((int8x16_t)__rev0, __rev1, __rev2, __p3, __s4); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti2_lane_p8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti2_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti2_lane_p8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti2q_lane_p8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti2q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti2q_lane_p8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti2q_lane_u8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti2q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti2q_lane_u8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti2q_lane_s8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti2q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti2q_lane_s8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti2_lane_u8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti2_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti2_lane_u8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti2_lane_s8((int8x8_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti2_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti2_lane_s8((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti2_lane_p16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti2_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti2_lane_p16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti2q_lane_p16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti2q_lane_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti2q_lane_p16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti2q_lane_u16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti2q_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti2q_lane_u16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti2q_lane_f16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti2q_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti2q_lane_f16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti2q_lane_s16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti2q_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti2q_lane_s16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti2_lane_u16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti2_lane_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti2_lane_u16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti2_lane_f16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti2_lane_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti2_lane_f16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti2_lane_s16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti2_lane_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti2_lane_s16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti2_laneq_p8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti2_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti2_laneq_p8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti2q_laneq_p8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti2q_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti2q_laneq_p8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti2q_laneq_u8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti2q_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti2q_laneq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti2q_laneq_s8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti2q_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti2q_laneq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti2_laneq_u8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti2_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti2_laneq_u8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti2_laneq_s8((int8x8_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti2_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int8x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti2_laneq_s8((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti2_laneq_p16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti2_laneq_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti2_laneq_p16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti2q_laneq_p16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti2q_laneq_p16(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti2q_laneq_p16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti2q_laneq_u16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti2q_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti2q_laneq_u16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti2q_laneq_f16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti2q_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti2q_laneq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti2q_laneq_s16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti2q_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti2q_laneq_s16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti2_laneq_u16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti2_laneq_u16(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti2_laneq_u16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti2_laneq_f16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti2_laneq_f16(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti2_laneq_f16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti2_laneq_s16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti2_laneq_s16(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti2_laneq_s16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti4q_lane_p8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti4q_lane_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti4q_lane_p8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti4q_lane_u8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti4q_lane_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti4q_lane_u8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti4q_lane_s8((int8x16_t)__s0, (int8x8_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti4q_lane_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti4q_lane_s8((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_p16_x2(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti4q_lane_p16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti4q_lane_p16_x2(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + poly16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti4q_lane_p16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_u16_x2(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti4q_lane_u16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti4q_lane_u16_x2(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + uint16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti4q_lane_u16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_f16_x2(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti4q_lane_f16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti4q_lane_f16_x2(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + float16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti4q_lane_f16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_s16_x2(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti4q_lane_s16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti4q_lane_s16_x2(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + int16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti4q_lane_s16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly8x16_t) __builtin_neon_vluti4q_laneq_p8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 36); \ + __ret; \ +}) +#else +#define vluti4q_laneq_p8(__p0, __p1, __p2) __extension__ ({ \ + poly8x16_t __ret; \ + poly8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly8x16_t) __builtin_neon_vluti4q_laneq_p8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 36); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint8x16_t) __builtin_neon_vluti4q_laneq_u8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 48); \ + __ret; \ +}) +#else +#define vluti4q_laneq_u8(__p0, __p1, __p2) __extension__ ({ \ + uint8x16_t __ret; \ + uint8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint8x16_t) __builtin_neon_vluti4q_laneq_u8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 48); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int8x16_t) __builtin_neon_vluti4q_laneq_s8((int8x16_t)__s0, (int8x16_t)__s1, __p2, 32); \ + __ret; \ +}) +#else +#define vluti4q_laneq_s8(__p0, __p1, __p2) __extension__ ({ \ + int8x16_t __ret; \ + int8x16_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int8x16_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int8x16_t) __builtin_neon_vluti4q_laneq_s8((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 32); \ + __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_p16_x2(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (poly16x8_t) __builtin_neon_vluti4q_laneq_p16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 37); \ + __ret; \ +}) +#else +#define vluti4q_laneq_p16_x2(__p0, __p1, __p2) __extension__ ({ \ + poly16x8_t __ret; \ + poly16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + poly16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (poly16x8_t) __builtin_neon_vluti4q_laneq_p16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 37); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_u16_x2(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (uint16x8_t) __builtin_neon_vluti4q_laneq_u16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 49); \ + __ret; \ +}) +#else +#define vluti4q_laneq_u16_x2(__p0, __p1, __p2) __extension__ ({ \ + uint16x8_t __ret; \ + uint16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + uint16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (uint16x8_t) __builtin_neon_vluti4q_laneq_u16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 49); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_f16_x2(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (float16x8_t) __builtin_neon_vluti4q_laneq_f16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 40); \ + __ret; \ +}) +#else +#define vluti4q_laneq_f16_x2(__p0, __p1, __p2) __extension__ ({ \ + float16x8_t __ret; \ + float16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + float16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (float16x8_t) __builtin_neon_vluti4q_laneq_f16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 40); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_s16_x2(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (int16x8_t) __builtin_neon_vluti4q_laneq_s16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 33); \ + __ret; \ +}) +#else +#define vluti4q_laneq_s16_x2(__p0, __p1, __p2) __extension__ ({ \ + int16x8_t __ret; \ + int16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + int16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (int16x8_t) __builtin_neon_vluti4q_laneq_s16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 33); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2q_lane_bf16((int8x16_t)__s0, (int8x8_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti2q_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2q_lane_bf16((int8x16_t)__rev0, (int8x8_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2_lane_bf16((int8x8_t)__s0, (int8x8_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti2_lane_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x4_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2_lane_bf16((int8x8_t)__rev0, (int8x8_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2q_laneq_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2q_laneq_bf16((int8x16_t)__s0, (int8x16_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti2q_laneq_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + bfloat16x8_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2q_laneq_bf16((int8x16_t)__rev0, (int8x16_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti2_laneq_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2_laneq_bf16((int8x8_t)__s0, (int8x16_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti2_laneq_bf16(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x4_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + bfloat16x4_t __rev0; __rev0 = __builtin_shufflevector(__s0, __s0, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti2_laneq_bf16((int8x8_t)__rev0, (int8x16_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_lane_bf16_x2(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti4q_lane_bf16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x8_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti4q_lane_bf16_x2(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8x2_t __s0 = __p0; \ + uint8x8_t __s1 = __p1; \ + bfloat16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti4q_lane_bf16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vluti4q_laneq_bf16_x2(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + __ret = (bfloat16x8_t) __builtin_neon_vluti4q_laneq_bf16_x2((int8x16_t)__s0.val[0], (int8x16_t)__s0.val[1], (int8x16_t)__s1, __p2, 43); \ + __ret; \ +}) +#else +#define vluti4q_laneq_bf16_x2(__p0, __p1, __p2) __extension__ ({ \ + bfloat16x8_t __ret; \ + bfloat16x8x2_t __s0 = __p0; \ + uint8x16_t __s1 = __p1; \ + bfloat16x8x2_t __rev0; \ + __rev0.val[0] = __builtin_shufflevector(__s0.val[0], __s0.val[0], 7, 6, 5, 4, 3, 2, 1, 0); \ + __rev0.val[1] = __builtin_shufflevector(__s0.val[1], __s0.val[1], 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__s1, __s1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret = (bfloat16x8_t) __builtin_neon_vluti4q_laneq_bf16_x2((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, __p2, 43); \ + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float64x2_t vamaxq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vamaxq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); return __ret; } +#else +__ai __attribute__((target("neon,faminmax"))) float64x2_t vamaxq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float64x2_t) __builtin_neon_vamaxq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + #ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("bf16,neon"))) bfloat16x8_t __a64_vcvtq_low_bf16_f32(float32x4_t __p0) { - bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); +__ai __attribute__((target("neon,faminmax"))) float32x4_t vamaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vamaxq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); return __ret; } #else -__ai __attribute__((target("bf16,neon"))) bfloat16x8_t __a64_vcvtq_low_bf16_f32(float32x4_t __p0) { - bfloat16x8_t __ret; +__ai __attribute__((target("neon,faminmax"))) float32x4_t vamaxq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__rev0, 43); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vamaxq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float16x8_t vamaxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vamaxq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float16x8_t vamaxq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vamaxq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } -__ai __attribute__((target("bf16,neon"))) bfloat16x8_t __noswap___a64_vcvtq_low_bf16_f32(float32x4_t __p0) { - bfloat16x8_t __ret; - __ret = (bfloat16x8_t) __builtin_neon___a64_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float32x2_t vamax_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vamax_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float32x2_t vamax_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float32x2_t) __builtin_neon_vamax_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float16x4_t vamax_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vamax_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float16x4_t vamax_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float16x4_t) __builtin_neon_vamax_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float64x2_t vaminq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vaminq_f64((int8x16_t)__p0, (int8x16_t)__p1, 42); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float64x2_t vaminq_f64(float64x2_t __p0, float64x2_t __p1) { + float64x2_t __ret; + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float64x2_t) __builtin_neon_vaminq_f64((int8x16_t)__rev0, (int8x16_t)__rev1, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float32x4_t vaminq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vaminq_f32((int8x16_t)__p0, (int8x16_t)__p1, 41); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float32x4_t vaminq_f32(float32x4_t __p0, float32x4_t __p1) { + float32x4_t __ret; + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float32x4_t) __builtin_neon_vaminq_f32((int8x16_t)__rev0, (int8x16_t)__rev1, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float16x8_t vaminq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vaminq_f16((int8x16_t)__p0, (int8x16_t)__p1, 40); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float16x8_t vaminq_f16(float16x8_t __p0, float16x8_t __p1) { + float16x8_t __ret; + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + __ret = (float16x8_t) __builtin_neon_vaminq_f16((int8x16_t)__rev0, (int8x16_t)__rev1, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float32x2_t vamin_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vamin_f32((int8x8_t)__p0, (int8x8_t)__p1, 9); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float32x2_t vamin_f32(float32x2_t __p0, float32x2_t __p1) { + float32x2_t __ret; + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + __ret = (float32x2_t) __builtin_neon_vamin_f32((int8x8_t)__rev0, (int8x8_t)__rev1, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } #endif +#ifdef __LITTLE_ENDIAN__ +__ai __attribute__((target("neon,faminmax"))) float16x4_t vamin_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vamin_f16((int8x8_t)__p0, (int8x8_t)__p1, 8); + return __ret; +} +#else +__ai __attribute__((target("neon,faminmax"))) float16x4_t vamin_f16(float16x4_t __p0, float16x4_t __p1) { + float16x4_t __ret; + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + __ret = (float16x4_t) __builtin_neon_vamin_f16((int8x8_t)__rev0, (int8x8_t)__rev1, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +#endif + +#endif +#if defined(__aarch64__) || defined(__arm64ec__) +__ai __attribute__((target("aes,neon"))) poly128_t vmull_p64(poly64_t __p0, poly64_t __p1) { + poly128_t __ret; + __ret = (poly128_t) __builtin_neon_vmull_p64(__p0, __p1); + return __ret; +} #ifdef __LITTLE_ENDIAN__ #define vcopyq_lane_bf16(__p0_230, __p1_230, __p2_230, __p3_230) __extension__ ({ \ bfloat16x8_t __ret_230; \ @@ -41363,14 +43458,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t __noswap___a64_vcvtq_low_ #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; - __ret = vget_low_bf16(__a64_vcvtq_low_bf16_f32(__p0)); + __ret = (bfloat16x4_t) __builtin_neon_vcvt_bf16_f32((int8x16_t)__p0, 11); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vcvt_bf16_f32(float32x4_t __p0) { bfloat16x4_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = __noswap_vget_low_bf16(__noswap___a64_vcvtq_low_bf16_f32(__rev0)); + __ret = (bfloat16x4_t) __builtin_neon_vcvt_bf16_f32((int8x16_t)__rev0, 11); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } @@ -41396,14 +43491,14 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_high_bf16_f32(bfloa #ifdef __LITTLE_ENDIAN__ __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; - __ret = __a64_vcvtq_low_bf16_f32(__p0); + __ret = (bfloat16x8_t) __builtin_neon_vcvtq_low_bf16_f32((int8x16_t)__p0, 43); return __ret; } #else __ai __attribute__((target("bf16,neon"))) bfloat16x8_t vcvtq_low_bf16_f32(float32x4_t __p0) { bfloat16x8_t __ret; float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - __ret = __noswap___a64_vcvtq_low_bf16_f32(__rev0); + __ret = (bfloat16x8_t) __builtin_neon_vcvtq_low_bf16_f32((int8x16_t)__rev0, 43); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); return __ret; } @@ -41705,9 +43800,7 @@ __ai __attribute__((target("bf16,neon"))) bfloat16x4_t vreinterpret_bf16_s16(int uint32x4_t __s0_238 = __p0_238; \ uint8x16_t __s1_238 = __p1_238; \ uint8x16_t __s2_238 = __p2_238; \ -uint8x16_t __reint_238 = __s2_238; \ -uint32x4_t __reint1_238 = splatq_laneq_u32(*(uint32x4_t *) &__reint_238, __p3_238); \ - __ret_238 = vdotq_u32(__s0_238, __s1_238, *(uint8x16_t *) &__reint1_238); \ + __ret_238 = vdotq_u32(__s0_238, __s1_238, __builtin_bit_cast(uint8x16_t, splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_238), __p3_238))); \ __ret_238; \ }) #else @@ -41719,9 +43812,7 @@ uint32x4_t __reint1_238 = splatq_laneq_u32(*(uint32x4_t *) &__reint_238, __p3_23 uint32x4_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 3, 2, 1, 0); \ uint8x16_t __rev1_239; __rev1_239 = __builtin_shufflevector(__s1_239, __s1_239, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_239; __rev2_239 = __builtin_shufflevector(__s2_239, __s2_239, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x16_t __reint_239 = __rev2_239; \ -uint32x4_t __reint1_239 = __noswap_splatq_laneq_u32(*(uint32x4_t *) &__reint_239, __p3_239); \ - __ret_239 = __noswap_vdotq_u32(__rev0_239, __rev1_239, *(uint8x16_t *) &__reint1_239); \ + __ret_239 = __noswap_vdotq_u32(__rev0_239, __rev1_239, __builtin_bit_cast(uint8x16_t, __noswap_splatq_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_239), __p3_239))); \ __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 3, 2, 1, 0); \ __ret_239; \ }) @@ -41733,9 +43824,7 @@ uint32x4_t __reint1_239 = __noswap_splatq_laneq_u32(*(uint32x4_t *) &__reint_239 int32x4_t __s0_240 = __p0_240; \ int8x16_t __s1_240 = __p1_240; \ int8x16_t __s2_240 = __p2_240; \ -int8x16_t __reint_240 = __s2_240; \ -int32x4_t __reint1_240 = splatq_laneq_s32(*(int32x4_t *) &__reint_240, __p3_240); \ - __ret_240 = vdotq_s32(__s0_240, __s1_240, *(int8x16_t *) &__reint1_240); \ + __ret_240 = vdotq_s32(__s0_240, __s1_240, __builtin_bit_cast(int8x16_t, splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_240), __p3_240))); \ __ret_240; \ }) #else @@ -41747,9 +43836,7 @@ int32x4_t __reint1_240 = splatq_laneq_s32(*(int32x4_t *) &__reint_240, __p3_240) int32x4_t __rev0_241; __rev0_241 = __builtin_shufflevector(__s0_241, __s0_241, 3, 2, 1, 0); \ int8x16_t __rev1_241; __rev1_241 = __builtin_shufflevector(__s1_241, __s1_241, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_241; __rev2_241 = __builtin_shufflevector(__s2_241, __s2_241, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x16_t __reint_241 = __rev2_241; \ -int32x4_t __reint1_241 = __noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_241, __p3_241); \ - __ret_241 = __noswap_vdotq_s32(__rev0_241, __rev1_241, *(int8x16_t *) &__reint1_241); \ + __ret_241 = __noswap_vdotq_s32(__rev0_241, __rev1_241, __builtin_bit_cast(int8x16_t, __noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_241), __p3_241))); \ __ret_241 = __builtin_shufflevector(__ret_241, __ret_241, 3, 2, 1, 0); \ __ret_241; \ }) @@ -41761,9 +43848,7 @@ int32x4_t __reint1_241 = __noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_241, uint32x2_t __s0_242 = __p0_242; \ uint8x8_t __s1_242 = __p1_242; \ uint8x16_t __s2_242 = __p2_242; \ -uint8x16_t __reint_242 = __s2_242; \ -uint32x2_t __reint1_242 = splat_laneq_u32(*(uint32x4_t *) &__reint_242, __p3_242); \ - __ret_242 = vdot_u32(__s0_242, __s1_242, *(uint8x8_t *) &__reint1_242); \ + __ret_242 = vdot_u32(__s0_242, __s1_242, __builtin_bit_cast(uint8x8_t, splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __s2_242), __p3_242))); \ __ret_242; \ }) #else @@ -41775,9 +43860,7 @@ uint32x2_t __reint1_242 = splat_laneq_u32(*(uint32x4_t *) &__reint_242, __p3_242 uint32x2_t __rev0_243; __rev0_243 = __builtin_shufflevector(__s0_243, __s0_243, 1, 0); \ uint8x8_t __rev1_243; __rev1_243 = __builtin_shufflevector(__s1_243, __s1_243, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_243; __rev2_243 = __builtin_shufflevector(__s2_243, __s2_243, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x16_t __reint_243 = __rev2_243; \ -uint32x2_t __reint1_243 = __noswap_splat_laneq_u32(*(uint32x4_t *) &__reint_243, __p3_243); \ - __ret_243 = __noswap_vdot_u32(__rev0_243, __rev1_243, *(uint8x8_t *) &__reint1_243); \ + __ret_243 = __noswap_vdot_u32(__rev0_243, __rev1_243, __builtin_bit_cast(uint8x8_t, __noswap_splat_laneq_u32(__builtin_bit_cast(uint32x4_t, __rev2_243), __p3_243))); \ __ret_243 = __builtin_shufflevector(__ret_243, __ret_243, 1, 0); \ __ret_243; \ }) @@ -41789,9 +43872,7 @@ uint32x2_t __reint1_243 = __noswap_splat_laneq_u32(*(uint32x4_t *) &__reint_243, int32x2_t __s0_244 = __p0_244; \ int8x8_t __s1_244 = __p1_244; \ int8x16_t __s2_244 = __p2_244; \ -int8x16_t __reint_244 = __s2_244; \ -int32x2_t __reint1_244 = splat_laneq_s32(*(int32x4_t *) &__reint_244, __p3_244); \ - __ret_244 = vdot_s32(__s0_244, __s1_244, *(int8x8_t *) &__reint1_244); \ + __ret_244 = vdot_s32(__s0_244, __s1_244, __builtin_bit_cast(int8x8_t, splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_244), __p3_244))); \ __ret_244; \ }) #else @@ -41803,9 +43884,7 @@ int32x2_t __reint1_244 = splat_laneq_s32(*(int32x4_t *) &__reint_244, __p3_244); int32x2_t __rev0_245; __rev0_245 = __builtin_shufflevector(__s0_245, __s0_245, 1, 0); \ int8x8_t __rev1_245; __rev1_245 = __builtin_shufflevector(__s1_245, __s1_245, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_245; __rev2_245 = __builtin_shufflevector(__s2_245, __s2_245, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x16_t __reint_245 = __rev2_245; \ -int32x2_t __reint1_245 = __noswap_splat_laneq_s32(*(int32x4_t *) &__reint_245, __p3_245); \ - __ret_245 = __noswap_vdot_s32(__rev0_245, __rev1_245, *(int8x8_t *) &__reint1_245); \ + __ret_245 = __noswap_vdot_s32(__rev0_245, __rev1_245, __builtin_bit_cast(int8x8_t, __noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_245), __p3_245))); \ __ret_245 = __builtin_shufflevector(__ret_245, __ret_245, 1, 0); \ __ret_245; \ }) @@ -43018,8 +45097,7 @@ __ai __attribute__((target("fullfp16,neon"))) float16x4_t vsqrt_f16(float16x4_t int32x4_t __s0_270 = __p0_270; \ int8x16_t __s1_270 = __p1_270; \ uint8x16_t __s2_270 = __p2_270; \ -uint8x16_t __reint_270 = __s2_270; \ - __ret_270 = vusdotq_s32(__s0_270, (uint8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_270, __p3_270)), __s1_270); \ + __ret_270 = vusdotq_s32(__s0_270, (uint8x16_t)(splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_270), __p3_270)), __s1_270); \ __ret_270; \ }) #else @@ -43031,8 +45109,7 @@ uint8x16_t __reint_270 = __s2_270; \ int32x4_t __rev0_271; __rev0_271 = __builtin_shufflevector(__s0_271, __s0_271, 3, 2, 1, 0); \ int8x16_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_271; __rev2_271 = __builtin_shufflevector(__s2_271, __s2_271, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x16_t __reint_271 = __rev2_271; \ - __ret_271 = __noswap_vusdotq_s32(__rev0_271, (uint8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_271, __p3_271)), __rev1_271); \ + __ret_271 = __noswap_vusdotq_s32(__rev0_271, (uint8x16_t)(__noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_271), __p3_271)), __rev1_271); \ __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, 3, 2, 1, 0); \ __ret_271; \ }) @@ -43044,8 +45121,7 @@ uint8x16_t __reint_271 = __rev2_271; \ int32x2_t __s0_272 = __p0_272; \ int8x8_t __s1_272 = __p1_272; \ uint8x16_t __s2_272 = __p2_272; \ -uint8x16_t __reint_272 = __s2_272; \ - __ret_272 = vusdot_s32(__s0_272, (uint8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_272, __p3_272)), __s1_272); \ + __ret_272 = vusdot_s32(__s0_272, (uint8x8_t)(splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_272), __p3_272)), __s1_272); \ __ret_272; \ }) #else @@ -43057,8 +45133,7 @@ uint8x16_t __reint_272 = __s2_272; \ int32x2_t __rev0_273; __rev0_273 = __builtin_shufflevector(__s0_273, __s0_273, 1, 0); \ int8x8_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, 7, 6, 5, 4, 3, 2, 1, 0); \ uint8x16_t __rev2_273; __rev2_273 = __builtin_shufflevector(__s2_273, __s2_273, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x16_t __reint_273 = __rev2_273; \ - __ret_273 = __noswap_vusdot_s32(__rev0_273, (uint8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_273, __p3_273)), __rev1_273); \ + __ret_273 = __noswap_vusdot_s32(__rev0_273, (uint8x8_t)(__noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_273), __p3_273)), __rev1_273); \ __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, 1, 0); \ __ret_273; \ }) @@ -43070,8 +45145,7 @@ uint8x16_t __reint_273 = __rev2_273; \ int32x4_t __s0_274 = __p0_274; \ uint8x16_t __s1_274 = __p1_274; \ int8x16_t __s2_274 = __p2_274; \ -int8x16_t __reint_274 = __s2_274; \ - __ret_274 = vusdotq_s32(__s0_274, __s1_274, (int8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_274, __p3_274))); \ + __ret_274 = vusdotq_s32(__s0_274, __s1_274, (int8x16_t)(splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_274), __p3_274))); \ __ret_274; \ }) #else @@ -43083,8 +45157,7 @@ int8x16_t __reint_274 = __s2_274; \ int32x4_t __rev0_275; __rev0_275 = __builtin_shufflevector(__s0_275, __s0_275, 3, 2, 1, 0); \ uint8x16_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x16_t __reint_275 = __rev2_275; \ - __ret_275 = __noswap_vusdotq_s32(__rev0_275, __rev1_275, (int8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_275, __p3_275))); \ + __ret_275 = __noswap_vusdotq_s32(__rev0_275, __rev1_275, (int8x16_t)(__noswap_splatq_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_275), __p3_275))); \ __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, 3, 2, 1, 0); \ __ret_275; \ }) @@ -43096,8 +45169,7 @@ int8x16_t __reint_275 = __rev2_275; \ int32x2_t __s0_276 = __p0_276; \ uint8x8_t __s1_276 = __p1_276; \ int8x16_t __s2_276 = __p2_276; \ -int8x16_t __reint_276 = __s2_276; \ - __ret_276 = vusdot_s32(__s0_276, __s1_276, (int8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_276, __p3_276))); \ + __ret_276 = vusdot_s32(__s0_276, __s1_276, (int8x8_t)(splat_laneq_s32(__builtin_bit_cast(int32x4_t, __s2_276), __p3_276))); \ __ret_276; \ }) #else @@ -43109,8 +45181,7 @@ int8x16_t __reint_276 = __s2_276; \ int32x2_t __rev0_277; __rev0_277 = __builtin_shufflevector(__s0_277, __s0_277, 1, 0); \ uint8x8_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, 7, 6, 5, 4, 3, 2, 1, 0); \ int8x16_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x16_t __reint_277 = __rev2_277; \ - __ret_277 = __noswap_vusdot_s32(__rev0_277, __rev1_277, (int8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_277, __p3_277))); \ + __ret_277 = __noswap_vusdot_s32(__rev0_277, __rev1_277, (int8x8_t)(__noswap_splat_laneq_s32(__builtin_bit_cast(int32x4_t, __rev2_277), __p3_277))); \ __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, 1, 0); \ __ret_277; \ }) @@ -57773,6 +59844,11 @@ __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s64(int64x1_t __p __ret = (poly8x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_mf8(mfloat8x8_t __p0) { + poly8x8_t __ret; + __ret = (poly8x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly8x8_t vreinterpret_p8_s16(int16x4_t __p0) { poly8x8_t __ret; __ret = (poly8x8_t)(__p0); @@ -57838,6 +59914,11 @@ __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s64(int64x1_t _ __ret = (poly64x1_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_mf8(mfloat8x8_t __p0) { + poly64x1_t __ret; + __ret = (poly64x1_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly64x1_t vreinterpret_p64_s16(int16x4_t __p0) { poly64x1_t __ret; __ret = (poly64x1_t)(__p0); @@ -57903,6 +59984,11 @@ __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s64(int64x1_t _ __ret = (poly16x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_mf8(mfloat8x8_t __p0) { + poly16x4_t __ret; + __ret = (poly16x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly16x4_t vreinterpret_p16_s16(int16x4_t __p0) { poly16x4_t __ret; __ret = (poly16x4_t)(__p0); @@ -57973,6 +60059,11 @@ __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s64(int64x2_t _ __ret = (poly8x16_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_mf8(mfloat8x16_t __p0) { + poly8x16_t __ret; + __ret = (poly8x16_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly8x16_t vreinterpretq_p8_s16(int16x8_t __p0) { poly8x16_t __ret; __ret = (poly8x16_t)(__p0); @@ -58043,6 +60134,11 @@ __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s64(int64x2_t __ret = (poly128_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_mf8(mfloat8x16_t __p0) { + poly128_t __ret; + __ret = (poly128_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly128_t vreinterpretq_p128_s16(int16x8_t __p0) { poly128_t __ret; __ret = (poly128_t)(__p0); @@ -58113,6 +60209,11 @@ __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s64(int64x2_t __ret = (poly64x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_mf8(mfloat8x16_t __p0) { + poly64x2_t __ret; + __ret = (poly64x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly64x2_t vreinterpretq_p64_s16(int16x8_t __p0) { poly64x2_t __ret; __ret = (poly64x2_t)(__p0); @@ -58183,6 +60284,11 @@ __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s64(int64x2_t __ret = (poly16x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_mf8(mfloat8x16_t __p0) { + poly16x8_t __ret; + __ret = (poly16x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) poly16x8_t vreinterpretq_p16_s16(int16x8_t __p0) { poly16x8_t __ret; __ret = (poly16x8_t)(__p0); @@ -58253,6 +60359,11 @@ __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s64(int64x2_t _ __ret = (uint8x16_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_mf8(mfloat8x16_t __p0) { + uint8x16_t __ret; + __ret = (uint8x16_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint8x16_t vreinterpretq_u8_s16(int16x8_t __p0) { uint8x16_t __ret; __ret = (uint8x16_t)(__p0); @@ -58323,6 +60434,11 @@ __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s64(int64x2_t __ret = (uint32x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_mf8(mfloat8x16_t __p0) { + uint32x4_t __ret; + __ret = (uint32x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint32x4_t vreinterpretq_u32_s16(int16x8_t __p0) { uint32x4_t __ret; __ret = (uint32x4_t)(__p0); @@ -58393,6 +60509,11 @@ __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s64(int64x2_t __ret = (uint64x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_mf8(mfloat8x16_t __p0) { + uint64x2_t __ret; + __ret = (uint64x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint64x2_t vreinterpretq_u64_s16(int16x8_t __p0) { uint64x2_t __ret; __ret = (uint64x2_t)(__p0); @@ -58463,6 +60584,11 @@ __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s64(int64x2_t __ret = (uint16x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_mf8(mfloat8x16_t __p0) { + uint16x8_t __ret; + __ret = (uint16x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint16x8_t vreinterpretq_u16_s16(int16x8_t __p0) { uint16x8_t __ret; __ret = (uint16x8_t)(__p0); @@ -58533,6 +60659,11 @@ __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s64(int64x2_t __ __ret = (int8x16_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_mf8(mfloat8x16_t __p0) { + int8x16_t __ret; + __ret = (int8x16_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int8x16_t vreinterpretq_s8_s16(int16x8_t __p0) { int8x16_t __ret; __ret = (int8x16_t)(__p0); @@ -58603,6 +60734,11 @@ __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s64(int64x2_t __ret = (float64x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_mf8(mfloat8x16_t __p0) { + float64x2_t __ret; + __ret = (float64x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float64x2_t vreinterpretq_f64_s16(int16x8_t __p0) { float64x2_t __ret; __ret = (float64x2_t)(__p0); @@ -58673,6 +60809,11 @@ __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s64(int64x2_t __ret = (float32x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_mf8(mfloat8x16_t __p0) { + float32x4_t __ret; + __ret = (float32x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float32x4_t vreinterpretq_f32_s16(int16x8_t __p0) { float32x4_t __ret; __ret = (float32x4_t)(__p0); @@ -58743,6 +60884,11 @@ __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s64(int64x2_t __ret = (float16x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_mf8(mfloat8x16_t __p0) { + float16x8_t __ret; + __ret = (float16x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float16x8_t vreinterpretq_f16_s16(int16x8_t __p0) { float16x8_t __ret; __ret = (float16x8_t)(__p0); @@ -58813,6 +60959,11 @@ __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s64(int64x2_t _ __ret = (int32x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_mf8(mfloat8x16_t __p0) { + int32x4_t __ret; + __ret = (int32x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int32x4_t vreinterpretq_s32_s16(int16x8_t __p0) { int32x4_t __ret; __ret = (int32x4_t)(__p0); @@ -58883,11 +61034,91 @@ __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s32(int32x4_t _ __ret = (int64x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_mf8(mfloat8x16_t __p0) { + int64x2_t __ret; + __ret = (int64x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int64x2_t vreinterpretq_s64_s16(int16x8_t __p0) { int64x2_t __ret; __ret = (int64x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p8(poly8x16_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p128(poly128_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p64(poly64x2_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_p16(poly16x8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u8(uint8x16_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u32(uint32x4_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u64(uint64x2_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_u16(uint16x8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s8(int8x16_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f64(float64x2_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f32(float32x4_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_f16(float16x8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s32(int32x4_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s64(int64x2_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x16_t vreinterpretq_mf8_s16(int16x8_t __p0) { + mfloat8x16_t __ret; + __ret = (mfloat8x16_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_p8(poly8x16_t __p0) { int16x8_t __ret; __ret = (int16x8_t)(__p0); @@ -58958,6 +61189,11 @@ __ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_s64(int64x2_t _ __ret = (int16x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int16x8_t vreinterpretq_s16_mf8(mfloat8x16_t __p0) { + int16x8_t __ret; + __ret = (int16x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_p8(poly8x8_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); @@ -59018,6 +61254,11 @@ __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s64(int64x1_t __p __ret = (uint8x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_mf8(mfloat8x8_t __p0) { + uint8x8_t __ret; + __ret = (uint8x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint8x8_t vreinterpret_u8_s16(int16x4_t __p0) { uint8x8_t __ret; __ret = (uint8x8_t)(__p0); @@ -59083,6 +61324,11 @@ __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s64(int64x1_t _ __ret = (uint32x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_mf8(mfloat8x8_t __p0) { + uint32x2_t __ret; + __ret = (uint32x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint32x2_t vreinterpret_u32_s16(int16x4_t __p0) { uint32x2_t __ret; __ret = (uint32x2_t)(__p0); @@ -59148,6 +61394,11 @@ __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s64(int64x1_t _ __ret = (uint64x1_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_mf8(mfloat8x8_t __p0) { + uint64x1_t __ret; + __ret = (uint64x1_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint64x1_t vreinterpret_u64_s16(int16x4_t __p0) { uint64x1_t __ret; __ret = (uint64x1_t)(__p0); @@ -59213,6 +61464,11 @@ __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s64(int64x1_t _ __ret = (uint16x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_mf8(mfloat8x8_t __p0) { + uint16x4_t __ret; + __ret = (uint16x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint16x4_t vreinterpret_u16_s16(int16x4_t __p0) { uint16x4_t __ret; __ret = (uint16x4_t)(__p0); @@ -59278,6 +61534,11 @@ __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s64(int64x1_t __p0 __ret = (int8x8_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_mf8(mfloat8x8_t __p0) { + int8x8_t __ret; + __ret = (int8x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int8x8_t vreinterpret_s8_s16(int16x4_t __p0) { int8x8_t __ret; __ret = (int8x8_t)(__p0); @@ -59343,6 +61604,11 @@ __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s64(int64x1_t __ret = (float64x1_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_mf8(mfloat8x8_t __p0) { + float64x1_t __ret; + __ret = (float64x1_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float64x1_t vreinterpret_f64_s16(int16x4_t __p0) { float64x1_t __ret; __ret = (float64x1_t)(__p0); @@ -59408,6 +61674,11 @@ __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s64(int64x1_t __ret = (float32x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_mf8(mfloat8x8_t __p0) { + float32x2_t __ret; + __ret = (float32x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float32x2_t vreinterpret_f32_s16(int16x4_t __p0) { float32x2_t __ret; __ret = (float32x2_t)(__p0); @@ -59473,6 +61744,11 @@ __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s64(int64x1_t __ret = (float16x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_mf8(mfloat8x8_t __p0) { + float16x4_t __ret; + __ret = (float16x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) float16x4_t vreinterpret_f16_s16(int16x4_t __p0) { float16x4_t __ret; __ret = (float16x4_t)(__p0); @@ -59538,6 +61814,11 @@ __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s64(int64x1_t __ __ret = (int32x2_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_mf8(mfloat8x8_t __p0) { + int32x2_t __ret; + __ret = (int32x2_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int32x2_t vreinterpret_s32_s16(int16x4_t __p0) { int32x2_t __ret; __ret = (int32x2_t)(__p0); @@ -59603,11 +61884,86 @@ __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s32(int32x2_t __ __ret = (int64x1_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_mf8(mfloat8x8_t __p0) { + int64x1_t __ret; + __ret = (int64x1_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int64x1_t vreinterpret_s64_s16(int16x4_t __p0) { int64x1_t __ret; __ret = (int64x1_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p8(poly8x8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p64(poly64x1_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_p16(poly16x4_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u8(uint8x8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u32(uint32x2_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u64(uint64x1_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_u16(uint16x4_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s8(int8x8_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f64(float64x1_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f32(float32x2_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_f16(float16x4_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s32(int32x2_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s64(int64x1_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} +__ai __attribute__((target("neon"))) mfloat8x8_t vreinterpret_mf8_s16(int16x4_t __p0) { + mfloat8x8_t __ret; + __ret = (mfloat8x8_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_p8(poly8x8_t __p0) { int16x4_t __ret; __ret = (int16x4_t)(__p0); @@ -59673,6 +62029,11 @@ __ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_s64(int64x1_t __ __ret = (int16x4_t)(__p0); return __ret; } +__ai __attribute__((target("neon"))) int16x4_t vreinterpret_s16_mf8(mfloat8x8_t __p0) { + int16x4_t __ret; + __ret = (int16x4_t)(__p0); + return __ret; +} __ai __attribute__((target("neon"))) uint64_t vrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); @@ -65616,106 +67977,6 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_f64(float64x2_t __ __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai __attribute__((target("v8.3a,neon"))) float64x2_t __noswap_vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); - return __ret; -} -#endif - -__ai __attribute__((target("v8.3a,neon"))) float64x1_t vcmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcmla_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#define vcmla_lane_f64(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ - float64x1_t __ret_792; \ - float64x1_t __s0_792 = __p0_792; \ - float64x1_t __s1_792 = __p1_792; \ - float64x1_t __s2_792 = __p2_792; \ -float64x1_t __reint_792 = __s2_792; \ -uint64x2_t __reint1_792 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_792, __p3_792), vgetq_lane_u64(*(uint64x2_t *) &__reint_792, __p3_792)}; \ - __ret_792 = vcmla_f64(__s0_792, __s1_792, *(float64x1_t *) &__reint1_792); \ - __ret_792; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_lane_f64(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ - float64x2_t __ret_793; \ - float64x2_t __s0_793 = __p0_793; \ - float64x2_t __s1_793 = __p1_793; \ - float64x1_t __s2_793 = __p2_793; \ -float64x1_t __reint_793 = __s2_793; \ -uint64x2_t __reint1_793 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_793, __p3_793), vgetq_lane_u64(*(uint64x2_t *) &__reint_793, __p3_793)}; \ - __ret_793 = vcmlaq_f64(__s0_793, __s1_793, *(float64x2_t *) &__reint1_793); \ - __ret_793; \ -}) -#else -#define vcmlaq_lane_f64(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ - float64x2_t __ret_794; \ - float64x2_t __s0_794 = __p0_794; \ - float64x2_t __s1_794 = __p1_794; \ - float64x1_t __s2_794 = __p2_794; \ - float64x2_t __rev0_794; __rev0_794 = __builtin_shufflevector(__s0_794, __s0_794, 1, 0); \ - float64x2_t __rev1_794; __rev1_794 = __builtin_shufflevector(__s1_794, __s1_794, 1, 0); \ -float64x1_t __reint_794 = __s2_794; \ -uint64x2_t __reint1_794 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_794, __p3_794), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_794, __p3_794)}; \ - __ret_794 = __noswap_vcmlaq_f64(__rev0_794, __rev1_794, *(float64x2_t *) &__reint1_794); \ - __ret_794 = __builtin_shufflevector(__ret_794, __ret_794, 1, 0); \ - __ret_794; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmla_laneq_f64(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ - float64x1_t __ret_795; \ - float64x1_t __s0_795 = __p0_795; \ - float64x1_t __s1_795 = __p1_795; \ - float64x2_t __s2_795 = __p2_795; \ -float64x2_t __reint_795 = __s2_795; \ -uint64x2_t __reint1_795 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_795, __p3_795), vgetq_lane_u64(*(uint64x2_t *) &__reint_795, __p3_795)}; \ - __ret_795 = vcmla_f64(__s0_795, __s1_795, *(float64x1_t *) &__reint1_795); \ - __ret_795; \ -}) -#else -#define vcmla_laneq_f64(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ - float64x1_t __ret_796; \ - float64x1_t __s0_796 = __p0_796; \ - float64x1_t __s1_796 = __p1_796; \ - float64x2_t __s2_796 = __p2_796; \ - float64x2_t __rev2_796; __rev2_796 = __builtin_shufflevector(__s2_796, __s2_796, 1, 0); \ -float64x2_t __reint_796 = __rev2_796; \ -uint64x2_t __reint1_796 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_796, __p3_796), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_796, __p3_796)}; \ - __ret_796 = vcmla_f64(__s0_796, __s1_796, *(float64x1_t *) &__reint1_796); \ - __ret_796; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_laneq_f64(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ - float64x2_t __ret_797; \ - float64x2_t __s0_797 = __p0_797; \ - float64x2_t __s1_797 = __p1_797; \ - float64x2_t __s2_797 = __p2_797; \ -float64x2_t __reint_797 = __s2_797; \ -uint64x2_t __reint1_797 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_797, __p3_797), vgetq_lane_u64(*(uint64x2_t *) &__reint_797, __p3_797)}; \ - __ret_797 = vcmlaq_f64(__s0_797, __s1_797, *(float64x2_t *) &__reint1_797); \ - __ret_797; \ -}) -#else -#define vcmlaq_laneq_f64(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ - float64x2_t __ret_798; \ - float64x2_t __s0_798 = __p0_798; \ - float64x2_t __s1_798 = __p1_798; \ - float64x2_t __s2_798 = __p2_798; \ - float64x2_t __rev0_798; __rev0_798 = __builtin_shufflevector(__s0_798, __s0_798, 1, 0); \ - float64x2_t __rev1_798; __rev1_798 = __builtin_shufflevector(__s1_798, __s1_798, 1, 0); \ - float64x2_t __rev2_798; __rev2_798 = __builtin_shufflevector(__s2_798, __s2_798, 1, 0); \ -float64x2_t __reint_798 = __rev2_798; \ -uint64x2_t __reint1_798 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_798, __p3_798), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_798, __p3_798)}; \ - __ret_798 = __noswap_vcmlaq_f64(__rev0_798, __rev1_798, *(float64x2_t *) &__reint1_798); \ - __ret_798 = __builtin_shufflevector(__ret_798, __ret_798, 1, 0); \ - __ret_798; \ -}) #endif #ifdef __LITTLE_ENDIAN__ @@ -65734,106 +67995,6 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot180_f64(float64 __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai __attribute__((target("v8.3a,neon"))) float64x2_t __noswap_vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); - return __ret; -} -#endif - -__ai __attribute__((target("v8.3a,neon"))) float64x1_t vcmla_rot180_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcmla_rot180_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#define vcmla_rot180_lane_f64(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ - float64x1_t __ret_799; \ - float64x1_t __s0_799 = __p0_799; \ - float64x1_t __s1_799 = __p1_799; \ - float64x1_t __s2_799 = __p2_799; \ -float64x1_t __reint_799 = __s2_799; \ -uint64x2_t __reint1_799 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_799, __p3_799), vgetq_lane_u64(*(uint64x2_t *) &__reint_799, __p3_799)}; \ - __ret_799 = vcmla_rot180_f64(__s0_799, __s1_799, *(float64x1_t *) &__reint1_799); \ - __ret_799; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot180_lane_f64(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ - float64x2_t __ret_800; \ - float64x2_t __s0_800 = __p0_800; \ - float64x2_t __s1_800 = __p1_800; \ - float64x1_t __s2_800 = __p2_800; \ -float64x1_t __reint_800 = __s2_800; \ -uint64x2_t __reint1_800 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_800, __p3_800), vgetq_lane_u64(*(uint64x2_t *) &__reint_800, __p3_800)}; \ - __ret_800 = vcmlaq_rot180_f64(__s0_800, __s1_800, *(float64x2_t *) &__reint1_800); \ - __ret_800; \ -}) -#else -#define vcmlaq_rot180_lane_f64(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ - float64x2_t __ret_801; \ - float64x2_t __s0_801 = __p0_801; \ - float64x2_t __s1_801 = __p1_801; \ - float64x1_t __s2_801 = __p2_801; \ - float64x2_t __rev0_801; __rev0_801 = __builtin_shufflevector(__s0_801, __s0_801, 1, 0); \ - float64x2_t __rev1_801; __rev1_801 = __builtin_shufflevector(__s1_801, __s1_801, 1, 0); \ -float64x1_t __reint_801 = __s2_801; \ -uint64x2_t __reint1_801 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_801, __p3_801), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_801, __p3_801)}; \ - __ret_801 = __noswap_vcmlaq_rot180_f64(__rev0_801, __rev1_801, *(float64x2_t *) &__reint1_801); \ - __ret_801 = __builtin_shufflevector(__ret_801, __ret_801, 1, 0); \ - __ret_801; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmla_rot180_laneq_f64(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ - float64x1_t __ret_802; \ - float64x1_t __s0_802 = __p0_802; \ - float64x1_t __s1_802 = __p1_802; \ - float64x2_t __s2_802 = __p2_802; \ -float64x2_t __reint_802 = __s2_802; \ -uint64x2_t __reint1_802 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_802, __p3_802), vgetq_lane_u64(*(uint64x2_t *) &__reint_802, __p3_802)}; \ - __ret_802 = vcmla_rot180_f64(__s0_802, __s1_802, *(float64x1_t *) &__reint1_802); \ - __ret_802; \ -}) -#else -#define vcmla_rot180_laneq_f64(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ - float64x1_t __ret_803; \ - float64x1_t __s0_803 = __p0_803; \ - float64x1_t __s1_803 = __p1_803; \ - float64x2_t __s2_803 = __p2_803; \ - float64x2_t __rev2_803; __rev2_803 = __builtin_shufflevector(__s2_803, __s2_803, 1, 0); \ -float64x2_t __reint_803 = __rev2_803; \ -uint64x2_t __reint1_803 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_803, __p3_803), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_803, __p3_803)}; \ - __ret_803 = vcmla_rot180_f64(__s0_803, __s1_803, *(float64x1_t *) &__reint1_803); \ - __ret_803; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot180_laneq_f64(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ - float64x2_t __ret_804; \ - float64x2_t __s0_804 = __p0_804; \ - float64x2_t __s1_804 = __p1_804; \ - float64x2_t __s2_804 = __p2_804; \ -float64x2_t __reint_804 = __s2_804; \ -uint64x2_t __reint1_804 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_804, __p3_804), vgetq_lane_u64(*(uint64x2_t *) &__reint_804, __p3_804)}; \ - __ret_804 = vcmlaq_rot180_f64(__s0_804, __s1_804, *(float64x2_t *) &__reint1_804); \ - __ret_804; \ -}) -#else -#define vcmlaq_rot180_laneq_f64(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ - float64x2_t __ret_805; \ - float64x2_t __s0_805 = __p0_805; \ - float64x2_t __s1_805 = __p1_805; \ - float64x2_t __s2_805 = __p2_805; \ - float64x2_t __rev0_805; __rev0_805 = __builtin_shufflevector(__s0_805, __s0_805, 1, 0); \ - float64x2_t __rev1_805; __rev1_805 = __builtin_shufflevector(__s1_805, __s1_805, 1, 0); \ - float64x2_t __rev2_805; __rev2_805 = __builtin_shufflevector(__s2_805, __s2_805, 1, 0); \ -float64x2_t __reint_805 = __rev2_805; \ -uint64x2_t __reint1_805 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_805, __p3_805), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_805, __p3_805)}; \ - __ret_805 = __noswap_vcmlaq_rot180_f64(__rev0_805, __rev1_805, *(float64x2_t *) &__reint1_805); \ - __ret_805 = __builtin_shufflevector(__ret_805, __ret_805, 1, 0); \ - __ret_805; \ -}) #endif #ifdef __LITTLE_ENDIAN__ @@ -65852,106 +68013,6 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot270_f64(float64 __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai __attribute__((target("v8.3a,neon"))) float64x2_t __noswap_vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); - return __ret; -} -#endif - -__ai __attribute__((target("v8.3a,neon"))) float64x1_t vcmla_rot270_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcmla_rot270_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#define vcmla_rot270_lane_f64(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ - float64x1_t __ret_806; \ - float64x1_t __s0_806 = __p0_806; \ - float64x1_t __s1_806 = __p1_806; \ - float64x1_t __s2_806 = __p2_806; \ -float64x1_t __reint_806 = __s2_806; \ -uint64x2_t __reint1_806 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_806, __p3_806), vgetq_lane_u64(*(uint64x2_t *) &__reint_806, __p3_806)}; \ - __ret_806 = vcmla_rot270_f64(__s0_806, __s1_806, *(float64x1_t *) &__reint1_806); \ - __ret_806; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot270_lane_f64(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ - float64x2_t __ret_807; \ - float64x2_t __s0_807 = __p0_807; \ - float64x2_t __s1_807 = __p1_807; \ - float64x1_t __s2_807 = __p2_807; \ -float64x1_t __reint_807 = __s2_807; \ -uint64x2_t __reint1_807 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_807, __p3_807), vgetq_lane_u64(*(uint64x2_t *) &__reint_807, __p3_807)}; \ - __ret_807 = vcmlaq_rot270_f64(__s0_807, __s1_807, *(float64x2_t *) &__reint1_807); \ - __ret_807; \ -}) -#else -#define vcmlaq_rot270_lane_f64(__p0_808, __p1_808, __p2_808, __p3_808) __extension__ ({ \ - float64x2_t __ret_808; \ - float64x2_t __s0_808 = __p0_808; \ - float64x2_t __s1_808 = __p1_808; \ - float64x1_t __s2_808 = __p2_808; \ - float64x2_t __rev0_808; __rev0_808 = __builtin_shufflevector(__s0_808, __s0_808, 1, 0); \ - float64x2_t __rev1_808; __rev1_808 = __builtin_shufflevector(__s1_808, __s1_808, 1, 0); \ -float64x1_t __reint_808 = __s2_808; \ -uint64x2_t __reint1_808 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_808, __p3_808), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_808, __p3_808)}; \ - __ret_808 = __noswap_vcmlaq_rot270_f64(__rev0_808, __rev1_808, *(float64x2_t *) &__reint1_808); \ - __ret_808 = __builtin_shufflevector(__ret_808, __ret_808, 1, 0); \ - __ret_808; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmla_rot270_laneq_f64(__p0_809, __p1_809, __p2_809, __p3_809) __extension__ ({ \ - float64x1_t __ret_809; \ - float64x1_t __s0_809 = __p0_809; \ - float64x1_t __s1_809 = __p1_809; \ - float64x2_t __s2_809 = __p2_809; \ -float64x2_t __reint_809 = __s2_809; \ -uint64x2_t __reint1_809 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_809, __p3_809), vgetq_lane_u64(*(uint64x2_t *) &__reint_809, __p3_809)}; \ - __ret_809 = vcmla_rot270_f64(__s0_809, __s1_809, *(float64x1_t *) &__reint1_809); \ - __ret_809; \ -}) -#else -#define vcmla_rot270_laneq_f64(__p0_810, __p1_810, __p2_810, __p3_810) __extension__ ({ \ - float64x1_t __ret_810; \ - float64x1_t __s0_810 = __p0_810; \ - float64x1_t __s1_810 = __p1_810; \ - float64x2_t __s2_810 = __p2_810; \ - float64x2_t __rev2_810; __rev2_810 = __builtin_shufflevector(__s2_810, __s2_810, 1, 0); \ -float64x2_t __reint_810 = __rev2_810; \ -uint64x2_t __reint1_810 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_810, __p3_810), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_810, __p3_810)}; \ - __ret_810 = vcmla_rot270_f64(__s0_810, __s1_810, *(float64x1_t *) &__reint1_810); \ - __ret_810; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot270_laneq_f64(__p0_811, __p1_811, __p2_811, __p3_811) __extension__ ({ \ - float64x2_t __ret_811; \ - float64x2_t __s0_811 = __p0_811; \ - float64x2_t __s1_811 = __p1_811; \ - float64x2_t __s2_811 = __p2_811; \ -float64x2_t __reint_811 = __s2_811; \ -uint64x2_t __reint1_811 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_811, __p3_811), vgetq_lane_u64(*(uint64x2_t *) &__reint_811, __p3_811)}; \ - __ret_811 = vcmlaq_rot270_f64(__s0_811, __s1_811, *(float64x2_t *) &__reint1_811); \ - __ret_811; \ -}) -#else -#define vcmlaq_rot270_laneq_f64(__p0_812, __p1_812, __p2_812, __p3_812) __extension__ ({ \ - float64x2_t __ret_812; \ - float64x2_t __s0_812 = __p0_812; \ - float64x2_t __s1_812 = __p1_812; \ - float64x2_t __s2_812 = __p2_812; \ - float64x2_t __rev0_812; __rev0_812 = __builtin_shufflevector(__s0_812, __s0_812, 1, 0); \ - float64x2_t __rev1_812; __rev1_812 = __builtin_shufflevector(__s1_812, __s1_812, 1, 0); \ - float64x2_t __rev2_812; __rev2_812 = __builtin_shufflevector(__s2_812, __s2_812, 1, 0); \ -float64x2_t __reint_812 = __rev2_812; \ -uint64x2_t __reint1_812 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_812, __p3_812), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_812, __p3_812)}; \ - __ret_812 = __noswap_vcmlaq_rot270_f64(__rev0_812, __rev1_812, *(float64x2_t *) &__reint1_812); \ - __ret_812 = __builtin_shufflevector(__ret_812, __ret_812, 1, 0); \ - __ret_812; \ -}) #endif #ifdef __LITTLE_ENDIAN__ @@ -65970,106 +68031,6 @@ __ai __attribute__((target("v8.3a,neon"))) float64x2_t vcmlaq_rot90_f64(float64x __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai __attribute__((target("v8.3a,neon"))) float64x2_t __noswap_vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { - float64x2_t __ret; - __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_f64((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); - return __ret; -} -#endif - -__ai __attribute__((target("v8.3a,neon"))) float64x1_t vcmla_rot90_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { - float64x1_t __ret; - __ret = (float64x1_t) __builtin_neon_vcmla_rot90_f64((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); - return __ret; -} -#define vcmla_rot90_lane_f64(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ - float64x1_t __ret_813; \ - float64x1_t __s0_813 = __p0_813; \ - float64x1_t __s1_813 = __p1_813; \ - float64x1_t __s2_813 = __p2_813; \ -float64x1_t __reint_813 = __s2_813; \ -uint64x2_t __reint1_813 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_813, __p3_813), vgetq_lane_u64(*(uint64x2_t *) &__reint_813, __p3_813)}; \ - __ret_813 = vcmla_rot90_f64(__s0_813, __s1_813, *(float64x1_t *) &__reint1_813); \ - __ret_813; \ -}) -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot90_lane_f64(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ - float64x2_t __ret_814; \ - float64x2_t __s0_814 = __p0_814; \ - float64x2_t __s1_814 = __p1_814; \ - float64x1_t __s2_814 = __p2_814; \ -float64x1_t __reint_814 = __s2_814; \ -uint64x2_t __reint1_814 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_814, __p3_814), vgetq_lane_u64(*(uint64x2_t *) &__reint_814, __p3_814)}; \ - __ret_814 = vcmlaq_rot90_f64(__s0_814, __s1_814, *(float64x2_t *) &__reint1_814); \ - __ret_814; \ -}) -#else -#define vcmlaq_rot90_lane_f64(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ - float64x2_t __ret_815; \ - float64x2_t __s0_815 = __p0_815; \ - float64x2_t __s1_815 = __p1_815; \ - float64x1_t __s2_815 = __p2_815; \ - float64x2_t __rev0_815; __rev0_815 = __builtin_shufflevector(__s0_815, __s0_815, 1, 0); \ - float64x2_t __rev1_815; __rev1_815 = __builtin_shufflevector(__s1_815, __s1_815, 1, 0); \ -float64x1_t __reint_815 = __s2_815; \ -uint64x2_t __reint1_815 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_815, __p3_815), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_815, __p3_815)}; \ - __ret_815 = __noswap_vcmlaq_rot90_f64(__rev0_815, __rev1_815, *(float64x2_t *) &__reint1_815); \ - __ret_815 = __builtin_shufflevector(__ret_815, __ret_815, 1, 0); \ - __ret_815; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmla_rot90_laneq_f64(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ - float64x1_t __ret_816; \ - float64x1_t __s0_816 = __p0_816; \ - float64x1_t __s1_816 = __p1_816; \ - float64x2_t __s2_816 = __p2_816; \ -float64x2_t __reint_816 = __s2_816; \ -uint64x2_t __reint1_816 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_816, __p3_816), vgetq_lane_u64(*(uint64x2_t *) &__reint_816, __p3_816)}; \ - __ret_816 = vcmla_rot90_f64(__s0_816, __s1_816, *(float64x1_t *) &__reint1_816); \ - __ret_816; \ -}) -#else -#define vcmla_rot90_laneq_f64(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ - float64x1_t __ret_817; \ - float64x1_t __s0_817 = __p0_817; \ - float64x1_t __s1_817 = __p1_817; \ - float64x2_t __s2_817 = __p2_817; \ - float64x2_t __rev2_817; __rev2_817 = __builtin_shufflevector(__s2_817, __s2_817, 1, 0); \ -float64x2_t __reint_817 = __rev2_817; \ -uint64x2_t __reint1_817 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_817, __p3_817), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_817, __p3_817)}; \ - __ret_817 = vcmla_rot90_f64(__s0_817, __s1_817, *(float64x1_t *) &__reint1_817); \ - __ret_817; \ -}) -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vcmlaq_rot90_laneq_f64(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ - float64x2_t __ret_818; \ - float64x2_t __s0_818 = __p0_818; \ - float64x2_t __s1_818 = __p1_818; \ - float64x2_t __s2_818 = __p2_818; \ -float64x2_t __reint_818 = __s2_818; \ -uint64x2_t __reint1_818 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_818, __p3_818), vgetq_lane_u64(*(uint64x2_t *) &__reint_818, __p3_818)}; \ - __ret_818 = vcmlaq_rot90_f64(__s0_818, __s1_818, *(float64x2_t *) &__reint1_818); \ - __ret_818; \ -}) -#else -#define vcmlaq_rot90_laneq_f64(__p0_819, __p1_819, __p2_819, __p3_819) __extension__ ({ \ - float64x2_t __ret_819; \ - float64x2_t __s0_819 = __p0_819; \ - float64x2_t __s1_819 = __p1_819; \ - float64x2_t __s2_819 = __p2_819; \ - float64x2_t __rev0_819; __rev0_819 = __builtin_shufflevector(__s0_819, __s0_819, 1, 0); \ - float64x2_t __rev1_819; __rev1_819 = __builtin_shufflevector(__s1_819, __s1_819, 1, 0); \ - float64x2_t __rev2_819; __rev2_819 = __builtin_shufflevector(__s2_819, __s2_819, 1, 0); \ -float64x2_t __reint_819 = __rev2_819; \ -uint64x2_t __reint1_819 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_819, __p3_819), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_819, __p3_819)}; \ - __ret_819 = __noswap_vcmlaq_rot90_f64(__rev0_819, __rev1_819, *(float64x2_t *) &__reint1_819); \ - __ret_819 = __builtin_shufflevector(__ret_819, __ret_819, 1, 0); \ - __ret_819; \ -}) #endif #ifdef __LITTLE_ENDIAN__ @@ -66286,237 +68247,215 @@ __ai __attribute__((target("v8.5a,neon"))) float64x1_t vrnd64z_f64(float64x1_t _ } #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdotq_lane_f32(__p0_820, __p1_820, __p2_820, __p3_820) __extension__ ({ \ - float32x4_t __ret_820; \ - float32x4_t __s0_820 = __p0_820; \ - bfloat16x8_t __s1_820 = __p1_820; \ - bfloat16x4_t __s2_820 = __p2_820; \ -bfloat16x4_t __reint_820 = __s2_820; \ -float32x4_t __reint1_820 = splatq_lane_f32(*(float32x2_t *) &__reint_820, __p3_820); \ - __ret_820 = vbfdotq_f32(__s0_820, __s1_820, *(bfloat16x8_t *) &__reint1_820); \ - __ret_820; \ +#define vbfdotq_lane_f32(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ + float32x4_t __ret_792; \ + float32x4_t __s0_792 = __p0_792; \ + bfloat16x8_t __s1_792 = __p1_792; \ + bfloat16x4_t __s2_792 = __p2_792; \ + __ret_792 = vbfdotq_f32(__s0_792, __s1_792, __builtin_bit_cast(bfloat16x8_t, splatq_lane_f32(__builtin_bit_cast(float32x2_t, __s2_792), __p3_792))); \ + __ret_792; \ }) #else -#define vbfdotq_lane_f32(__p0_821, __p1_821, __p2_821, __p3_821) __extension__ ({ \ - float32x4_t __ret_821; \ - float32x4_t __s0_821 = __p0_821; \ - bfloat16x8_t __s1_821 = __p1_821; \ - bfloat16x4_t __s2_821 = __p2_821; \ - float32x4_t __rev0_821; __rev0_821 = __builtin_shufflevector(__s0_821, __s0_821, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_821; __rev1_821 = __builtin_shufflevector(__s1_821, __s1_821, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_821; __rev2_821 = __builtin_shufflevector(__s2_821, __s2_821, 3, 2, 1, 0); \ -bfloat16x4_t __reint_821 = __rev2_821; \ -float32x4_t __reint1_821 = __noswap_splatq_lane_f32(*(float32x2_t *) &__reint_821, __p3_821); \ - __ret_821 = __noswap_vbfdotq_f32(__rev0_821, __rev1_821, *(bfloat16x8_t *) &__reint1_821); \ - __ret_821 = __builtin_shufflevector(__ret_821, __ret_821, 3, 2, 1, 0); \ - __ret_821; \ +#define vbfdotq_lane_f32(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ + float32x4_t __ret_793; \ + float32x4_t __s0_793 = __p0_793; \ + bfloat16x8_t __s1_793 = __p1_793; \ + bfloat16x4_t __s2_793 = __p2_793; \ + float32x4_t __rev0_793; __rev0_793 = __builtin_shufflevector(__s0_793, __s0_793, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_793; __rev1_793 = __builtin_shufflevector(__s1_793, __s1_793, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_793; __rev2_793 = __builtin_shufflevector(__s2_793, __s2_793, 3, 2, 1, 0); \ + __ret_793 = __noswap_vbfdotq_f32(__rev0_793, __rev1_793, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_793), __p3_793))); \ + __ret_793 = __builtin_shufflevector(__ret_793, __ret_793, 3, 2, 1, 0); \ + __ret_793; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdot_lane_f32(__p0_822, __p1_822, __p2_822, __p3_822) __extension__ ({ \ - float32x2_t __ret_822; \ - float32x2_t __s0_822 = __p0_822; \ - bfloat16x4_t __s1_822 = __p1_822; \ - bfloat16x4_t __s2_822 = __p2_822; \ -bfloat16x4_t __reint_822 = __s2_822; \ -float32x2_t __reint1_822 = splat_lane_f32(*(float32x2_t *) &__reint_822, __p3_822); \ - __ret_822 = vbfdot_f32(__s0_822, __s1_822, *(bfloat16x4_t *) &__reint1_822); \ - __ret_822; \ +#define vbfdot_lane_f32(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ + float32x2_t __ret_794; \ + float32x2_t __s0_794 = __p0_794; \ + bfloat16x4_t __s1_794 = __p1_794; \ + bfloat16x4_t __s2_794 = __p2_794; \ + __ret_794 = vbfdot_f32(__s0_794, __s1_794, __builtin_bit_cast(bfloat16x4_t, splat_lane_f32(__builtin_bit_cast(float32x2_t, __s2_794), __p3_794))); \ + __ret_794; \ }) #else -#define vbfdot_lane_f32(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ - float32x2_t __ret_823; \ - float32x2_t __s0_823 = __p0_823; \ - bfloat16x4_t __s1_823 = __p1_823; \ - bfloat16x4_t __s2_823 = __p2_823; \ - float32x2_t __rev0_823; __rev0_823 = __builtin_shufflevector(__s0_823, __s0_823, 1, 0); \ - bfloat16x4_t __rev1_823; __rev1_823 = __builtin_shufflevector(__s1_823, __s1_823, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_823; __rev2_823 = __builtin_shufflevector(__s2_823, __s2_823, 3, 2, 1, 0); \ -bfloat16x4_t __reint_823 = __rev2_823; \ -float32x2_t __reint1_823 = __noswap_splat_lane_f32(*(float32x2_t *) &__reint_823, __p3_823); \ - __ret_823 = __noswap_vbfdot_f32(__rev0_823, __rev1_823, *(bfloat16x4_t *) &__reint1_823); \ - __ret_823 = __builtin_shufflevector(__ret_823, __ret_823, 1, 0); \ - __ret_823; \ +#define vbfdot_lane_f32(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ + float32x2_t __ret_795; \ + float32x2_t __s0_795 = __p0_795; \ + bfloat16x4_t __s1_795 = __p1_795; \ + bfloat16x4_t __s2_795 = __p2_795; \ + float32x2_t __rev0_795; __rev0_795 = __builtin_shufflevector(__s0_795, __s0_795, 1, 0); \ + bfloat16x4_t __rev1_795; __rev1_795 = __builtin_shufflevector(__s1_795, __s1_795, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_795; __rev2_795 = __builtin_shufflevector(__s2_795, __s2_795, 3, 2, 1, 0); \ + __ret_795 = __noswap_vbfdot_f32(__rev0_795, __rev1_795, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_lane_f32(__builtin_bit_cast(float32x2_t, __rev2_795), __p3_795))); \ + __ret_795 = __builtin_shufflevector(__ret_795, __ret_795, 1, 0); \ + __ret_795; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdotq_laneq_f32(__p0_824, __p1_824, __p2_824, __p3_824) __extension__ ({ \ - float32x4_t __ret_824; \ - float32x4_t __s0_824 = __p0_824; \ - bfloat16x8_t __s1_824 = __p1_824; \ - bfloat16x8_t __s2_824 = __p2_824; \ -bfloat16x8_t __reint_824 = __s2_824; \ -float32x4_t __reint1_824 = splatq_laneq_f32(*(float32x4_t *) &__reint_824, __p3_824); \ - __ret_824 = vbfdotq_f32(__s0_824, __s1_824, *(bfloat16x8_t *) &__reint1_824); \ - __ret_824; \ +#define vbfdotq_laneq_f32(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ + float32x4_t __ret_796; \ + float32x4_t __s0_796 = __p0_796; \ + bfloat16x8_t __s1_796 = __p1_796; \ + bfloat16x8_t __s2_796 = __p2_796; \ + __ret_796 = vbfdotq_f32(__s0_796, __s1_796, __builtin_bit_cast(bfloat16x8_t, splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_796), __p3_796))); \ + __ret_796; \ }) #else -#define vbfdotq_laneq_f32(__p0_825, __p1_825, __p2_825, __p3_825) __extension__ ({ \ - float32x4_t __ret_825; \ - float32x4_t __s0_825 = __p0_825; \ - bfloat16x8_t __s1_825 = __p1_825; \ - bfloat16x8_t __s2_825 = __p2_825; \ - float32x4_t __rev0_825; __rev0_825 = __builtin_shufflevector(__s0_825, __s0_825, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_825; __rev1_825 = __builtin_shufflevector(__s1_825, __s1_825, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_825; __rev2_825 = __builtin_shufflevector(__s2_825, __s2_825, 7, 6, 5, 4, 3, 2, 1, 0); \ -bfloat16x8_t __reint_825 = __rev2_825; \ -float32x4_t __reint1_825 = __noswap_splatq_laneq_f32(*(float32x4_t *) &__reint_825, __p3_825); \ - __ret_825 = __noswap_vbfdotq_f32(__rev0_825, __rev1_825, *(bfloat16x8_t *) &__reint1_825); \ - __ret_825 = __builtin_shufflevector(__ret_825, __ret_825, 3, 2, 1, 0); \ - __ret_825; \ +#define vbfdotq_laneq_f32(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ + float32x4_t __ret_797; \ + float32x4_t __s0_797 = __p0_797; \ + bfloat16x8_t __s1_797 = __p1_797; \ + bfloat16x8_t __s2_797 = __p2_797; \ + float32x4_t __rev0_797; __rev0_797 = __builtin_shufflevector(__s0_797, __s0_797, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_797; __rev1_797 = __builtin_shufflevector(__s1_797, __s1_797, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_797; __rev2_797 = __builtin_shufflevector(__s2_797, __s2_797, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_797 = __noswap_vbfdotq_f32(__rev0_797, __rev1_797, __builtin_bit_cast(bfloat16x8_t, __noswap_splatq_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_797), __p3_797))); \ + __ret_797 = __builtin_shufflevector(__ret_797, __ret_797, 3, 2, 1, 0); \ + __ret_797; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfdot_laneq_f32(__p0_826, __p1_826, __p2_826, __p3_826) __extension__ ({ \ - float32x2_t __ret_826; \ - float32x2_t __s0_826 = __p0_826; \ - bfloat16x4_t __s1_826 = __p1_826; \ - bfloat16x8_t __s2_826 = __p2_826; \ -bfloat16x8_t __reint_826 = __s2_826; \ -float32x2_t __reint1_826 = splat_laneq_f32(*(float32x4_t *) &__reint_826, __p3_826); \ - __ret_826 = vbfdot_f32(__s0_826, __s1_826, *(bfloat16x4_t *) &__reint1_826); \ - __ret_826; \ +#define vbfdot_laneq_f32(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ + float32x2_t __ret_798; \ + float32x2_t __s0_798 = __p0_798; \ + bfloat16x4_t __s1_798 = __p1_798; \ + bfloat16x8_t __s2_798 = __p2_798; \ + __ret_798 = vbfdot_f32(__s0_798, __s1_798, __builtin_bit_cast(bfloat16x4_t, splat_laneq_f32(__builtin_bit_cast(float32x4_t, __s2_798), __p3_798))); \ + __ret_798; \ }) #else -#define vbfdot_laneq_f32(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ - float32x2_t __ret_827; \ - float32x2_t __s0_827 = __p0_827; \ - bfloat16x4_t __s1_827 = __p1_827; \ - bfloat16x8_t __s2_827 = __p2_827; \ - float32x2_t __rev0_827; __rev0_827 = __builtin_shufflevector(__s0_827, __s0_827, 1, 0); \ - bfloat16x4_t __rev1_827; __rev1_827 = __builtin_shufflevector(__s1_827, __s1_827, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_827; __rev2_827 = __builtin_shufflevector(__s2_827, __s2_827, 7, 6, 5, 4, 3, 2, 1, 0); \ -bfloat16x8_t __reint_827 = __rev2_827; \ -float32x2_t __reint1_827 = __noswap_splat_laneq_f32(*(float32x4_t *) &__reint_827, __p3_827); \ - __ret_827 = __noswap_vbfdot_f32(__rev0_827, __rev1_827, *(bfloat16x4_t *) &__reint1_827); \ - __ret_827 = __builtin_shufflevector(__ret_827, __ret_827, 1, 0); \ - __ret_827; \ +#define vbfdot_laneq_f32(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ + float32x2_t __ret_799; \ + float32x2_t __s0_799 = __p0_799; \ + bfloat16x4_t __s1_799 = __p1_799; \ + bfloat16x8_t __s2_799 = __p2_799; \ + float32x2_t __rev0_799; __rev0_799 = __builtin_shufflevector(__s0_799, __s0_799, 1, 0); \ + bfloat16x4_t __rev1_799; __rev1_799 = __builtin_shufflevector(__s1_799, __s1_799, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_799; __rev2_799 = __builtin_shufflevector(__s2_799, __s2_799, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_799 = __noswap_vbfdot_f32(__rev0_799, __rev1_799, __builtin_bit_cast(bfloat16x4_t, __noswap_splat_laneq_f32(__builtin_bit_cast(float32x4_t, __rev2_799), __p3_799))); \ + __ret_799 = __builtin_shufflevector(__ret_799, __ret_799, 1, 0); \ + __ret_799; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_lane_f32(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ - float32x4_t __ret_828; \ - float32x4_t __s0_828 = __p0_828; \ - bfloat16x8_t __s1_828 = __p1_828; \ - bfloat16x4_t __s2_828 = __p2_828; \ - __ret_828 = vbfmlalbq_f32(__s0_828, __s1_828, (bfloat16x8_t) {vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828), vget_lane_bf16(__s2_828, __p3_828)}); \ - __ret_828; \ +#define vbfmlalbq_lane_f32(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ + float32x4_t __ret_800; \ + float32x4_t __s0_800 = __p0_800; \ + bfloat16x8_t __s1_800 = __p1_800; \ + bfloat16x4_t __s2_800 = __p2_800; \ + __ret_800 = vbfmlalbq_f32(__s0_800, __s1_800, (bfloat16x8_t) {vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800), vget_lane_bf16(__s2_800, __p3_800)}); \ + __ret_800; \ }) #else -#define vbfmlalbq_lane_f32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ - float32x4_t __ret_829; \ - float32x4_t __s0_829 = __p0_829; \ - bfloat16x8_t __s1_829 = __p1_829; \ - bfloat16x4_t __s2_829 = __p2_829; \ - float32x4_t __rev0_829; __rev0_829 = __builtin_shufflevector(__s0_829, __s0_829, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_829; __rev1_829 = __builtin_shufflevector(__s1_829, __s1_829, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_829; __rev2_829 = __builtin_shufflevector(__s2_829, __s2_829, 3, 2, 1, 0); \ - __ret_829 = __noswap_vbfmlalbq_f32(__rev0_829, __rev1_829, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829), __noswap_vget_lane_bf16(__rev2_829, __p3_829)}); \ - __ret_829 = __builtin_shufflevector(__ret_829, __ret_829, 3, 2, 1, 0); \ - __ret_829; \ +#define vbfmlalbq_lane_f32(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ + float32x4_t __ret_801; \ + float32x4_t __s0_801 = __p0_801; \ + bfloat16x8_t __s1_801 = __p1_801; \ + bfloat16x4_t __s2_801 = __p2_801; \ + float32x4_t __rev0_801; __rev0_801 = __builtin_shufflevector(__s0_801, __s0_801, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_801; __rev1_801 = __builtin_shufflevector(__s1_801, __s1_801, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_801; __rev2_801 = __builtin_shufflevector(__s2_801, __s2_801, 3, 2, 1, 0); \ + __ret_801 = __noswap_vbfmlalbq_f32(__rev0_801, __rev1_801, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801), __noswap_vget_lane_bf16(__rev2_801, __p3_801)}); \ + __ret_801 = __builtin_shufflevector(__ret_801, __ret_801, 3, 2, 1, 0); \ + __ret_801; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_laneq_f32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ - float32x4_t __ret_830; \ - float32x4_t __s0_830 = __p0_830; \ - bfloat16x8_t __s1_830 = __p1_830; \ - bfloat16x8_t __s2_830 = __p2_830; \ - __ret_830 = vbfmlalbq_f32(__s0_830, __s1_830, (bfloat16x8_t) {vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830), vgetq_lane_bf16(__s2_830, __p3_830)}); \ - __ret_830; \ +#define vbfmlalbq_laneq_f32(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ + float32x4_t __ret_802; \ + float32x4_t __s0_802 = __p0_802; \ + bfloat16x8_t __s1_802 = __p1_802; \ + bfloat16x8_t __s2_802 = __p2_802; \ + __ret_802 = vbfmlalbq_f32(__s0_802, __s1_802, (bfloat16x8_t) {vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802), vgetq_lane_bf16(__s2_802, __p3_802)}); \ + __ret_802; \ }) #else -#define vbfmlalbq_laneq_f32(__p0_831, __p1_831, __p2_831, __p3_831) __extension__ ({ \ - float32x4_t __ret_831; \ - float32x4_t __s0_831 = __p0_831; \ - bfloat16x8_t __s1_831 = __p1_831; \ - bfloat16x8_t __s2_831 = __p2_831; \ - float32x4_t __rev0_831; __rev0_831 = __builtin_shufflevector(__s0_831, __s0_831, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_831; __rev1_831 = __builtin_shufflevector(__s1_831, __s1_831, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_831; __rev2_831 = __builtin_shufflevector(__s2_831, __s2_831, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_831 = __noswap_vbfmlalbq_f32(__rev0_831, __rev1_831, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831), __noswap_vgetq_lane_bf16(__rev2_831, __p3_831)}); \ - __ret_831 = __builtin_shufflevector(__ret_831, __ret_831, 3, 2, 1, 0); \ - __ret_831; \ +#define vbfmlalbq_laneq_f32(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ + float32x4_t __ret_803; \ + float32x4_t __s0_803 = __p0_803; \ + bfloat16x8_t __s1_803 = __p1_803; \ + bfloat16x8_t __s2_803 = __p2_803; \ + float32x4_t __rev0_803; __rev0_803 = __builtin_shufflevector(__s0_803, __s0_803, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_803; __rev1_803 = __builtin_shufflevector(__s1_803, __s1_803, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_803; __rev2_803 = __builtin_shufflevector(__s2_803, __s2_803, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_803 = __noswap_vbfmlalbq_f32(__rev0_803, __rev1_803, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803), __noswap_vgetq_lane_bf16(__rev2_803, __p3_803)}); \ + __ret_803 = __builtin_shufflevector(__ret_803, __ret_803, 3, 2, 1, 0); \ + __ret_803; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_lane_f32(__p0_832, __p1_832, __p2_832, __p3_832) __extension__ ({ \ - float32x4_t __ret_832; \ - float32x4_t __s0_832 = __p0_832; \ - bfloat16x8_t __s1_832 = __p1_832; \ - bfloat16x4_t __s2_832 = __p2_832; \ - __ret_832 = vbfmlaltq_f32(__s0_832, __s1_832, (bfloat16x8_t) {vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832), vget_lane_bf16(__s2_832, __p3_832)}); \ - __ret_832; \ +#define vbfmlaltq_lane_f32(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ + float32x4_t __ret_804; \ + float32x4_t __s0_804 = __p0_804; \ + bfloat16x8_t __s1_804 = __p1_804; \ + bfloat16x4_t __s2_804 = __p2_804; \ + __ret_804 = vbfmlaltq_f32(__s0_804, __s1_804, (bfloat16x8_t) {vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804), vget_lane_bf16(__s2_804, __p3_804)}); \ + __ret_804; \ }) #else -#define vbfmlaltq_lane_f32(__p0_833, __p1_833, __p2_833, __p3_833) __extension__ ({ \ - float32x4_t __ret_833; \ - float32x4_t __s0_833 = __p0_833; \ - bfloat16x8_t __s1_833 = __p1_833; \ - bfloat16x4_t __s2_833 = __p2_833; \ - float32x4_t __rev0_833; __rev0_833 = __builtin_shufflevector(__s0_833, __s0_833, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_833; __rev1_833 = __builtin_shufflevector(__s1_833, __s1_833, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_833; __rev2_833 = __builtin_shufflevector(__s2_833, __s2_833, 3, 2, 1, 0); \ - __ret_833 = __noswap_vbfmlaltq_f32(__rev0_833, __rev1_833, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833), __noswap_vget_lane_bf16(__rev2_833, __p3_833)}); \ - __ret_833 = __builtin_shufflevector(__ret_833, __ret_833, 3, 2, 1, 0); \ - __ret_833; \ +#define vbfmlaltq_lane_f32(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ + float32x4_t __ret_805; \ + float32x4_t __s0_805 = __p0_805; \ + bfloat16x8_t __s1_805 = __p1_805; \ + bfloat16x4_t __s2_805 = __p2_805; \ + float32x4_t __rev0_805; __rev0_805 = __builtin_shufflevector(__s0_805, __s0_805, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_805; __rev1_805 = __builtin_shufflevector(__s1_805, __s1_805, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_805; __rev2_805 = __builtin_shufflevector(__s2_805, __s2_805, 3, 2, 1, 0); \ + __ret_805 = __noswap_vbfmlaltq_f32(__rev0_805, __rev1_805, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805), __noswap_vget_lane_bf16(__rev2_805, __p3_805)}); \ + __ret_805 = __builtin_shufflevector(__ret_805, __ret_805, 3, 2, 1, 0); \ + __ret_805; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_laneq_f32(__p0_834, __p1_834, __p2_834, __p3_834) __extension__ ({ \ - float32x4_t __ret_834; \ - float32x4_t __s0_834 = __p0_834; \ - bfloat16x8_t __s1_834 = __p1_834; \ - bfloat16x8_t __s2_834 = __p2_834; \ - __ret_834 = vbfmlaltq_f32(__s0_834, __s1_834, (bfloat16x8_t) {vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834), vgetq_lane_bf16(__s2_834, __p3_834)}); \ - __ret_834; \ +#define vbfmlaltq_laneq_f32(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ + float32x4_t __ret_806; \ + float32x4_t __s0_806 = __p0_806; \ + bfloat16x8_t __s1_806 = __p1_806; \ + bfloat16x8_t __s2_806 = __p2_806; \ + __ret_806 = vbfmlaltq_f32(__s0_806, __s1_806, (bfloat16x8_t) {vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806), vgetq_lane_bf16(__s2_806, __p3_806)}); \ + __ret_806; \ }) #else -#define vbfmlaltq_laneq_f32(__p0_835, __p1_835, __p2_835, __p3_835) __extension__ ({ \ - float32x4_t __ret_835; \ - float32x4_t __s0_835 = __p0_835; \ - bfloat16x8_t __s1_835 = __p1_835; \ - bfloat16x8_t __s2_835 = __p2_835; \ - float32x4_t __rev0_835; __rev0_835 = __builtin_shufflevector(__s0_835, __s0_835, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_835; __rev1_835 = __builtin_shufflevector(__s1_835, __s1_835, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_835; __rev2_835 = __builtin_shufflevector(__s2_835, __s2_835, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_835 = __noswap_vbfmlaltq_f32(__rev0_835, __rev1_835, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835), __noswap_vgetq_lane_bf16(__rev2_835, __p3_835)}); \ - __ret_835 = __builtin_shufflevector(__ret_835, __ret_835, 3, 2, 1, 0); \ - __ret_835; \ +#define vbfmlaltq_laneq_f32(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ + float32x4_t __ret_807; \ + float32x4_t __s0_807 = __p0_807; \ + bfloat16x8_t __s1_807 = __p1_807; \ + bfloat16x8_t __s2_807 = __p2_807; \ + float32x4_t __rev0_807; __rev0_807 = __builtin_shufflevector(__s0_807, __s0_807, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_807; __rev1_807 = __builtin_shufflevector(__s1_807, __s1_807, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_807; __rev2_807 = __builtin_shufflevector(__s2_807, __s2_807, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_807 = __noswap_vbfmlaltq_f32(__rev0_807, __rev1_807, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807), __noswap_vgetq_lane_bf16(__rev2_807, __p3_807)}); \ + __ret_807 = __builtin_shufflevector(__ret_807, __ret_807, 3, 2, 1, 0); \ + __ret_807; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_836) { - float32x4_t __ret_836; -bfloat16x4_t __reint_836 = __p0_836; -int32x4_t __reint1_836 = vshll_n_s16(*(int16x4_t *) &__reint_836, 16); - __ret_836 = *(float32x4_t *) &__reint1_836; - return __ret_836; +__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_808) { + float32x4_t __ret_808; + __ret_808 = __builtin_bit_cast(float32x4_t, vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_808), 16)); + return __ret_808; } #else -__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_837) { - float32x4_t __ret_837; - bfloat16x4_t __rev0_837; __rev0_837 = __builtin_shufflevector(__p0_837, __p0_837, 3, 2, 1, 0); -bfloat16x4_t __reint_837 = __rev0_837; -int32x4_t __reint1_837 = __noswap_vshll_n_s16(*(int16x4_t *) &__reint_837, 16); - __ret_837 = *(float32x4_t *) &__reint1_837; - __ret_837 = __builtin_shufflevector(__ret_837, __ret_837, 3, 2, 1, 0); - return __ret_837; +__ai __attribute__((target("bf16,neon"))) float32x4_t vcvt_f32_bf16(bfloat16x4_t __p0_809) { + float32x4_t __ret_809; + bfloat16x4_t __rev0_809; __rev0_809 = __builtin_shufflevector(__p0_809, __p0_809, 3, 2, 1, 0); + __ret_809 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __rev0_809), 16)); + __ret_809 = __builtin_shufflevector(__ret_809, __ret_809, 3, 2, 1, 0); + return __ret_809; } -__ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vcvt_f32_bf16(bfloat16x4_t __p0_838) { - float32x4_t __ret_838; -bfloat16x4_t __reint_838 = __p0_838; -int32x4_t __reint1_838 = __noswap_vshll_n_s16(*(int16x4_t *) &__reint_838, 16); - __ret_838 = *(float32x4_t *) &__reint1_838; - return __ret_838; +__ai __attribute__((target("bf16,neon"))) float32x4_t __noswap_vcvt_f32_bf16(bfloat16x4_t __p0_810) { + float32x4_t __ret_810; + __ret_810 = __builtin_bit_cast(float32x4_t, __noswap_vshll_n_u16(__builtin_bit_cast(uint16x4_t, __p0_810), 16)); + return __ret_810; } #endif @@ -66553,260 +68492,236 @@ __ai __attribute__((target("bf16,neon"))) float32x4_t vcvtq_low_f32_bf16(bfloat1 #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_u32(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ - uint32x4_t __ret_839; \ - uint32x4_t __s0_839 = __p0_839; \ - uint8x16_t __s1_839 = __p1_839; \ - uint8x8_t __s2_839 = __p2_839; \ -uint8x8_t __reint_839 = __s2_839; \ -uint32x4_t __reint1_839 = splatq_lane_u32(*(uint32x2_t *) &__reint_839, __p3_839); \ - __ret_839 = vdotq_u32(__s0_839, __s1_839, *(uint8x16_t *) &__reint1_839); \ - __ret_839; \ +#define vdotq_lane_u32(__p0_811, __p1_811, __p2_811, __p3_811) __extension__ ({ \ + uint32x4_t __ret_811; \ + uint32x4_t __s0_811 = __p0_811; \ + uint8x16_t __s1_811 = __p1_811; \ + uint8x8_t __s2_811 = __p2_811; \ + __ret_811 = vdotq_u32(__s0_811, __s1_811, __builtin_bit_cast(uint8x16_t, splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_811), __p3_811))); \ + __ret_811; \ }) #else -#define vdotq_lane_u32(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ - uint32x4_t __ret_840; \ - uint32x4_t __s0_840 = __p0_840; \ - uint8x16_t __s1_840 = __p1_840; \ - uint8x8_t __s2_840 = __p2_840; \ - uint32x4_t __rev0_840; __rev0_840 = __builtin_shufflevector(__s0_840, __s0_840, 3, 2, 1, 0); \ - uint8x16_t __rev1_840; __rev1_840 = __builtin_shufflevector(__s1_840, __s1_840, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_840; __rev2_840 = __builtin_shufflevector(__s2_840, __s2_840, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x8_t __reint_840 = __rev2_840; \ -uint32x4_t __reint1_840 = __noswap_splatq_lane_u32(*(uint32x2_t *) &__reint_840, __p3_840); \ - __ret_840 = __noswap_vdotq_u32(__rev0_840, __rev1_840, *(uint8x16_t *) &__reint1_840); \ - __ret_840 = __builtin_shufflevector(__ret_840, __ret_840, 3, 2, 1, 0); \ - __ret_840; \ +#define vdotq_lane_u32(__p0_812, __p1_812, __p2_812, __p3_812) __extension__ ({ \ + uint32x4_t __ret_812; \ + uint32x4_t __s0_812 = __p0_812; \ + uint8x16_t __s1_812 = __p1_812; \ + uint8x8_t __s2_812 = __p2_812; \ + uint32x4_t __rev0_812; __rev0_812 = __builtin_shufflevector(__s0_812, __s0_812, 3, 2, 1, 0); \ + uint8x16_t __rev1_812; __rev1_812 = __builtin_shufflevector(__s1_812, __s1_812, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_812; __rev2_812 = __builtin_shufflevector(__s2_812, __s2_812, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_812 = __noswap_vdotq_u32(__rev0_812, __rev1_812, __builtin_bit_cast(uint8x16_t, __noswap_splatq_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_812), __p3_812))); \ + __ret_812 = __builtin_shufflevector(__ret_812, __ret_812, 3, 2, 1, 0); \ + __ret_812; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_s32(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ - int32x4_t __ret_841; \ - int32x4_t __s0_841 = __p0_841; \ - int8x16_t __s1_841 = __p1_841; \ - int8x8_t __s2_841 = __p2_841; \ -int8x8_t __reint_841 = __s2_841; \ -int32x4_t __reint1_841 = splatq_lane_s32(*(int32x2_t *) &__reint_841, __p3_841); \ - __ret_841 = vdotq_s32(__s0_841, __s1_841, *(int8x16_t *) &__reint1_841); \ - __ret_841; \ +#define vdotq_lane_s32(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ + int32x4_t __ret_813; \ + int32x4_t __s0_813 = __p0_813; \ + int8x16_t __s1_813 = __p1_813; \ + int8x8_t __s2_813 = __p2_813; \ + __ret_813 = vdotq_s32(__s0_813, __s1_813, __builtin_bit_cast(int8x16_t, splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_813), __p3_813))); \ + __ret_813; \ }) #else -#define vdotq_lane_s32(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ - int32x4_t __ret_842; \ - int32x4_t __s0_842 = __p0_842; \ - int8x16_t __s1_842 = __p1_842; \ - int8x8_t __s2_842 = __p2_842; \ - int32x4_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 3, 2, 1, 0); \ - int8x16_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x8_t __reint_842 = __rev2_842; \ -int32x4_t __reint1_842 = __noswap_splatq_lane_s32(*(int32x2_t *) &__reint_842, __p3_842); \ - __ret_842 = __noswap_vdotq_s32(__rev0_842, __rev1_842, *(int8x16_t *) &__reint1_842); \ - __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 3, 2, 1, 0); \ - __ret_842; \ +#define vdotq_lane_s32(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ + int32x4_t __ret_814; \ + int32x4_t __s0_814 = __p0_814; \ + int8x16_t __s1_814 = __p1_814; \ + int8x8_t __s2_814 = __p2_814; \ + int32x4_t __rev0_814; __rev0_814 = __builtin_shufflevector(__s0_814, __s0_814, 3, 2, 1, 0); \ + int8x16_t __rev1_814; __rev1_814 = __builtin_shufflevector(__s1_814, __s1_814, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_814; __rev2_814 = __builtin_shufflevector(__s2_814, __s2_814, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_814 = __noswap_vdotq_s32(__rev0_814, __rev1_814, __builtin_bit_cast(int8x16_t, __noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_814), __p3_814))); \ + __ret_814 = __builtin_shufflevector(__ret_814, __ret_814, 3, 2, 1, 0); \ + __ret_814; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_lane_u32(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ - uint32x2_t __ret_843; \ - uint32x2_t __s0_843 = __p0_843; \ - uint8x8_t __s1_843 = __p1_843; \ - uint8x8_t __s2_843 = __p2_843; \ -uint8x8_t __reint_843 = __s2_843; \ -uint32x2_t __reint1_843 = splat_lane_u32(*(uint32x2_t *) &__reint_843, __p3_843); \ - __ret_843 = vdot_u32(__s0_843, __s1_843, *(uint8x8_t *) &__reint1_843); \ - __ret_843; \ +#define vdot_lane_u32(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ + uint32x2_t __ret_815; \ + uint32x2_t __s0_815 = __p0_815; \ + uint8x8_t __s1_815 = __p1_815; \ + uint8x8_t __s2_815 = __p2_815; \ + __ret_815 = vdot_u32(__s0_815, __s1_815, __builtin_bit_cast(uint8x8_t, splat_lane_u32(__builtin_bit_cast(uint32x2_t, __s2_815), __p3_815))); \ + __ret_815; \ }) #else -#define vdot_lane_u32(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ - uint32x2_t __ret_844; \ - uint32x2_t __s0_844 = __p0_844; \ - uint8x8_t __s1_844 = __p1_844; \ - uint8x8_t __s2_844 = __p2_844; \ - uint32x2_t __rev0_844; __rev0_844 = __builtin_shufflevector(__s0_844, __s0_844, 1, 0); \ - uint8x8_t __rev1_844; __rev1_844 = __builtin_shufflevector(__s1_844, __s1_844, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x8_t __reint_844 = __rev2_844; \ -uint32x2_t __reint1_844 = __noswap_splat_lane_u32(*(uint32x2_t *) &__reint_844, __p3_844); \ - __ret_844 = __noswap_vdot_u32(__rev0_844, __rev1_844, *(uint8x8_t *) &__reint1_844); \ - __ret_844 = __builtin_shufflevector(__ret_844, __ret_844, 1, 0); \ - __ret_844; \ +#define vdot_lane_u32(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ + uint32x2_t __ret_816; \ + uint32x2_t __s0_816 = __p0_816; \ + uint8x8_t __s1_816 = __p1_816; \ + uint8x8_t __s2_816 = __p2_816; \ + uint32x2_t __rev0_816; __rev0_816 = __builtin_shufflevector(__s0_816, __s0_816, 1, 0); \ + uint8x8_t __rev1_816; __rev1_816 = __builtin_shufflevector(__s1_816, __s1_816, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_816; __rev2_816 = __builtin_shufflevector(__s2_816, __s2_816, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_816 = __noswap_vdot_u32(__rev0_816, __rev1_816, __builtin_bit_cast(uint8x8_t, __noswap_splat_lane_u32(__builtin_bit_cast(uint32x2_t, __rev2_816), __p3_816))); \ + __ret_816 = __builtin_shufflevector(__ret_816, __ret_816, 1, 0); \ + __ret_816; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_lane_s32(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ - int32x2_t __ret_845; \ - int32x2_t __s0_845 = __p0_845; \ - int8x8_t __s1_845 = __p1_845; \ - int8x8_t __s2_845 = __p2_845; \ -int8x8_t __reint_845 = __s2_845; \ -int32x2_t __reint1_845 = splat_lane_s32(*(int32x2_t *) &__reint_845, __p3_845); \ - __ret_845 = vdot_s32(__s0_845, __s1_845, *(int8x8_t *) &__reint1_845); \ - __ret_845; \ +#define vdot_lane_s32(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ + int32x2_t __ret_817; \ + int32x2_t __s0_817 = __p0_817; \ + int8x8_t __s1_817 = __p1_817; \ + int8x8_t __s2_817 = __p2_817; \ + __ret_817 = vdot_s32(__s0_817, __s1_817, __builtin_bit_cast(int8x8_t, splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_817), __p3_817))); \ + __ret_817; \ }) #else -#define vdot_lane_s32(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ - int32x2_t __ret_846; \ - int32x2_t __s0_846 = __p0_846; \ - int8x8_t __s1_846 = __p1_846; \ - int8x8_t __s2_846 = __p2_846; \ - int32x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, 1, 0); \ - int8x8_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x8_t __reint_846 = __rev2_846; \ -int32x2_t __reint1_846 = __noswap_splat_lane_s32(*(int32x2_t *) &__reint_846, __p3_846); \ - __ret_846 = __noswap_vdot_s32(__rev0_846, __rev1_846, *(int8x8_t *) &__reint1_846); \ - __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, 1, 0); \ - __ret_846; \ +#define vdot_lane_s32(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ + int32x2_t __ret_818; \ + int32x2_t __s0_818 = __p0_818; \ + int8x8_t __s1_818 = __p1_818; \ + int8x8_t __s2_818 = __p2_818; \ + int32x2_t __rev0_818; __rev0_818 = __builtin_shufflevector(__s0_818, __s0_818, 1, 0); \ + int8x8_t __rev1_818; __rev1_818 = __builtin_shufflevector(__s1_818, __s1_818, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_818; __rev2_818 = __builtin_shufflevector(__s2_818, __s2_818, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_818 = __noswap_vdot_s32(__rev0_818, __rev1_818, __builtin_bit_cast(int8x8_t, __noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_818), __p3_818))); \ + __ret_818 = __builtin_shufflevector(__ret_818, __ret_818, 1, 0); \ + __ret_818; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f16(__p0_847, __p1_847, __p2_847) __extension__ ({ \ - float16x8_t __ret_847; \ - float16x8_t __s0_847 = __p0_847; \ - float16x4_t __s1_847 = __p1_847; \ - __ret_847 = __s0_847 * splatq_lane_f16(__s1_847, __p2_847); \ - __ret_847; \ +#define vmulq_lane_f16(__p0_819, __p1_819, __p2_819) __extension__ ({ \ + float16x8_t __ret_819; \ + float16x8_t __s0_819 = __p0_819; \ + float16x4_t __s1_819 = __p1_819; \ + __ret_819 = __s0_819 * splatq_lane_f16(__s1_819, __p2_819); \ + __ret_819; \ }) #else -#define vmulq_lane_f16(__p0_848, __p1_848, __p2_848) __extension__ ({ \ - float16x8_t __ret_848; \ - float16x8_t __s0_848 = __p0_848; \ - float16x4_t __s1_848 = __p1_848; \ - float16x8_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1_848; __rev1_848 = __builtin_shufflevector(__s1_848, __s1_848, 3, 2, 1, 0); \ - __ret_848 = __rev0_848 * __noswap_splatq_lane_f16(__rev1_848, __p2_848); \ - __ret_848 = __builtin_shufflevector(__ret_848, __ret_848, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_848; \ +#define vmulq_lane_f16(__p0_820, __p1_820, __p2_820) __extension__ ({ \ + float16x8_t __ret_820; \ + float16x8_t __s0_820 = __p0_820; \ + float16x4_t __s1_820 = __p1_820; \ + float16x8_t __rev0_820; __rev0_820 = __builtin_shufflevector(__s0_820, __s0_820, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev1_820; __rev1_820 = __builtin_shufflevector(__s1_820, __s1_820, 3, 2, 1, 0); \ + __ret_820 = __rev0_820 * __noswap_splatq_lane_f16(__rev1_820, __p2_820); \ + __ret_820 = __builtin_shufflevector(__ret_820, __ret_820, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_820; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_lane_f16(__p0_849, __p1_849, __p2_849) __extension__ ({ \ - float16x4_t __ret_849; \ - float16x4_t __s0_849 = __p0_849; \ - float16x4_t __s1_849 = __p1_849; \ - __ret_849 = __s0_849 * splat_lane_f16(__s1_849, __p2_849); \ - __ret_849; \ +#define vmul_lane_f16(__p0_821, __p1_821, __p2_821) __extension__ ({ \ + float16x4_t __ret_821; \ + float16x4_t __s0_821 = __p0_821; \ + float16x4_t __s1_821 = __p1_821; \ + __ret_821 = __s0_821 * splat_lane_f16(__s1_821, __p2_821); \ + __ret_821; \ }) #else -#define vmul_lane_f16(__p0_850, __p1_850, __p2_850) __extension__ ({ \ - float16x4_t __ret_850; \ - float16x4_t __s0_850 = __p0_850; \ - float16x4_t __s1_850 = __p1_850; \ - float16x4_t __rev0_850; __rev0_850 = __builtin_shufflevector(__s0_850, __s0_850, 3, 2, 1, 0); \ - float16x4_t __rev1_850; __rev1_850 = __builtin_shufflevector(__s1_850, __s1_850, 3, 2, 1, 0); \ - __ret_850 = __rev0_850 * __noswap_splat_lane_f16(__rev1_850, __p2_850); \ - __ret_850 = __builtin_shufflevector(__ret_850, __ret_850, 3, 2, 1, 0); \ - __ret_850; \ +#define vmul_lane_f16(__p0_822, __p1_822, __p2_822) __extension__ ({ \ + float16x4_t __ret_822; \ + float16x4_t __s0_822 = __p0_822; \ + float16x4_t __s1_822 = __p1_822; \ + float16x4_t __rev0_822; __rev0_822 = __builtin_shufflevector(__s0_822, __s0_822, 3, 2, 1, 0); \ + float16x4_t __rev1_822; __rev1_822 = __builtin_shufflevector(__s1_822, __s1_822, 3, 2, 1, 0); \ + __ret_822 = __rev0_822 * __noswap_splat_lane_f16(__rev1_822, __p2_822); \ + __ret_822 = __builtin_shufflevector(__ret_822, __ret_822, 3, 2, 1, 0); \ + __ret_822; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsudotq_lane_s32(__p0_851, __p1_851, __p2_851, __p3_851) __extension__ ({ \ - int32x4_t __ret_851; \ - int32x4_t __s0_851 = __p0_851; \ - int8x16_t __s1_851 = __p1_851; \ - uint8x8_t __s2_851 = __p2_851; \ -uint8x8_t __reint_851 = __s2_851; \ - __ret_851 = vusdotq_s32(__s0_851, (uint8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_851, __p3_851)), __s1_851); \ - __ret_851; \ +#define vsudotq_lane_s32(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ + int32x4_t __ret_823; \ + int32x4_t __s0_823 = __p0_823; \ + int8x16_t __s1_823 = __p1_823; \ + uint8x8_t __s2_823 = __p2_823; \ + __ret_823 = vusdotq_s32(__s0_823, (uint8x16_t)(splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_823), __p3_823)), __s1_823); \ + __ret_823; \ }) #else -#define vsudotq_lane_s32(__p0_852, __p1_852, __p2_852, __p3_852) __extension__ ({ \ - int32x4_t __ret_852; \ - int32x4_t __s0_852 = __p0_852; \ - int8x16_t __s1_852 = __p1_852; \ - uint8x8_t __s2_852 = __p2_852; \ - int32x4_t __rev0_852; __rev0_852 = __builtin_shufflevector(__s0_852, __s0_852, 3, 2, 1, 0); \ - int8x16_t __rev1_852; __rev1_852 = __builtin_shufflevector(__s1_852, __s1_852, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_852; __rev2_852 = __builtin_shufflevector(__s2_852, __s2_852, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x8_t __reint_852 = __rev2_852; \ - __ret_852 = __noswap_vusdotq_s32(__rev0_852, (uint8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_852, __p3_852)), __rev1_852); \ - __ret_852 = __builtin_shufflevector(__ret_852, __ret_852, 3, 2, 1, 0); \ - __ret_852; \ +#define vsudotq_lane_s32(__p0_824, __p1_824, __p2_824, __p3_824) __extension__ ({ \ + int32x4_t __ret_824; \ + int32x4_t __s0_824 = __p0_824; \ + int8x16_t __s1_824 = __p1_824; \ + uint8x8_t __s2_824 = __p2_824; \ + int32x4_t __rev0_824; __rev0_824 = __builtin_shufflevector(__s0_824, __s0_824, 3, 2, 1, 0); \ + int8x16_t __rev1_824; __rev1_824 = __builtin_shufflevector(__s1_824, __s1_824, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_824; __rev2_824 = __builtin_shufflevector(__s2_824, __s2_824, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_824 = __noswap_vusdotq_s32(__rev0_824, (uint8x16_t)(__noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_824), __p3_824)), __rev1_824); \ + __ret_824 = __builtin_shufflevector(__ret_824, __ret_824, 3, 2, 1, 0); \ + __ret_824; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsudot_lane_s32(__p0_853, __p1_853, __p2_853, __p3_853) __extension__ ({ \ - int32x2_t __ret_853; \ - int32x2_t __s0_853 = __p0_853; \ - int8x8_t __s1_853 = __p1_853; \ - uint8x8_t __s2_853 = __p2_853; \ -uint8x8_t __reint_853 = __s2_853; \ - __ret_853 = vusdot_s32(__s0_853, (uint8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_853, __p3_853)), __s1_853); \ - __ret_853; \ +#define vsudot_lane_s32(__p0_825, __p1_825, __p2_825, __p3_825) __extension__ ({ \ + int32x2_t __ret_825; \ + int32x2_t __s0_825 = __p0_825; \ + int8x8_t __s1_825 = __p1_825; \ + uint8x8_t __s2_825 = __p2_825; \ + __ret_825 = vusdot_s32(__s0_825, (uint8x8_t)(splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_825), __p3_825)), __s1_825); \ + __ret_825; \ }) #else -#define vsudot_lane_s32(__p0_854, __p1_854, __p2_854, __p3_854) __extension__ ({ \ - int32x2_t __ret_854; \ - int32x2_t __s0_854 = __p0_854; \ - int8x8_t __s1_854 = __p1_854; \ - uint8x8_t __s2_854 = __p2_854; \ - int32x2_t __rev0_854; __rev0_854 = __builtin_shufflevector(__s0_854, __s0_854, 1, 0); \ - int8x8_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_854; __rev2_854 = __builtin_shufflevector(__s2_854, __s2_854, 7, 6, 5, 4, 3, 2, 1, 0); \ -uint8x8_t __reint_854 = __rev2_854; \ - __ret_854 = __noswap_vusdot_s32(__rev0_854, (uint8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_854, __p3_854)), __rev1_854); \ - __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, 1, 0); \ - __ret_854; \ +#define vsudot_lane_s32(__p0_826, __p1_826, __p2_826, __p3_826) __extension__ ({ \ + int32x2_t __ret_826; \ + int32x2_t __s0_826 = __p0_826; \ + int8x8_t __s1_826 = __p1_826; \ + uint8x8_t __s2_826 = __p2_826; \ + int32x2_t __rev0_826; __rev0_826 = __builtin_shufflevector(__s0_826, __s0_826, 1, 0); \ + int8x8_t __rev1_826; __rev1_826 = __builtin_shufflevector(__s1_826, __s1_826, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_826; __rev2_826 = __builtin_shufflevector(__s2_826, __s2_826, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_826 = __noswap_vusdot_s32(__rev0_826, (uint8x8_t)(__noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_826), __p3_826)), __rev1_826); \ + __ret_826 = __builtin_shufflevector(__ret_826, __ret_826, 1, 0); \ + __ret_826; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdotq_lane_s32(__p0_855, __p1_855, __p2_855, __p3_855) __extension__ ({ \ - int32x4_t __ret_855; \ - int32x4_t __s0_855 = __p0_855; \ - uint8x16_t __s1_855 = __p1_855; \ - int8x8_t __s2_855 = __p2_855; \ -int8x8_t __reint_855 = __s2_855; \ - __ret_855 = vusdotq_s32(__s0_855, __s1_855, (int8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_855, __p3_855))); \ - __ret_855; \ +#define vusdotq_lane_s32(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ + int32x4_t __ret_827; \ + int32x4_t __s0_827 = __p0_827; \ + uint8x16_t __s1_827 = __p1_827; \ + int8x8_t __s2_827 = __p2_827; \ + __ret_827 = vusdotq_s32(__s0_827, __s1_827, (int8x16_t)(splatq_lane_s32(__builtin_bit_cast(int32x2_t, __s2_827), __p3_827))); \ + __ret_827; \ }) #else -#define vusdotq_lane_s32(__p0_856, __p1_856, __p2_856, __p3_856) __extension__ ({ \ - int32x4_t __ret_856; \ - int32x4_t __s0_856 = __p0_856; \ - uint8x16_t __s1_856 = __p1_856; \ - int8x8_t __s2_856 = __p2_856; \ - int32x4_t __rev0_856; __rev0_856 = __builtin_shufflevector(__s0_856, __s0_856, 3, 2, 1, 0); \ - uint8x16_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_856; __rev2_856 = __builtin_shufflevector(__s2_856, __s2_856, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x8_t __reint_856 = __rev2_856; \ - __ret_856 = __noswap_vusdotq_s32(__rev0_856, __rev1_856, (int8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_856, __p3_856))); \ - __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, 3, 2, 1, 0); \ - __ret_856; \ +#define vusdotq_lane_s32(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ + int32x4_t __ret_828; \ + int32x4_t __s0_828 = __p0_828; \ + uint8x16_t __s1_828 = __p1_828; \ + int8x8_t __s2_828 = __p2_828; \ + int32x4_t __rev0_828; __rev0_828 = __builtin_shufflevector(__s0_828, __s0_828, 3, 2, 1, 0); \ + uint8x16_t __rev1_828; __rev1_828 = __builtin_shufflevector(__s1_828, __s1_828, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_828; __rev2_828 = __builtin_shufflevector(__s2_828, __s2_828, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_828 = __noswap_vusdotq_s32(__rev0_828, __rev1_828, (int8x16_t)(__noswap_splatq_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_828), __p3_828))); \ + __ret_828 = __builtin_shufflevector(__ret_828, __ret_828, 3, 2, 1, 0); \ + __ret_828; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdot_lane_s32(__p0_857, __p1_857, __p2_857, __p3_857) __extension__ ({ \ - int32x2_t __ret_857; \ - int32x2_t __s0_857 = __p0_857; \ - uint8x8_t __s1_857 = __p1_857; \ - int8x8_t __s2_857 = __p2_857; \ -int8x8_t __reint_857 = __s2_857; \ - __ret_857 = vusdot_s32(__s0_857, __s1_857, (int8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_857, __p3_857))); \ - __ret_857; \ +#define vusdot_lane_s32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ + int32x2_t __ret_829; \ + int32x2_t __s0_829 = __p0_829; \ + uint8x8_t __s1_829 = __p1_829; \ + int8x8_t __s2_829 = __p2_829; \ + __ret_829 = vusdot_s32(__s0_829, __s1_829, (int8x8_t)(splat_lane_s32(__builtin_bit_cast(int32x2_t, __s2_829), __p3_829))); \ + __ret_829; \ }) #else -#define vusdot_lane_s32(__p0_858, __p1_858, __p2_858, __p3_858) __extension__ ({ \ - int32x2_t __ret_858; \ - int32x2_t __s0_858 = __p0_858; \ - uint8x8_t __s1_858 = __p1_858; \ - int8x8_t __s2_858 = __p2_858; \ - int32x2_t __rev0_858; __rev0_858 = __builtin_shufflevector(__s0_858, __s0_858, 1, 0); \ - uint8x8_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_858; __rev2_858 = __builtin_shufflevector(__s2_858, __s2_858, 7, 6, 5, 4, 3, 2, 1, 0); \ -int8x8_t __reint_858 = __rev2_858; \ - __ret_858 = __noswap_vusdot_s32(__rev0_858, __rev1_858, (int8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_858, __p3_858))); \ - __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, 1, 0); \ - __ret_858; \ +#define vusdot_lane_s32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ + int32x2_t __ret_830; \ + int32x2_t __s0_830 = __p0_830; \ + uint8x8_t __s1_830 = __p1_830; \ + int8x8_t __s2_830 = __p2_830; \ + int32x2_t __rev0_830; __rev0_830 = __builtin_shufflevector(__s0_830, __s0_830, 1, 0); \ + uint8x8_t __rev1_830; __rev1_830 = __builtin_shufflevector(__s1_830, __s1_830, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_830 = __noswap_vusdot_s32(__rev0_830, __rev1_830, (int8x8_t)(__noswap_splat_lane_s32(__builtin_bit_cast(int32x2_t, __rev2_830), __p3_830))); \ + __ret_830 = __builtin_shufflevector(__ret_830, __ret_830, 1, 0); \ + __ret_830; \ }) #endif @@ -67363,60 +69278,48 @@ __ai __attribute__((target("neon"))) int32x4_t vaddw_s16(int32x4_t __p0, int16x4 #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_f16(__p0_859, __p1_859) __extension__ ({ \ - float16_t __ret_859; \ - float16x4_t __s0_859 = __p0_859; \ -float16x4_t __reint_859 = __s0_859; \ -int16_t __reint1_859 = vget_lane_s16(*(int16x4_t *) &__reint_859, __p1_859); \ - __ret_859 = *(float16_t *) &__reint1_859; \ - __ret_859; \ +#define vget_lane_f16(__p0_831, __p1_831) __extension__ ({ \ + float16_t __ret_831; \ + float16x4_t __s0_831 = __p0_831; \ + __ret_831 = __builtin_bit_cast(float16_t, vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_831), __p1_831)); \ + __ret_831; \ }) #else -#define vget_lane_f16(__p0_860, __p1_860) __extension__ ({ \ - float16_t __ret_860; \ - float16x4_t __s0_860 = __p0_860; \ - float16x4_t __rev0_860; __rev0_860 = __builtin_shufflevector(__s0_860, __s0_860, 3, 2, 1, 0); \ -float16x4_t __reint_860 = __rev0_860; \ -int16_t __reint1_860 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_860, __p1_860); \ - __ret_860 = *(float16_t *) &__reint1_860; \ - __ret_860; \ +#define vget_lane_f16(__p0_832, __p1_832) __extension__ ({ \ + float16_t __ret_832; \ + float16x4_t __s0_832 = __p0_832; \ + float16x4_t __rev0_832; __rev0_832 = __builtin_shufflevector(__s0_832, __s0_832, 3, 2, 1, 0); \ + __ret_832 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __rev0_832), __p1_832)); \ + __ret_832; \ }) -#define __noswap_vget_lane_f16(__p0_861, __p1_861) __extension__ ({ \ - float16_t __ret_861; \ - float16x4_t __s0_861 = __p0_861; \ -float16x4_t __reint_861 = __s0_861; \ -int16_t __reint1_861 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_861, __p1_861); \ - __ret_861 = *(float16_t *) &__reint1_861; \ - __ret_861; \ +#define __noswap_vget_lane_f16(__p0_833, __p1_833) __extension__ ({ \ + float16_t __ret_833; \ + float16x4_t __s0_833 = __p0_833; \ + __ret_833 = __builtin_bit_cast(float16_t, __noswap_vget_lane_s16(__builtin_bit_cast(int16x4_t, __s0_833), __p1_833)); \ + __ret_833; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_f16(__p0_862, __p1_862) __extension__ ({ \ - float16_t __ret_862; \ - float16x8_t __s0_862 = __p0_862; \ -float16x8_t __reint_862 = __s0_862; \ -int16_t __reint1_862 = vgetq_lane_s16(*(int16x8_t *) &__reint_862, __p1_862); \ - __ret_862 = *(float16_t *) &__reint1_862; \ - __ret_862; \ +#define vgetq_lane_f16(__p0_834, __p1_834) __extension__ ({ \ + float16_t __ret_834; \ + float16x8_t __s0_834 = __p0_834; \ + __ret_834 = __builtin_bit_cast(float16_t, vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_834), __p1_834)); \ + __ret_834; \ }) #else -#define vgetq_lane_f16(__p0_863, __p1_863) __extension__ ({ \ - float16_t __ret_863; \ - float16x8_t __s0_863 = __p0_863; \ - float16x8_t __rev0_863; __rev0_863 = __builtin_shufflevector(__s0_863, __s0_863, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16x8_t __reint_863 = __rev0_863; \ -int16_t __reint1_863 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_863, __p1_863); \ - __ret_863 = *(float16_t *) &__reint1_863; \ - __ret_863; \ +#define vgetq_lane_f16(__p0_835, __p1_835) __extension__ ({ \ + float16_t __ret_835; \ + float16x8_t __s0_835 = __p0_835; \ + float16x8_t __rev0_835; __rev0_835 = __builtin_shufflevector(__s0_835, __s0_835, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_835 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __rev0_835), __p1_835)); \ + __ret_835; \ }) -#define __noswap_vgetq_lane_f16(__p0_864, __p1_864) __extension__ ({ \ - float16_t __ret_864; \ - float16x8_t __s0_864 = __p0_864; \ -float16x8_t __reint_864 = __s0_864; \ -int16_t __reint1_864 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_864, __p1_864); \ - __ret_864 = *(float16_t *) &__reint1_864; \ - __ret_864; \ +#define __noswap_vgetq_lane_f16(__p0_836, __p1_836) __extension__ ({ \ + float16_t __ret_836; \ + float16x8_t __s0_836 = __p0_836; \ + __ret_836 = __builtin_bit_cast(float16_t, __noswap_vgetq_lane_s16(__builtin_bit_cast(int16x8_t, __s0_836), __p1_836)); \ + __ret_836; \ }) #endif @@ -67559,98 +69462,98 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlal_s16(int32x4_t __p0 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u32(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ - uint64x2_t __ret_865; \ - uint64x2_t __s0_865 = __p0_865; \ - uint32x2_t __s1_865 = __p1_865; \ - uint32x2_t __s2_865 = __p2_865; \ - __ret_865 = __s0_865 + vmull_u32(__s1_865, splat_lane_u32(__s2_865, __p3_865)); \ - __ret_865; \ +#define vmlal_lane_u32(__p0_837, __p1_837, __p2_837, __p3_837) __extension__ ({ \ + uint64x2_t __ret_837; \ + uint64x2_t __s0_837 = __p0_837; \ + uint32x2_t __s1_837 = __p1_837; \ + uint32x2_t __s2_837 = __p2_837; \ + __ret_837 = __s0_837 + vmull_u32(__s1_837, splat_lane_u32(__s2_837, __p3_837)); \ + __ret_837; \ }) #else -#define vmlal_lane_u32(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ - uint64x2_t __ret_866; \ - uint64x2_t __s0_866 = __p0_866; \ - uint32x2_t __s1_866 = __p1_866; \ - uint32x2_t __s2_866 = __p2_866; \ - uint64x2_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, 1, 0); \ - uint32x2_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, 1, 0); \ - uint32x2_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, 1, 0); \ - __ret_866 = __rev0_866 + __noswap_vmull_u32(__rev1_866, __noswap_splat_lane_u32(__rev2_866, __p3_866)); \ - __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, 1, 0); \ - __ret_866; \ +#define vmlal_lane_u32(__p0_838, __p1_838, __p2_838, __p3_838) __extension__ ({ \ + uint64x2_t __ret_838; \ + uint64x2_t __s0_838 = __p0_838; \ + uint32x2_t __s1_838 = __p1_838; \ + uint32x2_t __s2_838 = __p2_838; \ + uint64x2_t __rev0_838; __rev0_838 = __builtin_shufflevector(__s0_838, __s0_838, 1, 0); \ + uint32x2_t __rev1_838; __rev1_838 = __builtin_shufflevector(__s1_838, __s1_838, 1, 0); \ + uint32x2_t __rev2_838; __rev2_838 = __builtin_shufflevector(__s2_838, __s2_838, 1, 0); \ + __ret_838 = __rev0_838 + __noswap_vmull_u32(__rev1_838, __noswap_splat_lane_u32(__rev2_838, __p3_838)); \ + __ret_838 = __builtin_shufflevector(__ret_838, __ret_838, 1, 0); \ + __ret_838; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u16(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ - uint32x4_t __ret_867; \ - uint32x4_t __s0_867 = __p0_867; \ - uint16x4_t __s1_867 = __p1_867; \ - uint16x4_t __s2_867 = __p2_867; \ - __ret_867 = __s0_867 + vmull_u16(__s1_867, splat_lane_u16(__s2_867, __p3_867)); \ - __ret_867; \ +#define vmlal_lane_u16(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ + uint32x4_t __ret_839; \ + uint32x4_t __s0_839 = __p0_839; \ + uint16x4_t __s1_839 = __p1_839; \ + uint16x4_t __s2_839 = __p2_839; \ + __ret_839 = __s0_839 + vmull_u16(__s1_839, splat_lane_u16(__s2_839, __p3_839)); \ + __ret_839; \ }) #else -#define vmlal_lane_u16(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ - uint32x4_t __ret_868; \ - uint32x4_t __s0_868 = __p0_868; \ - uint16x4_t __s1_868 = __p1_868; \ - uint16x4_t __s2_868 = __p2_868; \ - uint32x4_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, 3, 2, 1, 0); \ - uint16x4_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, 3, 2, 1, 0); \ - uint16x4_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, 3, 2, 1, 0); \ - __ret_868 = __rev0_868 + __noswap_vmull_u16(__rev1_868, __noswap_splat_lane_u16(__rev2_868, __p3_868)); \ - __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, 3, 2, 1, 0); \ - __ret_868; \ +#define vmlal_lane_u16(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ + uint32x4_t __ret_840; \ + uint32x4_t __s0_840 = __p0_840; \ + uint16x4_t __s1_840 = __p1_840; \ + uint16x4_t __s2_840 = __p2_840; \ + uint32x4_t __rev0_840; __rev0_840 = __builtin_shufflevector(__s0_840, __s0_840, 3, 2, 1, 0); \ + uint16x4_t __rev1_840; __rev1_840 = __builtin_shufflevector(__s1_840, __s1_840, 3, 2, 1, 0); \ + uint16x4_t __rev2_840; __rev2_840 = __builtin_shufflevector(__s2_840, __s2_840, 3, 2, 1, 0); \ + __ret_840 = __rev0_840 + __noswap_vmull_u16(__rev1_840, __noswap_splat_lane_u16(__rev2_840, __p3_840)); \ + __ret_840 = __builtin_shufflevector(__ret_840, __ret_840, 3, 2, 1, 0); \ + __ret_840; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s32(__p0_869, __p1_869, __p2_869, __p3_869) __extension__ ({ \ - int64x2_t __ret_869; \ - int64x2_t __s0_869 = __p0_869; \ - int32x2_t __s1_869 = __p1_869; \ - int32x2_t __s2_869 = __p2_869; \ - __ret_869 = __s0_869 + vmull_s32(__s1_869, splat_lane_s32(__s2_869, __p3_869)); \ - __ret_869; \ +#define vmlal_lane_s32(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ + int64x2_t __ret_841; \ + int64x2_t __s0_841 = __p0_841; \ + int32x2_t __s1_841 = __p1_841; \ + int32x2_t __s2_841 = __p2_841; \ + __ret_841 = __s0_841 + vmull_s32(__s1_841, splat_lane_s32(__s2_841, __p3_841)); \ + __ret_841; \ }) #else -#define vmlal_lane_s32(__p0_870, __p1_870, __p2_870, __p3_870) __extension__ ({ \ - int64x2_t __ret_870; \ - int64x2_t __s0_870 = __p0_870; \ - int32x2_t __s1_870 = __p1_870; \ - int32x2_t __s2_870 = __p2_870; \ - int64x2_t __rev0_870; __rev0_870 = __builtin_shufflevector(__s0_870, __s0_870, 1, 0); \ - int32x2_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, 1, 0); \ - int32x2_t __rev2_870; __rev2_870 = __builtin_shufflevector(__s2_870, __s2_870, 1, 0); \ - __ret_870 = __rev0_870 + __noswap_vmull_s32(__rev1_870, __noswap_splat_lane_s32(__rev2_870, __p3_870)); \ - __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, 1, 0); \ - __ret_870; \ +#define vmlal_lane_s32(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ + int64x2_t __ret_842; \ + int64x2_t __s0_842 = __p0_842; \ + int32x2_t __s1_842 = __p1_842; \ + int32x2_t __s2_842 = __p2_842; \ + int64x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 1, 0); \ + int32x2_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, 1, 0); \ + int32x2_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, 1, 0); \ + __ret_842 = __rev0_842 + __noswap_vmull_s32(__rev1_842, __noswap_splat_lane_s32(__rev2_842, __p3_842)); \ + __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 1, 0); \ + __ret_842; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s16(__p0_871, __p1_871, __p2_871, __p3_871) __extension__ ({ \ - int32x4_t __ret_871; \ - int32x4_t __s0_871 = __p0_871; \ - int16x4_t __s1_871 = __p1_871; \ - int16x4_t __s2_871 = __p2_871; \ - __ret_871 = __s0_871 + vmull_s16(__s1_871, splat_lane_s16(__s2_871, __p3_871)); \ - __ret_871; \ +#define vmlal_lane_s16(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ + int32x4_t __ret_843; \ + int32x4_t __s0_843 = __p0_843; \ + int16x4_t __s1_843 = __p1_843; \ + int16x4_t __s2_843 = __p2_843; \ + __ret_843 = __s0_843 + vmull_s16(__s1_843, splat_lane_s16(__s2_843, __p3_843)); \ + __ret_843; \ }) #else -#define vmlal_lane_s16(__p0_872, __p1_872, __p2_872, __p3_872) __extension__ ({ \ - int32x4_t __ret_872; \ - int32x4_t __s0_872 = __p0_872; \ - int16x4_t __s1_872 = __p1_872; \ - int16x4_t __s2_872 = __p2_872; \ - int32x4_t __rev0_872; __rev0_872 = __builtin_shufflevector(__s0_872, __s0_872, 3, 2, 1, 0); \ - int16x4_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, 3, 2, 1, 0); \ - int16x4_t __rev2_872; __rev2_872 = __builtin_shufflevector(__s2_872, __s2_872, 3, 2, 1, 0); \ - __ret_872 = __rev0_872 + __noswap_vmull_s16(__rev1_872, __noswap_splat_lane_s16(__rev2_872, __p3_872)); \ - __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, 3, 2, 1, 0); \ - __ret_872; \ +#define vmlal_lane_s16(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ + int32x4_t __ret_844; \ + int32x4_t __s0_844 = __p0_844; \ + int16x4_t __s1_844 = __p1_844; \ + int16x4_t __s2_844 = __p2_844; \ + int32x4_t __rev0_844; __rev0_844 = __builtin_shufflevector(__s0_844, __s0_844, 3, 2, 1, 0); \ + int16x4_t __rev1_844; __rev1_844 = __builtin_shufflevector(__s1_844, __s1_844, 3, 2, 1, 0); \ + int16x4_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 3, 2, 1, 0); \ + __ret_844 = __rev0_844 + __noswap_vmull_s16(__rev1_844, __noswap_splat_lane_s16(__rev2_844, __p3_844)); \ + __ret_844 = __builtin_shufflevector(__ret_844, __ret_844, 3, 2, 1, 0); \ + __ret_844; \ }) #endif @@ -67881,98 +69784,98 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_s16(int32x4_t __p0 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u32(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ - uint64x2_t __ret_873; \ - uint64x2_t __s0_873 = __p0_873; \ - uint32x2_t __s1_873 = __p1_873; \ - uint32x2_t __s2_873 = __p2_873; \ - __ret_873 = __s0_873 - vmull_u32(__s1_873, splat_lane_u32(__s2_873, __p3_873)); \ - __ret_873; \ +#define vmlsl_lane_u32(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ + uint64x2_t __ret_845; \ + uint64x2_t __s0_845 = __p0_845; \ + uint32x2_t __s1_845 = __p1_845; \ + uint32x2_t __s2_845 = __p2_845; \ + __ret_845 = __s0_845 - vmull_u32(__s1_845, splat_lane_u32(__s2_845, __p3_845)); \ + __ret_845; \ }) #else -#define vmlsl_lane_u32(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ - uint64x2_t __ret_874; \ - uint64x2_t __s0_874 = __p0_874; \ - uint32x2_t __s1_874 = __p1_874; \ - uint32x2_t __s2_874 = __p2_874; \ - uint64x2_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, 1, 0); \ - uint32x2_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, 1, 0); \ - uint32x2_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, 1, 0); \ - __ret_874 = __rev0_874 - __noswap_vmull_u32(__rev1_874, __noswap_splat_lane_u32(__rev2_874, __p3_874)); \ - __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, 1, 0); \ - __ret_874; \ +#define vmlsl_lane_u32(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ + uint64x2_t __ret_846; \ + uint64x2_t __s0_846 = __p0_846; \ + uint32x2_t __s1_846 = __p1_846; \ + uint32x2_t __s2_846 = __p2_846; \ + uint64x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, 1, 0); \ + uint32x2_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, 1, 0); \ + uint32x2_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 1, 0); \ + __ret_846 = __rev0_846 - __noswap_vmull_u32(__rev1_846, __noswap_splat_lane_u32(__rev2_846, __p3_846)); \ + __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, 1, 0); \ + __ret_846; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u16(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ - uint32x4_t __ret_875; \ - uint32x4_t __s0_875 = __p0_875; \ - uint16x4_t __s1_875 = __p1_875; \ - uint16x4_t __s2_875 = __p2_875; \ - __ret_875 = __s0_875 - vmull_u16(__s1_875, splat_lane_u16(__s2_875, __p3_875)); \ - __ret_875; \ +#define vmlsl_lane_u16(__p0_847, __p1_847, __p2_847, __p3_847) __extension__ ({ \ + uint32x4_t __ret_847; \ + uint32x4_t __s0_847 = __p0_847; \ + uint16x4_t __s1_847 = __p1_847; \ + uint16x4_t __s2_847 = __p2_847; \ + __ret_847 = __s0_847 - vmull_u16(__s1_847, splat_lane_u16(__s2_847, __p3_847)); \ + __ret_847; \ }) #else -#define vmlsl_lane_u16(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ - uint32x4_t __ret_876; \ - uint32x4_t __s0_876 = __p0_876; \ - uint16x4_t __s1_876 = __p1_876; \ - uint16x4_t __s2_876 = __p2_876; \ - uint32x4_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, 3, 2, 1, 0); \ - uint16x4_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, 3, 2, 1, 0); \ - uint16x4_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, 3, 2, 1, 0); \ - __ret_876 = __rev0_876 - __noswap_vmull_u16(__rev1_876, __noswap_splat_lane_u16(__rev2_876, __p3_876)); \ - __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, 3, 2, 1, 0); \ - __ret_876; \ +#define vmlsl_lane_u16(__p0_848, __p1_848, __p2_848, __p3_848) __extension__ ({ \ + uint32x4_t __ret_848; \ + uint32x4_t __s0_848 = __p0_848; \ + uint16x4_t __s1_848 = __p1_848; \ + uint16x4_t __s2_848 = __p2_848; \ + uint32x4_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, 3, 2, 1, 0); \ + uint16x4_t __rev1_848; __rev1_848 = __builtin_shufflevector(__s1_848, __s1_848, 3, 2, 1, 0); \ + uint16x4_t __rev2_848; __rev2_848 = __builtin_shufflevector(__s2_848, __s2_848, 3, 2, 1, 0); \ + __ret_848 = __rev0_848 - __noswap_vmull_u16(__rev1_848, __noswap_splat_lane_u16(__rev2_848, __p3_848)); \ + __ret_848 = __builtin_shufflevector(__ret_848, __ret_848, 3, 2, 1, 0); \ + __ret_848; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s32(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ - int64x2_t __ret_877; \ - int64x2_t __s0_877 = __p0_877; \ - int32x2_t __s1_877 = __p1_877; \ - int32x2_t __s2_877 = __p2_877; \ - __ret_877 = __s0_877 - vmull_s32(__s1_877, splat_lane_s32(__s2_877, __p3_877)); \ - __ret_877; \ +#define vmlsl_lane_s32(__p0_849, __p1_849, __p2_849, __p3_849) __extension__ ({ \ + int64x2_t __ret_849; \ + int64x2_t __s0_849 = __p0_849; \ + int32x2_t __s1_849 = __p1_849; \ + int32x2_t __s2_849 = __p2_849; \ + __ret_849 = __s0_849 - vmull_s32(__s1_849, splat_lane_s32(__s2_849, __p3_849)); \ + __ret_849; \ }) #else -#define vmlsl_lane_s32(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ - int64x2_t __ret_878; \ - int64x2_t __s0_878 = __p0_878; \ - int32x2_t __s1_878 = __p1_878; \ - int32x2_t __s2_878 = __p2_878; \ - int64x2_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, 1, 0); \ - int32x2_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, 1, 0); \ - int32x2_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, 1, 0); \ - __ret_878 = __rev0_878 - __noswap_vmull_s32(__rev1_878, __noswap_splat_lane_s32(__rev2_878, __p3_878)); \ - __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, 1, 0); \ - __ret_878; \ +#define vmlsl_lane_s32(__p0_850, __p1_850, __p2_850, __p3_850) __extension__ ({ \ + int64x2_t __ret_850; \ + int64x2_t __s0_850 = __p0_850; \ + int32x2_t __s1_850 = __p1_850; \ + int32x2_t __s2_850 = __p2_850; \ + int64x2_t __rev0_850; __rev0_850 = __builtin_shufflevector(__s0_850, __s0_850, 1, 0); \ + int32x2_t __rev1_850; __rev1_850 = __builtin_shufflevector(__s1_850, __s1_850, 1, 0); \ + int32x2_t __rev2_850; __rev2_850 = __builtin_shufflevector(__s2_850, __s2_850, 1, 0); \ + __ret_850 = __rev0_850 - __noswap_vmull_s32(__rev1_850, __noswap_splat_lane_s32(__rev2_850, __p3_850)); \ + __ret_850 = __builtin_shufflevector(__ret_850, __ret_850, 1, 0); \ + __ret_850; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s16(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ - int32x4_t __ret_879; \ - int32x4_t __s0_879 = __p0_879; \ - int16x4_t __s1_879 = __p1_879; \ - int16x4_t __s2_879 = __p2_879; \ - __ret_879 = __s0_879 - vmull_s16(__s1_879, splat_lane_s16(__s2_879, __p3_879)); \ - __ret_879; \ +#define vmlsl_lane_s16(__p0_851, __p1_851, __p2_851, __p3_851) __extension__ ({ \ + int32x4_t __ret_851; \ + int32x4_t __s0_851 = __p0_851; \ + int16x4_t __s1_851 = __p1_851; \ + int16x4_t __s2_851 = __p2_851; \ + __ret_851 = __s0_851 - vmull_s16(__s1_851, splat_lane_s16(__s2_851, __p3_851)); \ + __ret_851; \ }) #else -#define vmlsl_lane_s16(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ - int32x4_t __ret_880; \ - int32x4_t __s0_880 = __p0_880; \ - int16x4_t __s1_880 = __p1_880; \ - int16x4_t __s2_880 = __p2_880; \ - int32x4_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, 3, 2, 1, 0); \ - int16x4_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, 3, 2, 1, 0); \ - int16x4_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, 3, 2, 1, 0); \ - __ret_880 = __rev0_880 - __noswap_vmull_s16(__rev1_880, __noswap_splat_lane_s16(__rev2_880, __p3_880)); \ - __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, 3, 2, 1, 0); \ - __ret_880; \ +#define vmlsl_lane_s16(__p0_852, __p1_852, __p2_852, __p3_852) __extension__ ({ \ + int32x4_t __ret_852; \ + int32x4_t __s0_852 = __p0_852; \ + int16x4_t __s1_852 = __p1_852; \ + int16x4_t __s2_852 = __p2_852; \ + int32x4_t __rev0_852; __rev0_852 = __builtin_shufflevector(__s0_852, __s0_852, 3, 2, 1, 0); \ + int16x4_t __rev1_852; __rev1_852 = __builtin_shufflevector(__s1_852, __s1_852, 3, 2, 1, 0); \ + int16x4_t __rev2_852; __rev2_852 = __builtin_shufflevector(__s2_852, __s2_852, 3, 2, 1, 0); \ + __ret_852 = __rev0_852 - __noswap_vmull_s16(__rev1_852, __noswap_splat_lane_s16(__rev2_852, __p3_852)); \ + __ret_852 = __builtin_shufflevector(__ret_852, __ret_852, 3, 2, 1, 0); \ + __ret_852; \ }) #endif @@ -68065,54 +69968,42 @@ __ai __attribute__((target("neon"))) int32x4_t __noswap_vmlsl_n_s16(int32x4_t __ #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f16(__p0_881, __p1_881, __p2_881) __extension__ ({ \ - float16x4_t __ret_881; \ - float16_t __s0_881 = __p0_881; \ - float16x4_t __s1_881 = __p1_881; \ -float16_t __reint_881 = __s0_881; \ -float16x4_t __reint1_881 = __s1_881; \ -int16x4_t __reint2_881 = vset_lane_s16(*(int16_t *) &__reint_881, *(int16x4_t *) &__reint1_881, __p2_881); \ - __ret_881 = *(float16x4_t *) &__reint2_881; \ - __ret_881; \ +#define vset_lane_f16(__p0_853, __p1_853, __p2_853) __extension__ ({ \ + float16x4_t __ret_853; \ + float16_t __s0_853 = __p0_853; \ + float16x4_t __s1_853 = __p1_853; \ + __ret_853 = __builtin_bit_cast(float16x4_t, vset_lane_s16(__builtin_bit_cast(int16_t, __s0_853), __builtin_bit_cast(int16x4_t, __s1_853), __p2_853)); \ + __ret_853; \ }) #else -#define vset_lane_f16(__p0_882, __p1_882, __p2_882) __extension__ ({ \ - float16x4_t __ret_882; \ - float16_t __s0_882 = __p0_882; \ - float16x4_t __s1_882 = __p1_882; \ - float16x4_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, 3, 2, 1, 0); \ -float16_t __reint_882 = __s0_882; \ -float16x4_t __reint1_882 = __rev1_882; \ -int16x4_t __reint2_882 = __noswap_vset_lane_s16(*(int16_t *) &__reint_882, *(int16x4_t *) &__reint1_882, __p2_882); \ - __ret_882 = *(float16x4_t *) &__reint2_882; \ - __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, 3, 2, 1, 0); \ - __ret_882; \ +#define vset_lane_f16(__p0_854, __p1_854, __p2_854) __extension__ ({ \ + float16x4_t __ret_854; \ + float16_t __s0_854 = __p0_854; \ + float16x4_t __s1_854 = __p1_854; \ + float16x4_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, 3, 2, 1, 0); \ + __ret_854 = __builtin_bit_cast(float16x4_t, __noswap_vset_lane_s16(__builtin_bit_cast(int16_t, __s0_854), __builtin_bit_cast(int16x4_t, __rev1_854), __p2_854)); \ + __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, 3, 2, 1, 0); \ + __ret_854; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f16(__p0_883, __p1_883, __p2_883) __extension__ ({ \ - float16x8_t __ret_883; \ - float16_t __s0_883 = __p0_883; \ - float16x8_t __s1_883 = __p1_883; \ -float16_t __reint_883 = __s0_883; \ -float16x8_t __reint1_883 = __s1_883; \ -int16x8_t __reint2_883 = vsetq_lane_s16(*(int16_t *) &__reint_883, *(int16x8_t *) &__reint1_883, __p2_883); \ - __ret_883 = *(float16x8_t *) &__reint2_883; \ - __ret_883; \ +#define vsetq_lane_f16(__p0_855, __p1_855, __p2_855) __extension__ ({ \ + float16x8_t __ret_855; \ + float16_t __s0_855 = __p0_855; \ + float16x8_t __s1_855 = __p1_855; \ + __ret_855 = __builtin_bit_cast(float16x8_t, vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_855), __builtin_bit_cast(int16x8_t, __s1_855), __p2_855)); \ + __ret_855; \ }) #else -#define vsetq_lane_f16(__p0_884, __p1_884, __p2_884) __extension__ ({ \ - float16x8_t __ret_884; \ - float16_t __s0_884 = __p0_884; \ - float16x8_t __s1_884 = __p1_884; \ - float16x8_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, 7, 6, 5, 4, 3, 2, 1, 0); \ -float16_t __reint_884 = __s0_884; \ -float16x8_t __reint1_884 = __rev1_884; \ -int16x8_t __reint2_884 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_884, *(int16x8_t *) &__reint1_884, __p2_884); \ - __ret_884 = *(float16x8_t *) &__reint2_884; \ - __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_884; \ +#define vsetq_lane_f16(__p0_856, __p1_856, __p2_856) __extension__ ({ \ + float16x8_t __ret_856; \ + float16_t __s0_856 = __p0_856; \ + float16x8_t __s1_856 = __p1_856; \ + float16x8_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_856 = __builtin_bit_cast(float16x8_t, __noswap_vsetq_lane_s16(__builtin_bit_cast(int16_t, __s0_856), __builtin_bit_cast(int16x8_t, __rev1_856), __p2_856)); \ + __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_856; \ }) #endif @@ -68134,424 +70025,424 @@ __ai __attribute__((target("aes,neon"))) poly128_t vmull_high_p64(poly64x2_t __p #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_high_f16(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ - float32x4_t __ret_885; \ - float32x4_t __s0_885 = __p0_885; \ - float16x8_t __s1_885 = __p1_885; \ - float16x4_t __s2_885 = __p2_885; \ - __ret_885 = vfmlalq_high_f16(__s0_885, __s1_885, (float16x8_t) {vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885)}); \ - __ret_885; \ +#define vfmlalq_lane_high_f16(__p0_857, __p1_857, __p2_857, __p3_857) __extension__ ({ \ + float32x4_t __ret_857; \ + float32x4_t __s0_857 = __p0_857; \ + float16x8_t __s1_857 = __p1_857; \ + float16x4_t __s2_857 = __p2_857; \ + __ret_857 = vfmlalq_high_f16(__s0_857, __s1_857, (float16x8_t) {vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857), vget_lane_f16(__s2_857, __p3_857)}); \ + __ret_857; \ }) #else -#define vfmlalq_lane_high_f16(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ - float32x4_t __ret_886; \ - float32x4_t __s0_886 = __p0_886; \ - float16x8_t __s1_886 = __p1_886; \ - float16x4_t __s2_886 = __p2_886; \ - float32x4_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, 3, 2, 1, 0); \ - float16x8_t __rev1_886; __rev1_886 = __builtin_shufflevector(__s1_886, __s1_886, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_886; __rev2_886 = __builtin_shufflevector(__s2_886, __s2_886, 3, 2, 1, 0); \ - __ret_886 = __noswap_vfmlalq_high_f16(__rev0_886, __rev1_886, (float16x8_t) {__noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886)}); \ - __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, 3, 2, 1, 0); \ - __ret_886; \ +#define vfmlalq_lane_high_f16(__p0_858, __p1_858, __p2_858, __p3_858) __extension__ ({ \ + float32x4_t __ret_858; \ + float32x4_t __s0_858 = __p0_858; \ + float16x8_t __s1_858 = __p1_858; \ + float16x4_t __s2_858 = __p2_858; \ + float32x4_t __rev0_858; __rev0_858 = __builtin_shufflevector(__s0_858, __s0_858, 3, 2, 1, 0); \ + float16x8_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_858; __rev2_858 = __builtin_shufflevector(__s2_858, __s2_858, 3, 2, 1, 0); \ + __ret_858 = __noswap_vfmlalq_high_f16(__rev0_858, __rev1_858, (float16x8_t) {__noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858), __noswap_vget_lane_f16(__rev2_858, __p3_858)}); \ + __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, 3, 2, 1, 0); \ + __ret_858; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_high_f16(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ - float32x2_t __ret_887; \ - float32x2_t __s0_887 = __p0_887; \ - float16x4_t __s1_887 = __p1_887; \ - float16x4_t __s2_887 = __p2_887; \ - __ret_887 = vfmlal_high_f16(__s0_887, __s1_887, (float16x4_t) {vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887)}); \ - __ret_887; \ +#define vfmlal_lane_high_f16(__p0_859, __p1_859, __p2_859, __p3_859) __extension__ ({ \ + float32x2_t __ret_859; \ + float32x2_t __s0_859 = __p0_859; \ + float16x4_t __s1_859 = __p1_859; \ + float16x4_t __s2_859 = __p2_859; \ + __ret_859 = vfmlal_high_f16(__s0_859, __s1_859, (float16x4_t) {vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859), vget_lane_f16(__s2_859, __p3_859)}); \ + __ret_859; \ }) #else -#define vfmlal_lane_high_f16(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ - float32x2_t __ret_888; \ - float32x2_t __s0_888 = __p0_888; \ - float16x4_t __s1_888 = __p1_888; \ - float16x4_t __s2_888 = __p2_888; \ - float32x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, 1, 0); \ - float16x4_t __rev1_888; __rev1_888 = __builtin_shufflevector(__s1_888, __s1_888, 3, 2, 1, 0); \ - float16x4_t __rev2_888; __rev2_888 = __builtin_shufflevector(__s2_888, __s2_888, 3, 2, 1, 0); \ - __ret_888 = __noswap_vfmlal_high_f16(__rev0_888, __rev1_888, (float16x4_t) {__noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888)}); \ - __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, 1, 0); \ - __ret_888; \ +#define vfmlal_lane_high_f16(__p0_860, __p1_860, __p2_860, __p3_860) __extension__ ({ \ + float32x2_t __ret_860; \ + float32x2_t __s0_860 = __p0_860; \ + float16x4_t __s1_860 = __p1_860; \ + float16x4_t __s2_860 = __p2_860; \ + float32x2_t __rev0_860; __rev0_860 = __builtin_shufflevector(__s0_860, __s0_860, 1, 0); \ + float16x4_t __rev1_860; __rev1_860 = __builtin_shufflevector(__s1_860, __s1_860, 3, 2, 1, 0); \ + float16x4_t __rev2_860; __rev2_860 = __builtin_shufflevector(__s2_860, __s2_860, 3, 2, 1, 0); \ + __ret_860 = __noswap_vfmlal_high_f16(__rev0_860, __rev1_860, (float16x4_t) {__noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860), __noswap_vget_lane_f16(__rev2_860, __p3_860)}); \ + __ret_860 = __builtin_shufflevector(__ret_860, __ret_860, 1, 0); \ + __ret_860; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_low_f16(__p0_889, __p1_889, __p2_889, __p3_889) __extension__ ({ \ - float32x4_t __ret_889; \ - float32x4_t __s0_889 = __p0_889; \ - float16x8_t __s1_889 = __p1_889; \ - float16x4_t __s2_889 = __p2_889; \ - __ret_889 = vfmlalq_low_f16(__s0_889, __s1_889, (float16x8_t) {vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889)}); \ - __ret_889; \ +#define vfmlalq_lane_low_f16(__p0_861, __p1_861, __p2_861, __p3_861) __extension__ ({ \ + float32x4_t __ret_861; \ + float32x4_t __s0_861 = __p0_861; \ + float16x8_t __s1_861 = __p1_861; \ + float16x4_t __s2_861 = __p2_861; \ + __ret_861 = vfmlalq_low_f16(__s0_861, __s1_861, (float16x8_t) {vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861), vget_lane_f16(__s2_861, __p3_861)}); \ + __ret_861; \ }) #else -#define vfmlalq_lane_low_f16(__p0_890, __p1_890, __p2_890, __p3_890) __extension__ ({ \ - float32x4_t __ret_890; \ - float32x4_t __s0_890 = __p0_890; \ - float16x8_t __s1_890 = __p1_890; \ - float16x4_t __s2_890 = __p2_890; \ - float32x4_t __rev0_890; __rev0_890 = __builtin_shufflevector(__s0_890, __s0_890, 3, 2, 1, 0); \ - float16x8_t __rev1_890; __rev1_890 = __builtin_shufflevector(__s1_890, __s1_890, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_890; __rev2_890 = __builtin_shufflevector(__s2_890, __s2_890, 3, 2, 1, 0); \ - __ret_890 = __noswap_vfmlalq_low_f16(__rev0_890, __rev1_890, (float16x8_t) {__noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890)}); \ - __ret_890 = __builtin_shufflevector(__ret_890, __ret_890, 3, 2, 1, 0); \ - __ret_890; \ +#define vfmlalq_lane_low_f16(__p0_862, __p1_862, __p2_862, __p3_862) __extension__ ({ \ + float32x4_t __ret_862; \ + float32x4_t __s0_862 = __p0_862; \ + float16x8_t __s1_862 = __p1_862; \ + float16x4_t __s2_862 = __p2_862; \ + float32x4_t __rev0_862; __rev0_862 = __builtin_shufflevector(__s0_862, __s0_862, 3, 2, 1, 0); \ + float16x8_t __rev1_862; __rev1_862 = __builtin_shufflevector(__s1_862, __s1_862, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_862; __rev2_862 = __builtin_shufflevector(__s2_862, __s2_862, 3, 2, 1, 0); \ + __ret_862 = __noswap_vfmlalq_low_f16(__rev0_862, __rev1_862, (float16x8_t) {__noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862), __noswap_vget_lane_f16(__rev2_862, __p3_862)}); \ + __ret_862 = __builtin_shufflevector(__ret_862, __ret_862, 3, 2, 1, 0); \ + __ret_862; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_low_f16(__p0_891, __p1_891, __p2_891, __p3_891) __extension__ ({ \ - float32x2_t __ret_891; \ - float32x2_t __s0_891 = __p0_891; \ - float16x4_t __s1_891 = __p1_891; \ - float16x4_t __s2_891 = __p2_891; \ - __ret_891 = vfmlal_low_f16(__s0_891, __s1_891, (float16x4_t) {vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891)}); \ - __ret_891; \ +#define vfmlal_lane_low_f16(__p0_863, __p1_863, __p2_863, __p3_863) __extension__ ({ \ + float32x2_t __ret_863; \ + float32x2_t __s0_863 = __p0_863; \ + float16x4_t __s1_863 = __p1_863; \ + float16x4_t __s2_863 = __p2_863; \ + __ret_863 = vfmlal_low_f16(__s0_863, __s1_863, (float16x4_t) {vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863), vget_lane_f16(__s2_863, __p3_863)}); \ + __ret_863; \ }) #else -#define vfmlal_lane_low_f16(__p0_892, __p1_892, __p2_892, __p3_892) __extension__ ({ \ - float32x2_t __ret_892; \ - float32x2_t __s0_892 = __p0_892; \ - float16x4_t __s1_892 = __p1_892; \ - float16x4_t __s2_892 = __p2_892; \ - float32x2_t __rev0_892; __rev0_892 = __builtin_shufflevector(__s0_892, __s0_892, 1, 0); \ - float16x4_t __rev1_892; __rev1_892 = __builtin_shufflevector(__s1_892, __s1_892, 3, 2, 1, 0); \ - float16x4_t __rev2_892; __rev2_892 = __builtin_shufflevector(__s2_892, __s2_892, 3, 2, 1, 0); \ - __ret_892 = __noswap_vfmlal_low_f16(__rev0_892, __rev1_892, (float16x4_t) {__noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892)}); \ - __ret_892 = __builtin_shufflevector(__ret_892, __ret_892, 1, 0); \ - __ret_892; \ +#define vfmlal_lane_low_f16(__p0_864, __p1_864, __p2_864, __p3_864) __extension__ ({ \ + float32x2_t __ret_864; \ + float32x2_t __s0_864 = __p0_864; \ + float16x4_t __s1_864 = __p1_864; \ + float16x4_t __s2_864 = __p2_864; \ + float32x2_t __rev0_864; __rev0_864 = __builtin_shufflevector(__s0_864, __s0_864, 1, 0); \ + float16x4_t __rev1_864; __rev1_864 = __builtin_shufflevector(__s1_864, __s1_864, 3, 2, 1, 0); \ + float16x4_t __rev2_864; __rev2_864 = __builtin_shufflevector(__s2_864, __s2_864, 3, 2, 1, 0); \ + __ret_864 = __noswap_vfmlal_low_f16(__rev0_864, __rev1_864, (float16x4_t) {__noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864), __noswap_vget_lane_f16(__rev2_864, __p3_864)}); \ + __ret_864 = __builtin_shufflevector(__ret_864, __ret_864, 1, 0); \ + __ret_864; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_high_f16(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ - float32x4_t __ret_893; \ - float32x4_t __s0_893 = __p0_893; \ - float16x8_t __s1_893 = __p1_893; \ - float16x8_t __s2_893 = __p2_893; \ - __ret_893 = vfmlalq_high_f16(__s0_893, __s1_893, (float16x8_t) {vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893)}); \ - __ret_893; \ +#define vfmlalq_laneq_high_f16(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ + float32x4_t __ret_865; \ + float32x4_t __s0_865 = __p0_865; \ + float16x8_t __s1_865 = __p1_865; \ + float16x8_t __s2_865 = __p2_865; \ + __ret_865 = vfmlalq_high_f16(__s0_865, __s1_865, (float16x8_t) {vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865), vgetq_lane_f16(__s2_865, __p3_865)}); \ + __ret_865; \ }) #else -#define vfmlalq_laneq_high_f16(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ - float32x4_t __ret_894; \ - float32x4_t __s0_894 = __p0_894; \ - float16x8_t __s1_894 = __p1_894; \ - float16x8_t __s2_894 = __p2_894; \ - float32x4_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, 3, 2, 1, 0); \ - float16x8_t __rev1_894; __rev1_894 = __builtin_shufflevector(__s1_894, __s1_894, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_894; __rev2_894 = __builtin_shufflevector(__s2_894, __s2_894, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_894 = __noswap_vfmlalq_high_f16(__rev0_894, __rev1_894, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894)}); \ - __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, 3, 2, 1, 0); \ - __ret_894; \ +#define vfmlalq_laneq_high_f16(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ + float32x4_t __ret_866; \ + float32x4_t __s0_866 = __p0_866; \ + float16x8_t __s1_866 = __p1_866; \ + float16x8_t __s2_866 = __p2_866; \ + float32x4_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, 3, 2, 1, 0); \ + float16x8_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_866 = __noswap_vfmlalq_high_f16(__rev0_866, __rev1_866, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866), __noswap_vgetq_lane_f16(__rev2_866, __p3_866)}); \ + __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, 3, 2, 1, 0); \ + __ret_866; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_high_f16(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ - float32x2_t __ret_895; \ - float32x2_t __s0_895 = __p0_895; \ - float16x4_t __s1_895 = __p1_895; \ - float16x8_t __s2_895 = __p2_895; \ - __ret_895 = vfmlal_high_f16(__s0_895, __s1_895, (float16x4_t) {vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895)}); \ - __ret_895; \ +#define vfmlal_laneq_high_f16(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ + float32x2_t __ret_867; \ + float32x2_t __s0_867 = __p0_867; \ + float16x4_t __s1_867 = __p1_867; \ + float16x8_t __s2_867 = __p2_867; \ + __ret_867 = vfmlal_high_f16(__s0_867, __s1_867, (float16x4_t) {vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867), vgetq_lane_f16(__s2_867, __p3_867)}); \ + __ret_867; \ }) #else -#define vfmlal_laneq_high_f16(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ - float32x2_t __ret_896; \ - float32x2_t __s0_896 = __p0_896; \ - float16x4_t __s1_896 = __p1_896; \ - float16x8_t __s2_896 = __p2_896; \ - float32x2_t __rev0_896; __rev0_896 = __builtin_shufflevector(__s0_896, __s0_896, 1, 0); \ - float16x4_t __rev1_896; __rev1_896 = __builtin_shufflevector(__s1_896, __s1_896, 3, 2, 1, 0); \ - float16x8_t __rev2_896; __rev2_896 = __builtin_shufflevector(__s2_896, __s2_896, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_896 = __noswap_vfmlal_high_f16(__rev0_896, __rev1_896, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896)}); \ - __ret_896 = __builtin_shufflevector(__ret_896, __ret_896, 1, 0); \ - __ret_896; \ +#define vfmlal_laneq_high_f16(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ + float32x2_t __ret_868; \ + float32x2_t __s0_868 = __p0_868; \ + float16x4_t __s1_868 = __p1_868; \ + float16x8_t __s2_868 = __p2_868; \ + float32x2_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, 1, 0); \ + float16x4_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, 3, 2, 1, 0); \ + float16x8_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_868 = __noswap_vfmlal_high_f16(__rev0_868, __rev1_868, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868), __noswap_vgetq_lane_f16(__rev2_868, __p3_868)}); \ + __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, 1, 0); \ + __ret_868; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_low_f16(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ - float32x4_t __ret_897; \ - float32x4_t __s0_897 = __p0_897; \ - float16x8_t __s1_897 = __p1_897; \ - float16x8_t __s2_897 = __p2_897; \ - __ret_897 = vfmlalq_low_f16(__s0_897, __s1_897, (float16x8_t) {vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897)}); \ - __ret_897; \ +#define vfmlalq_laneq_low_f16(__p0_869, __p1_869, __p2_869, __p3_869) __extension__ ({ \ + float32x4_t __ret_869; \ + float32x4_t __s0_869 = __p0_869; \ + float16x8_t __s1_869 = __p1_869; \ + float16x8_t __s2_869 = __p2_869; \ + __ret_869 = vfmlalq_low_f16(__s0_869, __s1_869, (float16x8_t) {vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869), vgetq_lane_f16(__s2_869, __p3_869)}); \ + __ret_869; \ }) #else -#define vfmlalq_laneq_low_f16(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ - float32x4_t __ret_898; \ - float32x4_t __s0_898 = __p0_898; \ - float16x8_t __s1_898 = __p1_898; \ - float16x8_t __s2_898 = __p2_898; \ - float32x4_t __rev0_898; __rev0_898 = __builtin_shufflevector(__s0_898, __s0_898, 3, 2, 1, 0); \ - float16x8_t __rev1_898; __rev1_898 = __builtin_shufflevector(__s1_898, __s1_898, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_898; __rev2_898 = __builtin_shufflevector(__s2_898, __s2_898, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_898 = __noswap_vfmlalq_low_f16(__rev0_898, __rev1_898, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898)}); \ - __ret_898 = __builtin_shufflevector(__ret_898, __ret_898, 3, 2, 1, 0); \ - __ret_898; \ +#define vfmlalq_laneq_low_f16(__p0_870, __p1_870, __p2_870, __p3_870) __extension__ ({ \ + float32x4_t __ret_870; \ + float32x4_t __s0_870 = __p0_870; \ + float16x8_t __s1_870 = __p1_870; \ + float16x8_t __s2_870 = __p2_870; \ + float32x4_t __rev0_870; __rev0_870 = __builtin_shufflevector(__s0_870, __s0_870, 3, 2, 1, 0); \ + float16x8_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_870; __rev2_870 = __builtin_shufflevector(__s2_870, __s2_870, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_870 = __noswap_vfmlalq_low_f16(__rev0_870, __rev1_870, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870), __noswap_vgetq_lane_f16(__rev2_870, __p3_870)}); \ + __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, 3, 2, 1, 0); \ + __ret_870; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_low_f16(__p0_899, __p1_899, __p2_899, __p3_899) __extension__ ({ \ - float32x2_t __ret_899; \ - float32x2_t __s0_899 = __p0_899; \ - float16x4_t __s1_899 = __p1_899; \ - float16x8_t __s2_899 = __p2_899; \ - __ret_899 = vfmlal_low_f16(__s0_899, __s1_899, (float16x4_t) {vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899)}); \ - __ret_899; \ +#define vfmlal_laneq_low_f16(__p0_871, __p1_871, __p2_871, __p3_871) __extension__ ({ \ + float32x2_t __ret_871; \ + float32x2_t __s0_871 = __p0_871; \ + float16x4_t __s1_871 = __p1_871; \ + float16x8_t __s2_871 = __p2_871; \ + __ret_871 = vfmlal_low_f16(__s0_871, __s1_871, (float16x4_t) {vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871), vgetq_lane_f16(__s2_871, __p3_871)}); \ + __ret_871; \ }) #else -#define vfmlal_laneq_low_f16(__p0_900, __p1_900, __p2_900, __p3_900) __extension__ ({ \ - float32x2_t __ret_900; \ - float32x2_t __s0_900 = __p0_900; \ - float16x4_t __s1_900 = __p1_900; \ - float16x8_t __s2_900 = __p2_900; \ - float32x2_t __rev0_900; __rev0_900 = __builtin_shufflevector(__s0_900, __s0_900, 1, 0); \ - float16x4_t __rev1_900; __rev1_900 = __builtin_shufflevector(__s1_900, __s1_900, 3, 2, 1, 0); \ - float16x8_t __rev2_900; __rev2_900 = __builtin_shufflevector(__s2_900, __s2_900, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_900 = __noswap_vfmlal_low_f16(__rev0_900, __rev1_900, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900)}); \ - __ret_900 = __builtin_shufflevector(__ret_900, __ret_900, 1, 0); \ - __ret_900; \ +#define vfmlal_laneq_low_f16(__p0_872, __p1_872, __p2_872, __p3_872) __extension__ ({ \ + float32x2_t __ret_872; \ + float32x2_t __s0_872 = __p0_872; \ + float16x4_t __s1_872 = __p1_872; \ + float16x8_t __s2_872 = __p2_872; \ + float32x2_t __rev0_872; __rev0_872 = __builtin_shufflevector(__s0_872, __s0_872, 1, 0); \ + float16x4_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, 3, 2, 1, 0); \ + float16x8_t __rev2_872; __rev2_872 = __builtin_shufflevector(__s2_872, __s2_872, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_872 = __noswap_vfmlal_low_f16(__rev0_872, __rev1_872, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872), __noswap_vgetq_lane_f16(__rev2_872, __p3_872)}); \ + __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, 1, 0); \ + __ret_872; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_high_f16(__p0_901, __p1_901, __p2_901, __p3_901) __extension__ ({ \ - float32x4_t __ret_901; \ - float32x4_t __s0_901 = __p0_901; \ - float16x8_t __s1_901 = __p1_901; \ - float16x4_t __s2_901 = __p2_901; \ - __ret_901 = vfmlslq_high_f16(__s0_901, __s1_901, (float16x8_t) {vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901), vget_lane_f16(__s2_901, __p3_901)}); \ - __ret_901; \ +#define vfmlslq_lane_high_f16(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ + float32x4_t __ret_873; \ + float32x4_t __s0_873 = __p0_873; \ + float16x8_t __s1_873 = __p1_873; \ + float16x4_t __s2_873 = __p2_873; \ + __ret_873 = vfmlslq_high_f16(__s0_873, __s1_873, (float16x8_t) {vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873)}); \ + __ret_873; \ }) #else -#define vfmlslq_lane_high_f16(__p0_902, __p1_902, __p2_902, __p3_902) __extension__ ({ \ - float32x4_t __ret_902; \ - float32x4_t __s0_902 = __p0_902; \ - float16x8_t __s1_902 = __p1_902; \ - float16x4_t __s2_902 = __p2_902; \ - float32x4_t __rev0_902; __rev0_902 = __builtin_shufflevector(__s0_902, __s0_902, 3, 2, 1, 0); \ - float16x8_t __rev1_902; __rev1_902 = __builtin_shufflevector(__s1_902, __s1_902, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_902; __rev2_902 = __builtin_shufflevector(__s2_902, __s2_902, 3, 2, 1, 0); \ - __ret_902 = __noswap_vfmlslq_high_f16(__rev0_902, __rev1_902, (float16x8_t) {__noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902), __noswap_vget_lane_f16(__rev2_902, __p3_902)}); \ - __ret_902 = __builtin_shufflevector(__ret_902, __ret_902, 3, 2, 1, 0); \ - __ret_902; \ +#define vfmlslq_lane_high_f16(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ + float32x4_t __ret_874; \ + float32x4_t __s0_874 = __p0_874; \ + float16x8_t __s1_874 = __p1_874; \ + float16x4_t __s2_874 = __p2_874; \ + float32x4_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, 3, 2, 1, 0); \ + float16x8_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, 3, 2, 1, 0); \ + __ret_874 = __noswap_vfmlslq_high_f16(__rev0_874, __rev1_874, (float16x8_t) {__noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874)}); \ + __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, 3, 2, 1, 0); \ + __ret_874; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_high_f16(__p0_903, __p1_903, __p2_903, __p3_903) __extension__ ({ \ - float32x2_t __ret_903; \ - float32x2_t __s0_903 = __p0_903; \ - float16x4_t __s1_903 = __p1_903; \ - float16x4_t __s2_903 = __p2_903; \ - __ret_903 = vfmlsl_high_f16(__s0_903, __s1_903, (float16x4_t) {vget_lane_f16(__s2_903, __p3_903), vget_lane_f16(__s2_903, __p3_903), vget_lane_f16(__s2_903, __p3_903), vget_lane_f16(__s2_903, __p3_903)}); \ - __ret_903; \ +#define vfmlsl_lane_high_f16(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ + float32x2_t __ret_875; \ + float32x2_t __s0_875 = __p0_875; \ + float16x4_t __s1_875 = __p1_875; \ + float16x4_t __s2_875 = __p2_875; \ + __ret_875 = vfmlsl_high_f16(__s0_875, __s1_875, (float16x4_t) {vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875)}); \ + __ret_875; \ }) #else -#define vfmlsl_lane_high_f16(__p0_904, __p1_904, __p2_904, __p3_904) __extension__ ({ \ - float32x2_t __ret_904; \ - float32x2_t __s0_904 = __p0_904; \ - float16x4_t __s1_904 = __p1_904; \ - float16x4_t __s2_904 = __p2_904; \ - float32x2_t __rev0_904; __rev0_904 = __builtin_shufflevector(__s0_904, __s0_904, 1, 0); \ - float16x4_t __rev1_904; __rev1_904 = __builtin_shufflevector(__s1_904, __s1_904, 3, 2, 1, 0); \ - float16x4_t __rev2_904; __rev2_904 = __builtin_shufflevector(__s2_904, __s2_904, 3, 2, 1, 0); \ - __ret_904 = __noswap_vfmlsl_high_f16(__rev0_904, __rev1_904, (float16x4_t) {__noswap_vget_lane_f16(__rev2_904, __p3_904), __noswap_vget_lane_f16(__rev2_904, __p3_904), __noswap_vget_lane_f16(__rev2_904, __p3_904), __noswap_vget_lane_f16(__rev2_904, __p3_904)}); \ - __ret_904 = __builtin_shufflevector(__ret_904, __ret_904, 1, 0); \ - __ret_904; \ +#define vfmlsl_lane_high_f16(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ + float32x2_t __ret_876; \ + float32x2_t __s0_876 = __p0_876; \ + float16x4_t __s1_876 = __p1_876; \ + float16x4_t __s2_876 = __p2_876; \ + float32x2_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, 1, 0); \ + float16x4_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, 3, 2, 1, 0); \ + float16x4_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, 3, 2, 1, 0); \ + __ret_876 = __noswap_vfmlsl_high_f16(__rev0_876, __rev1_876, (float16x4_t) {__noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876)}); \ + __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, 1, 0); \ + __ret_876; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_low_f16(__p0_905, __p1_905, __p2_905, __p3_905) __extension__ ({ \ - float32x4_t __ret_905; \ - float32x4_t __s0_905 = __p0_905; \ - float16x8_t __s1_905 = __p1_905; \ - float16x4_t __s2_905 = __p2_905; \ - __ret_905 = vfmlslq_low_f16(__s0_905, __s1_905, (float16x8_t) {vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905), vget_lane_f16(__s2_905, __p3_905)}); \ - __ret_905; \ +#define vfmlslq_lane_low_f16(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ + float32x4_t __ret_877; \ + float32x4_t __s0_877 = __p0_877; \ + float16x8_t __s1_877 = __p1_877; \ + float16x4_t __s2_877 = __p2_877; \ + __ret_877 = vfmlslq_low_f16(__s0_877, __s1_877, (float16x8_t) {vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877), vget_lane_f16(__s2_877, __p3_877)}); \ + __ret_877; \ }) #else -#define vfmlslq_lane_low_f16(__p0_906, __p1_906, __p2_906, __p3_906) __extension__ ({ \ - float32x4_t __ret_906; \ - float32x4_t __s0_906 = __p0_906; \ - float16x8_t __s1_906 = __p1_906; \ - float16x4_t __s2_906 = __p2_906; \ - float32x4_t __rev0_906; __rev0_906 = __builtin_shufflevector(__s0_906, __s0_906, 3, 2, 1, 0); \ - float16x8_t __rev1_906; __rev1_906 = __builtin_shufflevector(__s1_906, __s1_906, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_906; __rev2_906 = __builtin_shufflevector(__s2_906, __s2_906, 3, 2, 1, 0); \ - __ret_906 = __noswap_vfmlslq_low_f16(__rev0_906, __rev1_906, (float16x8_t) {__noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906), __noswap_vget_lane_f16(__rev2_906, __p3_906)}); \ - __ret_906 = __builtin_shufflevector(__ret_906, __ret_906, 3, 2, 1, 0); \ - __ret_906; \ +#define vfmlslq_lane_low_f16(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ + float32x4_t __ret_878; \ + float32x4_t __s0_878 = __p0_878; \ + float16x8_t __s1_878 = __p1_878; \ + float16x4_t __s2_878 = __p2_878; \ + float32x4_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, 3, 2, 1, 0); \ + float16x8_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, 3, 2, 1, 0); \ + __ret_878 = __noswap_vfmlslq_low_f16(__rev0_878, __rev1_878, (float16x8_t) {__noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878), __noswap_vget_lane_f16(__rev2_878, __p3_878)}); \ + __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, 3, 2, 1, 0); \ + __ret_878; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_low_f16(__p0_907, __p1_907, __p2_907, __p3_907) __extension__ ({ \ - float32x2_t __ret_907; \ - float32x2_t __s0_907 = __p0_907; \ - float16x4_t __s1_907 = __p1_907; \ - float16x4_t __s2_907 = __p2_907; \ - __ret_907 = vfmlsl_low_f16(__s0_907, __s1_907, (float16x4_t) {vget_lane_f16(__s2_907, __p3_907), vget_lane_f16(__s2_907, __p3_907), vget_lane_f16(__s2_907, __p3_907), vget_lane_f16(__s2_907, __p3_907)}); \ - __ret_907; \ +#define vfmlsl_lane_low_f16(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ + float32x2_t __ret_879; \ + float32x2_t __s0_879 = __p0_879; \ + float16x4_t __s1_879 = __p1_879; \ + float16x4_t __s2_879 = __p2_879; \ + __ret_879 = vfmlsl_low_f16(__s0_879, __s1_879, (float16x4_t) {vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879), vget_lane_f16(__s2_879, __p3_879)}); \ + __ret_879; \ }) #else -#define vfmlsl_lane_low_f16(__p0_908, __p1_908, __p2_908, __p3_908) __extension__ ({ \ - float32x2_t __ret_908; \ - float32x2_t __s0_908 = __p0_908; \ - float16x4_t __s1_908 = __p1_908; \ - float16x4_t __s2_908 = __p2_908; \ - float32x2_t __rev0_908; __rev0_908 = __builtin_shufflevector(__s0_908, __s0_908, 1, 0); \ - float16x4_t __rev1_908; __rev1_908 = __builtin_shufflevector(__s1_908, __s1_908, 3, 2, 1, 0); \ - float16x4_t __rev2_908; __rev2_908 = __builtin_shufflevector(__s2_908, __s2_908, 3, 2, 1, 0); \ - __ret_908 = __noswap_vfmlsl_low_f16(__rev0_908, __rev1_908, (float16x4_t) {__noswap_vget_lane_f16(__rev2_908, __p3_908), __noswap_vget_lane_f16(__rev2_908, __p3_908), __noswap_vget_lane_f16(__rev2_908, __p3_908), __noswap_vget_lane_f16(__rev2_908, __p3_908)}); \ - __ret_908 = __builtin_shufflevector(__ret_908, __ret_908, 1, 0); \ - __ret_908; \ +#define vfmlsl_lane_low_f16(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ + float32x2_t __ret_880; \ + float32x2_t __s0_880 = __p0_880; \ + float16x4_t __s1_880 = __p1_880; \ + float16x4_t __s2_880 = __p2_880; \ + float32x2_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, 1, 0); \ + float16x4_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, 3, 2, 1, 0); \ + float16x4_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, 3, 2, 1, 0); \ + __ret_880 = __noswap_vfmlsl_low_f16(__rev0_880, __rev1_880, (float16x4_t) {__noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880), __noswap_vget_lane_f16(__rev2_880, __p3_880)}); \ + __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, 1, 0); \ + __ret_880; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_high_f16(__p0_909, __p1_909, __p2_909, __p3_909) __extension__ ({ \ - float32x4_t __ret_909; \ - float32x4_t __s0_909 = __p0_909; \ - float16x8_t __s1_909 = __p1_909; \ - float16x8_t __s2_909 = __p2_909; \ - __ret_909 = vfmlslq_high_f16(__s0_909, __s1_909, (float16x8_t) {vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909), vgetq_lane_f16(__s2_909, __p3_909)}); \ - __ret_909; \ +#define vfmlslq_laneq_high_f16(__p0_881, __p1_881, __p2_881, __p3_881) __extension__ ({ \ + float32x4_t __ret_881; \ + float32x4_t __s0_881 = __p0_881; \ + float16x8_t __s1_881 = __p1_881; \ + float16x8_t __s2_881 = __p2_881; \ + __ret_881 = vfmlslq_high_f16(__s0_881, __s1_881, (float16x8_t) {vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881)}); \ + __ret_881; \ }) #else -#define vfmlslq_laneq_high_f16(__p0_910, __p1_910, __p2_910, __p3_910) __extension__ ({ \ - float32x4_t __ret_910; \ - float32x4_t __s0_910 = __p0_910; \ - float16x8_t __s1_910 = __p1_910; \ - float16x8_t __s2_910 = __p2_910; \ - float32x4_t __rev0_910; __rev0_910 = __builtin_shufflevector(__s0_910, __s0_910, 3, 2, 1, 0); \ - float16x8_t __rev1_910; __rev1_910 = __builtin_shufflevector(__s1_910, __s1_910, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_910; __rev2_910 = __builtin_shufflevector(__s2_910, __s2_910, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_910 = __noswap_vfmlslq_high_f16(__rev0_910, __rev1_910, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910), __noswap_vgetq_lane_f16(__rev2_910, __p3_910)}); \ - __ret_910 = __builtin_shufflevector(__ret_910, __ret_910, 3, 2, 1, 0); \ - __ret_910; \ +#define vfmlslq_laneq_high_f16(__p0_882, __p1_882, __p2_882, __p3_882) __extension__ ({ \ + float32x4_t __ret_882; \ + float32x4_t __s0_882 = __p0_882; \ + float16x8_t __s1_882 = __p1_882; \ + float16x8_t __s2_882 = __p2_882; \ + float32x4_t __rev0_882; __rev0_882 = __builtin_shufflevector(__s0_882, __s0_882, 3, 2, 1, 0); \ + float16x8_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_882; __rev2_882 = __builtin_shufflevector(__s2_882, __s2_882, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_882 = __noswap_vfmlslq_high_f16(__rev0_882, __rev1_882, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882)}); \ + __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, 3, 2, 1, 0); \ + __ret_882; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_high_f16(__p0_911, __p1_911, __p2_911, __p3_911) __extension__ ({ \ - float32x2_t __ret_911; \ - float32x2_t __s0_911 = __p0_911; \ - float16x4_t __s1_911 = __p1_911; \ - float16x8_t __s2_911 = __p2_911; \ - __ret_911 = vfmlsl_high_f16(__s0_911, __s1_911, (float16x4_t) {vgetq_lane_f16(__s2_911, __p3_911), vgetq_lane_f16(__s2_911, __p3_911), vgetq_lane_f16(__s2_911, __p3_911), vgetq_lane_f16(__s2_911, __p3_911)}); \ - __ret_911; \ +#define vfmlsl_laneq_high_f16(__p0_883, __p1_883, __p2_883, __p3_883) __extension__ ({ \ + float32x2_t __ret_883; \ + float32x2_t __s0_883 = __p0_883; \ + float16x4_t __s1_883 = __p1_883; \ + float16x8_t __s2_883 = __p2_883; \ + __ret_883 = vfmlsl_high_f16(__s0_883, __s1_883, (float16x4_t) {vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883)}); \ + __ret_883; \ }) #else -#define vfmlsl_laneq_high_f16(__p0_912, __p1_912, __p2_912, __p3_912) __extension__ ({ \ - float32x2_t __ret_912; \ - float32x2_t __s0_912 = __p0_912; \ - float16x4_t __s1_912 = __p1_912; \ - float16x8_t __s2_912 = __p2_912; \ - float32x2_t __rev0_912; __rev0_912 = __builtin_shufflevector(__s0_912, __s0_912, 1, 0); \ - float16x4_t __rev1_912; __rev1_912 = __builtin_shufflevector(__s1_912, __s1_912, 3, 2, 1, 0); \ - float16x8_t __rev2_912; __rev2_912 = __builtin_shufflevector(__s2_912, __s2_912, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_912 = __noswap_vfmlsl_high_f16(__rev0_912, __rev1_912, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_912, __p3_912), __noswap_vgetq_lane_f16(__rev2_912, __p3_912), __noswap_vgetq_lane_f16(__rev2_912, __p3_912), __noswap_vgetq_lane_f16(__rev2_912, __p3_912)}); \ - __ret_912 = __builtin_shufflevector(__ret_912, __ret_912, 1, 0); \ - __ret_912; \ +#define vfmlsl_laneq_high_f16(__p0_884, __p1_884, __p2_884, __p3_884) __extension__ ({ \ + float32x2_t __ret_884; \ + float32x2_t __s0_884 = __p0_884; \ + float16x4_t __s1_884 = __p1_884; \ + float16x8_t __s2_884 = __p2_884; \ + float32x2_t __rev0_884; __rev0_884 = __builtin_shufflevector(__s0_884, __s0_884, 1, 0); \ + float16x4_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, 3, 2, 1, 0); \ + float16x8_t __rev2_884; __rev2_884 = __builtin_shufflevector(__s2_884, __s2_884, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_884 = __noswap_vfmlsl_high_f16(__rev0_884, __rev1_884, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884)}); \ + __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, 1, 0); \ + __ret_884; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_low_f16(__p0_913, __p1_913, __p2_913, __p3_913) __extension__ ({ \ - float32x4_t __ret_913; \ - float32x4_t __s0_913 = __p0_913; \ - float16x8_t __s1_913 = __p1_913; \ - float16x8_t __s2_913 = __p2_913; \ - __ret_913 = vfmlslq_low_f16(__s0_913, __s1_913, (float16x8_t) {vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913), vgetq_lane_f16(__s2_913, __p3_913)}); \ - __ret_913; \ +#define vfmlslq_laneq_low_f16(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ + float32x4_t __ret_885; \ + float32x4_t __s0_885 = __p0_885; \ + float16x8_t __s1_885 = __p1_885; \ + float16x8_t __s2_885 = __p2_885; \ + __ret_885 = vfmlslq_low_f16(__s0_885, __s1_885, (float16x8_t) {vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885), vgetq_lane_f16(__s2_885, __p3_885)}); \ + __ret_885; \ }) #else -#define vfmlslq_laneq_low_f16(__p0_914, __p1_914, __p2_914, __p3_914) __extension__ ({ \ - float32x4_t __ret_914; \ - float32x4_t __s0_914 = __p0_914; \ - float16x8_t __s1_914 = __p1_914; \ - float16x8_t __s2_914 = __p2_914; \ - float32x4_t __rev0_914; __rev0_914 = __builtin_shufflevector(__s0_914, __s0_914, 3, 2, 1, 0); \ - float16x8_t __rev1_914; __rev1_914 = __builtin_shufflevector(__s1_914, __s1_914, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_914; __rev2_914 = __builtin_shufflevector(__s2_914, __s2_914, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_914 = __noswap_vfmlslq_low_f16(__rev0_914, __rev1_914, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914), __noswap_vgetq_lane_f16(__rev2_914, __p3_914)}); \ - __ret_914 = __builtin_shufflevector(__ret_914, __ret_914, 3, 2, 1, 0); \ - __ret_914; \ +#define vfmlslq_laneq_low_f16(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ + float32x4_t __ret_886; \ + float32x4_t __s0_886 = __p0_886; \ + float16x8_t __s1_886 = __p1_886; \ + float16x8_t __s2_886 = __p2_886; \ + float32x4_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, 3, 2, 1, 0); \ + float16x8_t __rev1_886; __rev1_886 = __builtin_shufflevector(__s1_886, __s1_886, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_886; __rev2_886 = __builtin_shufflevector(__s2_886, __s2_886, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_886 = __noswap_vfmlslq_low_f16(__rev0_886, __rev1_886, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886), __noswap_vgetq_lane_f16(__rev2_886, __p3_886)}); \ + __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, 3, 2, 1, 0); \ + __ret_886; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_low_f16(__p0_915, __p1_915, __p2_915, __p3_915) __extension__ ({ \ - float32x2_t __ret_915; \ - float32x2_t __s0_915 = __p0_915; \ - float16x4_t __s1_915 = __p1_915; \ - float16x8_t __s2_915 = __p2_915; \ - __ret_915 = vfmlsl_low_f16(__s0_915, __s1_915, (float16x4_t) {vgetq_lane_f16(__s2_915, __p3_915), vgetq_lane_f16(__s2_915, __p3_915), vgetq_lane_f16(__s2_915, __p3_915), vgetq_lane_f16(__s2_915, __p3_915)}); \ - __ret_915; \ +#define vfmlsl_laneq_low_f16(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ + float32x2_t __ret_887; \ + float32x2_t __s0_887 = __p0_887; \ + float16x4_t __s1_887 = __p1_887; \ + float16x8_t __s2_887 = __p2_887; \ + __ret_887 = vfmlsl_low_f16(__s0_887, __s1_887, (float16x4_t) {vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887), vgetq_lane_f16(__s2_887, __p3_887)}); \ + __ret_887; \ }) #else -#define vfmlsl_laneq_low_f16(__p0_916, __p1_916, __p2_916, __p3_916) __extension__ ({ \ - float32x2_t __ret_916; \ - float32x2_t __s0_916 = __p0_916; \ - float16x4_t __s1_916 = __p1_916; \ - float16x8_t __s2_916 = __p2_916; \ - float32x2_t __rev0_916; __rev0_916 = __builtin_shufflevector(__s0_916, __s0_916, 1, 0); \ - float16x4_t __rev1_916; __rev1_916 = __builtin_shufflevector(__s1_916, __s1_916, 3, 2, 1, 0); \ - float16x8_t __rev2_916; __rev2_916 = __builtin_shufflevector(__s2_916, __s2_916, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_916 = __noswap_vfmlsl_low_f16(__rev0_916, __rev1_916, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_916, __p3_916), __noswap_vgetq_lane_f16(__rev2_916, __p3_916), __noswap_vgetq_lane_f16(__rev2_916, __p3_916), __noswap_vgetq_lane_f16(__rev2_916, __p3_916)}); \ - __ret_916 = __builtin_shufflevector(__ret_916, __ret_916, 1, 0); \ - __ret_916; \ +#define vfmlsl_laneq_low_f16(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ + float32x2_t __ret_888; \ + float32x2_t __s0_888 = __p0_888; \ + float16x4_t __s1_888 = __p1_888; \ + float16x8_t __s2_888 = __p2_888; \ + float32x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, 1, 0); \ + float16x4_t __rev1_888; __rev1_888 = __builtin_shufflevector(__s1_888, __s1_888, 3, 2, 1, 0); \ + float16x8_t __rev2_888; __rev2_888 = __builtin_shufflevector(__s2_888, __s2_888, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_888 = __noswap_vfmlsl_low_f16(__rev0_888, __rev1_888, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888), __noswap_vgetq_lane_f16(__rev2_888, __p3_888)}); \ + __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, 1, 0); \ + __ret_888; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_lane_f16(__p0_917, __p1_917, __p2_917) __extension__ ({ \ - float16_t __ret_917; \ - float16_t __s0_917 = __p0_917; \ - float16x4_t __s1_917 = __p1_917; \ - __ret_917 = __s0_917 * vget_lane_f16(__s1_917, __p2_917); \ - __ret_917; \ +#define vmulh_lane_f16(__p0_889, __p1_889, __p2_889) __extension__ ({ \ + float16_t __ret_889; \ + float16_t __s0_889 = __p0_889; \ + float16x4_t __s1_889 = __p1_889; \ + __ret_889 = __s0_889 * vget_lane_f16(__s1_889, __p2_889); \ + __ret_889; \ }) #else -#define vmulh_lane_f16(__p0_918, __p1_918, __p2_918) __extension__ ({ \ - float16_t __ret_918; \ - float16_t __s0_918 = __p0_918; \ - float16x4_t __s1_918 = __p1_918; \ - float16x4_t __rev1_918; __rev1_918 = __builtin_shufflevector(__s1_918, __s1_918, 3, 2, 1, 0); \ - __ret_918 = __s0_918 * __noswap_vget_lane_f16(__rev1_918, __p2_918); \ - __ret_918; \ +#define vmulh_lane_f16(__p0_890, __p1_890, __p2_890) __extension__ ({ \ + float16_t __ret_890; \ + float16_t __s0_890 = __p0_890; \ + float16x4_t __s1_890 = __p1_890; \ + float16x4_t __rev1_890; __rev1_890 = __builtin_shufflevector(__s1_890, __s1_890, 3, 2, 1, 0); \ + __ret_890 = __s0_890 * __noswap_vget_lane_f16(__rev1_890, __p2_890); \ + __ret_890; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_laneq_f16(__p0_919, __p1_919, __p2_919) __extension__ ({ \ - float16_t __ret_919; \ - float16_t __s0_919 = __p0_919; \ - float16x8_t __s1_919 = __p1_919; \ - __ret_919 = __s0_919 * vgetq_lane_f16(__s1_919, __p2_919); \ - __ret_919; \ +#define vmulh_laneq_f16(__p0_891, __p1_891, __p2_891) __extension__ ({ \ + float16_t __ret_891; \ + float16_t __s0_891 = __p0_891; \ + float16x8_t __s1_891 = __p1_891; \ + __ret_891 = __s0_891 * vgetq_lane_f16(__s1_891, __p2_891); \ + __ret_891; \ }) #else -#define vmulh_laneq_f16(__p0_920, __p1_920, __p2_920) __extension__ ({ \ - float16_t __ret_920; \ - float16_t __s0_920 = __p0_920; \ - float16x8_t __s1_920 = __p1_920; \ - float16x8_t __rev1_920; __rev1_920 = __builtin_shufflevector(__s1_920, __s1_920, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_920 = __s0_920 * __noswap_vgetq_lane_f16(__rev1_920, __p2_920); \ - __ret_920; \ +#define vmulh_laneq_f16(__p0_892, __p1_892, __p2_892) __extension__ ({ \ + float16_t __ret_892; \ + float16_t __s0_892 = __p0_892; \ + float16x8_t __s1_892 = __p1_892; \ + float16x8_t __rev1_892; __rev1_892 = __builtin_shufflevector(__s1_892, __s1_892, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_892 = __s0_892 * __noswap_vgetq_lane_f16(__rev1_892, __p2_892); \ + __ret_892; \ }) #endif @@ -68862,136 +70753,136 @@ __ai __attribute__((target("neon"))) int32x4_t vaddw_high_s16(int32x4_t __p0, in #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p64(__p0_921, __p1_921, __p2_921, __p3_921) __extension__ ({ \ - poly64x2_t __ret_921; \ - poly64x2_t __s0_921 = __p0_921; \ - poly64x1_t __s2_921 = __p2_921; \ - __ret_921 = vsetq_lane_p64(vget_lane_p64(__s2_921, __p3_921), __s0_921, __p1_921); \ - __ret_921; \ +#define vcopyq_lane_p64(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ + poly64x2_t __ret_893; \ + poly64x2_t __s0_893 = __p0_893; \ + poly64x1_t __s2_893 = __p2_893; \ + __ret_893 = vsetq_lane_p64(vget_lane_p64(__s2_893, __p3_893), __s0_893, __p1_893); \ + __ret_893; \ }) #else -#define vcopyq_lane_p64(__p0_922, __p1_922, __p2_922, __p3_922) __extension__ ({ \ - poly64x2_t __ret_922; \ - poly64x2_t __s0_922 = __p0_922; \ - poly64x1_t __s2_922 = __p2_922; \ - poly64x2_t __rev0_922; __rev0_922 = __builtin_shufflevector(__s0_922, __s0_922, 1, 0); \ - __ret_922 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_922, __p3_922), __rev0_922, __p1_922); \ - __ret_922 = __builtin_shufflevector(__ret_922, __ret_922, 1, 0); \ - __ret_922; \ +#define vcopyq_lane_p64(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ + poly64x2_t __ret_894; \ + poly64x2_t __s0_894 = __p0_894; \ + poly64x1_t __s2_894 = __p2_894; \ + poly64x2_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, 1, 0); \ + __ret_894 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_894, __p3_894), __rev0_894, __p1_894); \ + __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, 1, 0); \ + __ret_894; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f64(__p0_923, __p1_923, __p2_923, __p3_923) __extension__ ({ \ - float64x2_t __ret_923; \ - float64x2_t __s0_923 = __p0_923; \ - float64x1_t __s2_923 = __p2_923; \ - __ret_923 = vsetq_lane_f64(vget_lane_f64(__s2_923, __p3_923), __s0_923, __p1_923); \ - __ret_923; \ +#define vcopyq_lane_f64(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ + float64x2_t __ret_895; \ + float64x2_t __s0_895 = __p0_895; \ + float64x1_t __s2_895 = __p2_895; \ + __ret_895 = vsetq_lane_f64(vget_lane_f64(__s2_895, __p3_895), __s0_895, __p1_895); \ + __ret_895; \ }) #else -#define vcopyq_lane_f64(__p0_924, __p1_924, __p2_924, __p3_924) __extension__ ({ \ - float64x2_t __ret_924; \ - float64x2_t __s0_924 = __p0_924; \ - float64x1_t __s2_924 = __p2_924; \ - float64x2_t __rev0_924; __rev0_924 = __builtin_shufflevector(__s0_924, __s0_924, 1, 0); \ - __ret_924 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_924, __p3_924), __rev0_924, __p1_924); \ - __ret_924 = __builtin_shufflevector(__ret_924, __ret_924, 1, 0); \ - __ret_924; \ +#define vcopyq_lane_f64(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ + float64x2_t __ret_896; \ + float64x2_t __s0_896 = __p0_896; \ + float64x1_t __s2_896 = __p2_896; \ + float64x2_t __rev0_896; __rev0_896 = __builtin_shufflevector(__s0_896, __s0_896, 1, 0); \ + __ret_896 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_896, __p3_896), __rev0_896, __p1_896); \ + __ret_896 = __builtin_shufflevector(__ret_896, __ret_896, 1, 0); \ + __ret_896; \ }) #endif -#define vcopy_lane_p64(__p0_925, __p1_925, __p2_925, __p3_925) __extension__ ({ \ - poly64x1_t __ret_925; \ - poly64x1_t __s0_925 = __p0_925; \ - poly64x1_t __s2_925 = __p2_925; \ - __ret_925 = vset_lane_p64(vget_lane_p64(__s2_925, __p3_925), __s0_925, __p1_925); \ - __ret_925; \ +#define vcopy_lane_p64(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ + poly64x1_t __ret_897; \ + poly64x1_t __s0_897 = __p0_897; \ + poly64x1_t __s2_897 = __p2_897; \ + __ret_897 = vset_lane_p64(vget_lane_p64(__s2_897, __p3_897), __s0_897, __p1_897); \ + __ret_897; \ }) -#define vcopy_lane_f64(__p0_926, __p1_926, __p2_926, __p3_926) __extension__ ({ \ - float64x1_t __ret_926; \ - float64x1_t __s0_926 = __p0_926; \ - float64x1_t __s2_926 = __p2_926; \ - __ret_926 = vset_lane_f64(vget_lane_f64(__s2_926, __p3_926), __s0_926, __p1_926); \ - __ret_926; \ +#define vcopy_lane_f64(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ + float64x1_t __ret_898; \ + float64x1_t __s0_898 = __p0_898; \ + float64x1_t __s2_898 = __p2_898; \ + __ret_898 = vset_lane_f64(vget_lane_f64(__s2_898, __p3_898), __s0_898, __p1_898); \ + __ret_898; \ }) #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p64(__p0_927, __p1_927, __p2_927, __p3_927) __extension__ ({ \ - poly64x2_t __ret_927; \ - poly64x2_t __s0_927 = __p0_927; \ - poly64x2_t __s2_927 = __p2_927; \ - __ret_927 = vsetq_lane_p64(vgetq_lane_p64(__s2_927, __p3_927), __s0_927, __p1_927); \ - __ret_927; \ +#define vcopyq_laneq_p64(__p0_899, __p1_899, __p2_899, __p3_899) __extension__ ({ \ + poly64x2_t __ret_899; \ + poly64x2_t __s0_899 = __p0_899; \ + poly64x2_t __s2_899 = __p2_899; \ + __ret_899 = vsetq_lane_p64(vgetq_lane_p64(__s2_899, __p3_899), __s0_899, __p1_899); \ + __ret_899; \ }) #else -#define vcopyq_laneq_p64(__p0_928, __p1_928, __p2_928, __p3_928) __extension__ ({ \ - poly64x2_t __ret_928; \ - poly64x2_t __s0_928 = __p0_928; \ - poly64x2_t __s2_928 = __p2_928; \ - poly64x2_t __rev0_928; __rev0_928 = __builtin_shufflevector(__s0_928, __s0_928, 1, 0); \ - poly64x2_t __rev2_928; __rev2_928 = __builtin_shufflevector(__s2_928, __s2_928, 1, 0); \ - __ret_928 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_928, __p3_928), __rev0_928, __p1_928); \ - __ret_928 = __builtin_shufflevector(__ret_928, __ret_928, 1, 0); \ - __ret_928; \ +#define vcopyq_laneq_p64(__p0_900, __p1_900, __p2_900, __p3_900) __extension__ ({ \ + poly64x2_t __ret_900; \ + poly64x2_t __s0_900 = __p0_900; \ + poly64x2_t __s2_900 = __p2_900; \ + poly64x2_t __rev0_900; __rev0_900 = __builtin_shufflevector(__s0_900, __s0_900, 1, 0); \ + poly64x2_t __rev2_900; __rev2_900 = __builtin_shufflevector(__s2_900, __s2_900, 1, 0); \ + __ret_900 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_900, __p3_900), __rev0_900, __p1_900); \ + __ret_900 = __builtin_shufflevector(__ret_900, __ret_900, 1, 0); \ + __ret_900; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f64(__p0_929, __p1_929, __p2_929, __p3_929) __extension__ ({ \ - float64x2_t __ret_929; \ - float64x2_t __s0_929 = __p0_929; \ - float64x2_t __s2_929 = __p2_929; \ - __ret_929 = vsetq_lane_f64(vgetq_lane_f64(__s2_929, __p3_929), __s0_929, __p1_929); \ - __ret_929; \ +#define vcopyq_laneq_f64(__p0_901, __p1_901, __p2_901, __p3_901) __extension__ ({ \ + float64x2_t __ret_901; \ + float64x2_t __s0_901 = __p0_901; \ + float64x2_t __s2_901 = __p2_901; \ + __ret_901 = vsetq_lane_f64(vgetq_lane_f64(__s2_901, __p3_901), __s0_901, __p1_901); \ + __ret_901; \ }) #else -#define vcopyq_laneq_f64(__p0_930, __p1_930, __p2_930, __p3_930) __extension__ ({ \ - float64x2_t __ret_930; \ - float64x2_t __s0_930 = __p0_930; \ - float64x2_t __s2_930 = __p2_930; \ - float64x2_t __rev0_930; __rev0_930 = __builtin_shufflevector(__s0_930, __s0_930, 1, 0); \ - float64x2_t __rev2_930; __rev2_930 = __builtin_shufflevector(__s2_930, __s2_930, 1, 0); \ - __ret_930 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_930, __p3_930), __rev0_930, __p1_930); \ - __ret_930 = __builtin_shufflevector(__ret_930, __ret_930, 1, 0); \ - __ret_930; \ +#define vcopyq_laneq_f64(__p0_902, __p1_902, __p2_902, __p3_902) __extension__ ({ \ + float64x2_t __ret_902; \ + float64x2_t __s0_902 = __p0_902; \ + float64x2_t __s2_902 = __p2_902; \ + float64x2_t __rev0_902; __rev0_902 = __builtin_shufflevector(__s0_902, __s0_902, 1, 0); \ + float64x2_t __rev2_902; __rev2_902 = __builtin_shufflevector(__s2_902, __s2_902, 1, 0); \ + __ret_902 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_902, __p3_902), __rev0_902, __p1_902); \ + __ret_902 = __builtin_shufflevector(__ret_902, __ret_902, 1, 0); \ + __ret_902; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p64(__p0_931, __p1_931, __p2_931, __p3_931) __extension__ ({ \ - poly64x1_t __ret_931; \ - poly64x1_t __s0_931 = __p0_931; \ - poly64x2_t __s2_931 = __p2_931; \ - __ret_931 = vset_lane_p64(vgetq_lane_p64(__s2_931, __p3_931), __s0_931, __p1_931); \ - __ret_931; \ +#define vcopy_laneq_p64(__p0_903, __p1_903, __p2_903, __p3_903) __extension__ ({ \ + poly64x1_t __ret_903; \ + poly64x1_t __s0_903 = __p0_903; \ + poly64x2_t __s2_903 = __p2_903; \ + __ret_903 = vset_lane_p64(vgetq_lane_p64(__s2_903, __p3_903), __s0_903, __p1_903); \ + __ret_903; \ }) #else -#define vcopy_laneq_p64(__p0_932, __p1_932, __p2_932, __p3_932) __extension__ ({ \ - poly64x1_t __ret_932; \ - poly64x1_t __s0_932 = __p0_932; \ - poly64x2_t __s2_932 = __p2_932; \ - poly64x2_t __rev2_932; __rev2_932 = __builtin_shufflevector(__s2_932, __s2_932, 1, 0); \ - __ret_932 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_932, __p3_932), __s0_932, __p1_932); \ - __ret_932; \ +#define vcopy_laneq_p64(__p0_904, __p1_904, __p2_904, __p3_904) __extension__ ({ \ + poly64x1_t __ret_904; \ + poly64x1_t __s0_904 = __p0_904; \ + poly64x2_t __s2_904 = __p2_904; \ + poly64x2_t __rev2_904; __rev2_904 = __builtin_shufflevector(__s2_904, __s2_904, 1, 0); \ + __ret_904 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_904, __p3_904), __s0_904, __p1_904); \ + __ret_904; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f64(__p0_933, __p1_933, __p2_933, __p3_933) __extension__ ({ \ - float64x1_t __ret_933; \ - float64x1_t __s0_933 = __p0_933; \ - float64x2_t __s2_933 = __p2_933; \ - __ret_933 = vset_lane_f64(vgetq_lane_f64(__s2_933, __p3_933), __s0_933, __p1_933); \ - __ret_933; \ +#define vcopy_laneq_f64(__p0_905, __p1_905, __p2_905, __p3_905) __extension__ ({ \ + float64x1_t __ret_905; \ + float64x1_t __s0_905 = __p0_905; \ + float64x2_t __s2_905 = __p2_905; \ + __ret_905 = vset_lane_f64(vgetq_lane_f64(__s2_905, __p3_905), __s0_905, __p1_905); \ + __ret_905; \ }) #else -#define vcopy_laneq_f64(__p0_934, __p1_934, __p2_934, __p3_934) __extension__ ({ \ - float64x1_t __ret_934; \ - float64x1_t __s0_934 = __p0_934; \ - float64x2_t __s2_934 = __p2_934; \ - float64x2_t __rev2_934; __rev2_934 = __builtin_shufflevector(__s2_934, __s2_934, 1, 0); \ - __ret_934 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_934, __p3_934), __s0_934, __p1_934); \ - __ret_934; \ +#define vcopy_laneq_f64(__p0_906, __p1_906, __p2_906, __p3_906) __extension__ ({ \ + float64x1_t __ret_906; \ + float64x1_t __s0_906 = __p0_906; \ + float64x2_t __s2_906 = __p2_906; \ + float64x2_t __rev2_906; __rev2_906 = __builtin_shufflevector(__s2_906, __s2_906, 1, 0); \ + __ret_906 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_906, __p3_906), __s0_906, __p1_906); \ + __ret_906; \ }) #endif @@ -69347,38 +71238,38 @@ __ai __attribute__((target("neon"))) int32x4_t vmlsl_high_n_s16(int32x4_t __p0, } #endif -#define vmulx_lane_f64(__p0_935, __p1_935, __p2_935) __extension__ ({ \ - float64x1_t __ret_935; \ - float64x1_t __s0_935 = __p0_935; \ - float64x1_t __s1_935 = __p1_935; \ - float64_t __x_935 = vget_lane_f64(__s0_935, 0); \ - float64_t __y_935 = vget_lane_f64(__s1_935, __p2_935); \ - float64_t __z_935 = vmulxd_f64(__x_935, __y_935); \ - __ret_935 = vset_lane_f64(__z_935, __s0_935, __p2_935); \ - __ret_935; \ +#define vmulx_lane_f64(__p0_907, __p1_907, __p2_907) __extension__ ({ \ + float64x1_t __ret_907; \ + float64x1_t __s0_907 = __p0_907; \ + float64x1_t __s1_907 = __p1_907; \ + float64_t __x_907 = vget_lane_f64(__s0_907, 0); \ + float64_t __y_907 = vget_lane_f64(__s1_907, __p2_907); \ + float64_t __z_907 = vmulxd_f64(__x_907, __y_907); \ + __ret_907 = vset_lane_f64(__z_907, __s0_907, __p2_907); \ + __ret_907; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f64(__p0_936, __p1_936, __p2_936) __extension__ ({ \ - float64x1_t __ret_936; \ - float64x1_t __s0_936 = __p0_936; \ - float64x2_t __s1_936 = __p1_936; \ - float64_t __x_936 = vget_lane_f64(__s0_936, 0); \ - float64_t __y_936 = vgetq_lane_f64(__s1_936, __p2_936); \ - float64_t __z_936 = vmulxd_f64(__x_936, __y_936); \ - __ret_936 = vset_lane_f64(__z_936, __s0_936, 0); \ - __ret_936; \ +#define vmulx_laneq_f64(__p0_908, __p1_908, __p2_908) __extension__ ({ \ + float64x1_t __ret_908; \ + float64x1_t __s0_908 = __p0_908; \ + float64x2_t __s1_908 = __p1_908; \ + float64_t __x_908 = vget_lane_f64(__s0_908, 0); \ + float64_t __y_908 = vgetq_lane_f64(__s1_908, __p2_908); \ + float64_t __z_908 = vmulxd_f64(__x_908, __y_908); \ + __ret_908 = vset_lane_f64(__z_908, __s0_908, 0); \ + __ret_908; \ }) #else -#define vmulx_laneq_f64(__p0_937, __p1_937, __p2_937) __extension__ ({ \ - float64x1_t __ret_937; \ - float64x1_t __s0_937 = __p0_937; \ - float64x2_t __s1_937 = __p1_937; \ - float64x2_t __rev1_937; __rev1_937 = __builtin_shufflevector(__s1_937, __s1_937, 1, 0); \ - float64_t __x_937 = vget_lane_f64(__s0_937, 0); \ - float64_t __y_937 = __noswap_vgetq_lane_f64(__rev1_937, __p2_937); \ - float64_t __z_937 = vmulxd_f64(__x_937, __y_937); \ - __ret_937 = vset_lane_f64(__z_937, __s0_937, 0); \ - __ret_937; \ +#define vmulx_laneq_f64(__p0_909, __p1_909, __p2_909) __extension__ ({ \ + float64x1_t __ret_909; \ + float64x1_t __s0_909 = __p0_909; \ + float64x2_t __s1_909 = __p1_909; \ + float64x2_t __rev1_909; __rev1_909 = __builtin_shufflevector(__s1_909, __s1_909, 1, 0); \ + float64_t __x_909 = vget_lane_f64(__s0_909, 0); \ + float64_t __y_909 = __noswap_vgetq_lane_f64(__rev1_909, __p2_909); \ + float64_t __z_909 = vmulxd_f64(__x_909, __y_909); \ + __ret_909 = vset_lane_f64(__z_909, __s0_909, 0); \ + __ret_909; \ }) #endif diff --git a/lib/include/arm_sme.h b/lib/include/arm_sme.h index cbfea38fe457..19f0191ac56b 100644 --- a/lib/include/arm_sme.h +++ b/lib/include/arm_sme.h @@ -35,12 +35,6 @@ __ai bool __arm_has_sme(void) __arm_streaming_compatible { return x0 & (1ULL << 63); } -__ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible { - uint64_t x0, x1; - __builtin_arm_get_sme_state(&x0, &x1); - return x0 & 1; -} - void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible; void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible; void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible; @@ -48,6 +42,8 @@ void *__arm_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible; __ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { } +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme___arm_in_streaming_mode))) +bool __arm_in_streaming_mode(void); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m))) void svaddha_za32_u32_m(uint64_t, svbool_t, svbool_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m))) @@ -604,6 +600,94 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_ void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m))) void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2))) +void svadd_za16_f16_vg1x2(uint32_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4))) +void svadd_za16_f16_vg1x4(uint32_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2))) +void svsub_za16_f16_vg1x2(uint32_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4))) +void svsub_za16_f16_vg1x4(uint32_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2))) +void svadd_za16_vg1x2(uint32_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4))) +void svadd_za16_vg1x4(uint32_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2))) +void svsub_za16_vg1x2(uint32_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4))) +void svsub_za16_vg1x4(uint32_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2))) +void svadd_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4))) +void svadd_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2))) +void svmla_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4))) +void svmla_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2))) +void svmla_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4))) +void svmla_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2))) +void svmla_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4))) +void svmla_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2))) +void svmls_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4))) +void svmls_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2))) +void svmls_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4))) +void svmls_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2))) +void svmls_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4))) +void svmls_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m))) +void svmopa_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m))) +void svmops_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2))) +void svsub_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4))) +void svsub_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2))) +void svadd_za16_vg1x2(uint32_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4))) +void svadd_za16_vg1x4(uint32_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2))) +void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4))) +void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2))) +void svmla_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4))) +void svmla_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2))) +void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4))) +void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2))) +void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4))) +void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2))) +void svmls_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4))) +void svmls_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2))) +void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4))) +void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m))) +void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m))) +void svmops_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2))) +void svsub_za16_vg1x2(uint32_t, svbfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4))) +void svsub_za16_vg1x4(uint32_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x2))) void svmla_single_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x4))) @@ -660,22 +744,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_f16_m)) void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_f16_m))) void svmops_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2))) -void svadd_za16_f16_vg1x2(uint32_t, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4))) -void svadd_za16_f16_vg1x4(uint32_t, svfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2))) -void svsub_za16_f16_vg1x2(uint32_t, svfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4))) -void svsub_za16_f16_vg1x4(uint32_t, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2))) -void svadd_za16_vg1x2(uint32_t, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4))) -void svadd_za16_vg1x4(uint32_t, svfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2))) -void svsub_za16_vg1x2(uint32_t, svfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4))) -void svsub_za16_vg1x4(uint32_t, svfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m))) void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) @@ -684,6 +752,138 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m)) void svmopa_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m))) void svmops_za64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za16_mf8_vg1x2_fpm))) +void svdot_single_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za16_mf8_vg1x4_fpm))) +void svdot_single_za16_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za16_mf8_vg1x2_fpm))) +void svdot_lane_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za16_mf8_vg1x4_fpm))) +void svdot_lane_za16_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x2_fpm))) +void svdot_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm))) +void svdot_za16_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm))) +void svmla_single_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm))) +void svmla_single_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm))) +void svmla_single_za16_mf8_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x1_fpm))) +void svmla_lane_za16_mf8_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x2_fpm))) +void svmla_lane_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm))) +void svmla_lane_za16_mf8_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm))) +void svmla_za16_mf8_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm))) +void svmla_za16_mf8_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_mf8_m_fpm))) +void svmopa_za16_mf8_m_fpm(uint64_t, svbool_t, svbool_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za16_mf8_vg1x2_fpm))) +void svvdot_lane_za16_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za16_mf8_vg1x2_fpm))) +void svdot_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za16_mf8_vg1x4_fpm))) +void svdot_za16_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za16_mf8_vg1x2_fpm))) +void svdot_lane_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za16_mf8_vg1x4_fpm))) +void svdot_lane_za16_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x2_fpm))) +void svdot_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za16_mf8_vg1x4_fpm))) +void svdot_za16_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x1_fpm))) +void svmla_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x2_fpm))) +void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_mf8_vg2x4_fpm))) +void svmla_za16_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x1_fpm))) +void svmla_lane_za16_vg2x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x2_fpm))) +void svmla_lane_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_mf8_vg2x4_fpm))) +void svmla_lane_za16_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x2_fpm))) +void svmla_za16_vg2x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_mf8_vg2x4_fpm))) +void svmla_za16_vg2x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_mf8_m_fpm))) +void svmopa_za16_m_fpm(uint64_t, svbool_t, svbool_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za16_mf8_vg1x2_fpm))) +void svvdot_lane_za16_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_mf8_vg1x2_fpm))) +void svdot_single_za32_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_mf8_vg1x4_fpm))) +void svdot_single_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_mf8_vg1x2_fpm))) +void svdot_lane_za32_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_mf8_vg1x4_fpm))) +void svdot_lane_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x2_fpm))) +void svdot_za32_mf8_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm))) +void svdot_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm))) +void svmla_single_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm))) +void svmla_single_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm))) +void svmla_single_za32_mf8_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x1_fpm))) +void svmla_lane_za32_mf8_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x2_fpm))) +void svmla_lane_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm))) +void svmla_lane_za32_mf8_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm))) +void svmla_za32_mf8_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm))) +void svmla_za32_mf8_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_mf8_m_fpm))) +void svmopa_za32_mf8_m_fpm(uint64_t, svbool_t, svbool_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdotb_lane_za32_mf8_vg1x4_fpm))) +void svvdotb_lane_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdott_lane_za32_mf8_vg1x4_fpm))) +void svvdott_lane_za32_mf8_vg1x4_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_mf8_vg1x2_fpm))) +void svdot_za32_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_single_za32_mf8_vg1x4_fpm))) +void svdot_za32_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_mf8_vg1x2_fpm))) +void svdot_lane_za32_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_lane_za32_mf8_vg1x4_fpm))) +void svdot_lane_za32_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x2_fpm))) +void svdot_za32_vg1x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svdot_za32_mf8_vg1x4_fpm))) +void svdot_za32_vg1x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x1_fpm))) +void svmla_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x2_fpm))) +void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za32_mf8_vg4x4_fpm))) +void svmla_za32_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x1_fpm))) +void svmla_lane_za32_vg4x1_fpm(uint32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x2_fpm))) +void svmla_lane_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za32_mf8_vg4x4_fpm))) +void svmla_lane_za32_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x2_fpm))) +void svmla_za32_vg4x2_fpm(uint32_t, svmfloat8x2_t, svmfloat8x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za32_mf8_vg4x4_fpm))) +void svmla_za32_vg4x4_fpm(uint32_t, svmfloat8x4_t, svmfloat8x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za32_mf8_m_fpm))) +void svmopa_za32_m_fpm(uint64_t, svbool_t, svbool_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdotb_lane_za32_mf8_vg1x4_fpm))) +void svvdotb_lane_za32_vg1x4_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdott_lane_za32_mf8_vg1x4_fpm))) +void svvdott_lane_za32_vg1x4_fpm(uint32_t, svmfloat8x2_t, svmfloat8_t, uint64_t, fpm_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_u64_m))) void svaddha_za64_u64_m(uint64_t, svbool_t, svbool_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za64_s64_m))) @@ -732,6 +932,106 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmopa_za64_u16_m void svusmopa_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svusmops_za64_u16_m))) void svusmops_za64_m(uint64_t, svbool_t, svbool_t, svuint16_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_zt_u8_x4))) +svuint8x4_t svluti4_zt_u8_x4(uint64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svluti4_zt_s8_x4))) +svint8x4_t svluti4_zt_s8_x4(uint64_t, svuint8x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u8))) +void svwrite_lane_zt_u8(uint64_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u32))) +void svwrite_lane_zt_u32(uint64_t, svuint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u64))) +void svwrite_lane_zt_u64(uint64_t, svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u16))) +void svwrite_lane_zt_u16(uint64_t, svuint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_bf16))) +void svwrite_lane_zt_bf16(uint64_t, svbfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s8))) +void svwrite_lane_zt_s8(uint64_t, svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f64))) +void svwrite_lane_zt_f64(uint64_t, svfloat64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f32))) +void svwrite_lane_zt_f32(uint64_t, svfloat32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f16))) +void svwrite_lane_zt_f16(uint64_t, svfloat16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s32))) +void svwrite_lane_zt_s32(uint64_t, svint32_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s64))) +void svwrite_lane_zt_s64(uint64_t, svint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s16))) +void svwrite_lane_zt_s16(uint64_t, svint16_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u8))) +void svwrite_zt_u8(uint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u32))) +void svwrite_zt_u32(uint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u64))) +void svwrite_zt_u64(uint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u16))) +void svwrite_zt_u16(uint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_bf16))) +void svwrite_zt_bf16(uint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s8))) +void svwrite_zt_s8(uint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f64))) +void svwrite_zt_f64(uint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f32))) +void svwrite_zt_f32(uint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f16))) +void svwrite_zt_f16(uint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s32))) +void svwrite_zt_s32(uint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s64))) +void svwrite_zt_s64(uint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s16))) +void svwrite_zt_s16(uint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u8))) +void svwrite_lane_zt(uint64_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u32))) +void svwrite_lane_zt(uint64_t, svuint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u64))) +void svwrite_lane_zt(uint64_t, svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_u16))) +void svwrite_lane_zt(uint64_t, svuint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_bf16))) +void svwrite_lane_zt(uint64_t, svbfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s8))) +void svwrite_lane_zt(uint64_t, svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f64))) +void svwrite_lane_zt(uint64_t, svfloat64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f32))) +void svwrite_lane_zt(uint64_t, svfloat32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_f16))) +void svwrite_lane_zt(uint64_t, svfloat16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s32))) +void svwrite_lane_zt(uint64_t, svint32_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s64))) +void svwrite_lane_zt(uint64_t, svint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_lane_zt_s16))) +void svwrite_lane_zt(uint64_t, svint16_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u8))) +void svwrite_zt(uint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u32))) +void svwrite_zt(uint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u64))) +void svwrite_zt(uint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_u16))) +void svwrite_zt(uint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_bf16))) +void svwrite_zt(uint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s8))) +void svwrite_zt(uint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f64))) +void svwrite_zt(uint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f32))) +void svwrite_zt(uint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_f16))) +void svwrite_zt(uint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s32))) +void svwrite_zt(uint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s64))) +void svwrite_zt(uint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_zt_s16))) +void svwrite_zt(uint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_u32_vg1x2))) void svadd_write_single_za32_u32_vg1x2(uint32_t, svuint32x2_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_write_single_za32_s32_vg1x2))) @@ -2138,78 +2438,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x void svwrite_za8_vg1x4(uint32_t, svuint8x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4))) void svwrite_za8_vg1x4(uint32_t, svint8x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2))) -void svadd_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4))) -void svadd_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2))) -void svmla_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4))) -void svmla_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2))) -void svmla_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4))) -void svmla_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2))) -void svmla_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4))) -void svmla_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2))) -void svmls_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4))) -void svmls_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2))) -void svmls_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4))) -void svmls_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2))) -void svmls_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4))) -void svmls_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m))) -void svmopa_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m))) -void svmops_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2))) -void svsub_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4))) -void svsub_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2))) -void svadd_za16_vg1x2(uint32_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4))) -void svadd_za16_vg1x4(uint32_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2))) -void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4))) -void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2))) -void svmla_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4))) -void svmla_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2))) -void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4))) -void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2))) -void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4))) -void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2))) -void svmls_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4))) -void svmls_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2))) -void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4))) -void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m))) -void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m))) -void svmops_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2))) -void svsub_za16_vg1x2(uint32_t, svbfloat16x2_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4))) -void svsub_za16_vg1x4(uint32_t, svbfloat16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2))) void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4))) diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h index 87691e03cecb..35e4644b60b7 100644 --- a/lib/include/arm_sve.h +++ b/lib/include/arm_sve.h @@ -38,6 +38,8 @@ typedef __SVFloat16_t svfloat16_t; typedef __SVBfloat16_t svbfloat16_t; #include #include +typedef __SVMfloat8_t svmfloat8_t; + typedef __SVFloat32_t svfloat32_t; typedef __SVFloat64_t svfloat64_t; typedef __clang_svint8x2_t svint8x2_t; @@ -80,6 +82,9 @@ typedef __clang_svboolx4_t svboolx4_t; typedef __clang_svbfloat16x2_t svbfloat16x2_t; typedef __clang_svbfloat16x3_t svbfloat16x3_t; typedef __clang_svbfloat16x4_t svbfloat16x4_t; +typedef __clang_svmfloat8x2_t svmfloat8x2_t; +typedef __clang_svmfloat8x3_t svmfloat8x3_t; +typedef __clang_svmfloat8x4_t svmfloat8x4_t; typedef __SVCount_t svcount_t; enum svpattern @@ -128,6 +133,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8)) svint8_t svreinterpret_s8_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8))) svint8_t svreinterpret_s8_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8))) +svint8_t svreinterpret_s8_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16))) svint8_t svreinterpret_s8_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16))) @@ -152,6 +159,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8)) svuint8_t svreinterpret_u8_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8))) svuint8_t svreinterpret_u8_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8))) +svuint8_t svreinterpret_u8_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16))) svuint8_t svreinterpret_u8_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16))) @@ -172,10 +181,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32) svuint8_t svreinterpret_u8_f32(svfloat32_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64))) svuint8_t svreinterpret_u8_f64(svfloat64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8))) +svmfloat8_t svreinterpret_mf8_s8(svint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8))) +svmfloat8_t svreinterpret_mf8_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8))) +svmfloat8_t svreinterpret_mf8_mf8(svmfloat8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16))) +svmfloat8_t svreinterpret_mf8_s16(svint16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16))) +svmfloat8_t svreinterpret_mf8_u16(svuint16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32))) +svmfloat8_t svreinterpret_mf8_s32(svint32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32))) +svmfloat8_t svreinterpret_mf8_u32(svuint32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64))) +svmfloat8_t svreinterpret_mf8_s64(svint64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64))) +svmfloat8_t svreinterpret_mf8_u64(svuint64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16))) +svmfloat8_t svreinterpret_mf8_f16(svfloat16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16))) +svmfloat8_t svreinterpret_mf8_bf16(svbfloat16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32))) +svmfloat8_t svreinterpret_mf8_f32(svfloat32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64))) +svmfloat8_t svreinterpret_mf8_f64(svfloat64_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8))) svint16_t svreinterpret_s16_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8))) svint16_t svreinterpret_s16_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8))) +svint16_t svreinterpret_s16_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16))) svint16_t svreinterpret_s16_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16))) @@ -200,6 +237,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8) svuint16_t svreinterpret_u16_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8))) svuint16_t svreinterpret_u16_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8))) +svuint16_t svreinterpret_u16_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16))) svuint16_t svreinterpret_u16_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16))) @@ -224,6 +263,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8) svint32_t svreinterpret_s32_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8))) svint32_t svreinterpret_s32_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8))) +svint32_t svreinterpret_s32_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16))) svint32_t svreinterpret_s32_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16))) @@ -248,6 +289,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8) svuint32_t svreinterpret_u32_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8))) svuint32_t svreinterpret_u32_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8))) +svuint32_t svreinterpret_u32_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16))) svuint32_t svreinterpret_u32_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16))) @@ -272,6 +315,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8) svint64_t svreinterpret_s64_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8))) svint64_t svreinterpret_s64_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8))) +svint64_t svreinterpret_s64_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16))) svint64_t svreinterpret_s64_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16))) @@ -296,6 +341,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8) svuint64_t svreinterpret_u64_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8))) svuint64_t svreinterpret_u64_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8))) +svuint64_t svreinterpret_u64_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16))) svuint64_t svreinterpret_u64_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16))) @@ -320,6 +367,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8) svfloat16_t svreinterpret_f16_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8))) svfloat16_t svreinterpret_f16_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8))) +svfloat16_t svreinterpret_f16_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16))) svfloat16_t svreinterpret_f16_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16))) @@ -344,6 +393,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16_t svreinterpret_bf16_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8))) svbfloat16_t svreinterpret_bf16_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8))) +svbfloat16_t svreinterpret_bf16_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16))) svbfloat16_t svreinterpret_bf16_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16))) @@ -368,6 +419,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8) svfloat32_t svreinterpret_f32_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8))) svfloat32_t svreinterpret_f32_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8))) +svfloat32_t svreinterpret_f32_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16))) svfloat32_t svreinterpret_f32_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16))) @@ -392,6 +445,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8) svfloat64_t svreinterpret_f64_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8))) svfloat64_t svreinterpret_f64_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8))) +svfloat64_t svreinterpret_f64_mf8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16))) svfloat64_t svreinterpret_f64_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16))) @@ -416,6 +471,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8)) svint8_t svreinterpret_s8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8))) svint8_t svreinterpret_s8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8))) +svint8_t svreinterpret_s8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16))) svint8_t svreinterpret_s8(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16))) @@ -440,6 +497,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8)) svuint8_t svreinterpret_u8(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8))) svuint8_t svreinterpret_u8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8))) +svuint8_t svreinterpret_u8(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16))) svuint8_t svreinterpret_u8(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16))) @@ -460,10 +519,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32) svuint8_t svreinterpret_u8(svfloat32_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64))) svuint8_t svreinterpret_u8(svfloat64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8))) +svmfloat8_t svreinterpret_mf8(svint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8))) +svmfloat8_t svreinterpret_mf8(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8))) +svmfloat8_t svreinterpret_mf8(svmfloat8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16))) +svmfloat8_t svreinterpret_mf8(svint16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16))) +svmfloat8_t svreinterpret_mf8(svuint16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32))) +svmfloat8_t svreinterpret_mf8(svint32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32))) +svmfloat8_t svreinterpret_mf8(svuint32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64))) +svmfloat8_t svreinterpret_mf8(svint64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64))) +svmfloat8_t svreinterpret_mf8(svuint64_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16))) +svmfloat8_t svreinterpret_mf8(svfloat16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16))) +svmfloat8_t svreinterpret_mf8(svbfloat16_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32))) +svmfloat8_t svreinterpret_mf8(svfloat32_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64))) +svmfloat8_t svreinterpret_mf8(svfloat64_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8))) svint16_t svreinterpret_s16(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8))) svint16_t svreinterpret_s16(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8))) +svint16_t svreinterpret_s16(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16))) svint16_t svreinterpret_s16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16))) @@ -488,6 +575,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8) svuint16_t svreinterpret_u16(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8))) svuint16_t svreinterpret_u16(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8))) +svuint16_t svreinterpret_u16(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16))) svuint16_t svreinterpret_u16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16))) @@ -512,6 +601,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8) svint32_t svreinterpret_s32(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8))) svint32_t svreinterpret_s32(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8))) +svint32_t svreinterpret_s32(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16))) svint32_t svreinterpret_s32(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16))) @@ -536,6 +627,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8) svuint32_t svreinterpret_u32(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8))) svuint32_t svreinterpret_u32(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8))) +svuint32_t svreinterpret_u32(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16))) svuint32_t svreinterpret_u32(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16))) @@ -560,6 +653,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8) svint64_t svreinterpret_s64(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8))) svint64_t svreinterpret_s64(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8))) +svint64_t svreinterpret_s64(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16))) svint64_t svreinterpret_s64(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16))) @@ -584,6 +679,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8) svuint64_t svreinterpret_u64(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8))) svuint64_t svreinterpret_u64(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8))) +svuint64_t svreinterpret_u64(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16))) svuint64_t svreinterpret_u64(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16))) @@ -608,6 +705,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8) svfloat16_t svreinterpret_f16(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8))) svfloat16_t svreinterpret_f16(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8))) +svfloat16_t svreinterpret_f16(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16))) svfloat16_t svreinterpret_f16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16))) @@ -632,6 +731,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16_t svreinterpret_bf16(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8))) svbfloat16_t svreinterpret_bf16(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8))) +svbfloat16_t svreinterpret_bf16(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16))) svbfloat16_t svreinterpret_bf16(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16))) @@ -656,6 +757,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8) svfloat32_t svreinterpret_f32(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8))) svfloat32_t svreinterpret_f32(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8))) +svfloat32_t svreinterpret_f32(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16))) svfloat32_t svreinterpret_f32(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16))) @@ -680,6 +783,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8) svfloat64_t svreinterpret_f64(svint8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8))) svfloat64_t svreinterpret_f64(svuint8_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8))) +svfloat64_t svreinterpret_f64(svmfloat8_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16))) svfloat64_t svreinterpret_f64(svint16_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16))) @@ -704,6 +809,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x2_t svreinterpret_s8_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x2))) svint8x2_t svreinterpret_s8_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x2))) +svint8x2_t svreinterpret_s8_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x2))) svint8x2_t svreinterpret_s8_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x2))) @@ -728,6 +835,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x2_t svreinterpret_u8_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x2))) svuint8x2_t svreinterpret_u8_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x2))) +svuint8x2_t svreinterpret_u8_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x2))) svuint8x2_t svreinterpret_u8_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x2))) @@ -748,10 +857,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x2_t svreinterpret_u8_f32_x2(svfloat32x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x2))) svuint8x2_t svreinterpret_u8_f64_x2(svfloat64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x2))) +svmfloat8x2_t svreinterpret_mf8_s8_x2(svint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x2))) +svmfloat8x2_t svreinterpret_mf8_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x2))) +svmfloat8x2_t svreinterpret_mf8_mf8_x2(svmfloat8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x2))) +svmfloat8x2_t svreinterpret_mf8_s16_x2(svint16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x2))) +svmfloat8x2_t svreinterpret_mf8_u16_x2(svuint16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x2))) +svmfloat8x2_t svreinterpret_mf8_s32_x2(svint32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x2))) +svmfloat8x2_t svreinterpret_mf8_u32_x2(svuint32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x2))) +svmfloat8x2_t svreinterpret_mf8_s64_x2(svint64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x2))) +svmfloat8x2_t svreinterpret_mf8_u64_x2(svuint64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x2))) +svmfloat8x2_t svreinterpret_mf8_f16_x2(svfloat16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x2))) +svmfloat8x2_t svreinterpret_mf8_bf16_x2(svbfloat16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x2))) +svmfloat8x2_t svreinterpret_mf8_f32_x2(svfloat32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x2))) +svmfloat8x2_t svreinterpret_mf8_f64_x2(svfloat64x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x2))) svint16x2_t svreinterpret_s16_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x2))) svint16x2_t svreinterpret_s16_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x2))) +svint16x2_t svreinterpret_s16_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x2))) svint16x2_t svreinterpret_s16_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x2))) @@ -776,6 +913,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x2_t svreinterpret_u16_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x2))) svuint16x2_t svreinterpret_u16_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x2))) +svuint16x2_t svreinterpret_u16_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x2))) svuint16x2_t svreinterpret_u16_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x2))) @@ -800,6 +939,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x2_t svreinterpret_s32_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x2))) svint32x2_t svreinterpret_s32_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x2))) +svint32x2_t svreinterpret_s32_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x2))) svint32x2_t svreinterpret_s32_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x2))) @@ -824,6 +965,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x2_t svreinterpret_u32_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x2))) svuint32x2_t svreinterpret_u32_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x2))) +svuint32x2_t svreinterpret_u32_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x2))) svuint32x2_t svreinterpret_u32_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x2))) @@ -848,6 +991,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x2_t svreinterpret_s64_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x2))) svint64x2_t svreinterpret_s64_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x2))) +svint64x2_t svreinterpret_s64_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x2))) svint64x2_t svreinterpret_s64_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x2))) @@ -872,6 +1017,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x2_t svreinterpret_u64_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x2))) svuint64x2_t svreinterpret_u64_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x2))) +svuint64x2_t svreinterpret_u64_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x2))) svuint64x2_t svreinterpret_u64_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x2))) @@ -896,6 +1043,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x2_t svreinterpret_f16_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x2))) svfloat16x2_t svreinterpret_f16_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x2))) +svfloat16x2_t svreinterpret_f16_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x2))) svfloat16x2_t svreinterpret_f16_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x2))) @@ -920,6 +1069,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x2_t svreinterpret_bf16_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x2))) svbfloat16x2_t svreinterpret_bf16_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x2))) +svbfloat16x2_t svreinterpret_bf16_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x2))) svbfloat16x2_t svreinterpret_bf16_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x2))) @@ -944,6 +1095,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x2_t svreinterpret_f32_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x2))) svfloat32x2_t svreinterpret_f32_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x2))) +svfloat32x2_t svreinterpret_f32_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x2))) svfloat32x2_t svreinterpret_f32_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x2))) @@ -968,6 +1121,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x2_t svreinterpret_f64_s8_x2(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x2))) svfloat64x2_t svreinterpret_f64_u8_x2(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x2))) +svfloat64x2_t svreinterpret_f64_mf8_x2(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x2))) svfloat64x2_t svreinterpret_f64_s16_x2(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x2))) @@ -992,6 +1147,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x2_t svreinterpret_s8(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x2))) svint8x2_t svreinterpret_s8(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x2))) +svint8x2_t svreinterpret_s8(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x2))) svint8x2_t svreinterpret_s8(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x2))) @@ -1016,6 +1173,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x2_t svreinterpret_u8(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x2))) svuint8x2_t svreinterpret_u8(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x2))) +svuint8x2_t svreinterpret_u8(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x2))) svuint8x2_t svreinterpret_u8(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x2))) @@ -1036,10 +1195,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x2_t svreinterpret_u8(svfloat32x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x2))) svuint8x2_t svreinterpret_u8(svfloat64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x2))) +svmfloat8x2_t svreinterpret_mf8(svint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x2))) +svmfloat8x2_t svreinterpret_mf8(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x2))) +svmfloat8x2_t svreinterpret_mf8(svmfloat8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x2))) +svmfloat8x2_t svreinterpret_mf8(svint16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x2))) +svmfloat8x2_t svreinterpret_mf8(svuint16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x2))) +svmfloat8x2_t svreinterpret_mf8(svint32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x2))) +svmfloat8x2_t svreinterpret_mf8(svuint32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x2))) +svmfloat8x2_t svreinterpret_mf8(svint64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x2))) +svmfloat8x2_t svreinterpret_mf8(svuint64x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x2))) +svmfloat8x2_t svreinterpret_mf8(svfloat16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x2))) +svmfloat8x2_t svreinterpret_mf8(svbfloat16x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x2))) +svmfloat8x2_t svreinterpret_mf8(svfloat32x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x2))) +svmfloat8x2_t svreinterpret_mf8(svfloat64x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x2))) svint16x2_t svreinterpret_s16(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x2))) svint16x2_t svreinterpret_s16(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x2))) +svint16x2_t svreinterpret_s16(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x2))) svint16x2_t svreinterpret_s16(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x2))) @@ -1064,6 +1251,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x2_t svreinterpret_u16(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x2))) svuint16x2_t svreinterpret_u16(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x2))) +svuint16x2_t svreinterpret_u16(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x2))) svuint16x2_t svreinterpret_u16(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x2))) @@ -1088,6 +1277,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x2_t svreinterpret_s32(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x2))) svint32x2_t svreinterpret_s32(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x2))) +svint32x2_t svreinterpret_s32(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x2))) svint32x2_t svreinterpret_s32(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x2))) @@ -1112,6 +1303,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x2_t svreinterpret_u32(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x2))) svuint32x2_t svreinterpret_u32(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x2))) +svuint32x2_t svreinterpret_u32(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x2))) svuint32x2_t svreinterpret_u32(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x2))) @@ -1136,6 +1329,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x2_t svreinterpret_s64(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x2))) svint64x2_t svreinterpret_s64(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x2))) +svint64x2_t svreinterpret_s64(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x2))) svint64x2_t svreinterpret_s64(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x2))) @@ -1160,6 +1355,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x2_t svreinterpret_u64(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x2))) svuint64x2_t svreinterpret_u64(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x2))) +svuint64x2_t svreinterpret_u64(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x2))) svuint64x2_t svreinterpret_u64(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x2))) @@ -1184,6 +1381,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x2_t svreinterpret_f16(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x2))) svfloat16x2_t svreinterpret_f16(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x2))) +svfloat16x2_t svreinterpret_f16(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x2))) svfloat16x2_t svreinterpret_f16(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x2))) @@ -1208,6 +1407,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x2_t svreinterpret_bf16(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x2))) svbfloat16x2_t svreinterpret_bf16(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x2))) +svbfloat16x2_t svreinterpret_bf16(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x2))) svbfloat16x2_t svreinterpret_bf16(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x2))) @@ -1232,6 +1433,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x2_t svreinterpret_f32(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x2))) svfloat32x2_t svreinterpret_f32(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x2))) +svfloat32x2_t svreinterpret_f32(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x2))) svfloat32x2_t svreinterpret_f32(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x2))) @@ -1256,6 +1459,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x2_t svreinterpret_f64(svint8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x2))) svfloat64x2_t svreinterpret_f64(svuint8x2_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x2))) +svfloat64x2_t svreinterpret_f64(svmfloat8x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x2))) svfloat64x2_t svreinterpret_f64(svint16x2_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x2))) @@ -1280,6 +1485,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x3_t svreinterpret_s8_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x3))) svint8x3_t svreinterpret_s8_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x3))) +svint8x3_t svreinterpret_s8_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x3))) svint8x3_t svreinterpret_s8_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x3))) @@ -1304,6 +1511,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x3_t svreinterpret_u8_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x3))) svuint8x3_t svreinterpret_u8_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x3))) +svuint8x3_t svreinterpret_u8_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x3))) svuint8x3_t svreinterpret_u8_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x3))) @@ -1324,10 +1533,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x3_t svreinterpret_u8_f32_x3(svfloat32x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x3))) svuint8x3_t svreinterpret_u8_f64_x3(svfloat64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x3))) +svmfloat8x3_t svreinterpret_mf8_s8_x3(svint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x3))) +svmfloat8x3_t svreinterpret_mf8_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x3))) +svmfloat8x3_t svreinterpret_mf8_mf8_x3(svmfloat8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x3))) +svmfloat8x3_t svreinterpret_mf8_s16_x3(svint16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x3))) +svmfloat8x3_t svreinterpret_mf8_u16_x3(svuint16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x3))) +svmfloat8x3_t svreinterpret_mf8_s32_x3(svint32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x3))) +svmfloat8x3_t svreinterpret_mf8_u32_x3(svuint32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x3))) +svmfloat8x3_t svreinterpret_mf8_s64_x3(svint64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x3))) +svmfloat8x3_t svreinterpret_mf8_u64_x3(svuint64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x3))) +svmfloat8x3_t svreinterpret_mf8_f16_x3(svfloat16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x3))) +svmfloat8x3_t svreinterpret_mf8_bf16_x3(svbfloat16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x3))) +svmfloat8x3_t svreinterpret_mf8_f32_x3(svfloat32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x3))) +svmfloat8x3_t svreinterpret_mf8_f64_x3(svfloat64x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x3))) svint16x3_t svreinterpret_s16_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x3))) svint16x3_t svreinterpret_s16_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x3))) +svint16x3_t svreinterpret_s16_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x3))) svint16x3_t svreinterpret_s16_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x3))) @@ -1352,6 +1589,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x3_t svreinterpret_u16_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x3))) svuint16x3_t svreinterpret_u16_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x3))) +svuint16x3_t svreinterpret_u16_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x3))) svuint16x3_t svreinterpret_u16_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x3))) @@ -1376,6 +1615,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x3_t svreinterpret_s32_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x3))) svint32x3_t svreinterpret_s32_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x3))) +svint32x3_t svreinterpret_s32_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x3))) svint32x3_t svreinterpret_s32_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x3))) @@ -1400,6 +1641,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x3_t svreinterpret_u32_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x3))) svuint32x3_t svreinterpret_u32_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x3))) +svuint32x3_t svreinterpret_u32_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x3))) svuint32x3_t svreinterpret_u32_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x3))) @@ -1424,6 +1667,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x3_t svreinterpret_s64_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x3))) svint64x3_t svreinterpret_s64_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x3))) +svint64x3_t svreinterpret_s64_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x3))) svint64x3_t svreinterpret_s64_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x3))) @@ -1448,6 +1693,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x3_t svreinterpret_u64_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x3))) svuint64x3_t svreinterpret_u64_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x3))) +svuint64x3_t svreinterpret_u64_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x3))) svuint64x3_t svreinterpret_u64_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x3))) @@ -1472,6 +1719,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x3_t svreinterpret_f16_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x3))) svfloat16x3_t svreinterpret_f16_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x3))) +svfloat16x3_t svreinterpret_f16_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x3))) svfloat16x3_t svreinterpret_f16_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x3))) @@ -1496,6 +1745,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x3_t svreinterpret_bf16_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x3))) svbfloat16x3_t svreinterpret_bf16_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x3))) +svbfloat16x3_t svreinterpret_bf16_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x3))) svbfloat16x3_t svreinterpret_bf16_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x3))) @@ -1520,6 +1771,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x3_t svreinterpret_f32_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x3))) svfloat32x3_t svreinterpret_f32_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x3))) +svfloat32x3_t svreinterpret_f32_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x3))) svfloat32x3_t svreinterpret_f32_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x3))) @@ -1544,6 +1797,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x3_t svreinterpret_f64_s8_x3(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x3))) svfloat64x3_t svreinterpret_f64_u8_x3(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x3))) +svfloat64x3_t svreinterpret_f64_mf8_x3(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x3))) svfloat64x3_t svreinterpret_f64_s16_x3(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x3))) @@ -1568,6 +1823,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x3_t svreinterpret_s8(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x3))) svint8x3_t svreinterpret_s8(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x3))) +svint8x3_t svreinterpret_s8(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x3))) svint8x3_t svreinterpret_s8(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x3))) @@ -1592,6 +1849,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x3_t svreinterpret_u8(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x3))) svuint8x3_t svreinterpret_u8(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x3))) +svuint8x3_t svreinterpret_u8(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x3))) svuint8x3_t svreinterpret_u8(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x3))) @@ -1612,10 +1871,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x3_t svreinterpret_u8(svfloat32x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x3))) svuint8x3_t svreinterpret_u8(svfloat64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x3))) +svmfloat8x3_t svreinterpret_mf8(svint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x3))) +svmfloat8x3_t svreinterpret_mf8(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x3))) +svmfloat8x3_t svreinterpret_mf8(svmfloat8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x3))) +svmfloat8x3_t svreinterpret_mf8(svint16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x3))) +svmfloat8x3_t svreinterpret_mf8(svuint16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x3))) +svmfloat8x3_t svreinterpret_mf8(svint32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x3))) +svmfloat8x3_t svreinterpret_mf8(svuint32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x3))) +svmfloat8x3_t svreinterpret_mf8(svint64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x3))) +svmfloat8x3_t svreinterpret_mf8(svuint64x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x3))) +svmfloat8x3_t svreinterpret_mf8(svfloat16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x3))) +svmfloat8x3_t svreinterpret_mf8(svbfloat16x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x3))) +svmfloat8x3_t svreinterpret_mf8(svfloat32x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x3))) +svmfloat8x3_t svreinterpret_mf8(svfloat64x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x3))) svint16x3_t svreinterpret_s16(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x3))) svint16x3_t svreinterpret_s16(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x3))) +svint16x3_t svreinterpret_s16(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x3))) svint16x3_t svreinterpret_s16(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x3))) @@ -1640,6 +1927,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x3_t svreinterpret_u16(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x3))) svuint16x3_t svreinterpret_u16(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x3))) +svuint16x3_t svreinterpret_u16(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x3))) svuint16x3_t svreinterpret_u16(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x3))) @@ -1664,6 +1953,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x3_t svreinterpret_s32(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x3))) svint32x3_t svreinterpret_s32(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x3))) +svint32x3_t svreinterpret_s32(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x3))) svint32x3_t svreinterpret_s32(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x3))) @@ -1688,6 +1979,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x3_t svreinterpret_u32(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x3))) svuint32x3_t svreinterpret_u32(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x3))) +svuint32x3_t svreinterpret_u32(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x3))) svuint32x3_t svreinterpret_u32(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x3))) @@ -1712,6 +2005,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x3_t svreinterpret_s64(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x3))) svint64x3_t svreinterpret_s64(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x3))) +svint64x3_t svreinterpret_s64(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x3))) svint64x3_t svreinterpret_s64(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x3))) @@ -1736,6 +2031,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x3_t svreinterpret_u64(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x3))) svuint64x3_t svreinterpret_u64(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x3))) +svuint64x3_t svreinterpret_u64(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x3))) svuint64x3_t svreinterpret_u64(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x3))) @@ -1760,6 +2057,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x3_t svreinterpret_f16(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x3))) svfloat16x3_t svreinterpret_f16(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x3))) +svfloat16x3_t svreinterpret_f16(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x3))) svfloat16x3_t svreinterpret_f16(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x3))) @@ -1784,6 +2083,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x3_t svreinterpret_bf16(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x3))) svbfloat16x3_t svreinterpret_bf16(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x3))) +svbfloat16x3_t svreinterpret_bf16(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x3))) svbfloat16x3_t svreinterpret_bf16(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x3))) @@ -1808,6 +2109,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x3_t svreinterpret_f32(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x3))) svfloat32x3_t svreinterpret_f32(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x3))) +svfloat32x3_t svreinterpret_f32(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x3))) svfloat32x3_t svreinterpret_f32(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x3))) @@ -1832,6 +2135,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x3_t svreinterpret_f64(svint8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x3))) svfloat64x3_t svreinterpret_f64(svuint8x3_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x3))) +svfloat64x3_t svreinterpret_f64(svmfloat8x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x3))) svfloat64x3_t svreinterpret_f64(svint16x3_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x3))) @@ -1856,6 +2161,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x4_t svreinterpret_s8_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x4))) svint8x4_t svreinterpret_s8_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x4))) +svint8x4_t svreinterpret_s8_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x4))) svint8x4_t svreinterpret_s8_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x4))) @@ -1880,6 +2187,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x4_t svreinterpret_u8_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x4))) svuint8x4_t svreinterpret_u8_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x4))) +svuint8x4_t svreinterpret_u8_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x4))) svuint8x4_t svreinterpret_u8_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x4))) @@ -1900,10 +2209,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x4_t svreinterpret_u8_f32_x4(svfloat32x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x4))) svuint8x4_t svreinterpret_u8_f64_x4(svfloat64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x4))) +svmfloat8x4_t svreinterpret_mf8_s8_x4(svint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x4))) +svmfloat8x4_t svreinterpret_mf8_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x4))) +svmfloat8x4_t svreinterpret_mf8_mf8_x4(svmfloat8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x4))) +svmfloat8x4_t svreinterpret_mf8_s16_x4(svint16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x4))) +svmfloat8x4_t svreinterpret_mf8_u16_x4(svuint16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x4))) +svmfloat8x4_t svreinterpret_mf8_s32_x4(svint32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x4))) +svmfloat8x4_t svreinterpret_mf8_u32_x4(svuint32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x4))) +svmfloat8x4_t svreinterpret_mf8_s64_x4(svint64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x4))) +svmfloat8x4_t svreinterpret_mf8_u64_x4(svuint64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x4))) +svmfloat8x4_t svreinterpret_mf8_f16_x4(svfloat16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x4))) +svmfloat8x4_t svreinterpret_mf8_bf16_x4(svbfloat16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x4))) +svmfloat8x4_t svreinterpret_mf8_f32_x4(svfloat32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x4))) +svmfloat8x4_t svreinterpret_mf8_f64_x4(svfloat64x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x4))) svint16x4_t svreinterpret_s16_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x4))) svint16x4_t svreinterpret_s16_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x4))) +svint16x4_t svreinterpret_s16_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x4))) svint16x4_t svreinterpret_s16_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x4))) @@ -1928,6 +2265,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x4_t svreinterpret_u16_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x4))) svuint16x4_t svreinterpret_u16_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x4))) +svuint16x4_t svreinterpret_u16_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x4))) svuint16x4_t svreinterpret_u16_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x4))) @@ -1952,6 +2291,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x4_t svreinterpret_s32_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x4))) svint32x4_t svreinterpret_s32_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x4))) +svint32x4_t svreinterpret_s32_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x4))) svint32x4_t svreinterpret_s32_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x4))) @@ -1976,6 +2317,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x4_t svreinterpret_u32_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x4))) svuint32x4_t svreinterpret_u32_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x4))) +svuint32x4_t svreinterpret_u32_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x4))) svuint32x4_t svreinterpret_u32_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x4))) @@ -2000,6 +2343,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x4_t svreinterpret_s64_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x4))) svint64x4_t svreinterpret_s64_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x4))) +svint64x4_t svreinterpret_s64_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x4))) svint64x4_t svreinterpret_s64_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x4))) @@ -2024,6 +2369,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x4_t svreinterpret_u64_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x4))) svuint64x4_t svreinterpret_u64_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x4))) +svuint64x4_t svreinterpret_u64_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x4))) svuint64x4_t svreinterpret_u64_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x4))) @@ -2048,6 +2395,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x4_t svreinterpret_f16_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x4))) svfloat16x4_t svreinterpret_f16_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x4))) +svfloat16x4_t svreinterpret_f16_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x4))) svfloat16x4_t svreinterpret_f16_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x4))) @@ -2072,6 +2421,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x4_t svreinterpret_bf16_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x4))) svbfloat16x4_t svreinterpret_bf16_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x4))) +svbfloat16x4_t svreinterpret_bf16_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x4))) svbfloat16x4_t svreinterpret_bf16_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x4))) @@ -2096,6 +2447,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x4_t svreinterpret_f32_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x4))) svfloat32x4_t svreinterpret_f32_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x4))) +svfloat32x4_t svreinterpret_f32_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x4))) svfloat32x4_t svreinterpret_f32_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x4))) @@ -2120,6 +2473,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x4_t svreinterpret_f64_s8_x4(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x4))) svfloat64x4_t svreinterpret_f64_u8_x4(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x4))) +svfloat64x4_t svreinterpret_f64_mf8_x4(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x4))) svfloat64x4_t svreinterpret_f64_s16_x4(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x4))) @@ -2144,6 +2499,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s8_x svint8x4_t svreinterpret_s8(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u8_x4))) svint8x4_t svreinterpret_s8(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_mf8_x4))) +svint8x4_t svreinterpret_s8(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_s16_x4))) svint8x4_t svreinterpret_s8(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s8_u16_x4))) @@ -2168,6 +2525,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s8_x svuint8x4_t svreinterpret_u8(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u8_x4))) svuint8x4_t svreinterpret_u8(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_mf8_x4))) +svuint8x4_t svreinterpret_u8(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_s16_x4))) svuint8x4_t svreinterpret_u8(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_u16_x4))) @@ -2188,10 +2547,38 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f32_ svuint8x4_t svreinterpret_u8(svfloat32x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u8_f64_x4))) svuint8x4_t svreinterpret_u8(svfloat64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s8_x4))) +svmfloat8x4_t svreinterpret_mf8(svint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u8_x4))) +svmfloat8x4_t svreinterpret_mf8(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_mf8_x4))) +svmfloat8x4_t svreinterpret_mf8(svmfloat8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s16_x4))) +svmfloat8x4_t svreinterpret_mf8(svint16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u16_x4))) +svmfloat8x4_t svreinterpret_mf8(svuint16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s32_x4))) +svmfloat8x4_t svreinterpret_mf8(svint32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u32_x4))) +svmfloat8x4_t svreinterpret_mf8(svuint32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_s64_x4))) +svmfloat8x4_t svreinterpret_mf8(svint64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_u64_x4))) +svmfloat8x4_t svreinterpret_mf8(svuint64x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f16_x4))) +svmfloat8x4_t svreinterpret_mf8(svfloat16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_bf16_x4))) +svmfloat8x4_t svreinterpret_mf8(svbfloat16x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f32_x4))) +svmfloat8x4_t svreinterpret_mf8(svfloat32x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_mf8_f64_x4))) +svmfloat8x4_t svreinterpret_mf8(svfloat64x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s8_x4))) svint16x4_t svreinterpret_s16(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u8_x4))) svint16x4_t svreinterpret_s16(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_mf8_x4))) +svint16x4_t svreinterpret_s16(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_s16_x4))) svint16x4_t svreinterpret_s16(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s16_u16_x4))) @@ -2216,6 +2603,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s8_ svuint16x4_t svreinterpret_u16(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u8_x4))) svuint16x4_t svreinterpret_u16(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_mf8_x4))) +svuint16x4_t svreinterpret_u16(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_s16_x4))) svuint16x4_t svreinterpret_u16(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u16_u16_x4))) @@ -2240,6 +2629,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s8_ svint32x4_t svreinterpret_s32(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u8_x4))) svint32x4_t svreinterpret_s32(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_mf8_x4))) +svint32x4_t svreinterpret_s32(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_s16_x4))) svint32x4_t svreinterpret_s32(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s32_u16_x4))) @@ -2264,6 +2655,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s8_ svuint32x4_t svreinterpret_u32(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u8_x4))) svuint32x4_t svreinterpret_u32(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_mf8_x4))) +svuint32x4_t svreinterpret_u32(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_s16_x4))) svuint32x4_t svreinterpret_u32(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u32_u16_x4))) @@ -2288,6 +2681,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s8_ svint64x4_t svreinterpret_s64(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u8_x4))) svint64x4_t svreinterpret_s64(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_mf8_x4))) +svint64x4_t svreinterpret_s64(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_s16_x4))) svint64x4_t svreinterpret_s64(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_s64_u16_x4))) @@ -2312,6 +2707,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s8_ svuint64x4_t svreinterpret_u64(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u8_x4))) svuint64x4_t svreinterpret_u64(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_mf8_x4))) +svuint64x4_t svreinterpret_u64(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_s16_x4))) svuint64x4_t svreinterpret_u64(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_u64_u16_x4))) @@ -2336,6 +2733,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s8_ svfloat16x4_t svreinterpret_f16(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u8_x4))) svfloat16x4_t svreinterpret_f16(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_mf8_x4))) +svfloat16x4_t svreinterpret_f16(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_s16_x4))) svfloat16x4_t svreinterpret_f16(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f16_u16_x4))) @@ -2360,6 +2759,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s8 svbfloat16x4_t svreinterpret_bf16(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u8_x4))) svbfloat16x4_t svreinterpret_bf16(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_mf8_x4))) +svbfloat16x4_t svreinterpret_bf16(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_s16_x4))) svbfloat16x4_t svreinterpret_bf16(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_bf16_u16_x4))) @@ -2384,6 +2785,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s8_ svfloat32x4_t svreinterpret_f32(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u8_x4))) svfloat32x4_t svreinterpret_f32(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_mf8_x4))) +svfloat32x4_t svreinterpret_f32(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_s16_x4))) svfloat32x4_t svreinterpret_f32(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f32_u16_x4))) @@ -2408,6 +2811,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s8_ svfloat64x4_t svreinterpret_f64(svint8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u8_x4))) svfloat64x4_t svreinterpret_f64(svuint8x4_t op); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_mf8_x4))) +svfloat64x4_t svreinterpret_f64(svmfloat8x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_s16_x4))) svfloat64x4_t svreinterpret_f64(svint16x4_t op); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_reinterpret_f64_u16_x4))) @@ -3956,6 +4361,150 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s64_x4))) svint64x4_t svzipq(svint64x4_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq_s16_x4))) svint16x4_t svzipq(svint16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x2))) +svfloat64x2_t svamax_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x2))) +svfloat32x2_t svamax_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x2))) +svfloat16x2_t svamax_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x4))) +svfloat64x4_t svamax_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x4))) +svfloat32x4_t svamax_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x4))) +svfloat16x4_t svamax_f16_x4(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x2))) +svfloat64x2_t svamin_f64_x2(svfloat64x2_t, svfloat64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x2))) +svfloat32x2_t svamin_f32_x2(svfloat32x2_t, svfloat32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x2))) +svfloat16x2_t svamin_f16_x2(svfloat16x2_t, svfloat16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x4))) +svfloat64x4_t svamin_f64_x4(svfloat64x4_t, svfloat64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x4))) +svfloat32x4_t svamin_f32_x4(svfloat32x4_t, svfloat32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x4))) +svfloat16x4_t svamin_f16_x4(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x2))) +svfloat64x2_t svamax(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x2))) +svfloat32x2_t svamax(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x2))) +svfloat16x2_t svamax(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x4))) +svfloat64x4_t svamax(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x4))) +svfloat32x4_t svamax(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x4))) +svfloat16x4_t svamax(svfloat16x4_t, svfloat16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x2))) +svfloat64x2_t svamin(svfloat64x2_t, svfloat64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x2))) +svfloat32x2_t svamin(svfloat32x2_t, svfloat32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x2))) +svfloat16x2_t svamin(svfloat16x2_t, svfloat16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x4))) +svfloat64x4_t svamin(svfloat64x4_t, svfloat64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x4))) +svfloat32x4_t svamin(svfloat32x4_t, svfloat32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x4))) +svfloat16x4_t svamin(svfloat16x4_t, svfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvt1_bf16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_x2_fpm))) +svfloat16x2_t svcvt1_f16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvt2_bf16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_x2_fpm))) +svfloat16x2_t svcvt2_f16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_bf16_x2_fpm))) +svmfloat8_t svcvt_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_f16_x2_fpm))) +svmfloat8_t svcvt_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_f32_x4_fpm))) +svmfloat8_t svcvt_mf8_f32_x4_fpm(svfloat32x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl1_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvtl1_bf16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl1_f16_mf8_x2_fpm))) +svfloat16x2_t svcvtl1_f16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl2_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvtl2_bf16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl2_f16_mf8_x2_fpm))) +svfloat16x2_t svcvtl2_f16_mf8_x2_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f32_x4_fpm))) +svmfloat8_t svcvtn_mf8_f32_x4_fpm(svfloat32x4_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f64_x2))) +svfloat64x2_t svscale_single_f64_x2(svfloat64x2_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f32_x2))) +svfloat32x2_t svscale_single_f32_x2(svfloat32x2_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f16_x2))) +svfloat16x2_t svscale_single_f16_x2(svfloat16x2_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f64_x4))) +svfloat64x4_t svscale_single_f64_x4(svfloat64x4_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f32_x4))) +svfloat32x4_t svscale_single_f32_x4(svfloat32x4_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f16_x4))) +svfloat16x4_t svscale_single_f16_x4(svfloat16x4_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x2))) +svfloat64x2_t svscale_f64_x2(svfloat64x2_t, svint64x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x2))) +svfloat32x2_t svscale_f32_x2(svfloat32x2_t, svint32x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x2))) +svfloat16x2_t svscale_f16_x2(svfloat16x2_t, svint16x2_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x4))) +svfloat64x4_t svscale_f64_x4(svfloat64x4_t, svint64x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x4))) +svfloat32x4_t svscale_f32_x4(svfloat32x4_t, svint32x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x4))) +svfloat16x4_t svscale_f16_x4(svfloat16x4_t, svint16x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvt1_bf16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_x2_fpm))) +svfloat16x2_t svcvt1_f16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvt2_bf16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_x2_fpm))) +svfloat16x2_t svcvt2_f16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_bf16_x2_fpm))) +svmfloat8_t svcvt_mf8_fpm(svbfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_f16_x2_fpm))) +svmfloat8_t svcvt_mf8_fpm(svfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt_mf8_f32_x4_fpm))) +svmfloat8_t svcvt_mf8_fpm(svfloat32x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl1_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvtl1_bf16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl1_f16_mf8_x2_fpm))) +svfloat16x2_t svcvtl1_f16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl2_bf16_mf8_x2_fpm))) +svbfloat16x2_t svcvtl2_bf16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtl2_f16_mf8_x2_fpm))) +svfloat16x2_t svcvtl2_f16_x2_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f32_x4_fpm))) +svmfloat8_t svcvtn_mf8_fpm(svfloat32x4_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f64_x2))) +svfloat64x2_t svscale(svfloat64x2_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f32_x2))) +svfloat32x2_t svscale(svfloat32x2_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f16_x2))) +svfloat16x2_t svscale(svfloat16x2_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f64_x4))) +svfloat64x4_t svscale(svfloat64x4_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f32_x4))) +svfloat32x4_t svscale(svfloat32x4_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_single_f16_x4))) +svfloat16x4_t svscale(svfloat16x4_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x2))) +svfloat64x2_t svscale(svfloat64x2_t, svint64x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x2))) +svfloat32x2_t svscale(svfloat32x2_t, svint32x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x2))) +svfloat16x2_t svscale(svfloat16x2_t, svint16x2_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f64_x4))) +svfloat64x4_t svscale(svfloat64x4_t, svint64x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f32_x4))) +svfloat32x4_t svscale(svfloat32x4_t, svint32x4_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svscale_f16_x4))) +svfloat16x4_t svscale(svfloat16x4_t, svint16x4_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_bf16_x2))) svbfloat16x2_t svclamp_single_bf16_x2(svbfloat16x2_t, svbfloat16_t, svbfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svclamp_single_bf16_x4))) @@ -4028,6 +4577,12 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x2))) svbfloat16x2_t svminnm(svbfloat16x2_t, svbfloat16x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svminnm_bf16_x4))) svbfloat16x4_t svminnm(svbfloat16x4_t, svbfloat16x4_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) +float64_t svadda_f64(svbool_t, float64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) +float32_t svadda_f32(svbool_t, float32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) +float16_t svadda_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_u32base_u32offset(svuint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) @@ -5280,6 +5835,12 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtssel_f16))) svfloat16_t svtssel_f16(svfloat16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwrffr))) void svwrffr(svbool_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) +float64_t svadda(svbool_t, float64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) +float32_t svadda(svbool_t, float32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) +float16_t svadda(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u32base_u32offset))) svuint32_t svadrb_offset(svuint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadrb_u64base_u64offset))) @@ -7948,6 +8509,406 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u6 void svstnt1w_scatter_offset(svbool_t, int32_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svstnt1w_scatter_u64offset_u64))) void svstnt1w_scatter_offset(svbool_t, uint32_t *, svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) +svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) +svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) +svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) +svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) +svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) +svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) +svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) +svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) +svfloat64_t svamax_n_f64_m(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) +svfloat32_t svamax_n_f32_m(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) +svfloat16_t svamax_n_f16_m(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) +svfloat64_t svamax_n_f64_x(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) +svfloat32_t svamax_n_f32_x(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) +svfloat16_t svamax_n_f16_x(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) +svfloat64_t svamax_n_f64_z(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) +svfloat32_t svamax_n_f32_z(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) +svfloat16_t svamax_n_f16_z(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) +svfloat64_t svamax_f64_m(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) +svfloat32_t svamax_f32_m(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) +svfloat16_t svamax_f16_m(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) +svfloat64_t svamax_f64_x(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) +svfloat32_t svamax_f32_x(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) +svfloat16_t svamax_f16_x(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) +svfloat64_t svamax_f64_z(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) +svfloat32_t svamax_f32_z(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) +svfloat16_t svamax_f16_z(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) +svfloat64_t svamin_n_f64_m(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) +svfloat32_t svamin_n_f32_m(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) +svfloat16_t svamin_n_f16_m(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) +svfloat64_t svamin_n_f64_x(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) +svfloat32_t svamin_n_f32_x(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) +svfloat16_t svamin_n_f16_x(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) +svfloat64_t svamin_n_f64_z(svbool_t, svfloat64_t, float64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) +svfloat32_t svamin_n_f32_z(svbool_t, svfloat32_t, float32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) +svfloat16_t svamin_n_f16_z(svbool_t, svfloat16_t, float16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) +svfloat64_t svamin_f64_m(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) +svfloat32_t svamin_f32_m(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) +svfloat16_t svamin_f16_m(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) +svfloat64_t svamin_f64_x(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) +svfloat32_t svamin_f32_x(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) +svfloat16_t svamin_f16_x(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) +svfloat64_t svamin_f64_z(svbool_t, svfloat64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) +svfloat32_t svamin_f32_z(svbool_t, svfloat32_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) +svfloat16_t svamin_f16_z(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_m))) +svfloat64_t svamax_m(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_m))) +svfloat32_t svamax_m(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_m))) +svfloat16_t svamax_m(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_x))) +svfloat64_t svamax_x(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_x))) +svfloat32_t svamax_x(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_x))) +svfloat16_t svamax_x(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f64_z))) +svfloat64_t svamax_z(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f32_z))) +svfloat32_t svamax_z(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_n_f16_z))) +svfloat16_t svamax_z(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_m))) +svfloat64_t svamax_m(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_m))) +svfloat32_t svamax_m(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_m))) +svfloat16_t svamax_m(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_x))) +svfloat64_t svamax_x(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_x))) +svfloat32_t svamax_x(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_x))) +svfloat16_t svamax_x(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f64_z))) +svfloat64_t svamax_z(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f32_z))) +svfloat32_t svamax_z(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamax_f16_z))) +svfloat16_t svamax_z(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_m))) +svfloat64_t svamin_m(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_m))) +svfloat32_t svamin_m(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_m))) +svfloat16_t svamin_m(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_x))) +svfloat64_t svamin_x(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_x))) +svfloat32_t svamin_x(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_x))) +svfloat16_t svamin_x(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f64_z))) +svfloat64_t svamin_z(svbool_t, svfloat64_t, float64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f32_z))) +svfloat32_t svamin_z(svbool_t, svfloat32_t, float32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_n_f16_z))) +svfloat16_t svamin_z(svbool_t, svfloat16_t, float16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_m))) +svfloat64_t svamin_m(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_m))) +svfloat32_t svamin_m(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_m))) +svfloat16_t svamin_m(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_x))) +svfloat64_t svamin_x(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_x))) +svfloat32_t svamin_x(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_x))) +svfloat16_t svamin_x(svbool_t, svfloat16_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f64_z))) +svfloat64_t svamin_z(svbool_t, svfloat64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f32_z))) +svfloat32_t svamin_z(svbool_t, svfloat32_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svamin_f16_z))) +svfloat16_t svamin_z(svbool_t, svfloat16_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) +svfloat16_t svdot_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) +svfloat16_t svdot_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) +svfloat16_t svdot_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f16_mf8_fpm))) +svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f16_mf8_fpm))) +svfloat16_t svdot_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f16_mf8_fpm))) +svfloat16_t svdot_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) +svfloat32_t svdot_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) +svfloat32_t svdot_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) +svfloat32_t svdot_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_f32_mf8_fpm))) +svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_n_f32_mf8_fpm))) +svfloat32_t svdot_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdot_lane_f32_mf8_fpm))) +svfloat32_t svdot_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) +svfloat16_t svmlalb_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) +svfloat16_t svmlalb_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) +svfloat16_t svmlalb_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) +svfloat32_t svmlallbb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) +svfloat32_t svmlallbb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) +svfloat32_t svmlallbb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) +svfloat32_t svmlallbt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) +svfloat32_t svmlallbt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) +svfloat32_t svmlallbt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) +svfloat32_t svmlalltb_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) +svfloat32_t svmlalltb_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) +svfloat32_t svmlalltb_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) +svfloat32_t svmlalltt_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) +svfloat32_t svmlalltt_n_f32_mf8_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) +svfloat32_t svmlalltt_lane_f32_mf8_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) +svfloat16_t svmlalt_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) +svfloat16_t svmlalt_n_f16_mf8_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) +svfloat16_t svmlalt_lane_f16_mf8_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_f16_mf8_fpm))) +svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_n_f16_mf8_fpm))) +svfloat16_t svmlalb_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalb_lane_f16_mf8_fpm))) +svfloat16_t svmlalb_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_f32_mf8_fpm))) +svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_n_f32_mf8_fpm))) +svfloat32_t svmlallbb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbb_lane_f32_mf8_fpm))) +svfloat32_t svmlallbb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_f32_mf8_fpm))) +svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_n_f32_mf8_fpm))) +svfloat32_t svmlallbt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlallbt_lane_f32_mf8_fpm))) +svfloat32_t svmlallbt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_f32_mf8_fpm))) +svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_n_f32_mf8_fpm))) +svfloat32_t svmlalltb_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltb_lane_f32_mf8_fpm))) +svfloat32_t svmlalltb_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_f32_mf8_fpm))) +svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_n_f32_mf8_fpm))) +svfloat32_t svmlalltt_fpm(svfloat32_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalltt_lane_f32_mf8_fpm))) +svfloat32_t svmlalltt_lane_fpm(svfloat32_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_f16_mf8_fpm))) +svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_n_f16_mf8_fpm))) +svfloat16_t svmlalt_fpm(svfloat16_t, svmfloat8_t, mfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svmlalt_lane_f16_mf8_fpm))) +svfloat16_t svmlalt_lane_fpm(svfloat16_t, svmfloat8_t, svmfloat8_t, uint64_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) +svbfloat16_t svcvt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) +svfloat16_t svcvt1_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) +svbfloat16_t svcvt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) +svfloat16_t svcvt2_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) +svbfloat16_t svcvtlt1_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) +svfloat16_t svcvtlt1_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) +svbfloat16_t svcvtlt2_bf16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) +svfloat16_t svcvtlt2_f16_mf8_fpm(svmfloat8_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) +svmfloat8_t svcvtn_mf8_bf16_x2_fpm(svbfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) +svmfloat8_t svcvtn_mf8_f16_x2_fpm(svfloat16x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnb_mf8_f32_x2_fpm(svfloat32x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnt_mf8_f32_x2_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_bf16_mf8_fpm))) +svbfloat16_t svcvt1_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt1_f16_mf8_fpm))) +svfloat16_t svcvt1_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_bf16_mf8_fpm))) +svbfloat16_t svcvt2_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvt2_f16_mf8_fpm))) +svfloat16_t svcvt2_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_bf16_mf8_fpm))) +svbfloat16_t svcvtlt1_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt1_f16_mf8_fpm))) +svfloat16_t svcvtlt1_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_bf16_mf8_fpm))) +svbfloat16_t svcvtlt2_bf16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtlt2_f16_mf8_fpm))) +svfloat16_t svcvtlt2_f16_fpm(svmfloat8_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_bf16_x2_fpm))) +svmfloat8_t svcvtn_mf8_fpm(svbfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtn_mf8_f16_x2_fpm))) +svmfloat8_t svcvtn_mf8_fpm(svfloat16x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnb_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnb_mf8_fpm(svfloat32x2_t, fpm_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svcvtnt_mf8_f32_x2_fpm))) +svmfloat8_t svcvtnt_mf8_fpm(svmfloat8_t, svfloat32x2_t, fpm_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane_bf16(svbfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane_bf16_x2(svbfloat16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_bf16))) +svbfloat16_t svluti2_lane(svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16))) +svbfloat16_t svluti4_lane(svbfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_bf16_x2))) +svbfloat16_t svluti4_lane(svbfloat16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) +svuint8_t svluti2_lane_u8(svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) +svint8_t svluti2_lane_s8(svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) +svuint16_t svluti2_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) +svfloat16_t svluti2_lane_f16(svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) +svint16_t svluti2_lane_s16(svint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) +svuint8_t svluti4_lane_u8(svuint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) +svint8_t svluti4_lane_s8(svint8_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) +svuint16_t svluti4_lane_u16(svuint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) +svfloat16_t svluti4_lane_f16(svfloat16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) +svint16_t svluti4_lane_s16(svint16_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) +svuint16_t svluti4_lane_u16_x2(svuint16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) +svfloat16_t svluti4_lane_f16_x2(svfloat16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) +svint16_t svluti4_lane_s16_x2(svint16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u8))) +svuint8_t svluti2_lane(svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s8))) +svint8_t svluti2_lane(svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_u16))) +svuint16_t svluti2_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_f16))) +svfloat16_t svluti2_lane(svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti2_lane_s16))) +svint16_t svluti2_lane(svint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u8))) +svuint8_t svluti4_lane(svuint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s8))) +svint8_t svluti4_lane(svint8_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16))) +svuint16_t svluti4_lane(svuint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16))) +svfloat16_t svluti4_lane(svfloat16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16))) +svint16_t svluti4_lane(svint16_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_u16_x2))) +svuint16_t svluti4_lane(svuint16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_f16_x2))) +svfloat16_t svluti4_lane(svfloat16x2_t, svuint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svluti4_lane_s16_x2))) +svint16_t svluti4_lane(svint16x2_t, svuint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) +svuint8_t svaesd_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) +svuint8_t svaese_u8(svuint8_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) +svuint8_t svaesimc_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) +svuint8_t svaesmc_u8(svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) +svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) +svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) +svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) +svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) +svuint8_t svaesd(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) +svuint8_t svaese(svuint8_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) +svuint8_t svaesimc(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) +svuint8_t svaesmc(svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) +svuint64_t svpmullb_pair(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) +svuint64_t svpmullb_pair(svuint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) +svuint64_t svpmullt_pair(svuint64_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) +svuint64_t svpmullt_pair(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_m))) svbfloat16_t svadd_n_bf16_m(svbool_t, svbfloat16_t, bfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_n_bf16_x))) @@ -8180,54 +9141,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_x))) svbfloat16_t svsub_x(svbool_t, svbfloat16_t, svbfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svsub_bf16_z))) svbfloat16_t svsub_z(svbool_t, svbfloat16_t, svbfloat16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) -svbfloat16_t svtbl2_bf16(svbfloat16x2_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) -svbfloat16_t svtbx_bf16(svbfloat16_t, svbfloat16_t, svuint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) -svbool_t svwhilerw_bf16(bfloat16_t const *, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) -svbool_t svwhilewr_bf16(bfloat16_t const *, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbl2_bf16))) -svbfloat16_t svtbl2(svbfloat16x2_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbx_bf16))) -svbfloat16_t svtbx(svbfloat16_t, svbfloat16_t, svuint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilerw_bf16))) -svbool_t svwhilerw(bfloat16_t const *, bfloat16_t const *); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svwhilewr_bf16))) -svbool_t svwhilewr(bfloat16_t const *, bfloat16_t const *); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) -svuint8_t svaesd_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) -svuint8_t svaese_u8(svuint8_t, svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) -svuint8_t svaesimc_u8(svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) -svuint8_t svaesmc_u8(svuint8_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) -svuint64_t svpmullb_pair_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) -svuint64_t svpmullb_pair_u64(svuint64_t, svuint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) -svuint64_t svpmullt_pair_n_u64(svuint64_t, uint64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) -svuint64_t svpmullt_pair_u64(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesd_u8))) -svuint8_t svaesd(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaese_u8))) -svuint8_t svaese(svuint8_t, svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesimc_u8))) -svuint8_t svaesimc(svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaesmc_u8))) -svuint8_t svaesmc(svuint8_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_n_u64))) -svuint64_t svpmullb_pair(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullb_pair_u64))) -svuint64_t svpmullb_pair(svuint64_t, svuint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_n_u64))) -svuint64_t svpmullt_pair(svuint64_t, uint64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmullt_pair_u64))) -svuint64_t svpmullt_pair(svuint64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u8))) svuint8_t svbdep_n_u8(svuint8_t, uint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svbdep_n_u32))) @@ -8416,6 +9329,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) svint32_t svextq_s32(svint32_t, svint32_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) svint64_t svextq_s64(svint64_t, svint64_t, int32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) +svmfloat8_t svextq_mf8(svmfloat8_t, svmfloat8_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) svint16_t svextq_s16(svint16_t, svint16_t, int32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) @@ -8831,17 +9746,17 @@ svuint32_t svpmov_u32_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_s32_z))) svint32_t svpmov_s32_z(svbool_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) -void svst1dq_u64(svbool_t, uint64_t const *, svuint64_t); +void svst1dq_u64(svbool_t, uint64_t *, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) -void svst1dq_f64(svbool_t, float64_t const *, svfloat64_t); +void svst1dq_f64(svbool_t, float64_t *, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) -void svst1dq_s64(svbool_t, int64_t const *, svint64_t); +void svst1dq_s64(svbool_t, int64_t *, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) -void svst1dq_vnum_u64(svbool_t, uint64_t const *, int64_t, svuint64_t); +void svst1dq_vnum_u64(svbool_t, uint64_t *, int64_t, svuint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) -void svst1dq_vnum_f64(svbool_t, float64_t const *, int64_t, svfloat64_t); +void svst1dq_vnum_f64(svbool_t, float64_t *, int64_t, svfloat64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) -void svst1dq_vnum_s64(svbool_t, int64_t const *, int64_t, svint64_t); +void svst1dq_vnum_s64(svbool_t, int64_t *, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) void svst1q_scatter_u64base_u8(svbool_t, svuint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) @@ -8910,6 +9825,26 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64ba void svst1q_scatter_u64base_offset_s64(svbool_t, svuint64_t, int64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_u64base_offset_s16(svbool_t, svuint64_t, int64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) +void svst1q_scatter_s64index_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) +void svst1q_scatter_s64index_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) +void svst1q_scatter_s64index_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) +void svst1q_scatter_s64index_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) +void svst1q_scatter_s64index_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) +void svst1q_scatter_s64index_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) +void svst1q_scatter_s64index_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) +void svst1q_scatter_s64index_s32(svbool_t, int32_t *, svint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) +void svst1q_scatter_s64index_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) +void svst1q_scatter_s64index_s16(svbool_t, int16_t *, svint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) void svst1q_scatter_u64index_u32(svbool_t, uint32_t *, svuint64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) @@ -8930,6 +9865,30 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64in void svst1q_scatter_u64index_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) void svst1q_scatter_u64index_s16(svbool_t, int16_t *, svuint64_t, svint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) +void svst1q_scatter_s64offset_u8(svbool_t, uint8_t *, svint64_t, svuint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) +void svst1q_scatter_s64offset_u32(svbool_t, uint32_t *, svint64_t, svuint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) +void svst1q_scatter_s64offset_u64(svbool_t, uint64_t *, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) +void svst1q_scatter_s64offset_u16(svbool_t, uint16_t *, svint64_t, svuint16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) +void svst1q_scatter_s64offset_bf16(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) +void svst1q_scatter_s64offset_s8(svbool_t, int8_t *, svint64_t, svint8_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) +void svst1q_scatter_s64offset_f64(svbool_t, float64_t *, svint64_t, svfloat64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) +void svst1q_scatter_s64offset_f32(svbool_t, float32_t *, svint64_t, svfloat32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) +void svst1q_scatter_s64offset_f16(svbool_t, float16_t *, svint64_t, svfloat16_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) +void svst1q_scatter_s64offset_s32(svbool_t, int32_t *, svint64_t, svint32_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) +void svst1q_scatter_s64offset_s64(svbool_t, int64_t *, svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) +void svst1q_scatter_s64offset_s16(svbool_t, int16_t *, svint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) void svst1q_scatter_u64offset_u8(svbool_t, uint8_t *, svuint64_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) @@ -8955,17 +9914,17 @@ void svst1q_scatter_u64offset_s64(svbool_t, int64_t *, svuint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_u64offset_s16(svbool_t, int16_t *, svuint64_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) -void svst1wq_u32(svbool_t, uint32_t const *, svuint32_t); +void svst1wq_u32(svbool_t, uint32_t *, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) -void svst1wq_f32(svbool_t, float32_t const *, svfloat32_t); +void svst1wq_f32(svbool_t, float32_t *, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) -void svst1wq_s32(svbool_t, int32_t const *, svint32_t); +void svst1wq_s32(svbool_t, int32_t *, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) -void svst1wq_vnum_u32(svbool_t, uint32_t const *, int64_t, svuint32_t); +void svst1wq_vnum_u32(svbool_t, uint32_t *, int64_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) -void svst1wq_vnum_f32(svbool_t, float32_t const *, int64_t, svfloat32_t); +void svst1wq_vnum_f32(svbool_t, float32_t *, int64_t, svfloat32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) -void svst1wq_vnum_s32(svbool_t, int32_t const *, int64_t, svint32_t); +void svst1wq_vnum_s32(svbool_t, int32_t *, int64_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) void svst2q_u8(svbool_t, uint8_t const *, svuint8x2_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) @@ -9132,6 +10091,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) svint32_t svtblq_s32(svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) svint64_t svtblq_s64(svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) +svmfloat8_t svtblq_mf8(svmfloat8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) svint16_t svtblq_s16(svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) @@ -9156,6 +10117,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) svint32_t svtbxq_s32(svint32_t, svint32_t, svuint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) svint64_t svtbxq_s64(svint64_t, svint64_t, svuint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) +svmfloat8_t svtbxq_mf8(svmfloat8_t, svmfloat8_t, svuint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) svint16_t svtbxq_s16(svint16_t, svint16_t, svuint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) @@ -9180,6 +10143,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) svint32_t svuzpq1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) svint64_t svuzpq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) +svmfloat8_t svuzpq1_mf8(svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) svint16_t svuzpq1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) @@ -9204,6 +10169,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) svint32_t svuzpq2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) svint64_t svuzpq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) +svmfloat8_t svuzpq2_mf8(svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) svint16_t svuzpq2_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) @@ -9228,6 +10195,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) svint32_t svzipq1_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) svint64_t svzipq1_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) +svmfloat8_t svzipq1_mf8(svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) svint16_t svzipq1_s16(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) @@ -9252,6 +10221,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) svint32_t svzipq2_s32(svint32_t, svint32_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) svint64_t svzipq2_s64(svint64_t, svint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) +svmfloat8_t svzipq2_mf8(svmfloat8_t, svmfloat8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) svint16_t svzipq2_s16(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddqv_u8))) @@ -9330,6 +10301,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s32))) svint32_t svextq(svint32_t, svint32_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s64))) svint64_t svextq(svint64_t, svint64_t, int32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_mf8))) +svmfloat8_t svextq(svmfloat8_t, svmfloat8_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svextq_s16))) svint16_t svextq(svint16_t, svint16_t, int32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svld1q_gather_u64base_index_u32))) @@ -9729,17 +10702,17 @@ svuint32_t svpmov_lane_m(svuint32_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpmov_lane_s32_m))) svint32_t svpmov_lane_m(svint32_t, svbool_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_u64))) -void svst1dq(svbool_t, uint64_t const *, svuint64_t); +void svst1dq(svbool_t, uint64_t *, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_f64))) -void svst1dq(svbool_t, float64_t const *, svfloat64_t); +void svst1dq(svbool_t, float64_t *, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_s64))) -void svst1dq(svbool_t, int64_t const *, svint64_t); +void svst1dq(svbool_t, int64_t *, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_u64))) -void svst1dq_vnum(svbool_t, uint64_t const *, int64_t, svuint64_t); +void svst1dq_vnum(svbool_t, uint64_t *, int64_t, svuint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_f64))) -void svst1dq_vnum(svbool_t, float64_t const *, int64_t, svfloat64_t); +void svst1dq_vnum(svbool_t, float64_t *, int64_t, svfloat64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1dq_vnum_s64))) -void svst1dq_vnum(svbool_t, int64_t const *, int64_t, svint64_t); +void svst1dq_vnum(svbool_t, int64_t *, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u8))) void svst1q_scatter(svbool_t, svuint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_u32))) @@ -9808,6 +10781,26 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64b void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64base_offset_s16))) void svst1q_scatter_offset(svbool_t, svuint64_t, int64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u32))) +void svst1q_scatter_index(svbool_t, uint32_t *, svint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u64))) +void svst1q_scatter_index(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_u16))) +void svst1q_scatter_index(svbool_t, uint16_t *, svint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_bf16))) +void svst1q_scatter_index(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f64))) +void svst1q_scatter_index(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f32))) +void svst1q_scatter_index(svbool_t, float32_t *, svint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_f16))) +void svst1q_scatter_index(svbool_t, float16_t *, svint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s32))) +void svst1q_scatter_index(svbool_t, int32_t *, svint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s64))) +void svst1q_scatter_index(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64index_s16))) +void svst1q_scatter_index(svbool_t, int16_t *, svint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u32))) void svst1q_scatter_index(svbool_t, uint32_t *, svuint64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_u64))) @@ -9828,6 +10821,30 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64i void svst1q_scatter_index(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64index_s16))) void svst1q_scatter_index(svbool_t, int16_t *, svuint64_t, svint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u8))) +void svst1q_scatter_offset(svbool_t, uint8_t *, svint64_t, svuint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u32))) +void svst1q_scatter_offset(svbool_t, uint32_t *, svint64_t, svuint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u64))) +void svst1q_scatter_offset(svbool_t, uint64_t *, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_u16))) +void svst1q_scatter_offset(svbool_t, uint16_t *, svint64_t, svuint16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_bf16))) +void svst1q_scatter_offset(svbool_t, bfloat16_t *, svint64_t, svbfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s8))) +void svst1q_scatter_offset(svbool_t, int8_t *, svint64_t, svint8_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f64))) +void svst1q_scatter_offset(svbool_t, float64_t *, svint64_t, svfloat64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f32))) +void svst1q_scatter_offset(svbool_t, float32_t *, svint64_t, svfloat32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_f16))) +void svst1q_scatter_offset(svbool_t, float16_t *, svint64_t, svfloat16_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s32))) +void svst1q_scatter_offset(svbool_t, int32_t *, svint64_t, svint32_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s64))) +void svst1q_scatter_offset(svbool_t, int64_t *, svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_s64offset_s16))) +void svst1q_scatter_offset(svbool_t, int16_t *, svint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u8))) void svst1q_scatter_offset(svbool_t, uint8_t *, svuint64_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_u32))) @@ -9853,17 +10870,17 @@ void svst1q_scatter_offset(svbool_t, int64_t *, svuint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1q_scatter_u64offset_s16))) void svst1q_scatter_offset(svbool_t, int16_t *, svuint64_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_u32))) -void svst1wq(svbool_t, uint32_t const *, svuint32_t); +void svst1wq(svbool_t, uint32_t *, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_f32))) -void svst1wq(svbool_t, float32_t const *, svfloat32_t); +void svst1wq(svbool_t, float32_t *, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_s32))) -void svst1wq(svbool_t, int32_t const *, svint32_t); +void svst1wq(svbool_t, int32_t *, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_u32))) -void svst1wq_vnum(svbool_t, uint32_t const *, int64_t, svuint32_t); +void svst1wq_vnum(svbool_t, uint32_t *, int64_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_f32))) -void svst1wq_vnum(svbool_t, float32_t const *, int64_t, svfloat32_t); +void svst1wq_vnum(svbool_t, float32_t *, int64_t, svfloat32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst1wq_vnum_s32))) -void svst1wq_vnum(svbool_t, int32_t const *, int64_t, svint32_t); +void svst1wq_vnum(svbool_t, int32_t *, int64_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u8))) void svst2q(svbool_t, uint8_t const *, svuint8x2_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svst2q_u32))) @@ -10030,6 +11047,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s32))) svint32_t svtblq(svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s64))) svint64_t svtblq(svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_mf8))) +svmfloat8_t svtblq(svmfloat8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtblq_s16))) svint16_t svtblq(svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_u8))) @@ -10054,6 +11073,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s32))) svint32_t svtbxq(svint32_t, svint32_t, svuint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s64))) svint64_t svtbxq(svint64_t, svint64_t, svuint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_mf8))) +svmfloat8_t svtbxq(svmfloat8_t, svmfloat8_t, svuint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svtbxq_s16))) svint16_t svtbxq(svint16_t, svint16_t, svuint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_u8))) @@ -10078,6 +11099,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s32))) svint32_t svuzpq1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s64))) svint64_t svuzpq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_mf8))) +svmfloat8_t svuzpq1(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq1_s16))) svint16_t svuzpq1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_u8))) @@ -10102,6 +11125,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s32))) svint32_t svuzpq2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s64))) svint64_t svuzpq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_mf8))) +svmfloat8_t svuzpq2(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svuzpq2_s16))) svint16_t svuzpq2(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_u8))) @@ -10126,6 +11151,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s32))) svint32_t svzipq1(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s64))) svint64_t svzipq1(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_mf8))) +svmfloat8_t svzipq1(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq1_s16))) svint16_t svzipq1(svint16_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_u8))) @@ -10150,6 +11177,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s32))) svint32_t svzipq2(svint32_t, svint32_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s64))) svint64_t svzipq2(svint64_t, svint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_mf8))) +svmfloat8_t svzipq2(svmfloat8_t, svmfloat8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svzipq2_s16))) svint16_t svzipq2(svint16_t, svint16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_bf16))) @@ -11522,6 +12551,8 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) svuint8_t svdup_laneq_u8(svuint8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) svint8_t svdup_laneq_s8(svint8_t, uint64_t); +__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) +svmfloat8_t svdup_laneq_mf8(svmfloat8_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) svuint64_t svdup_laneq_u64(svuint64_t, uint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) @@ -11544,6 +12575,8 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u8))) svuint8_t svdup_laneq(svuint8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_s8))) svint8_t svdup_laneq(svint8_t, uint64_t); +__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_mf8))) +svmfloat8_t svdup_laneq(svmfloat8_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_u64))) svuint64_t svdup_laneq(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svdup_laneq_f64))) @@ -18424,12 +19457,6 @@ __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_s64_z(svbool_t, svint64_t, svint64_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_s16_z(svbool_t, svint16_t, svint16_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) -float64_t svadda_f64(svbool_t, float64_t, svfloat64_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) -float32_t svadda_f32(svbool_t, float32_t, svfloat32_t); -__ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) -float16_t svadda_f16(svbool_t, float16_t, svfloat16_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv_s8(svbool_t, svint8_t); __ai __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) @@ -24810,12 +25837,6 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s64_z))) svint64_t svadd_z(svbool_t, svint64_t, svint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadd_s16_z))) svint16_t svadd_z(svbool_t, svint16_t, svint16_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f64))) -float64_t svadda(svbool_t, float64_t, svfloat64_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f32))) -float32_t svadda(svbool_t, float32_t, svfloat32_t); -__aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svadda_f16))) -float16_t svadda(svbool_t, float16_t, svfloat16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s8))) int64_t svaddv(svbool_t, svint8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svaddv_s32))) diff --git a/lib/include/arm_vector_types.h b/lib/include/arm_vector_types.h index 8e79d39a6041..e73e9c94fbd0 100644 --- a/lib/include/arm_vector_types.h +++ b/lib/include/arm_vector_types.h @@ -17,9 +17,62 @@ typedef float float32_t; typedef __fp16 float16_t; #if defined(__aarch64__) || defined(__arm64ec__) +typedef __mfp8 mfloat8_t; typedef double float64_t; #endif + +typedef uint64_t fpm_t; + +enum __ARM_FPM_FORMAT { __ARM_FPM_E5M2, __ARM_FPM_E4M3 }; + +enum __ARM_FPM_OVERFLOW { __ARM_FPM_INFNAN, __ARM_FPM_SATURATE }; + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_fpm_init(void) { + return 0; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src1_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~7ull) | (fpm_t)__format; +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_src2_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x38ull) | ((fpm_t)__format << 3u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_dst_format(fpm_t __fpm, enum __ARM_FPM_FORMAT __format) { + return (__fpm & ~0x1c0ull) | ((fpm_t)__format << 6u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_mul(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x4000ull) | ((fpm_t)__behaviour << 14u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_overflow_cvt(fpm_t __fpm, enum __ARM_FPM_OVERFLOW __behaviour) { + return (__fpm & ~0x8000ull) | ((fpm_t)__behaviour << 15u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale(fpm_t __fpm, uint64_t __scale) { + return (__fpm & ~0x7f0000ull) | (__scale << 16u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_nscale(fpm_t __fpm, int64_t __scale) { + return (__fpm & ~0xff000000ull) | (((fpm_t)__scale & 0xffu) << 24u); +} + +static __inline__ fpm_t __attribute__((__always_inline__, __nodebug__)) +__arm_set_fpm_lscale2(fpm_t __fpm, uint64_t __scale) { + return (uint32_t)__fpm | (__scale << 32u); +} + typedef __attribute__((neon_vector_type(8))) int8_t int8x8_t; typedef __attribute__((neon_vector_type(16))) int8_t int8x16_t; typedef __attribute__((neon_vector_type(4))) int16_t int16x4_t; @@ -36,6 +89,10 @@ typedef __attribute__((neon_vector_type(2))) uint32_t uint32x2_t; typedef __attribute__((neon_vector_type(4))) uint32_t uint32x4_t; typedef __attribute__((neon_vector_type(1))) uint64_t uint64x1_t; typedef __attribute__((neon_vector_type(2))) uint64_t uint64x2_t; +#if defined(__aarch64__) || defined(__arm64ec__) +typedef __attribute__((neon_vector_type(8))) mfloat8_t mfloat8x8_t; +typedef __attribute__((neon_vector_type(16))) mfloat8_t mfloat8x16_t; +#endif typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t; typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t; typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t; @@ -109,6 +166,16 @@ typedef struct uint64x2x2_t { uint64x2_t val[2]; } uint64x2x2_t; +#if defined(__aarch64__) || defined(__arm64ec__) +typedef struct mfloat8x8x2_t { + mfloat8x8_t val[2]; +} mfloat8x8x2_t; + +typedef struct mfloat8x16x2_t { + mfloat8x16_t val[2]; +} mfloat8x16x2_t; + +#endif typedef struct float16x4x2_t { float16x4_t val[2]; } float16x4x2_t; @@ -199,6 +266,16 @@ typedef struct uint64x2x3_t { uint64x2_t val[3]; } uint64x2x3_t; +#if defined(__aarch64__) || defined(__arm64ec__) +typedef struct mfloat8x8x3_t { + mfloat8x8_t val[3]; +} mfloat8x8x3_t; + +typedef struct mfloat8x16x3_t { + mfloat8x16_t val[3]; +} mfloat8x16x3_t; + +#endif typedef struct float16x4x3_t { float16x4_t val[3]; } float16x4x3_t; @@ -289,6 +366,16 @@ typedef struct uint64x2x4_t { uint64x2_t val[4]; } uint64x2x4_t; +#if defined(__aarch64__) || defined(__arm64ec__) +typedef struct mfloat8x8x4_t { + mfloat8x8_t val[4]; +} mfloat8x8x4_t; + +typedef struct mfloat8x16x4_t { + mfloat8x16_t val[4]; +} mfloat8x16x4_t; + +#endif typedef struct float16x4x4_t { float16x4_t val[4]; } float16x4x4_t; diff --git a/lib/include/avx10_2_512bf16intrin.h b/lib/include/avx10_2_512bf16intrin.h new file mode 100644 index 000000000000..ce43ecbcfe04 --- /dev/null +++ b/lib/include/avx10_2_512bf16intrin.h @@ -0,0 +1,561 @@ +/*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512BF16INTRIN_H +#define __AVX10_2_512BF16INTRIN_H + +/* Define the default attributes for the functions in this file. */ +typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) { + return __builtin_bit_cast(__m512bh, _mm512_setzero_ps()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) { + return (__m512bh)__builtin_ia32_undef512(); +} + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) { + return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh( + __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6, + __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17, + __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22, + __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27, + __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) { + return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25, + bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17, + bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9, + bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1}; +} + +#define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \ + bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \ + bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \ + bf29, bf30, bf31, bf32) \ + _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \ + (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \ + (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \ + (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \ + (bf3), (bf2), (bf1)) + +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_castbf16_ps(__m512bh __a) { + return (__m512)__a; +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS512 +_mm512_castbf16_pd(__m512bh __a) { + return (__m512d)__a; +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_castbf16_si512(__m512bh __a) { + return (__m512i)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) { + return (__m512bh)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castpd_pbh(__m512d __a) { + return (__m512bh)__a; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castsi512_pbh(__m512i __a) { + return (__m512bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS512 +_mm512_castbf16512_pbh128(__m512bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +_mm512_castbf16512_pbh256(__m512bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castbf16128_pbh512(__m128bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_castbf16256_pbh512(__m256bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_zextbf16128_pbh512(__m128bh __a) { + return __builtin_shufflevector( + __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_zextbf16256_pbh512(__m256bh __a) { + return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, + 29, 30, 31); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) { + return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), + (__m512i)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_load_pbh(void const *__p) { + return *(const __m512bh *)__p; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m512bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P, + __m512bh __A) { + *(__m512bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P, + __m512bh __A) { + struct __storeu_pbh { + __m512bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) { + return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W, + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { + return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, + (__v32hi)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)((__v32bf)__A + (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)((__v32bf)__A - (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)((__v32bf)__A * (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)((__v32bf)__A / (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A, + __m512bh __B) { + return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), + (__v32bf)_mm512_setzero_pbh()); +} + +#define _mm512_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32) - 1)) + +#define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ + (__v32bf)(__m512bh)(__B), \ + (int)(__P), (__mmask32)(__U))) + +#define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U))) + +#define _mm512_fpclass_pbh_mask(__A, imm) \ + ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1)) + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_scalef_pbh(__m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(), + (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh( + __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { + return (__m512bh)__builtin_ia32_vscalefbf16512_mask( + (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(), + (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vrcpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W, + (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrcpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_getexp_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( + (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_rsqrt_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W, + (__mmask32)__U); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( + (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); +} + +#define _mm512_reduce_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \ + (__mmask32) - 1)) + +#define _mm512_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ + (__mmask32)(__U))) + +#define _mm512_maskz_reduce_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32)(__U))) + +#define _mm512_roundscale_pbh(__A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32) - 1)) + +#define _mm512_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ + (__mmask32)(__U))) + +#define _mm512_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ + (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ + (__mmask32)(__U))) + +#define _mm512_getmant_pbh(__A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1)) + +#define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)(__m512bh)(__W), (__mmask32)(__U))) + +#define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ + (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { + return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { + return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, + (__v32bf)_mm512_sqrt_pbh(__A), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, + -(__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, + -(__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( + __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__A); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh( + __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)__C); +} + +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh( + __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { + return (__m512bh)__builtin_ia32_selectpbf_512( + (__mmask32)__U, + _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), + (__v32bf)_mm512_setzero_pbh()); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif +#endif diff --git a/lib/include/avx10_2_512convertintrin.h b/lib/include/avx10_2_512convertintrin.h new file mode 100644 index 000000000000..0b5fca5cda52 --- /dev/null +++ b/lib/include/avx10_2_512convertintrin.h @@ -0,0 +1,320 @@ +/*===--------- avx10_2_512convertintrin.h - AVX10_2_512CONVERT -------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512CONVERTINTRIN_H +#define __AVX10_2_512CONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A, + __m512 __B) { + return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask( + (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)(-1), + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtx2ps_ph(__m512h __W, __mmask32 __U, __m512 __A, __m512 __B) { + return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask( + (__v16sf)__A, (__v16sf)__B, (__v32hf)__W, (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtx2ps_ph(__mmask32 __U, __m512 __A, __m512 __B) { + return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask( + (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtx_round2ps_ph(A, B, R) \ + ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask( \ + (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_undefined_ph(), \ + (__mmask32)(-1), (const int)(R))) + +#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask((__v16sf)(A), (__v16sf)(B), \ + (__v32hf)(W), (__mmask32)(U), \ + (const int)(R))) + +#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \ + ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask( \ + (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_setzero_ph(), \ + (__mmask32)(U), (const int)(R))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiasph_bf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_bf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiasph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiassph_bf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_bf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiassph_bf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiasph_hf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_hf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiasph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_cvtbiassph_hf8(__m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(), + (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_hf8( + __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbiassph_hf8(__mmask32 __U, __m512i __A, __m512h __B) { + return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask( + (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(), + (__mmask32)__U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_bf8(__m512h __A, + __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2bf8_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvt2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvt2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_bf8(__A, __B), + (__v64qi)(__m512i)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvts2ph_bf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2bf8s_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvts2ph_bf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvts2ph_bf8(__mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_bf8(__A, __B), + (__v64qi)(__m512i)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvt2ph_hf8(__m512h __A, + __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2hf8_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvt2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvt2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvt2ph_hf8(__A, __B), + (__v64qi)(__m512i)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_cvts2ph_hf8(__m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_vcvt2ph2hf8s_512((__v32hf)(__A), + (__v32hf)(__B)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvts2ph_hf8(__m512i __W, __mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvts2ph_hf8(__mmask64 __U, __m512h __A, __m512h __B) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_cvts2ph_hf8(__A, __B), + (__v64qi)(__m512i)_mm512_setzero_si512()); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvthf8(__m256i __A) { + return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvthf8(__m512h __W, __mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U); +} + +static __inline__ __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvthf8(__mmask32 __U, __m256i __A) { + return (__m512h)__builtin_ia32_vcvthf8_2ph512_mask( + (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_bf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_bf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_bf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtsph_bf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtsph_bf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2bf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtph_hf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtph_hf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsph_hf8(__m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtsph_hf8(__m256i __W, __mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtsph_hf8(__mmask32 __U, __m512h __A) { + return (__m256i)__builtin_ia32_vcvtph2hf8s_512_mask( + (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtbf8_ph(__m256i __A) { + return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8)); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) { + return _mm512_castsi512_ph( + _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8)); +} + +static __inline __m512h __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtbf8_ph(__mmask32 __U, __m256i __A) { + return _mm512_castsi512_ph( + _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8)); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __AVX10_2_512CONVERTINTRIN_H +#endif // __SSE2__ diff --git a/lib/include/avx10_2_512minmaxintrin.h b/lib/include/avx10_2_512minmaxintrin.h new file mode 100644 index 000000000000..fbc7fbadbc6b --- /dev/null +++ b/lib/include/avx10_2_512minmaxintrin.h @@ -0,0 +1,127 @@ +/*===---- avx10_2_512minmaxintrin.h - AVX10_2_512MINMAX intrinsics ---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2_512MINMAXINTRIN_H +#define __AVX10_2_512MINMAXINTRIN_H + +#define _mm512_minmax_pbh(A, B, C) \ + ((__m512bh)__builtin_ia32_vminmaxbf16512((__v32bf)(__m512bh)(A), \ + (__v32bf)(__m512bh)(A), (int)(C))) + +#define _mm512_mask_minmax_pbh(W, U, A, B, C) \ + ((__m512bh)__builtin_ia32_selectpbf_512( \ + (__mmask32)(U), \ + (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A), \ + (__v32bf)(__m512bh)(B), (int)(C)), \ + (__v32bf)(__m512bh)(W))) + +#define _mm512_maskz_minmax_pbh(U, A, B, C) \ + ((__m512bh)__builtin_ia32_selectpbf_512( \ + (__mmask32)(U), \ + (__v32bf)_mm512_minmax_pbh((__v32bf)(__m512bh)(A), \ + (__v32bf)(__m512bh)(B), (int)(C)), \ + (__v32bf) __builtin_bit_cast(__m512bh, _mm512_setzero_ps()))) + +#define _mm512_minmax_pd(A, B, C) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_undefined_pd(), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_pd(W, U, A, B, C) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_pd(U, A, B, C) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_pd(A, B, C, R) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_undefined_pd(), (__mmask8)-1, (int)(R))) + +#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)(__m512d)(W), (__mmask8)(U), (int)(R))) + +#define _mm512_maskz_minmax_round_pd(U, A, B, C, R) \ + ((__m512d)__builtin_ia32_vminmaxpd512_round_mask( \ + (__v8df)(__m512d)(A), (__v8df)(__m512d)(B), (int)(C), \ + (__v8df)_mm512_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm512_minmax_ph(A, B, C) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ph(W, U, A, B, C) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)(__m512h)(W), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ph(U, A, B, C) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ph(A, B, C, R) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)_mm512_undefined_ph(), (__mmask32)-1, (int)(R))) + +#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)(__m512h)(W), (__mmask32)(U), (int)(R))) + +#define _mm512_maskz_minmax_round_ph(U, A, B, C, R) \ + ((__m512h)__builtin_ia32_vminmaxph512_round_mask( \ + (__v32hf)(__m512h)(A), (__v32hf)(__m512h)(B), (int)(C), \ + (__v32hf)_mm512_setzero_ph(), (__mmask32)(U), (int)(R))) + +#define _mm512_minmax_ps(A, B, C) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_minmax_ps(W, U, A, B, C) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_minmax_ps(U, A, B, C) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_minmax_round_ps(A, B, C, R) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, (int)(R))) + +#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), (__v16sf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm512_maskz_minmax_round_ps(U, A, B, C, R) \ + ((__m512)__builtin_ia32_vminmaxps512_round_mask( \ + (__v16sf)(__m512)(A), (__v16sf)(__m512)(B), (int)(C), \ + (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), (int)(R))) +#endif // __AVX10_2_512MINMAXINTRIN_H diff --git a/lib/include/avx10_2_512niintrin.h b/lib/include/avx10_2_512niintrin.h new file mode 100644 index 000000000000..7e614f7740bf --- /dev/null +++ b/lib/include/avx10_2_512niintrin.h @@ -0,0 +1,314 @@ +/*===---- avx10_2_512niintrin.h - AVX10.2-512 new instruction intrinsics ---=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2_512NIINTRIN_H +#define __AVX10_2_512NIINTRIN_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +/* VNNI FP16 */ +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_dpph_ps(__m512 __W, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_vdpphps512((__v16sf)__W, (__v32hf)__A, + (__v32hf)__B); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_mask_dpph_ps(__m512 __W, + __mmask16 __U, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B), (__v16sf)__W); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS _mm512_maskz_dpph_ps(__mmask16 __U, + __m512 __W, + __m512h __A, + __m512h __B) { + return (__m512)__builtin_ia32_selectps_512( + (__mmask16)__U, (__v16sf)_mm512_dpph_ps(__W, __A, __B), + (__v16sf)_mm512_setzero_ps()); +} + +/* VMPSADBW */ +#define _mm512_mpsadbw_epu8(A, B, imm) \ + ((__m512i)__builtin_ia32_mpsadbw512((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), (int)(imm))) + +#define _mm512_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m512i)__builtin_ia32_selectw_512( \ + (__mmask32)(U), (__v32hi)_mm512_mpsadbw_epu8((A), (B), (imm)), \ + (__v32hi)_mm512_setzero_si512())) + +/* VNNI INT8 */ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssd_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbssd512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbssd_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssd_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssd_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbssds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbssds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbssds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbssds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbssds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsud_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbsud512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbsud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsud_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsud_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbsuds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbsuds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbsuds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbsuds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbsuds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuud_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbuud512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpbuud_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuud_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuud_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpbuuds_epi32(__m512i __W, + __m512i __A, + __m512i __B) { + return (__m512i)__builtin_ia32_vpdpbuuds512((__v16si)__W, (__v16si)__A, + (__v16si)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpbuuds_epi32( + __m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpbuuds_epi32( + __mmask16 __U, __m512i __W, __m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_selectd_512( + __U, (__v16si)_mm512_dpbuuds_epi32(__W, __A, __B), + (__v16si)_mm512_setzero_si512()); +} + +/* VNNI INT16 */ +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsud_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwsud512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwsud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsud_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsud_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwsuds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwsuds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwsuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwsuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwsuds_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusd_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwusd512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwusd_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusd_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusd_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwusds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwusds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwusds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwusds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwusds_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuud_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwuud512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_dpwuud_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuud_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuud_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_dpwuuds_epi32(__m512i __A, + __m512i __B, + __m512i __C) { + return (__m512i)__builtin_ia32_vpdpwuuds512((__v16si)__A, (__v16si)__B, + (__v16si)__C); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_dpwuuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C), + (__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_dpwuuds_epi32( + __m512i __A, __mmask16 __U, __m512i __B, __m512i __C) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_dpwuuds_epi32(__A, __B, __C), + (__v16si)_mm512_setzero_si512()); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __SSE2__ */ +#endif /* __AVX10_2_512NIINTRIN_H */ diff --git a/lib/include/avx10_2_512satcvtdsintrin.h b/lib/include/avx10_2_512satcvtdsintrin.h new file mode 100644 index 000000000000..5970ab033144 --- /dev/null +++ b/lib/include/avx10_2_512satcvtdsintrin.h @@ -0,0 +1,303 @@ +/*===----- avx10_2_512satcvtdsintrin.h - AVX10_2_512SATCVTDS intrinsics ----=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __AVX10_2_512SATCVTDSINTRIN_H +#define __AVX10_2_512SATCVTDSINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ + __min_vector_width__(512))) + +// 512 bit : Double -> Int +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi32(__m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epi32(__m256i __W, __mmask8 __U, __m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epi32(__mmask8 __U, __m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( + (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epi32(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \ + (__mmask8) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundpd_epi32(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2dqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \ + (const int)(__R))) + +// 512 bit : Double -> uInt +static __inline__ __m256i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu32(__m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epu32(__mmask8 __U, __m512d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( + (__v8df)__A, (__v8si)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epu32(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)_mm256_undefined_si256(), \ + (__mmask8) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)(__m256i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundpd_epu32(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2udqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8si)_mm256_setzero_si256(), (__mmask8)(__U), \ + (const int)(__R))) + +// 512 bit : Double -> Long + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epi64(__m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epi64(__m512i __W, __mmask8 __U, __m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epi64(__mmask8 __U, __m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( + (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epi64(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \ + (__mmask8) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundpd_epi64(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2qqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \ + (const int)(__R))) + +// 512 bit : Double -> ULong + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttspd_epu64(__m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttspd_epu64(__m512i __W, __mmask8 __U, __m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttspd_epu64(__mmask8 __U, __m512d __A) { + return ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( + (__v8df)__A, (__v8di)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundpd_epu64(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)_mm512_undefined_epi32(), \ + (__mmask8) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundpd_epu64(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttpd2uqqs512_round_mask( \ + (__v8df)(__m512d)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \ + (const int)(__R))) + +// 512 bit: Float -> int +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi32(__m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epi32(__m512i __W, __mmask16 __U, __m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epi32(__mmask16 __U, __m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( + (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epi32(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \ + (__mmask16) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundps_epi32(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2dqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(__U), (const int)(__R))) + +// 512 bit: Float -> uint +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu32(__m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(__A), (__v16si)_mm512_undefined_epi32(), (__mmask16)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epu32(__m512i __W, __mmask16 __U, __m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(__A), (__v16si)(__W), __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epu32(__mmask16 __U, __m512 __A) { + return ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( + (__v16sf)(__A), (__v16si)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epu32(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)_mm512_undefined_epi32(), \ + (__mmask16) - 1, (const int)(__R))) + +#define _mm512_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)(__m512i)(__W), (__mmask16)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundps_epu32(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2udqs512_round_mask( \ + (__v16sf)(__m512)(__A), (__v16si)_mm512_setzero_si512(), \ + (__mmask16)(__U), (const int)(__R))) + +// 512 bit : float -> long +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epi64(__m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epi64(__m512i __W, __mmask8 __U, __m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epi64(__mmask8 __U, __m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( + (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epi64(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(__R))) + +#define _mm512_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundps_epi64(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2qqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \ + (const int)(__R))) + +// 512 bit : float -> ulong +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_cvttsps_epu64(__m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)__A, (__v8di)_mm512_undefined_epi32(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_cvttsps_epu64(__m512i __W, __mmask8 __U, __m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)__A, (__v8di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttsps_epu64(__mmask8 __U, __m256 __A) { + return ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( + (__v8sf)__A, (__v8di)_mm512_setzero_si512(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm512_cvtts_roundps_epu64(__A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)_mm512_undefined_epi32(), (__mmask8) - 1, \ + (const int)(__R))) + +#define _mm512_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)(__m512i)(__W), (__mmask8)(__U), \ + (const int)(__R))) + +#define _mm512_maskz_cvtts_roundps_epu64(__U, __A, __R) \ + ((__m512i)__builtin_ia32_vcvttps2uqqs512_round_mask( \ + (__v8sf)(__m256)(__A), (__v8di)_mm512_setzero_si512(), (__mmask8)(__U), \ + (const int)(__R))) + +#undef __DEFAULT_FN_ATTRS +#endif // __AVX10_2_512SATCVTDSINTRIN_H diff --git a/lib/include/avx10_2_512satcvtintrin.h b/lib/include/avx10_2_512satcvtintrin.h new file mode 100644 index 000000000000..7f41deb5212c --- /dev/null +++ b/lib/include/avx10_2_512satcvtintrin.h @@ -0,0 +1,301 @@ +/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2_512SATCVTINTRIN_H +#define __AVX10_2_512SATCVTINTRIN_H + +#define _mm512_ipcvtbf16_epi8(A) \ + ((__m512i)__builtin_ia32_vcvtbf162ibs512((__v32bf)(__m512bh)(A))) + +#define _mm512_mask_ipcvtbf16_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_ipcvtbf16_epi8(U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvtbf16_epi8(A), \ + (__v32hi)_mm512_setzero_si512())) + +#define _mm512_ipcvtbf16_epu8(A) \ + ((__m512i)__builtin_ia32_vcvtbf162iubs512((__v32bf)(__m512bh)(A))) + +#define _mm512_mask_ipcvtbf16_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_ipcvtbf16_epu8(U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvtbf16_epu8(A), \ + (__v32hi)_mm512_setzero_si512())) + +#define _mm512_ipcvttbf16_epi8(A) \ + ((__m512i)__builtin_ia32_vcvttbf162ibs512((__v32bf)(__m512bh)(A))) + +#define _mm512_mask_ipcvttbf16_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_ipcvttbf16_epi8(U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvttbf16_epi8(A), \ + (__v32hi)_mm512_setzero_si512())) + +#define _mm512_ipcvttbf16_epu8(A) \ + ((__m512i)__builtin_ia32_vcvttbf162iubs512((__v32bf)(__m512bh)(A))) + +#define _mm512_mask_ipcvttbf16_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)(__m512i)(W))) + +#define _mm512_maskz_ipcvttbf16_epu8(U, A) \ + ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ + (__v32hi)_mm512_ipcvttbf16_epu8(A), \ + (__v32hi)_mm512_setzero_si512())) + +#define _mm512_ipcvtph_epi8(A) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvtph_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)(W), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvtph_epi8(U, A) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvt_roundph_epi8(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)_mm512_setzero_si512(), \ + (__mmask32)-1, (const int)R)) + +#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) + +#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)_mm512_setzero_si512(), \ + (__mmask32)(U), (const int)R)) + +#define _mm512_ipcvtph_epu8(A) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvtph_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)(W), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvtph_epu8(U, A) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvt_roundph_epu8(A, R) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + (const int)R)) + +#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R)) + +#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + (const int)R)) + +#define _mm512_ipcvtps_epi8(A) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvtps_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ + (__v16su)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvtps_epi8(U, A) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvt_roundps_epi8(A, R) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ + (__v16su)_mm512_setzero_si512(), \ + (__mmask16)-1, (const int)R)) + +#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) + +#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \ + (__v16su)_mm512_setzero_si512(), \ + (__mmask16)(U), (const int)R)) + +#define _mm512_ipcvtps_epu8(A) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvtps_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \ + (__v16su)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvtps_epu8(U, A) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvt_roundps_epu8(A, R) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + (const int)R)) + +#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R)) + +#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \ + ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \ + (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + (const int)R)) + +#define _mm512_ipcvttph_epi8(A) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvttph_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)(W), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvttph_epi8(U, A) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvtt_roundph_epi8(A, S) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + S)) + +#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) + +#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \ + ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + S)) + +#define _mm512_ipcvttph_epu8(A) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvttph_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \ + (__v32hu)(W), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvttph_epu8(U, A) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvtt_roundph_epu8(A, S) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \ + S)) + +#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S)) + +#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \ + ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \ + (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \ + S)) + +#define _mm512_ipcvttps_epi8(A) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvttps_epi8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \ + (__v16su)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvttps_epi8(U, A) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvtt_roundps_epi8(A, S) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + S)) + +#define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) + +#define _mm512_maskz_ipcvtt_roundps_epi8(U, A, S) \ + ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + S)) + +#define _mm512_ipcvttps_epu8(A) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_mask_ipcvttps_epu8(W, U, A) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask((__v16sf)(__m512h)(A), \ + (__v16su)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_maskz_ipcvttps_epu8(U, A) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm512_ipcvtt_roundps_epu8(A, S) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \ + S)) + +#define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)(W), (__mmask16)(U), S)) + +#define _mm512_maskz_ipcvtt_roundps_epu8(U, A, S) \ + ((__m512i)__builtin_ia32_vcvttps2iubs512_mask( \ + (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \ + S)) + +#endif // __AVX10_2_512SATCVTINTRIN_H diff --git a/lib/include/avx10_2bf16intrin.h b/lib/include/avx10_2bf16intrin.h new file mode 100644 index 000000000000..199cc13ff7a1 --- /dev/null +++ b/lib/include/avx10_2bf16intrin.h @@ -0,0 +1,1085 @@ +/*===-------------- avx10_2bf16intrin.h - AVX10-BF16 intrinsics ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2BF16INTRIN_H +#define __AVX10_2BF16INTRIN_H + +typedef __bf16 __m128bh_u __attribute__((__vector_size__(16), __aligned__(1))); +typedef __bf16 __m256bh_u __attribute__((__vector_size__(32), __aligned__(1))); + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_setzero_pbh(void) { + return __builtin_bit_cast(__m256bh, _mm256_setzero_ps()); +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_setzero_pbh(void) { + return __builtin_bit_cast(__m128bh, _mm_setzero_ps()); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castbf16_ps(__m128bh __a) { + return (__m128)__a; +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_castbf16_ps(__m256bh __a) { + return (__m256)__a; +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS256 +_mm256_castbf16_pd(__m256bh __a) { + return (__m256d)__a; +} + +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_castbf16_pd(__m128bh __a) { + return (__m128d)__a; +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_castbf16_si128(__m128bh __a) { + return (__m128i)__a; +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_castbf16_si256(__m256bh __a) { + return (__m256i)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castps_pbh(__m128 __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_castps_pbh(__m256 __a) { + return (__m256bh)__a; +} + +static __inline__ __bf16 __DEFAULT_FN_ATTRS128 _mm_cvtsbh_bf16(__m128bh __a) { + return __a[0]; +} + +static __inline__ __bf16 __DEFAULT_FN_ATTRS256 +_mm256_cvtsbh_bf16(__m256bh __a) { + return __a[0]; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_castpd_pbh(__m128d __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castpd_pbh(__m256d __a) { + return (__m256bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_castsi128_pbh(__m128i __a) { + return (__m128bh)__a; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castsi256_pbh(__m256i __a) { + return (__m256bh)__a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_castbf16256_pbh128(__m256bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_castbf16128_pbh256(__m128bh __a) { + return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, + -1, -1, -1, -1, -1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_zextbf16128_pbh256(__m128bh __a) { + return __builtin_shufflevector(__a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_undefined_pbh(void) { + return (__m256bh)__builtin_ia32_undef256(); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_load_sbh(void const *__dp) { + __m128bh src = (__v8bf)_mm_setzero_pbh(); + return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__dp, src, + 1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_load_sbh(__m128bh __W, __mmask8 __U, const void *__A) { + __m128bh src = (__v8bf)__builtin_shufflevector( + (__v8bf)__W, (__v8bf)_mm_setzero_pbh(), 0, 8, 8, 8, 8, 8, 8, 8); + + return (__m128bh)__builtin_ia32_loadsbf16128_mask((const __v8bf *)__A, src, + __U & 1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_load_sbh(__mmask8 __U, const void *__A) { + return (__m128bh)__builtin_ia32_loadsbf16128_mask( + (const __v8bf *)__A, (__v8bf)_mm_setzero_pbh(), __U & 1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_load_pbh(void const *__p) { + return *(const __m256bh *)__p; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_load_pbh(void const *__p) { + return *(const __m128bh *)__p; +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m256bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_loadu_pbh(void const *__p) { + struct __loadu_pbh { + __m128bh_u __v; + } __attribute__((__packed__, __may_alias__)); + return ((const struct __loadu_pbh *)__p)->__v; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_sbh(void *__dp, + __m128bh __a) { + struct __mm_store_sbh_struct { + __bf16 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_store_sbh_struct *)__dp)->__u = __a[0]; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sbh(void *__W, + __mmask8 __U, + __m128bh __A) { + __builtin_ia32_storesbf16128_mask((__v8bf *)__W, __A, __U & 1); +} + +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_store_pbh(void *__P, + __m256bh __A) { + *(__m256bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_store_pbh(void *__P, + __m128bh __A) { + *(__m128bh *)__P = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_storeu_pbh(void *__P, + __m256bh __A) { + struct __storeu_pbh { + __m256bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_storeu_pbh(void *__P, + __m128bh __A) { + struct __storeu_pbh { + __m128bh_u __v; + } __attribute__((__packed__, __may_alias__)); + ((struct __storeu_pbh *)__P)->__v = __A; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_move_sbh(__m128bh __a, + __m128bh __b) { + __a[0] = __b[0]; + return __a; +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_move_sbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), __W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_move_sbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return __builtin_ia32_selectsbf_128(__U, _mm_move_sbh(__A, __B), + _mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_undefined_pbh(void) { + return (__m128bh)__builtin_ia32_undef128(); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_set_sbh(__bf16 bf) { + return (__v8bf)__builtin_shufflevector( + (__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}, (__v8bf)_mm_setzero_pbh(), 0, 8, + 8, 8, 8, 8, 8, 8); +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 _mm_set1_pbh(__bf16 bf) { + return (__m128bh)(__v8bf){bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set1_pbh(__bf16 bf) { + return (__m256bh)(__v16bf){bf, bf, bf, bf, bf, bf, bf, bf, + bf, bf, bf, bf, bf, bf, bf, bf}; +} + +static __inline __m128bh __DEFAULT_FN_ATTRS128 +_mm_set_pbh(__bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, + __bf16 bf6, __bf16 bf7, __bf16 bf8) { + return (__m128bh)(__v8bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8}; +} + +static __inline __m256bh __DEFAULT_FN_ATTRS256 _mm256_set_pbh( + __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6, + __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, + __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16) { + return (__m256bh)(__v16bf){bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, + bf9, bf10, bf11, bf12, bf13, bf14, bf15, bf16}; +} + +#define _mm_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8) \ + _mm_set_pbh((bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), (bf1)) + +#define _mm256_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \ + bf11, bf12, bf13, bf14, bf15, bf16) \ + _mm256_set_pbh((bf16), (bf15), (bf14), (bf13), (bf12), (bf11), (bf10), \ + (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), (bf3), (bf2), \ + (bf1)) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_abs_pbh(__m256bh __A) { + return (__m256bh)_mm256_and_epi32(_mm256_set1_epi32(0x7FFF7FFF), + (__m256i)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_abs_pbh(__m128bh __A) { + return (__m128bh)_mm_and_epi32(_mm_set1_epi32(0x7FFF7FFF), (__m128i)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_blend_pbh(__mmask8 __U, __m128bh __A, __m128bh __W) { + return (__m128bh)__builtin_ia32_selectpbf_128((__mmask8)__U, (__v8bf)__W, + (__v8bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_blend_pbh(__mmask16 __U, __m256bh __A, __m256bh __W) { + return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, (__v16bf)__W, + (__v16bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_permutex2var_pbh(__m128bh __A, __m128i __I, __m128bh __B) { + return (__m128bh)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, + (__v8hi)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_permutex2var_pbh(__m256bh __A, __m256i __I, __m256bh __B) { + return (__m256bh)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, + (__v16hi)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_permutexvar_pbh(__m128i __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_permvarhi128((__v8hi)__B, (__v8hi)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_permutexvar_pbh(__m256i __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_permvarhi256((__v16hi)__B, (__v16hi)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_add_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)((__v16bf)__A + (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_add_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_add_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_add_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_add_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_add_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A + (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_add_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_add_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_add_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_add_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sub_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)((__v16bf)__A - (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_sub_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_sub_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_sub_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_sub_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sub_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A - (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_sub_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sub_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_sub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sub_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mul_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)((__v16bf)__A * (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_mul_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_mul_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_mul_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_mul_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_mul_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A * (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_mul_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_mul_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_mul_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_mul_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_div_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)((__v16bf)__A / (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_div_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_div_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_div_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_div_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_div_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)((__v8bf)__A / (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_div_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_div_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_div_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_div_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_max_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)__builtin_ia32_vmaxbf16256((__v16bf)__A, (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_max_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_max_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_max_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_max_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vmaxbf16128((__v8bf)__A, (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_max_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_max_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_max_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_min_pbh(__m256bh __A, + __m256bh __B) { + return (__m256bh)__builtin_ia32_vminbf16256((__v16bf)__A, (__v16bf)__B); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_min_pbh(__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_min_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_min_pbh(__A, __B), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_min_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vminbf16128((__v8bf)__A, (__v8bf)__B); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_min_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_min_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_min_pbh(__A, __B), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16eq((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16lt((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16le((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16gt((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16ge((__v8bf)A, (__v8bf)B); +} + +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sbh(__m128bh A, + __m128bh B) { + return __builtin_ia32_vcomisbf16neq((__v8bf)A, (__v8bf)B); +} + +#define _mm256_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask16)__builtin_ia32_vcmpbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16) - 1)) + +#define _mm256_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask16)__builtin_ia32_vcmpbf16256_mask((__v16bf)(__m256bh)(__A), \ + (__v16bf)(__m256bh)(__B), \ + (int)(__P), (__mmask16)(__U))) + +#define _mm_cmp_pbh_mask(__A, __B, __P) \ + ((__mmask8)__builtin_ia32_vcmpbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8) - 1)) + +#define _mm_mask_cmp_pbh_mask(__U, __A, __B, __P) \ + ((__mmask8)__builtin_ia32_vcmpbf16128_mask((__v8bf)(__m128bh)(__A), \ + (__v8bf)(__m128bh)(__B), \ + (int)(__P), (__mmask8)(__U))) + +#define _mm256_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask16)__builtin_ia32_vfpclassbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16)(__U))) + +#define _mm256_fpclass_pbh_mask(__A, imm) \ + ((__mmask16)__builtin_ia32_vfpclassbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__mmask16) - 1)) + +#define _mm_mask_fpclass_pbh_mask(__U, __A, imm) \ + ((__mmask8)__builtin_ia32_vfpclassbf16128_mask((__v8bf)(__m128bh)(__A), \ + (int)(imm), (__mmask8)(__U))) + +#define _mm_fpclass_pbh_mask(__A, imm) \ + ((__mmask8)__builtin_ia32_vfpclassbf16128_mask((__v8bf)(__m128bh)(__A), \ + (int)(imm), (__mmask8) - 1)) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_scalef_pbh(__m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_undefined_pbh(), + (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pbh( + __m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)__W, (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_scalef_pbh(__mmask16 __U, __m256bh __A, __m256bh __B) { + return (__m256bh)__builtin_ia32_vscalefbf16256_mask( + (__v16bf)__A, (__v16bf)__B, (__v16bf)_mm256_setzero_pbh(), + (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_scalef_pbh(__m128bh __A, + __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_scalef_pbh(__m128bh __W, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)__W, (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_scalef_pbh(__mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128bh)__builtin_ia32_vscalefbf16128_mask( + (__v8bf)__A, (__v8bf)__B, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_rcp_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vrcpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_rcp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrcpbf16256_mask((__v16bf)__A, (__v16bf)__W, + (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_rcp_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrcpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rcp_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vrcpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_rcp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrcpbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_rcp_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrcpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_getexp_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_getexp_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( + (__v16bf)__A, (__v16bf)__W, (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_getexp_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vgetexpbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_getexp_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_getexp_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_getexp_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vgetexpbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_rsqrt_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_undefined_pbh(), (__mmask16)-1); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_rsqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask((__v16bf)__A, (__v16bf)__W, + (__mmask16)__U); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_rsqrt_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_vrsqrtbf16256_mask( + (__v16bf)__A, (__v16bf)_mm256_setzero_pbh(), (__mmask16)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_rsqrt_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_undefined_pbh(), (__mmask8)-1); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_rsqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask((__v8bf)__A, (__v8bf)__W, + (__mmask8)__U); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_rsqrt_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_vrsqrtbf16128_mask( + (__v8bf)__A, (__v8bf)_mm_setzero_pbh(), (__mmask8)__U); +} + +#define _mm256_reduce_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_undefined_pbh(), \ + (__mmask16) - 1)) + +#define _mm256_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ + (__mmask16)(__U))) + +#define _mm256_maskz_reduce_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vreducebf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16)(__U))) + +#define _mm_reduce_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_undefined_pbh(), \ + (__mmask8) - 1)) + +#define _mm_mask_reduce_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ + (__mmask8)(__U))) + +#define _mm_maskz_reduce_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vreducebf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8)(__U))) + +#define _mm256_roundscale_pbh(__A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16) - 1)) + +#define _mm256_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)(__m256bh)(__W), \ + (__mmask16)(__U))) + +#define _mm256_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m256bh)__builtin_ia32_vrndscalebf16_256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(imm), (__v16bf)_mm256_setzero_pbh(), \ + (__mmask16)(__U))) + +#define _mm_roundscale_pbh(__A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8) - 1)) + +#define _mm_mask_roundscale_pbh(__W, __U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)(__m128bh)(__W), \ + (__mmask8)(__U))) + +#define _mm_maskz_roundscale_pbh(__U, __A, imm) \ + ((__m128bh)__builtin_ia32_vrndscalebf16_128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(imm), (__v8bf)_mm_setzero_pbh(), \ + (__mmask8)(__U))) + +#define _mm256_getmant_pbh(__A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)_mm256_undefined_pbh(), (__mmask16) - 1)) + +#define _mm256_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)(__m256bh)(__W), (__mmask16)(__U))) + +#define _mm256_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m256bh)__builtin_ia32_vgetmantbf16256_mask( \ + (__v16bf)(__m256bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v16bf)_mm256_setzero_pbh(), (__mmask16)(__U))) + +#define _mm_getmant_pbh(__A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)_mm_undefined_pbh(), (__mmask8) - 1)) + +#define _mm_mask_getmant_pbh(__W, __U, __A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)(__m128bh)(__W), (__mmask8)(__U))) + +#define _mm_maskz_getmant_pbh(__U, __A, __B, __C) \ + ((__m128bh)__builtin_ia32_vgetmantbf16128_mask( \ + (__v8bf)(__m128bh)(__A), (int)(((__C) << 2) | (__B)), \ + (__v8bf)_mm_setzero_pbh(), (__mmask8)(__U))) + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_sqrt_pbh(__m256bh __A) { + return (__m256bh)__builtin_ia32_vsqrtbf16256((__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_sqrt_pbh(__m256bh __W, __mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, (__v16bf)_mm256_sqrt_pbh(__A), (__v16bf)__W); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_sqrt_pbh(__mmask16 __U, __m256bh __A) { + return (__m256bh)__builtin_ia32_selectpbf_256((__mmask16)__U, + (__v16bf)_mm256_sqrt_pbh(__A), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_sqrt_pbh(__m128bh __A) { + return (__m128bh)__builtin_ia32_vsqrtbf16((__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_sqrt_pbh(__m128bh __W, __mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)__W); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_sqrt_pbh(__mmask8 __U, __m128bh __A) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, (__v8bf)_mm_sqrt_pbh(__A), (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_fmadd_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, (__v16bf)__B, + -(__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_fmsub_pbh(__m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fnmadd_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmadd_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_fnmsub_pbh(__m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_vfmaddnepbh256((__v16bf)__A, -(__v16bf)__B, + -(__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pbh( + __m256bh __A, __mmask16 __U, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__A); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pbh( + __m256bh __A, __m256bh __B, __m256bh __C, __mmask16 __U) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)__C); +} + +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pbh( + __mmask16 __U, __m256bh __A, __m256bh __B, __m256bh __C) { + return (__m256bh)__builtin_ia32_selectpbf_256( + (__mmask16)__U, + _mm256_fnmsub_pbh((__v16bf)__A, (__v16bf)__B, (__v16bf)__C), + (__v16bf)_mm256_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmadd_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fmadd_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fmsub_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, (__v8bf)__B, + -(__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fmsub_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmadd_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fnmadd_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fnmadd_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fnmadd_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmadd_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 _mm_fnmsub_pbh(__m128bh __A, + __m128bh __B, + __m128bh __C) { + return (__m128bh)__builtin_ia32_vfmaddnepbh128((__v8bf)__A, -(__v8bf)__B, + -(__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_fnmsub_pbh(__m128bh __A, __mmask8 __U, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__A); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask3_fnmsub_pbh(__m128bh __A, __m128bh __B, __m128bh __C, __mmask8 __U) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)__C); +} + +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_fnmsub_pbh(__mmask8 __U, __m128bh __A, __m128bh __B, __m128bh __C) { + return (__m128bh)__builtin_ia32_selectpbf_128( + (__mmask8)__U, _mm_fnmsub_pbh((__v8bf)__A, (__v8bf)__B, (__v8bf)__C), + (__v8bf)_mm_setzero_pbh()); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif +#endif diff --git a/lib/include/avx10_2convertintrin.h b/lib/include/avx10_2convertintrin.h new file mode 100644 index 000000000000..c67a5b890f19 --- /dev/null +++ b/lib/include/avx10_2convertintrin.h @@ -0,0 +1,590 @@ +/*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifdef __SSE2__ + +#ifndef __AVX10_2CONVERTINTRIN_H +#define __AVX10_2CONVERTINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, + __m128 __B) { + return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( + (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( + (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( + (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, + __m256 __B) { + return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( + (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1), + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { + return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( + (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { + return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( + (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm256_cvtx_round2ps_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ + (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)(-1), (const int)(R))) + +#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ + (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) + +#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ + (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ + (__mmask16)(U), (const int)(R))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), + (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( + __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), + (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( + __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), + (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( + __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( + (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), + (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( + __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { + return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( + (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), + (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), + (__v16qi)(__m128i)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, + __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), + (__v32qi)(__m256i)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), + (__v16qi)(__m128i)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), + (__v32qi)(__m256i)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), + (__v16qi)(__m128i)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, + __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), + (__v32qi)(__m256i)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, + __m128h __B) { + return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), + (__v16qi)(__m128i)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), + (__v16hf)(__B)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), + (__v32qi)(__m256i)_mm256_setzero_si256()); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) { + return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( + (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W, + __mmask8 __U, + __m128i __A) { + return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( + (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U, + __m128i __A) { + return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( + (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) { + return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( + (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) { + return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( + (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) { + return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( + (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( + (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { + return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( + (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { + return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { + return _mm_castsi128_ph( + _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); +} + +static __inline__ __m128h __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { + return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { + return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { + return _mm256_castsi256_ph( + _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); +} + +static __inline__ __m256h __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { + return _mm256_castsi256_ph( + _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif // __AVX10_2CONVERTINTRIN_H +#endif // __SSE2__ diff --git a/lib/include/avx10_2copyintrin.h b/lib/include/avx10_2copyintrin.h new file mode 100644 index 000000000000..76b8f8ced540 --- /dev/null +++ b/lib/include/avx10_2copyintrin.h @@ -0,0 +1,66 @@ +/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2COPYINTRIN_H +#define __AVX10_2COPYINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +/// Constructs a 128-bit integer vector, setting the lower 32 bits to the +/// lower 32 bits of the parameter \a __A; the upper bits are zeoroed. +/// +/// \code{.operation} +/// result[31:0] := __A[31:0] +/// result[MAX:32] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVD instruction. +/// +/// \param __A +/// A 128-bit integer vector. +/// \returns A 128-bit integer vector. The lower 32 bits are copied from the +/// parameter \a __A; the upper bits are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4); +} + +/// Constructs a 128-bit integer vector, setting the lower 16 bits to the +/// lower 16 bits of the parameter \a __A; the upper bits are zeoroed. +/// +/// \code{.operation} +/// result[15:0] := __A[15:0] +/// result[MAX:16] := 0 +/// \endcode +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VMOVW instruction. +/// +/// \param __A +/// A 128-bit integer vector. +/// \returns A 128-bit integer vector. The lower 16 bits are copied from the +/// parameter \a __A; the upper bits are zeroed. +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) { + return (__m128i)__builtin_shufflevector( + (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8); +} + +#undef __DEFAULT_FN_ATTRS128 + +#endif // __AVX10_2COPYINTRIN_H diff --git a/lib/include/avx10_2minmaxintrin.h b/lib/include/avx10_2minmaxintrin.h new file mode 100644 index 000000000000..8164d49d89f1 --- /dev/null +++ b/lib/include/avx10_2minmaxintrin.h @@ -0,0 +1,277 @@ +/*===-------- avx10_2minmaxintrin.h - AVX10_2MINMAX intrinsics -------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2MINMAXINTRIN_H +#define __AVX10_2MINMAXINTRIN_H + +#define _mm_minmax_pbh(A, B, C) \ + ((__m128bh)__builtin_ia32_vminmaxbf16128((__m128bh)(__v8bf)(A), \ + (__m128bh)(__v8bf)(B), (int)(C))) + +#define _mm_mask_minmax_pbh(W, U, A, B, C) \ + ((__m128bh)__builtin_ia32_selectpbf_128( \ + (__mmask8)(U), \ + (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), \ + (int)(C)), \ + (__v8bf)(W))) + +#define _mm_maskz_minmax_pbh(U, A, B, C) \ + ((__m128bh)__builtin_ia32_selectpbf_128( \ + (__mmask8)(U), \ + (__v8bf)_mm_minmax_pbh((__m128bh)(__v8bf)(A), (__m128bh)(__v8bf)(B), \ + (int)(C)), \ + (__v8bf) __builtin_bit_cast(__m128bh, _mm_setzero_ps()))) + +#define _mm256_minmax_pbh(A, B, C) \ + ((__m256bh)__builtin_ia32_vminmaxbf16256((__m256bh)(__v16bf)(A), \ + (__m256bh)(__v16bf)(B), (int)(C))) + +#define _mm256_mask_minmax_pbh(W, U, A, B, C) \ + ((__m256bh)__builtin_ia32_selectpbf_256( \ + (__mmask16)(U), \ + (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A), \ + (__m256bh)(__v16bf)(B), (int)(C)), \ + (__v16bf)(W))) + +#define _mm256_maskz_minmax_pbh(U, A, B, C) \ + ((__m256bh)__builtin_ia32_selectpbf_256( \ + (__mmask16)(U), \ + (__v16bf)_mm256_minmax_pbh((__m256bh)(__v16bf)(A), \ + (__m256bh)(__v16bf)(B), (int)(C)), \ + (__v16bf) __builtin_bit_cast(__m256bh, _mm256_setzero_ps()))) + +#define _mm_minmax_pd(A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), (__mmask8)-1)) + +#define _mm_mask_minmax_pd(W, U, A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)(__m128d)(W), (__mmask8)(U))) + +#define _mm_maskz_minmax_pd(U, A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxpd128_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), (__mmask8)(U))) + +#define _mm256_minmax_pd(A, B, C) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)-1, _MM_FROUND_NO_EXC)) + +#define _mm256_mask_minmax_pd(W, U, A, B, C) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)(__m256d)(W), (__mmask8)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_maskz_minmax_pd(U, A, B, C) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_minmax_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_minmax_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vminmaxpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm_minmax_ph(A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxph128_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)-1)) + +#define _mm_mask_minmax_ph(W, U, A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxph128_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)(__m128h)(W), (__mmask16)-1)) + +#define _mm_maskz_minmax_ph(U, A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxph128_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)(U))) + +#define _mm256_minmax_ph(A, B, C) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)-1, _MM_FROUND_NO_EXC)) + +#define _mm256_mask_minmax_ph(W, U, A, B, C) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ + (__v16hf)(__m256h)(W), (__mmask16)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_maskz_minmax_ph(U, A, B, C) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_minmax_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ + (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) + +#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (C), \ + (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_minmax_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vminmaxph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(C), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +#define _mm_minmax_ps(A, B, C) \ + ((__m128)__builtin_ia32_vminmaxps128_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) + +#define _mm_mask_minmax_ps(W, U, A, B, C) \ + ((__m128)__builtin_ia32_vminmaxps128_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(__m128)(W), \ + (__mmask8)(U))) + +#define _mm_maskz_minmax_ps(U, A, B, C) \ + ((__m128)__builtin_ia32_vminmaxps128_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) + +#define _mm256_minmax_ps(A, B, C) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, _MM_FROUND_NO_EXC)) + +#define _mm256_mask_minmax_ps(W, U, A, B, C) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ + (__mmask8)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_maskz_minmax_ps(U, A, B, C) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), _MM_FROUND_NO_EXC)) + +#define _mm256_minmax_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_minmax_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vminmaxps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) + +#define _mm_minmax_sd(A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_minmax_sd(W, U, A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)(__m128d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_minmax_sd(U, A, B, C) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_minmax_round_sd(A, B, C, R) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_undefined_pd(), (__mmask8)-1, (int)(R))) + +#define _mm_mask_minmax_round_sd(W, U, A, B, C, R) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)(__m128d)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_minmax_round_sd(U, A, B, C, R) \ + ((__m128d)__builtin_ia32_vminmaxsd_round_mask( \ + (__v2df)(__m128d)(A), (__v2df)(__m128d)(B), (int)(C), \ + (__v2df)_mm_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm_minmax_sh(A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_undefined_ph(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_minmax_sh(W, U, A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)(__m128h)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_minmax_sh(U, A, B, C) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_minmax_round_sh(A, B, C, R) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_undefined_ph(), (__mmask8)-1, (int)(R))) + +#define _mm_mask_minmax_round_sh(W, U, A, B, C, R) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)(__m128h)(W), (__mmask8)(U), (int)(R))) + +#define _mm_maskz_minmax_round_sh(U, A, B, C, R) \ + ((__m128h)__builtin_ia32_vminmaxsh_round_mask( \ + (__v8hf)(__m128h)(A), (__v8hf)(__m128h)(B), (int)(C), \ + (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm_minmax_ss(A, B, C) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_undefined_ps(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) + +#define _mm_mask_minmax_ss(W, U, A, B, C) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W), \ + (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_maskz_minmax_ss(U, A, B, C) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm_minmax_round_ss(A, B, C, R) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_undefined_ps(), (__mmask8)-1, (int)(R))) + +#define _mm_mask_minmax_round_ss(W, U, A, B, C, R) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), (__v4sf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm_maskz_minmax_round_ss(U, A, B, C, R) \ + ((__m128)__builtin_ia32_vminmaxss_round_mask( \ + (__v4sf)(__m128)(A), (__v4sf)(__m128)(B), (int)(C), \ + (__v4sf)_mm_setzero_ps(), (__mmask8)(U), (int)(R))) +#endif // __AVX10_2MINMAXINTRIN_H diff --git a/lib/include/avx10_2niintrin.h b/lib/include/avx10_2niintrin.h new file mode 100644 index 000000000000..c91a7b57c752 --- /dev/null +++ b/lib/include/avx10_2niintrin.h @@ -0,0 +1,2075 @@ +/*===---- avx10_2niintrin.h - AVX10.2 new instruction intrinsics -----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifdef __SSE2__ + +#ifndef __AVX10_2NIINTRIN_H +#define __AVX10_2NIINTRIN_H + +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) + +/* VNNI FP16 */ +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_dpph_ps(__m128 __W, + __m128h __A, + __m128h __B) { + return (__m128)__builtin_ia32_vdpphps128((__v4sf)__W, (__v8hf)__A, + (__v8hf)__B); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_dpph_ps(__m128 __W, + __mmask8 __U, + __m128h __A, + __m128h __B) { + return (__m128)__builtin_ia32_selectps_128( + (__mmask8)__U, (__v4sf)_mm_dpph_ps(__W, __A, __B), (__v4sf)__W); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_dpph_ps(__mmask8 __U, + __m128 __W, + __m128h __A, + __m128h __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_dpph_ps(__W, __A, __B), + (__v4sf)_mm_setzero_ps()); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_dpph_ps(__m256 __W, + __m256h __A, + __m256h __B) { + return (__m256)__builtin_ia32_vdpphps256((__v8sf)__W, (__v16hf)__A, + (__v16hf)__B); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_mask_dpph_ps(__m256 __W, __mmask8 __U, __m256h __A, __m256h __B) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B), (__v8sf)__W); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpph_ps(__mmask8 __U, __m256 __W, __m256h __A, __m256h __B) { + return (__m256)__builtin_ia32_selectps_256( + (__mmask8)__U, (__v8sf)_mm256_dpph_ps(__W, __A, __B), + (__v8sf)_mm256_setzero_ps()); +} + +/* VMPSADBW */ +#define _mm_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)(__m128i)(W))) + +#define _mm_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_mpsadbw_epu8((A), (B), (imm)), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_mask_mpsadbw_epu8(W, U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_mpsadbw_epu8(U, A, B, imm) \ + ((__m256i)__builtin_ia32_selectw_256( \ + (__mmask16)(U), (__v16hi)_mm256_mpsadbw_epu8((A), (B), (imm)), \ + (__v16hi)_mm256_setzero_si256())) + +/* VNNI INT8 */ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbssd_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbssd_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbssd_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbssd_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbssd_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbssd_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbssd_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbssd_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbssds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbssds_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbssds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbssds_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbssds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbssds_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbssds_epi32( + __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbssds_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbsud_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbsud_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbsud_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbsud_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbsud_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbsud_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbsud_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbsud_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbsuds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbsuds_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbsuds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbsuds_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbsuds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbsuds_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbsuds_epi32( + __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbsuds_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbuud_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbuud_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbuud_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbuud_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbuud_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbuud_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbuud_epi32(__mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbuud_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpbuuds_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbuuds_epi32(__W, __A, __B), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbuuds_epi32(__mmask8 __U, __m128i __W, __m128i __A, __m128i __B) { + return (__m128i)__builtin_ia32_selectd_128( + __U, (__v4si)_mm_dpbuuds_epi32(__W, __A, __B), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbuuds_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbuuds_epi32(__W, __A, __B), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbuuds_epi32( + __mmask8 __U, __m256i __W, __m256i __A, __m256i __B) { + return (__m256i)__builtin_ia32_selectd_256( + __U, (__v8si)_mm256_dpbuuds_epi32(__W, __A, __B), + (__v8si)_mm256_setzero_si256()); +} + +/* VNNI INT16 */ +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwsud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwsud_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpwsud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwsud_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwsuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwsuds_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwsuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwsuds_epi32( + __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwsuds_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwusd_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwusd_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpwusd_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwusd_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwusds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwusds_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwusds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwusds_epi32( + __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwusds_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwuud_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwuud_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpwuud_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwuud_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C), (__v4si)__A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_dpwuuds_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_dpwuuds_epi32(__A, __B, __C), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_dpwuuds_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C), (__v8si)__A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwuuds_epi32( + __m256i __A, __mmask8 __U, __m256i __B, __m256i __C) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_dpwuuds_epi32(__A, __B, __C), + (__v8si)_mm256_setzero_si256()); +} + +/* YMM Rounding */ +#define _mm256_add_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vaddpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_add_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_add_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_add_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_add_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vaddph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_add_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_add_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_add_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_add_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vaddps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_add_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_add_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_add_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_cmp_round_pd_mask(A, B, P, R) \ + ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cmp_round_pd_mask(U, A, B, P, R) \ + ((__mmask8)__builtin_ia32_vcmppd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(P), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cmp_round_ph_mask(A, B, P, R) \ + ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)-1, \ + (int)(R))) + +#define _mm256_mask_cmp_round_ph_mask(U, A, B, P, R) \ + ((__mmask16)__builtin_ia32_vcmpph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (int)(P), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_cmp_round_ps_mask(A, B, P, R) \ + ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cmp_round_ps_mask(U, A, B, P, R) \ + ((__mmask8)__builtin_ia32_vcmpps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(P), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepi32_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ + (__v8si)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask((__v8si)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtdq2ph256_round_mask( \ + (__v8si)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundepi32_ps(A, R) \ + ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \ + ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask( \ + (__v8si)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \ + ((__m256)__builtin_ia32_vcvtdq2ps256_round_mask((__v8si)(__m256i)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundpd_epi32(A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_epi32(U, A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundpd_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ + (__v4df)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask((__v4df)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtpd2ph256_round_mask( \ + (__v4df)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundpd_ps(A, R) \ + ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ + (__v4df)(__m256d)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \ + ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask( \ + (__v4df)(__m256d)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_ps(U, A, R) \ + ((__m128)__builtin_ia32_vcvtpd2ps256_round_mask((__v4df)(__m256d)(A), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundpd_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundpd_epu32(A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \ + ((__m128i)__builtin_ia32_vcvtpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundpd_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundph_epi32(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ + (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epi32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2dq256_round_mask( \ + (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundph_pd(A, R) \ + ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ + (__v8hf)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundph_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask((__v8hf)(A), (__v4df)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_pd(U, A, R) \ + ((__m256d)__builtin_ia32_vcvtph2pd256_round_mask( \ + (__v8hf)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvtx_roundph_ps(A, R) \ + ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ + (__v8hf)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \ + ((__m256)__builtin_ia32_vcvtph2psx256_round_mask((__v8hf)(A), (__v8sf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtx_roundph_ps(U, A, R) \ + ((__m256)__builtin_ia32_vcvtph2psx256_round_mask( \ + (__v8hf)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundph_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ + (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2qq256_round_mask( \ + (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundph_epu32(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ + (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epu32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2udq256_round_mask( \ + (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundph_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ + (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uqq256_round_mask( \ + (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundph_epu16(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ + (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask((__v16hf)(A), (__v16hu)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epu16(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2uw256_round_mask( \ + (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_cvt_roundph_epi16(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ + (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundph_epi16(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2w256_round_mask( \ + (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_cvt_roundps_epi32(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundps_epi32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundps_pd(A, R) \ + ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ + (__v4sf)(__m128)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundps_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ + (__v4sf)(__m128)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundps_pd(U, A, R) \ + ((__m256d)__builtin_ia32_vcvtps2pd256_round_mask( \ + (__v4sf)(__m128)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundps_ph(A, I) \ + ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ + (__v8hi)_mm_undefined_si128(), \ + (__mmask8)-1)) + +/* FIXME: We may use these way in future. +#define _mm256_cvt_roundps_ph(A, I) \ + ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ + (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_undefined_si128(), \ + (__mmask8)-1)) +#define _mm256_mask_cvt_roundps_ph(U, W, A, I) \ + ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ + (__v8sf)(__m256)(A), (int)(I), (__v8hi)(__m128i)(U), (__mmask8)(W))) +#define _mm256_maskz_cvt_roundps_ph(W, A, I) \ + ((__m128i)__builtin_ia32_vcvtps2ph256_round_mask( \ + (__v8sf)(__m256)(A), (int)(I), (__v8hi)_mm_setzero_si128(), \ + (__mmask8)(W))) */ + +#define _mm256_cvtx_roundps_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ + (__v8sf)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask((__v8sf)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtx_roundps_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtps2phx256_round_mask( \ + (__v8sf)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundps_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundps_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundps_epu32(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundps_epu32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundps_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundps_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepi64_pd(A, R) \ + ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ + (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ + (__v4di)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \ + ((__m256d)__builtin_ia32_vcvtqq2pd256_round_mask( \ + (__v4di)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepi64_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ + (__v4di)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask((__v4di)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtqq2ph256_round_mask( \ + (__v4di)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundepi64_ps(A, R) \ + ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ + (__v4di)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \ + ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask( \ + (__v4di)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \ + ((__m128)__builtin_ia32_vcvtqq2ps256_round_mask((__v4di)(__m256i)(A), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_cvtt_roundpd_epi32(A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)(__m128i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2dq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4si)_mm_setzero_si128(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundpd_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2qq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundpd_epu32(A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)(__m128i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \ + ((__m128i)__builtin_ia32_vcvttpd2udq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4su)_mm_setzero_si128(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundpd_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqq256_round_mask( \ + (__v4df)(__m256d)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundph_epi32(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ + (__v8hf)(A), (__v8si)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask((__v8hf)(A), (__v8si)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2dq256_round_mask( \ + (__v8hf)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvtt_roundph_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ + (__v8hf)(A), (__v4di)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask((__v8hf)(A), (__v4di)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2qq256_round_mask( \ + (__v8hf)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvtt_roundph_epu32(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ + (__v8hf)(A), (__v8su)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask((__v8hf)(A), (__v8su)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2udq256_round_mask( \ + (__v8hf)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvtt_roundph_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ + (__v8hf)(A), (__v4du)_mm256_undefined_si256(), (__mmask8)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask((__v8hf)(A), (__v4du)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uqq256_round_mask( \ + (__v8hf)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvtt_roundph_epu16(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ + (__v16hf)(A), (__v16hu)_mm256_undefined_si256(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ + (__v16hf)(A), (__v16hu)(W), (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2uw256_round_mask( \ + (__v16hf)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundph_epi16(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ + (__v16hf)(A), (__v16hi)_mm256_undefined_si256(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2w256_round_mask((__v16hf)(A), (__v16hi)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundph_epi16(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2w256_round_mask( \ + (__v16hf)(A), (__v16hi)_mm256_setzero_si256(), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundps_epi32(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2dq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8si)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundps_epi64(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2qq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4di)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundps_epu32(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2udq256_round_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvtt_roundps_epu64(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)(__m256i)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2uqq256_round_mask( \ + (__v4sf)(__m128)(A), (__v4du)_mm256_setzero_si256(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepu32_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ + (__v8su)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask((__v8su)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtudq2ph256_round_mask( \ + (__v8su)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundepu32_ps(A, R) \ + ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ + (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \ + ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ + (__v8su)(__m256i)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \ + ((__m256)__builtin_ia32_vcvtudq2ps256_round_mask( \ + (__v8su)(__m256i)(A), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepu64_pd(A, R) \ + ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ + (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ + (__v4du)(__m256i)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \ + ((__m256d)__builtin_ia32_vcvtuqq2pd256_round_mask( \ + (__v4du)(__m256i)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_cvt_roundepu64_ph(A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ + (__v4du)(A), (__v8hf)_mm_undefined_ph(), (__mmask8)(-1), (int)(R))) + +#define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask((__v4du)(A), (__v8hf)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \ + ((__m128h)__builtin_ia32_vcvtuqq2ph256_round_mask( \ + (__v4du)(A), (__v8hf)_mm_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundepu64_ps(A, R) \ + ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ + (__v4du)(__m256i)(A), (__v4sf)_mm_setzero_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \ + ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask( \ + (__v4du)(__m256i)(A), (__v4sf)(__m128)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \ + ((__m128)__builtin_ia32_vcvtuqq2ps256_round_mask((__v4du)(__m256i)(A), \ + (__v4sf)_mm_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_cvt_roundepu16_ph(A, R) \ + ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ + (__v16hu)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask((__v16hu)(A), (__v16hf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vcvtuw2ph256_round_mask( \ + (__v16hu)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +#define _mm256_cvt_roundepi16_ph(A, R) \ + ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ + (__v16hi)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)(-1), \ + (int)(R))) + +#define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask((__v16hi)(A), (__v16hf)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vcvtw2ph256_round_mask( \ + (__v16hi)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +#define _mm256_div_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vdivpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_div_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_div_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_div_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_div_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vdivph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_div_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_div_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_div_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_div_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vdivps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_div_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_div_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_div_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_fcmadd_round_pch(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fcmadd_round_pch(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fcmadd_round_pch(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfcmaddcph256_round_mask3( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fcmadd_round_pch(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfcmaddcph256_round_maskz( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_cmul_round_pch(A, B, R) \ + ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ + (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_cmul_round_pch(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_cmul_round_pch(U, A, B, R) \ + ((__m256h)__builtin_ia32_vfcmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ + (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_fixupimm_round_pd(A, B, C, imm, R) \ + ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ + (int)(imm), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fixupimm_round_pd(A, U, B, C, imm, R) \ + ((__m256d)__builtin_ia32_vfixupimmpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ + (int)(imm), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fixupimm_round_pd(U, A, B, C, imm, R) \ + ((__m256d)__builtin_ia32_vfixupimmpd256_round_maskz( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4di)(__m256i)(C), \ + (int)(imm), (__mmask8)(U), (int)(R))) + +#define _mm256_fixupimm_round_ps(A, B, C, imm, R) \ + ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ + (int)(imm), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fixupimm_round_ps(A, U, B, C, imm, R) \ + ((__m256)__builtin_ia32_vfixupimmps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ + (int)(imm), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fixupimm_round_ps(U, A, B, C, imm, R) \ + ((__m256)__builtin_ia32_vfixupimmps256_round_maskz( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8si)(__m256i)(C), \ + (int)(imm), (__mmask8)(U), (int)(R))) + +#define _mm256_fmadd_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmadd_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmadd_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmadd_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmsub_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmsub_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmsub_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fnmadd_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask3_fnmadd_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask3( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fnmadd_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fnmsub_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_maskz_fnmsub_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_maskz( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmadd_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask_fmadd_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask3_fmadd_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_fmadd_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fmsub_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask_fmsub_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_fmsub_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fnmadd_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask3_fnmadd_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask3( \ + -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_fnmadd_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ + -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fnmsub_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_maskz_fnmsub_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_maskz( \ + -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fmadd_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmadd_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmadd_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmadd_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmsub_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmsub_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmsub_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fnmadd_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask3_fnmadd_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask3( \ + -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fnmadd_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ + -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fnmsub_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_maskz_fnmsub_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_maskz( \ + -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmadd_round_pch(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmadd_round_pch(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmadd_round_pch(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmaddcph256_round_mask3( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmadd_round_pch(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddcph256_round_maskz( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmaddsub_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmaddsub_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmaddsub_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask3( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmaddsub_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmsubadd_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmsubadd_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmsubadd_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddsubpd256_round_maskz( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmaddsub_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask_fmaddsub_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask3_fmaddsub_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask3( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_fmaddsub_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fmsubadd_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask_fmsubadd_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_fmsubadd_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddsubph256_round_maskz( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_fmaddsub_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmaddsub_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmaddsub_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_mask3( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmaddsub_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_fmsubadd_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_fmsubadd_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_fmsubadd_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddsubps256_round_maskz( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) +#define _mm256_mask3_fmsub_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmsubadd_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmsubaddpd256_round_mask3( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask_fnmadd_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask_fnmsub_round_pd(A, U, B, C, R) \ + ((__m256d)__builtin_ia32_vfmaddpd256_round_mask( \ + (__v4df)(__m256d)(A), -(__v4df)(__m256d)(B), -(__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fnmsub_round_pd(A, B, C, U, R) \ + ((__m256d)__builtin_ia32_vfmsubpd256_round_mask3( \ + -(__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmsub_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask3_fmsubadd_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmsubaddph256_round_mask3( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask_fnmadd_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask_fnmsub_round_ph(A, U, B, C, R) \ + ((__m256h)__builtin_ia32_vfmaddph256_round_mask( \ + (__v16hf)(__m256h)(A), -(__v16hf)(__m256h)(B), -(__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask3_fnmsub_round_ph(A, B, C, U, R) \ + ((__m256h)__builtin_ia32_vfmsubph256_round_mask3( \ + -(__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(C), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_mask3_fmsub_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fmsubadd_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmsubaddps256_round_mask3( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask_fnmadd_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask_fnmsub_round_ps(A, U, B, C, R) \ + ((__m256)__builtin_ia32_vfmaddps256_round_mask( \ + (__v8sf)(__m256)(A), -(__v8sf)(__m256)(B), -(__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask3_fnmsub_round_ps(A, B, C, U, R) \ + ((__m256)__builtin_ia32_vfmsubps256_round_mask3( \ + -(__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(C), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mul_round_pch(A, B, R) \ + ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ + (__v8sf)(__m256h)_mm256_undefined_ph(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_mul_round_pch(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), (__v8sf)(__m256h)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_mul_round_pch(U, A, B, R) \ + ((__m256h)__builtin_ia32_vfmulcph256_round_mask( \ + (__v8sf)(__m256h)(A), (__v8sf)(__m256h)(B), \ + (__v8sf)(__m256h)_mm256_setzero_ph(), (__mmask8)(U), (int)(R))) + +#define _mm256_getexp_round_pd(A, R) \ + ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)_mm256_undefined_pd(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_getexp_round_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_getexp_round_pd(U, A, R) \ + ((__m256d)__builtin_ia32_vgetexppd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)_mm256_setzero_pd(), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_getexp_round_ph(A, R) \ + ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, \ + (int)(R))) + +#define _mm256_mask_getexp_round_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(W), (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_getexp_round_ph(U, A, R) \ + ((__m256h)__builtin_ia32_vgetexpph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), \ + (int)(R))) + +#define _mm256_getexp_round_ps(A, R) \ + ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, \ + (int)(R))) + +#define _mm256_mask_getexp_round_ps(W, U, A, R) \ + ((__m256)__builtin_ia32_vgetexpps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_getexp_round_ps(U, A, R) \ + ((__m256)__builtin_ia32_vgetexpps256_round_mask((__v8sf)(__m256)(A), \ + (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_getmant_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ + (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), (__v4df)(__m256d)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vgetmantpd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(((C) << 2) | (B)), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm256_getmant_round_ph(A, B, C, R) \ + ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ + (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) + +#define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), (__v16hf)(__m256h)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \ + ((__m256h)__builtin_ia32_vgetmantph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(((C) << 2) | (B)), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +#define _mm256_getmant_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ + (__v8sf)_mm256_undefined_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \ + ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), (__v8sf)(__m256)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vgetmantps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(((C) << 2) | (B)), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) + +#define _mm256_max_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vmaxpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_max_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_max_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_max_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_max_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vmaxph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_max_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_max_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_max_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_max_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vmaxps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_max_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_max_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_max_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_min_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vminpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_min_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_min_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_min_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_min_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vminph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_min_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_min_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_min_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_min_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vminps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_min_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_min_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_min_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_mul_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vmulpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_mul_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_mul_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_mul_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_mul_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vmulph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_mul_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_mul_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_mul_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_mul_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vmulps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_mul_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_mul_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_mul_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_range_round_pd(A, B, C, R) \ + ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_range_round_pd(W, U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)(__m256d)(W), (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_range_round_pd(U, A, B, C, R) \ + ((__m256d)__builtin_ia32_vrangepd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (int)(C), \ + (__v4df)_mm256_setzero_pd(), (__mmask8)(U), (int)(R))) + +#define _mm256_range_round_ps(A, B, C, R) \ + ((__m256)__builtin_ia32_vrangeps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_range_round_ps(W, U, A, B, C, R) \ + ((__m256)__builtin_ia32_vrangeps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), (__v8sf)(__m256)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_range_round_ps(U, A, B, C, R) \ + ((__m256)__builtin_ia32_vrangeps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (int)(C), \ + (__v8sf)_mm256_setzero_ps(), (__mmask8)(U), (int)(R))) + +#define _mm256_reduce_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_reduce_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_maskz_reduce_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_vreducepd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(B), (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_mask_reduce_round_ph(W, U, A, imm, R) \ + ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)(__m256h)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_reduce_round_ph(U, A, imm, R) \ + ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_reduce_round_ph(A, imm, R) \ + ((__m256h)__builtin_ia32_vreduceph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_reduce_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vreduceps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_reduce_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_vreduceps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U), \ + (int)(R))) + +#define _mm256_maskz_reduce_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_vreduceps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(B), (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_roundscale_round_pd(A, imm, R) \ + ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ + (__v4df)(__m256d)(A), (int)(imm), (__v4df)_mm256_undefined_pd(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_roundscale_round_pd(A, B, C, imm, R) \ + ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ + (__v4df)(__m256d)(C), (int)(imm), (__v4df)(__m256d)(A), (__mmask8)(B), \ + (int)(R))) + +#define _mm256_maskz_roundscale_round_pd(A, B, imm, R) \ + ((__m256d)__builtin_ia32_vrndscalepd256_round_mask( \ + (__v4df)(__m256d)(B), (int)(imm), (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(A), (int)(R))) + +#define _mm256_roundscale_round_ph(A, imm, R) \ + ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ + (__v16hf)(__m256h)(A), (int)(imm), (__v16hf)_mm256_undefined_ph(), \ + (__mmask16)-1, (int)(R))) + +#define _mm256_mask_roundscale_round_ph(A, B, C, imm, R) \ + ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ + (__v16hf)(__m256h)(C), (int)(imm), (__v16hf)(__m256h)(A), \ + (__mmask16)(B), (int)(R))) + +#define _mm256_maskz_roundscale_round_ph(A, B, imm, R) \ + ((__m256h)__builtin_ia32_vrndscaleph256_round_mask( \ + (__v16hf)(__m256h)(B), (int)(imm), (__v16hf)_mm256_setzero_ph(), \ + (__mmask16)(A), (int)(R))) + +#define _mm256_roundscale_round_ps(A, imm, R) \ + ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ + (__v8sf)(__m256)(A), (int)(imm), (__v8sf)_mm256_undefined_ps(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_roundscale_round_ps(A, B, C, imm, R) \ + ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ + (__v8sf)(__m256)(C), (int)(imm), (__v8sf)(__m256)(A), (__mmask8)(B), \ + (int)(R))) + +#define _mm256_maskz_roundscale_round_ps(A, B, imm, R) \ + ((__m256)__builtin_ia32_vrndscaleps256_round_mask( \ + (__v8sf)(__m256)(B), (int)(imm), (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(A), (int)(R))) + +#define _mm256_scalef_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), \ + (__v4df)_mm256_undefined_pd(), (__mmask8)-1, (int)(R))) + +#define _mm256_mask_scalef_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)(__m256d)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_scalef_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_vscalefpd256_round_mask( \ + (__v4df)(__m256d)(A), (__v4df)(__m256d)(B), (__v4df)_mm256_setzero_pd(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_scalef_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ + (__v16hf)_mm256_undefined_ph(), (__mmask16)-1, (int)(R))) + +#define _mm256_mask_scalef_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), (__v16hf)(__m256h)(W), \ + (__mmask16)(U), (int)(R))) + +#define _mm256_maskz_scalef_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_vscalefph256_round_mask( \ + (__v16hf)(__m256h)(A), (__v16hf)(__m256h)(B), \ + (__v16hf)_mm256_setzero_ph(), (__mmask16)(U), (int)(R))) + +#define _mm256_scalef_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vscalefps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_undefined_ps(), \ + (__mmask8)-1, (int)(R))) + +#define _mm256_mask_scalef_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_vscalefps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)(__m256)(W), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_maskz_scalef_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_vscalefps256_round_mask( \ + (__v8sf)(__m256)(A), (__v8sf)(__m256)(B), (__v8sf)_mm256_setzero_ps(), \ + (__mmask8)(U), (int)(R))) + +#define _mm256_sqrt_round_pd(A, R) \ + ((__m256d)__builtin_ia32_vsqrtpd256_round((__v4df)(__m256d)(A), (int)(R))) + +#define _mm256_mask_sqrt_round_pd(W, U, A, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_sqrt_round_pd(U, A, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_sqrt_round_pd((A), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_sqrt_round_ph(A, R) \ + ((__m256h)__builtin_ia32_vsqrtph256_round((__v16hf)(__m256h)(A), (int)(R))) + +#define _mm256_mask_sqrt_round_ph(W, U, A, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_sqrt_round_ph(U, A, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_sqrt_round_ph((A), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_sqrt_round_ps(A, R) \ + ((__m256)__builtin_ia32_vsqrtps256_round((__v8sf)(__m256)(A), (int)(R))) + +#define _mm256_mask_sqrt_round_ps(W, U, A, R) \ + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_sqrt_round_ps(U, A, R) \ + ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ + (__v8sf)_mm256_sqrt_round_ps((A), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#define _mm256_sub_round_pd(A, B, R) \ + ((__m256d)__builtin_ia32_vsubpd256_round((__v4df)(__m256d)(A), \ + (__v4df)(__m256d)(B), (int)(R))) + +#define _mm256_mask_sub_round_pd(W, U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ + (__v4df)(__m256d)(W))) + +#define _mm256_maskz_sub_round_pd(U, A, B, R) \ + ((__m256d)__builtin_ia32_selectpd_256( \ + (__mmask8)(U), (__v4df)_mm256_sub_round_pd((A), (B), (R)), \ + (__v4df)_mm256_setzero_pd())) + +#define _mm256_sub_round_ph(A, B, R) \ + ((__m256h)__builtin_ia32_vsubph256_round((__v16hf)(__m256h)(A), \ + (__v16hf)(__m256h)(B), (int)(R))) + +#define _mm256_mask_sub_round_ph(W, U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ + (__v16hf)(__m256h)(W))) + +#define _mm256_maskz_sub_round_ph(U, A, B, R) \ + ((__m256h)__builtin_ia32_selectph_256( \ + (__mmask16)(U), (__v16hf)_mm256_sub_round_ph((A), (B), (R)), \ + (__v16hf)_mm256_setzero_ph())) + +#define _mm256_sub_round_ps(A, B, R) \ + ((__m256)__builtin_ia32_vsubps256_round((__v8sf)(__m256)(A), \ + (__v8sf)(__m256)(B), (int)(R))) + +#define _mm256_mask_sub_round_ps(W, U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ + (__v8sf)(__m256)(W))) + +#define _mm256_maskz_sub_round_ps(U, A, B, R) \ + ((__m256)__builtin_ia32_selectps_256( \ + (__mmask8)(U), (__v8sf)_mm256_sub_round_ps((A), (B), (R)), \ + (__v8sf)_mm256_setzero_ps())) + +#undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128 + +#endif /* __AVX10_2NIINTRIN_H */ +#endif /* __SSE2__ */ diff --git a/lib/include/avx10_2satcvtdsintrin.h b/lib/include/avx10_2satcvtdsintrin.h new file mode 100644 index 000000000000..59028436311e --- /dev/null +++ b/lib/include/avx10_2satcvtdsintrin.h @@ -0,0 +1,496 @@ +/*===----------- avx10_2satcvtdsintrin.h - AVX512SATCVTDS intrinsics --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2SATCVTDSINTRIN_H +#define __AVX10_2SATCVTDSINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(256))) + +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ + __min_vector_width__(128))) + +#define _mm_cvtts_roundsd_i32(__A, __R) \ + ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128)(__A), (const int)(__R))) + +#define _mm_cvtts_roundsd_si32(__A, __R) \ + ((int)__builtin_ia32_vcvttsd2sis32((__v2df)(__m128d)(__A), (const int)(__R))) + +#define _mm_cvtts_roundsd_u32(__A, __R) \ + ((unsigned int)__builtin_ia32_vcvttsd2usis32((__v2df)(__m128d)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundss_i32(__A, __R) \ + ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R))) + +#define _mm_cvtts_roundss_si32(__A, __R) \ + ((int)__builtin_ia32_vcvttss2sis32((__v4sf)(__m128)(__A), (const int)(__R))) + +#define _mm_cvtts_roundss_u32(__A, __R) \ + ((unsigned int)__builtin_ia32_vcvttss2usis32((__v4sf)(__m128)(__A), \ + (const int)(__R))) + +#ifdef __x86_64__ +#define _mm_cvtts_roundss_u64(__A, __R) \ + ((unsigned long long)__builtin_ia32_vcvttss2usis64((__v4sf)(__m128)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundsd_u64(__A, __R) \ + ((unsigned long long)__builtin_ia32_vcvttsd2usis64((__v2df)(__m128d)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundss_i64(__A, __R) \ + ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundss_si64(__A, __R) \ + ((long long)__builtin_ia32_vcvttss2sis64((__v4sf)(__m128)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundsd_si64(__A, __R) \ + ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \ + (const int)(__R))) + +#define _mm_cvtts_roundsd_i64(__A, __R) \ + ((long long)__builtin_ia32_vcvttsd2sis64((__v2df)(__m128d)(__A), \ + (const int)(__R))) +#endif /* __x86_64__ */ + +// 128 Bit : Double -> int +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi32(__m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( + (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask((__v2df)__A, (__v4si)__W, + __U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttspd_epi32(__mmask16 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs128_mask( + (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); +} + +// 256 Bit : Double -> int +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvttspd_epi32(__m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( + (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttspd_epi32(__m128i __W, __mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( + (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttspd_epi32(__mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( + (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundpd_epi32(__A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ + (__mmask8) - 1, (int)(__R))) + +#define _mm256_mask_cvtts_roundpd_epi32(__W, __U, __A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) + +#define _mm256_maskz_cvtts_roundpd_epi32(__U, __A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2dqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ + (__mmask8)__U, (int)(__R))) + +// 128 Bit : Double -> uint +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu32(__m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( + (__v2df)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( + (__v2df)__A, (__v4si)(__m128i)__W, (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttspd_epu32(__mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs128_mask( + (__v2df)__A, (__v4si)(__m128i)_mm_setzero_si128(), __U)); +} + +// 256 Bit : Double -> uint +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_cvttspd_epu32(__m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( + (__v4df)__A, (__v4si)_mm_undefined_si128(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttspd_epu32(__m128i __W, __mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( + (__v4df)__A, (__v4si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttspd_epu32(__mmask8 __U, __m256d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( + (__v4df)__A, (__v4si)_mm_setzero_si128(), __U, _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundpd_epu32(__A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_undefined_si128(), \ + (__mmask8) - 1, (int)(__R))) + +#define _mm256_mask_cvtts_roundpd_epu32(__W, __U, __A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)__W, (__mmask8)__U, (int)(__R))) + +#define _mm256_maskz_cvtts_roundpd_epu32(__U, __A, __R) \ + ((__m128i)__builtin_ia32_vcvttpd2udqs256_round_mask( \ + (__v4df)(__m256d)__A, (__v4si)(__m128i)_mm_setzero_si128(), \ + (__mmask8)__U, (int)(__R))) + +// 128 Bit : Double -> long +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epi64(__m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( + (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttspd_epi64(__m128i __W, __mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask((__v2df)__A, (__v2di)__W, + (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttspd_epi64(__mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2qqs128_mask( + (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); +} + +// 256 Bit : Double -> long +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttspd_epi64(__m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttspd_epi64(__m256i __W, __mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( + (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttspd_epi64(__mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( + (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundpd_epi64(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ + (int)__R)) + +#define _mm256_mask_cvtts_roundpd_epi64(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask((__v4df)__A, (__v4di)__W, \ + (__mmask8)__U, (int)__R)) + +#define _mm256_maskz_cvtts_roundpd_epi64(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2qqs256_round_mask( \ + (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) + +// 128 Bit : Double -> ulong +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttspd_epu64(__m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( + (__v2df)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttspd_epu64(__m128i __W, __mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask((__v2df)__A, (__v2di)__W, + (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttspd_epu64(__mmask8 __U, __m128d __A) { + return ((__m128i)__builtin_ia32_vcvttpd2uqqs128_mask( + (__v2df)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); +} + +// 256 Bit : Double -> ulong + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttspd_epu64(__m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttspd_epu64(__m256i __W, __mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( + (__v4df)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttspd_epu64(__mmask8 __U, __m256d __A) { + return ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( + (__v4df)__A, (__v4di)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundpd_epu64(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ + (__v4df)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ + (int)__R)) + +#define _mm256_mask_cvtts_roundpd_epu64(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ + (__v4df)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) + +#define _mm256_maskz_cvtts_roundpd_epu64(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttpd2uqqs256_round_mask( \ + (__v4df)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, (int)__R)) + +// 128 Bit : float -> int +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi32(__m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( + (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttsps_epi32(__m128i __W, __mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask((__v4sf)__A, (__v4si)__W, + (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttsps_epi32(__mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2dqs128_mask( + (__v4sf)__A, (__v4si)(__m128i)_mm_setzero_si128(), (__mmask8)__U)); +} + +// 256 Bit : float -> int +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttsps_epi32(__m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( + (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttsps_epi32(__m256i __W, __mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( + (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttsps_epi32(__mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( + (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundps_epi32(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ + (__mmask8) - 1, (int)(__R))) + +#define _mm256_mask_cvtts_roundps_epi32(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) + +#define _mm256_maskz_cvtts_roundps_epi32(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2dqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__mmask8)__U, (int)(__R))) + +// 128 Bit : float -> uint +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu32(__m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( + (__v4sf)__A, (__v4si)(__m128i)_mm_undefined_si128(), (__mmask8)(-1))); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttsps_epu32(__m128i __W, __mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask((__v4sf)__A, (__v4si)__W, + (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttsps_epu32(__mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2udqs128_mask( + (__v4sf)__A, (__v4si)_mm_setzero_si128(), (__mmask8)__U)); +} + +// 256 Bit : float -> uint + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttsps_epu32(__m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( + (__v8sf)__A, (__v8si)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttsps_epu32(__m256i __W, __mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( + (__v8sf)__A, (__v8si)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttsps_epu32(__mmask8 __U, __m256 __A) { + return ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( + (__v8sf)__A, (__v8si)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundps_epu32(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_undefined_si256(), \ + (__mmask8) - 1, (int)(__R))) + +#define _mm256_mask_cvtts_roundps_epu32(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)__W, (__mmask8)__U, (int)(__R))) + +#define _mm256_maskz_cvtts_roundps_epu32(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2udqs256_round_mask( \ + (__v8sf)(__m256)__A, (__v8si)(__m256i)_mm256_setzero_si256(), \ + (__mmask8)__U, (int)(__R))) + +// 128 bit : float -> long +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epi64(__m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( + (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttsps_epi64(__m128i __W, __mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( + (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2qqs128_mask( + (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); +} +// 256 bit : float -> long + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttsps_epi64(__m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( + (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttsps_epi64(__m256i __W, __mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( + (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttsps_epi64(__mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( + (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundps_epi64(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ + (int)__R)) + +#define _mm256_mask_cvtts_roundps_epi64(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) + +#define _mm256_maskz_cvtts_roundps_epi64(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2qqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ + (int)__R)) + +// 128 bit : float -> ulong +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttsps_epu64(__m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( + (__v4sf)__A, (__v2di)_mm_undefined_si128(), (__mmask8)-1)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_cvttsps_epu64(__m128i __W, __mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( + (__v4sf)__A, (__v2di)(__m128i)__W, (__mmask8)__U)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { + return ((__m128i)__builtin_ia32_vcvttps2uqqs128_mask( + (__v4sf)__A, (__v2di)_mm_setzero_si128(), (__mmask8)__U)); +} +// 256 bit : float -> ulong + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_cvttsps_epu64(__m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( + (__v4sf)__A, (__v4di)_mm256_undefined_si256(), (__mmask8)-1, + _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_cvttsps_epu64(__m256i __W, __mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( + (__v4sf)__A, (__v4di)__W, __U, _MM_FROUND_CUR_DIRECTION)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvttsps_epu64(__mmask8 __U, __m128 __A) { + return ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( + (__v4sf)__A, (__v4di)_mm256_setzero_si256(), __U, + _MM_FROUND_CUR_DIRECTION)); +} + +#define _mm256_cvtts_roundps_epu64(__A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)_mm256_undefined_si256(), (__mmask8) - 1, \ + (int)__R)) + +#define _mm256_mask_cvtts_roundps_epu64(__W, __U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)__W, (__mmask8)__U, (int)__R)) + +#define _mm256_maskz_cvtts_roundps_epu64(__U, __A, __R) \ + ((__m256i)__builtin_ia32_vcvttps2uqqs256_round_mask( \ + (__v4sf)(__m128)__A, (__v4di)_mm256_setzero_si256(), (__mmask8)__U, \ + (int)__R)) + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 +#endif // __AVX10_2SATCVTDSINTRIN_H diff --git a/lib/include/avx10_2satcvtintrin.h b/lib/include/avx10_2satcvtintrin.h new file mode 100644 index 000000000000..d16c60e6382d --- /dev/null +++ b/lib/include/avx10_2satcvtintrin.h @@ -0,0 +1,444 @@ +/*===----------- avx10_2satcvtintrin.h - AVX10_2SATCVT intrinsics ----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __AVX10_2SATCVTINTRIN_H +#define __AVX10_2SATCVTINTRIN_H + +#define _mm_ipcvtbf16_epi8(A) \ + ((__m128i)__builtin_ia32_vcvtbf162ibs128((__v8bf)(__m128bh)(A))) + +#define _mm_mask_ipcvtbf16_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epi8(A), (__v8hi)(__m128i)(W))) + +#define _mm_maskz_ipcvtbf16_epi8(U, A) \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_ipcvtbf16_epi8(A), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_ipcvtbf16_epi8(A) \ + ((__m256i)__builtin_ia32_vcvtbf162ibs256((__v16bf)(__m256bh)(A))) + +#define _mm256_mask_ipcvtbf16_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_ipcvtbf16_epi8(U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvtbf16_epi8(A), \ + (__v16hi)_mm256_setzero_si256())) + +#define _mm_ipcvtbf16_epu8(A) \ + ((__m128i)__builtin_ia32_vcvtbf162iubs128((__v8bf)(__m128bh)(A))) + +#define _mm_mask_ipcvtbf16_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_ipcvtbf16_epu8(A), (__v8hi)(__m128i)(W))) + +#define _mm_maskz_ipcvtbf16_epu8(U, A) \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_ipcvtbf16_epu8(A), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_ipcvtbf16_epu8(A) \ + ((__m256i)__builtin_ia32_vcvtbf162iubs256((__v16bf)(__m256bh)(A))) + +#define _mm256_mask_ipcvtbf16_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_ipcvtbf16_epu8(U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvtbf16_epu8(A), \ + (__v16hi)_mm256_setzero_si256())) + +#define _mm_ipcvtph_epi8(A) \ + ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvtph_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A), \ + (__v8hu)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvtph_epi8(U, A) \ + ((__m128i)__builtin_ia32_vcvtph2ibs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvtph_epi8(A) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvtph_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvtph_epi8(U, A) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvt_roundph_epi8(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)_mm256_setzero_si256(), \ + (__mmask16)-1, (const int)R)) + +#define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) + +#define _mm256_maskz_ipcvt_roundph_epi8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)_mm256_setzero_si256(), \ + (__mmask16)(U), (const int)R)) + +#define _mm_ipcvtph_epu8(A) \ + ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvtph_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A), \ + (__v8hu)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvtph_epu8(U, A) \ + ((__m128i)__builtin_ia32_vcvtph2iubs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvtph_epu8(A) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvtph_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvtph_epu8(U, A) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvt_roundph_epu8(A, R) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + (const int)R)) + +#define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) + +#define _mm256_maskz_ipcvt_roundph_epu8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ + (const int)R)) + +#define _mm_ipcvtps_epi8(A) \ + ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvtps_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A), \ + (__v4su)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvtps_epi8(U, A) \ + ((__m128i)__builtin_ia32_vcvtps2ibs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvtps_epi8(A) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvtps_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)(W), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvtps_epi8(U, A) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvt_roundps_epi8(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)-1, (const int)R)) + +#define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) + +#define _mm256_maskz_ipcvt_roundps_epi8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)(U), (const int)R)) + +#define _mm_ipcvtps_epu8(A) \ + ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvtps_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A), \ + (__v4su)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvtps_epu8(U, A) \ + ((__m128i)__builtin_ia32_vcvtps2iubs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvtps_epu8(A) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvtps_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ + (__v8su)(W), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvtps_epu8(U, A) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvt_roundps_epu8(A, R) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)-1, (const int)R)) + +#define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) + +#define _mm256_maskz_ipcvt_roundps_epu8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)(U), (const int)R)) + +#define _mm_ipcvttbf16_epi8(A) \ + ((__m128i)__builtin_ia32_vcvttbf162ibs128((__v8bf)(__m128bh)(A))) + +#define _mm_mask_ipcvttbf16_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epi8(A), (__v8hi)(__m128i)(W))) + +#define _mm_maskz_ipcvttbf16_epi8(U, A) \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_ipcvttbf16_epi8(A), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_ipcvttbf16_epi8(A) \ + ((__m256i)__builtin_ia32_vcvttbf162ibs256((__v16bf)(__m256bh)(A))) + +#define _mm256_mask_ipcvttbf16_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_ipcvttbf16_epi8(U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvttbf16_epi8(A), \ + (__v16hi)_mm256_setzero_si256())) + +#define _mm_ipcvttbf16_epu8(A) \ + ((__m128i)__builtin_ia32_vcvttbf162iubs128((__v8bf)(__m128bh)(A))) + +#define _mm_mask_ipcvttbf16_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_selectw_128( \ + (__mmask8)(U), (__v8hi)_mm_ipcvttbf16_epu8(A), (__v8hi)(__m128i)(W))) + +#define _mm_maskz_ipcvttbf16_epu8(U, A) \ + ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ + (__v8hi)_mm_ipcvttbf16_epu8(A), \ + (__v8hi)_mm_setzero_si128())) + +#define _mm256_ipcvttbf16_epu8(A) \ + ((__m256i)__builtin_ia32_vcvttbf162iubs256((__v16bf)(__m256bh)(A))) + +#define _mm256_mask_ipcvttbf16_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)(__m256i)(W))) + +#define _mm256_maskz_ipcvttbf16_epu8(U, A) \ + ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ + (__v16hi)_mm256_ipcvttbf16_epu8(A), \ + (__v16hi)_mm256_setzero_si256())) + +#define _mm_ipcvttph_epi8(A) \ + ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvttph_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A), \ + (__v8hu)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvttph_epi8(U, A) \ + ((__m128i)__builtin_ia32_vcvttph2ibs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvttph_epi8(A) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvttph_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvttph_epi8(U, A) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvtt_roundph_epi8(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + (const int)R)) + +#define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) + +#define _mm256_maskz_ipcvtt_roundph_epi8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2ibs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ + (const int)R)) + +#define _mm_ipcvttph_epu8(A) \ + ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvttph_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A), \ + (__v8hu)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvttph_epu8(U, A) \ + ((__m128i)__builtin_ia32_vcvttph2iubs128_mask( \ + (__v8hf)(__m128h)(A), (__v8hu)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvttph_epu8(A) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvttph_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask((__v16hf)(__m256h)(A), \ + (__v16hu)(W), (__mmask16)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvttph_epu8(U, A) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(_mm256_setzero_si256()), \ + (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvtt_roundph_epu8(A, R) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1, \ + (const int)R)) + +#define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)(W), (__mmask16)(U), (const int)R)) + +#define _mm256_maskz_ipcvtt_roundph_epu8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttph2iubs256_mask( \ + (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)(U), \ + (const int)R)) + +#define _mm_ipcvttps_epi8(A) \ + ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvttps_epi8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A), \ + (__v4su)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvttps_epi8(U, A) \ + ((__m128i)__builtin_ia32_vcvttps2ibs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvttps_epi8(A) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvttps_epi8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)(W), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvttps_epi8(U, A) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvtt_roundps_epi8(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)-1, (const int)R)) + +#define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) + +#define _mm256_maskz_ipcvtt_roundps_epi8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A), \ + (__v8su)_mm256_setzero_si256(), \ + (__mmask8)(U), (const int)R)) + +#define _mm_ipcvttps_epu8(A) \ + ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1)) + +#define _mm_mask_ipcvttps_epu8(W, U, A) \ + ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A), \ + (__v4su)(W), (__mmask8)(U))) + +#define _mm_maskz_ipcvttps_epu8(U, A) \ + ((__m128i)__builtin_ia32_vcvttps2iubs128_mask( \ + (__v4sf)(__m128)(A), (__v4su)(_mm_setzero_si128()), (__mmask8)(U))) + +#define _mm256_ipcvttps_epu8(A) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_mask_ipcvttps_epu8(W, U, A) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask((__v8sf)(__m256)(A), \ + (__v8su)(W), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_maskz_ipcvttps_epu8(U, A) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(_mm256_setzero_si256()), (__mmask8)(U), \ + _MM_FROUND_CUR_DIRECTION)) + +#define _mm256_ipcvtt_roundps_epu8(A, R) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1, \ + (const int)R)) + +#define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)(W), (__mmask8)(U), (const int)R)) + +#define _mm256_maskz_ipcvtt_roundps_epu8(U, A, R) \ + ((__m256i)__builtin_ia32_vcvttps2iubs256_mask( \ + (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)(U), \ + (const int)R)) +#endif // __AVX10_2SATCVTINTRIN_H diff --git a/lib/include/avx2intrin.h b/lib/include/avx2intrin.h index 096cae01b57d..dc9fc0731432 100644 --- a/lib/include/avx2intrin.h +++ b/lib/include/avx2intrin.h @@ -15,12 +15,21 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx2,no-evex512"), __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx2,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx2"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx2"), \ + __min_vector_width__(128))) +#endif /* SSE4 Multiple Packed Sums of Absolute Difference. */ /// Computes sixteen sum of absolute difference (SAD) operations on sets of diff --git a/lib/include/avx512bitalgintrin.h b/lib/include/avx512bitalgintrin.h index bad265ceb7db..3c446b34e788 100644 --- a/lib/include/avx512bitalgintrin.h +++ b/lib/include/avx512bitalgintrin.h @@ -23,7 +23,7 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) { - return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); + return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -45,7 +45,7 @@ _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) { - return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); + return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index 4f172c74b31c..45e7eeb5327d 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -175,12 +175,21 @@ typedef enum __attribute__((__always_inline__, __nodebug__, \ __target__("avx512f,no-evex512"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /* Create vectors with repeated elements */ -static __inline __m512i __DEFAULT_FN_ATTRS512 -_mm512_setzero_si512(void) -{ - return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; +static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setzero_si512(void) { + return __extension__(__m512i)(__v8di){0, 0, 0, 0, 0, 0, 0, 0}; } #define _mm512_setzero_epi32 _mm512_setzero_si512 @@ -256,20 +265,16 @@ _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) (__v8di) _mm512_setzero_si512()); } - -static __inline __m512 __DEFAULT_FN_ATTRS512 -_mm512_setzero_ps(void) -{ - return __extension__ (__m512){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; +static __inline __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR _mm512_setzero_ps(void) { + return __extension__(__m512){0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; } #define _mm512_setzero _mm512_setzero_ps -static __inline __m512d __DEFAULT_FN_ATTRS512 -_mm512_setzero_pd(void) -{ - return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; +static __inline __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR +_mm512_setzero_pd(void) { + return __extension__(__m512d){0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; } static __inline __m512 __DEFAULT_FN_ATTRS512 @@ -9775,5 +9780,8 @@ _mm512_cvtsi512_si32(__m512i __A) { #undef __DEFAULT_FN_ATTRS512 #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512_CONSTEXPR +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __AVX512FINTRIN_H */ diff --git a/lib/include/avx512vlbitalgintrin.h b/lib/include/avx512vlbitalgintrin.h index 377e3a5ea571..1b01fe0b9d81 100644 --- a/lib/include/avx512vlbitalgintrin.h +++ b/lib/include/avx512vlbitalgintrin.h @@ -27,7 +27,7 @@ static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { - return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); + return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -49,7 +49,7 @@ _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { - return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); + return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -71,7 +71,7 @@ _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { - return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); + return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -93,7 +93,7 @@ _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { - return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); + return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 diff --git a/lib/include/avx512vpopcntdqintrin.h b/lib/include/avx512vpopcntdqintrin.h index e73e7e4f7131..e24c2c5e1bcd 100644 --- a/lib/include/avx512vpopcntdqintrin.h +++ b/lib/include/avx512vpopcntdqintrin.h @@ -21,8 +21,15 @@ __target__("avx512vpopcntdq,evex512"), \ __min_vector_width__(512))) -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { - return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_popcnt_epi64(__m512i __A) { + return (__m512i)__builtin_elementwise_popcount((__v8du)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS @@ -36,8 +43,9 @@ _mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { - return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A); +static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_popcnt_epi32(__m512i __A) { + return (__m512i)__builtin_elementwise_popcount((__v16su)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512vpopcntdqvlintrin.h b/lib/include/avx512vpopcntdqvlintrin.h index b2df2e84d3ed..b6c819b0cb85 100644 --- a/lib/include/avx512vpopcntdqvlintrin.h +++ b/lib/include/avx512vpopcntdqvlintrin.h @@ -25,9 +25,17 @@ __target__("avx512vpopcntdq,avx512vl,no-evex512"), \ __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr +#else +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 +#endif + +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi64(__m128i __A) { - return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); + return (__m128i)__builtin_elementwise_popcount((__v2du)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -41,9 +49,9 @@ _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_popcnt_epi32(__m128i __A) { - return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); + return (__m128i)__builtin_elementwise_popcount((__v4su)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -57,9 +65,9 @@ _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi64(__m256i __A) { - return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); + return (__m256i)__builtin_elementwise_popcount((__v4du)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 @@ -73,9 +81,9 @@ _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_popcnt_epi32(__m256i __A) { - return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); + return (__m256i)__builtin_elementwise_popcount((__v8su)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index 4983f3311370..8e497a982349 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -50,12 +50,29 @@ typedef __bf16 __m256bh __attribute__((__vector_size__(32), __aligned__(32))); #endif /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ __min_vector_width__(256))) #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, __target__("avx,no-evex512"), \ __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx"), \ + __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx"), \ + __min_vector_width__(128))) +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS128 +#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS +#endif /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. @@ -3689,7 +3706,7 @@ _mm256_undefined_si256(void) /// A double-precision floating-point value used to initialize bits [63:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_pd(double __a, double __b, double __c, double __d) { return __extension__ (__m256d){ __d, __c, __b, __a }; @@ -3728,7 +3745,7 @@ _mm256_set_pd(double __a, double __b, double __c, double __d) /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { @@ -3955,7 +3972,7 @@ _mm256_set_epi64x(long long __a, long long __b, long long __c, long long __d) /// A double-precision floating-point value used to initialize bits [255:192] /// of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_pd(double __a, double __b, double __c, double __d) { return _mm256_set_pd(__d, __c, __b, __a); @@ -3995,7 +4012,7 @@ _mm256_setr_pd(double __a, double __b, double __c, double __d) /// A single-precision floating-point value used to initialize bits [255:224] /// of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setr_ps(float __a, float __b, float __c, float __d, float __e, float __f, float __g, float __h) { @@ -4212,7 +4229,7 @@ _mm256_setr_epi64x(long long __a, long long __b, long long __c, long long __d) /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [4 x double]. -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_pd(double __w) { return _mm256_set_pd(__w, __w, __w, __w); @@ -4231,7 +4248,7 @@ _mm256_set1_pd(double __w) /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 256-bit floating-point vector of [8 x float]. -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_set1_ps(float __w) { return _mm256_set_ps(__w, __w, __w, __w, __w, __w, __w, __w); @@ -4322,10 +4339,8 @@ _mm256_set1_epi64x(long long __q) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [4 x double] with all elements set to zero. -static __inline __m256d __DEFAULT_FN_ATTRS -_mm256_setzero_pd(void) -{ - return __extension__ (__m256d){ 0.0, 0.0, 0.0, 0.0 }; +static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_pd(void) { + return __extension__(__m256d){0.0, 0.0, 0.0, 0.0}; } /// Constructs a 256-bit floating-point vector of [8 x float] with all @@ -4336,9 +4351,7 @@ _mm256_setzero_pd(void) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit vector of [8 x float] with all elements set to zero. -static __inline __m256 __DEFAULT_FN_ATTRS -_mm256_setzero_ps(void) -{ +static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_setzero_ps(void) { return __extension__ (__m256){ 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f }; } @@ -4349,9 +4362,8 @@ _mm256_setzero_ps(void) /// This intrinsic corresponds to the VXORPS instruction. /// /// \returns A 256-bit integer vector initialized to zero. -static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_setzero_si256(void) -{ +static __inline __m256i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_setzero_si256(void) { return __extension__ (__m256i)(__v4di){ 0, 0, 0, 0 }; } @@ -5121,6 +5133,8 @@ _mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS128_CONSTEXPR #endif /* __AVXINTRIN_H */ diff --git a/lib/include/avxvnniint16intrin.h b/lib/include/avxvnniint16intrin.h index e4d342a8b45b..805d249911c1 100644 --- a/lib/include/avxvnniint16intrin.h +++ b/lib/include/avxvnniint16intrin.h @@ -15,14 +15,6 @@ #ifndef __AVXVNNIINT16INTRIN_H #define __AVXVNNIINT16INTRIN_H -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ - __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint16"), \ - __min_vector_width__(256))) - /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate /// signed 16-bit results. Sum these 2 results with the corresponding @@ -53,12 +45,9 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwsud128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwsud_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwsud128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -90,11 +79,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsud_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwsud256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpwsud_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwsud256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -127,12 +114,9 @@ _mm256_dpwsud_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwsuds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwsuds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwsuds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -165,11 +149,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwsuds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwsuds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpwsuds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwsuds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate @@ -201,12 +183,9 @@ _mm256_dpwsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwusd128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwusd_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwusd128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate @@ -238,11 +217,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusd_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwusd256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpwusd_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwusd256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate @@ -275,12 +252,9 @@ _mm256_dpwusd_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwusds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwusds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwusds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding signed 16-bit integers in \a __B, producing 2 intermediate @@ -313,11 +287,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwusds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwusds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwusds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpwusds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwusds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -349,12 +321,9 @@ _mm256_dpwusds_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwuud128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwuud_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwuud128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -386,11 +355,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuud_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwuud256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpwuud_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwuud256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -423,12 +390,9 @@ _mm256_dpwuud_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpwuuds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpwuuds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpwuuds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 2 adjacent pairs of unsigned 16-bit integers in \a __A with /// corresponding unsigned 16-bit integers in \a __B, producing 2 intermediate @@ -461,13 +425,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwuuds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpwuuds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} - -#undef __DEFAULT_FN_ATTRS128 -#undef __DEFAULT_FN_ATTRS256 +#define _mm256_dpwuuds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpwuuds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) #endif // __AVXVNNIINT16INTRIN_H diff --git a/lib/include/avxvnniint8intrin.h b/lib/include/avxvnniint8intrin.h index b0b6cb853f71..c211620c68f0 100644 --- a/lib/include/avxvnniint8intrin.h +++ b/lib/include/avxvnniint8intrin.h @@ -14,14 +14,6 @@ #ifndef __AVXVNNIINT8INTRIN_H #define __AVXVNNIINT8INTRIN_H -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS256 \ - __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ - __min_vector_width__(256))) -#define __DEFAULT_FN_ATTRS128 \ - __attribute__((__always_inline__, __nodebug__, __target__("avxvnniint8"), \ - __min_vector_width__(128))) - /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding @@ -52,12 +44,9 @@ /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssd_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbssd128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbssd_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbssd128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate @@ -89,11 +78,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssd_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbssd256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpbssd_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbssd256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate @@ -126,12 +113,9 @@ _mm256_dpbssd_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbssds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbssds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbssds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding signed 8-bit integers in \a __B, producing 4 intermediate @@ -164,11 +148,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbssds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbssds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpbssds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbssds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -200,12 +182,9 @@ _mm256_dpbssds_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsud_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbsud128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbsud_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbsud128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -237,11 +216,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsud_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbsud256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpbsud_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbsud256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -274,12 +251,9 @@ _mm256_dpbsud_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsuds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbsuds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbsuds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbsuds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -312,11 +286,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbsuds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbsuds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpbsuds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbsuds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -348,12 +320,9 @@ _mm256_dpbsuds_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuud_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbuud128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbuud_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbuud128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -385,11 +354,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuud_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbuud256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} +#define _mm256_dpbuud_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbuud256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate @@ -422,14 +389,10 @@ _mm256_dpbuud_epi32(__m256i __W, __m256i __A, __m256i __B) { /// ENDFOR /// dst[MAX:128] := 0 /// \endcode -static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuuds_epi32(__m128i __W, - __m128i __A, - __m128i __B) { - return (__m128i)__builtin_ia32_vpdpbuuds128((__v4si)__W, (__v4si)__A, - (__v4si)__B); -} +#define _mm_dpbuuds_epi32(__W, __A, __B) \ + ((__m128i)__builtin_ia32_vpdpbuuds128((__v4si)(__W), (__v4si)(__A), \ + (__v4si)(__B))) -/// Multiply groups of 4 adjacent pairs of signed 8-bit integers in \a __A with /// corresponding unsigned 8-bit integers in \a __B, producing 4 intermediate /// signed 16-bit results. Sum these 4 results with the corresponding /// 32-bit integer in \a __W with signed saturation, and store the packed @@ -460,12 +423,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbuuds_epi32(__m128i __W, /// ENDFOR /// dst[MAX:256] := 0 /// \endcode -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbuuds_epi32(__m256i __W, __m256i __A, __m256i __B) { - return (__m256i)__builtin_ia32_vpdpbuuds256((__v8si)__W, (__v8si)__A, - (__v8si)__B); -} -#undef __DEFAULT_FN_ATTRS128 -#undef __DEFAULT_FN_ATTRS256 +#define _mm256_dpbuuds_epi32(__W, __A, __B) \ + ((__m256i)__builtin_ia32_vpdpbuuds256((__v8si)(__W), (__v8si)(__A), \ + (__v8si)(__B))) #endif // __AVXVNNIINT8INTRIN_H diff --git a/lib/include/bmi2intrin.h b/lib/include/bmi2intrin.h index f0a3343bef91..bdb61b13fb83 100644 --- a/lib/include/bmi2intrin.h +++ b/lib/include/bmi2intrin.h @@ -15,7 +15,13 @@ #define __BMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) +#endif /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits /// starting at bit number \a __Y. @@ -38,8 +44,7 @@ /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 32-bit value. static __inline__ unsigned int __DEFAULT_FN_ATTRS -_bzhi_u32(unsigned int __X, unsigned int __Y) -{ +_bzhi_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bzhi_si(__X, __Y); } @@ -68,8 +73,7 @@ _bzhi_u32(unsigned int __X, unsigned int __Y) /// The 32-bit mask specifying where to deposit source bits. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS -_pdep_u32(unsigned int __X, unsigned int __Y) -{ +_pdep_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pdep_si(__X, __Y); } @@ -98,8 +102,7 @@ _pdep_u32(unsigned int __X, unsigned int __Y) /// The 32-bit mask specifying which source bits to extract. /// \returns The 32-bit result. static __inline__ unsigned int __DEFAULT_FN_ATTRS -_pext_u32(unsigned int __X, unsigned int __Y) -{ +_pext_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_pext_si(__X, __Y); } @@ -124,8 +127,7 @@ _pext_u32(unsigned int __X, unsigned int __Y) /// A pointer to memory for storing the upper half of the product. /// \returns The lower half of the product. static __inline__ unsigned int __DEFAULT_FN_ATTRS -_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) -{ +_mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) { unsigned long long __res = (unsigned long long) __X * __Y; *__P = (unsigned int)(__res >> 32); return (unsigned int)__res; @@ -154,8 +156,7 @@ _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) /// The lower 8 bits specify the bit number of the lowest bit to zero. /// \returns The partially zeroed 64-bit value. static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_bzhi_u64(unsigned long long __X, unsigned long long __Y) -{ +_bzhi_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bzhi_di(__X, __Y); } @@ -184,8 +185,7 @@ _bzhi_u64(unsigned long long __X, unsigned long long __Y) /// The 64-bit mask specifying where to deposit source bits. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_pdep_u64(unsigned long long __X, unsigned long long __Y) -{ +_pdep_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pdep_di(__X, __Y); } @@ -214,8 +214,7 @@ _pdep_u64(unsigned long long __X, unsigned long long __Y) /// The 64-bit mask specifying which source bits to extract. /// \returns The 64-bit result. static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_pext_u64(unsigned long long __X, unsigned long long __Y) -{ +_pext_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_pext_di(__X, __Y); } @@ -241,8 +240,7 @@ _pext_u64(unsigned long long __X, unsigned long long __Y) /// \returns The lower half of the product. static __inline__ unsigned long long __DEFAULT_FN_ATTRS _mulx_u64 (unsigned long long __X, unsigned long long __Y, - unsigned long long *__P) -{ + unsigned long long *__P) { unsigned __int128 __res = (unsigned __int128) __X * __Y; *__P = (unsigned long long) (__res >> 64); return (unsigned long long) __res; diff --git a/lib/include/bmiintrin.h b/lib/include/bmiintrin.h index 78bffe68e221..59c5ece3977f 100644 --- a/lib/include/bmiintrin.h +++ b/lib/include/bmiintrin.h @@ -17,7 +17,12 @@ /* Allow using the tzcnt intrinsics even for non-BMI targets. Since the TZCNT instruction behaves as BSF on non-BMI targets, there is code that expects to use it as a potentially faster version of BSF. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __RELAXED_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__)) constexpr +#else #define __RELAXED_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#endif /// Counts the number of trailing zero bits in the operand. /// @@ -31,8 +36,7 @@ /// bits in the operand. /// \see _tzcnt_u16 static __inline__ unsigned short __RELAXED_FN_ATTRS -__tzcnt_u16(unsigned short __X) -{ +__tzcnt_u16(unsigned short __X) { return __builtin_ia32_tzcnt_u16(__X); } @@ -65,8 +69,7 @@ __tzcnt_u16(unsigned short __X) /// bits in the operand. /// \see { _mm_tzcnt_32 _tzcnt_u32 } static __inline__ unsigned int __RELAXED_FN_ATTRS -__tzcnt_u32(unsigned int __X) -{ +__tzcnt_u32(unsigned int __X) { return __builtin_ia32_tzcnt_u32(__X); } @@ -82,8 +85,7 @@ __tzcnt_u32(unsigned int __X) /// the operand. /// \see { __tzcnt_u32 _tzcnt_u32 } static __inline__ int __RELAXED_FN_ATTRS -_mm_tzcnt_32(unsigned int __X) -{ +_mm_tzcnt_32(unsigned int __X) { return (int)__builtin_ia32_tzcnt_u32(__X); } @@ -118,8 +120,7 @@ _mm_tzcnt_32(unsigned int __X) /// bits in the operand. /// \see { _mm_tzcnt_64 _tzcnt_u64 } static __inline__ unsigned long long __RELAXED_FN_ATTRS -__tzcnt_u64(unsigned long long __X) -{ +__tzcnt_u64(unsigned long long __X) { return __builtin_ia32_tzcnt_u64(__X); } @@ -135,8 +136,7 @@ __tzcnt_u64(unsigned long long __X) /// the operand. /// \see { __tzcnt_u64 _tzcnt_u64 } static __inline__ long long __RELAXED_FN_ATTRS -_mm_tzcnt_64(unsigned long long __X) -{ +_mm_tzcnt_64(unsigned long long __X) { return (long long)__builtin_ia32_tzcnt_u64(__X); } @@ -164,7 +164,13 @@ _mm_tzcnt_64(unsigned long long __X) #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("bmi"))) +#endif /// Performs a bitwise AND of the second operand with the one's /// complement of the first operand. @@ -181,8 +187,7 @@ _mm_tzcnt_64(unsigned long long __X) /// operand with the one's complement of the first operand. /// \see _andn_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__andn_u32(unsigned int __X, unsigned int __Y) -{ +__andn_u32(unsigned int __X, unsigned int __Y) { return ~__X & __Y; } @@ -224,8 +229,7 @@ __andn_u32(unsigned int __X, unsigned int __Y) /// extracted bits. /// \see _bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__bextr_u32(unsigned int __X, unsigned int __Y) -{ +__bextr_u32(unsigned int __X, unsigned int __Y) { return __builtin_ia32_bextr_u32(__X, __Y); } @@ -249,9 +253,8 @@ __bextr_u32(unsigned int __X, unsigned int __Y) /// extracted bits. /// \see __bextr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) -{ - return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +_bextr_u32(unsigned int __X, unsigned int __Y, unsigned int __Z) { + return __builtin_ia32_bextr_u32(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ @@ -289,8 +292,7 @@ _bextr2_u32(unsigned int __X, unsigned int __Y) { /// the source operand. /// \see _blsi_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsi_u32(unsigned int __X) -{ +__blsi_u32(unsigned int __X) { return __X & -__X; } @@ -325,8 +327,7 @@ __blsi_u32(unsigned int __X) /// \returns An unsigned integer containing the newly created mask. /// \see _blsmsk_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsmsk_u32(unsigned int __X) -{ +__blsmsk_u32(unsigned int __X) { return __X ^ (__X - 1); } @@ -361,8 +362,7 @@ __blsmsk_u32(unsigned int __X) /// operand. /// \see _blsr_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsr_u32(unsigned int __X) -{ +__blsr_u32(unsigned int __X) { return __X & (__X - 1); } @@ -401,8 +401,7 @@ __blsr_u32(unsigned int __X) /// operand with the one's complement of the first operand. /// \see _andn_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__andn_u64 (unsigned long long __X, unsigned long long __Y) -{ +__andn_u64 (unsigned long long __X, unsigned long long __Y) { return ~__X & __Y; } @@ -445,8 +444,7 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y) /// extracted bits. /// \see _bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__bextr_u64(unsigned long long __X, unsigned long long __Y) -{ +__bextr_u64(unsigned long long __X, unsigned long long __Y) { return __builtin_ia32_bextr_u64(__X, __Y); } @@ -470,9 +468,8 @@ __bextr_u64(unsigned long long __X, unsigned long long __Y) /// extracted bits. /// \see __bextr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) -{ - return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); +_bextr_u64(unsigned long long __X, unsigned int __Y, unsigned int __Z) { + return __builtin_ia32_bextr_u64(__X, ((__Y & 0xff) | ((__Z & 0xff) << 8))); } /* Intel-specified, single-leading-underscore version of BEXTR2 */ @@ -510,8 +507,7 @@ _bextr2_u64(unsigned long long __X, unsigned long long __Y) { /// bits from the source operand. /// \see _blsi_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsi_u64(unsigned long long __X) -{ +__blsi_u64(unsigned long long __X) { return __X & -__X; } @@ -546,8 +542,7 @@ __blsi_u64(unsigned long long __X) /// \returns An unsigned 64-bit integer containing the newly created mask. /// \see _blsmsk_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsmsk_u64(unsigned long long __X) -{ +__blsmsk_u64(unsigned long long __X) { return __X ^ (__X - 1); } @@ -582,8 +577,7 @@ __blsmsk_u64(unsigned long long __X) /// source operand. /// \see _blsr_u64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsr_u64(unsigned long long __X) -{ +__blsr_u64(unsigned long long __X) { return __X & (__X - 1); } diff --git a/lib/include/cmpccxaddintrin.h b/lib/include/cmpccxaddintrin.h index 6957498996c8..0076c402f5ff 100644 --- a/lib/include/cmpccxaddintrin.h +++ b/lib/include/cmpccxaddintrin.h @@ -63,7 +63,7 @@ typedef enum { (int)(__D)))) #define _cmpccxadd_epi64(__A, __B, __C, __D) \ - ((long long)(__builtin_ia32_cmpccxadd64((void *)(__A), (long long)(__B), \ + ((long long)(__builtin_ia32_cmpccxadd64((__A), (long long)(__B), \ (long long)(__C), (int)(__D)))) #endif // __x86_64__ diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index 82d995f1b966..2601aa5724f0 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -187,17 +187,18 @@ #define bit_ENQCMD 0x20000000 /* Features in %edx for leaf 7 sub-leaf 0 */ -#define bit_AVX5124VNNIW 0x00000004 -#define bit_AVX5124FMAPS 0x00000008 -#define bit_UINTR 0x00000020 -#define bit_SERIALIZE 0x00004000 -#define bit_TSXLDTRK 0x00010000 -#define bit_PCONFIG 0x00040000 -#define bit_IBT 0x00100000 -#define bit_AMXBF16 0x00400000 -#define bit_AVX512FP16 0x00800000 -#define bit_AMXTILE 0x01000000 -#define bit_AMXINT8 0x02000000 +#define bit_AVX5124VNNIW 0x00000004 +#define bit_AVX5124FMAPS 0x00000008 +#define bit_UINTR 0x00000020 +#define bit_AVX512VP2INTERSECT 0x00000100 +#define bit_SERIALIZE 0x00004000 +#define bit_TSXLDTRK 0x00010000 +#define bit_PCONFIG 0x00040000 +#define bit_IBT 0x00100000 +#define bit_AMXBF16 0x00400000 +#define bit_AVX512FP16 0x00800000 +#define bit_AMXTILE 0x01000000 +#define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ #define bit_SHA512 0x00000001 diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index 4dff6421350c..78e8a422db4c 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -49,12 +49,27 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); #endif /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse2,no-evex512"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_MMX \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("mmx,sse2,no-evex512"), __min_vector_width__(64))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ + __min_vector_width__(128))) +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + +#define __trunc64(x) \ + (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __anyext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, -1, -1) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result @@ -71,8 +86,8 @@ typedef __bf16 __m128bh __attribute__((__vector_size__(16), __aligned__(16))); /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// sum of the lower 64 bits of both operands. The upper 64 bits are copied /// from the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_sd(__m128d __a, + __m128d __b) { __a[0] += __b[0]; return __a; } @@ -89,8 +104,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the sums of both /// operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_add_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2df)__a + (__v2df)__b); } @@ -111,8 +126,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_add_pd(__m128d __a, /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// difference of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_sd(__m128d __a, + __m128d __b) { __a[0] -= __b[0]; return __a; } @@ -129,8 +144,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing the subtrahend. /// \returns A 128-bit vector of [2 x double] containing the differences between /// both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sub_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2df)__a - (__v2df)__b); } @@ -150,8 +165,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_sub_pd(__m128d __a, /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// product of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_sd(__m128d __a, + __m128d __b) { __a[0] *= __b[0]; return __a; } @@ -168,8 +183,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the operands. /// \returns A 128-bit vector of [2 x double] containing the products of both /// operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_mul_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2df)__a * (__v2df)__b); } @@ -190,8 +205,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_mul_pd(__m128d __a, /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// quotient of the lower 64 bits of both operands. The upper 64 bits are /// copied from the upper 64 bits of the first source operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_sd(__m128d __a, + __m128d __b) { __a[0] /= __b[0]; return __a; } @@ -209,8 +224,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_sd(__m128d __a, /// A 128-bit vector of [2 x double] containing the divisor. /// \returns A 128-bit vector of [2 x double] containing the quotients of both /// operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_div_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_div_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2df)__a / (__v2df)__b); } @@ -358,8 +373,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_max_pd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_and_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2du)__a & (__v2du)__b); } @@ -378,8 +393,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_and_pd(__m128d __a, /// \returns A 128-bit vector of [2 x double] containing the bitwise AND of the /// values in the second operand and the one's complement of the first /// operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_andnot_pd(__m128d __a, __m128d __b) { return (__m128d)(~(__v2du)__a & (__v2du)__b); } @@ -395,8 +410,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_andnot_pd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise OR of the /// values between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_or_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2du)__a | (__v2du)__b); } @@ -412,8 +427,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_or_pd(__m128d __a, /// A 128-bit vector of [2 x double] containing one of the source operands. /// \returns A 128-bit vector of [2 x double] containing the bitwise XOR of the /// values between both operands. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_xor_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_xor_pd(__m128d __a, + __m128d __b) { return (__m128d)((__v2du)__a ^ (__v2du)__b); } @@ -1291,7 +1306,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtpd_ps(__m128d __a) { /// floating-point elements are converted to double-precision values. The /// upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtps_pd(__m128 __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 1), __v2df); } @@ -1312,7 +1328,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtps_pd(__m128 __a) { /// /// The upper two elements are unused. /// \returns A 128-bit vector of [2 x double] containing the converted values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtepi32_pd(__m128i __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi32_pd(__m128i __a) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4si)__a, (__v4si)__a, 0, 1), __v2df); } @@ -1398,8 +1415,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtsd_ss(__m128 __a, /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, - int __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi32_sd(__m128d __a, int __b) { __a[0] = __b; return __a; } @@ -1423,8 +1440,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi32_sd(__m128d __a, /// \returns A 128-bit vector of [2 x double]. The lower 64 bits contain the /// converted value from the second parameter. The upper 64 bits are copied /// from the upper 64 bits of the first parameter. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtss_sd(__m128d __a, - __m128 __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtss_sd(__m128d __a, __m128 __b) { __a[0] = __b[0]; return __a; } @@ -1486,8 +1503,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_cvttsd_si32(__m128d __a) { /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { - return (__m64)__builtin_ia32_cvtpd2pi((__v2df)__a); +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvtpd_pi32(__m128d __a) { + return __trunc64(__builtin_ia32_cvtpd2dq((__v2df)__a)); } /// Converts the two double-precision floating-point elements of a @@ -1505,8 +1522,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvtpd_pi32(__m128d __a) { /// \param __a /// A 128-bit vector of [2 x double]. /// \returns A 64-bit vector of [2 x i32] containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { - return (__m64)__builtin_ia32_cvttpd2pi((__v2df)__a); +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_cvttpd_pi32(__m128d __a) { + return __trunc64(__builtin_ia32_cvttpd2dq((__v2df)__a)); } /// Converts the two signed 32-bit integer elements of a 64-bit vector of @@ -1520,8 +1537,9 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_cvttpd_pi32(__m128d __a) { /// \param __a /// A 64-bit vector of [2 x i32]. /// \returns A 128-bit vector of [2 x double] containing the converted values. -static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { - return __builtin_ia32_cvtpi2pd((__v2si)__a); +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtpi32_pd(__m64 __a) { + return (__m128d) __builtin_convertvector((__v2si)__a, __v2df); } /// Returns the low-order element of a 128-bit vector of [2 x double] as @@ -1535,7 +1553,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS_MMX _mm_cvtpi32_pd(__m64 __a) { /// A 128-bit vector of [2 x double]. The lower 64 bits are returned. /// \returns A double-precision floating-point value copied from the lower 64 /// bits of \a __a. -static __inline__ double __DEFAULT_FN_ATTRS _mm_cvtsd_f64(__m128d __a) { +static __inline__ double __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsd_f64(__m128d __a) { return __a[0]; } @@ -1770,7 +1789,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_undefined_pd(void) { /// \returns An initialized 128-bit floating-point vector of [2 x double]. The /// lower 64 bits contain the value of the parameter. The upper 64 bits are /// set to zero. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_sd(double __w) { return __extension__(__m128d){__w, 0.0}; } @@ -1786,7 +1805,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_sd(double __w) { /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_pd(double __w) { return __extension__(__m128d){__w, __w}; } @@ -1802,7 +1821,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set1_pd(double __w) { /// A double-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_pd1(double __w) { return _mm_set1_pd(__w); } @@ -1820,8 +1839,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd1(double __w) { /// A double-precision floating-point value used to initialize the lower 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, - double __x) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_pd(double __w, + double __x) { return __extension__(__m128d){__x, __w}; } @@ -1840,8 +1859,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_set_pd(double __w, /// A double-precision floating-point value used to initialize the upper 64 /// bits of the result. /// \returns An initialized 128-bit floating-point vector of [2 x double]. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, - double __x) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_pd(double __w, + double __x) { return __extension__(__m128d){__w, __x}; } @@ -1854,7 +1873,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setr_pd(double __w, /// /// \returns An initialized 128-bit floating-point vector of [2 x double] with /// all elements set to zero. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_pd(void) { return __extension__(__m128d){0.0, 0.0}; } @@ -1873,8 +1892,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_setzero_pd(void) { /// A 128-bit vector of [2 x double]. The lower 64 bits are written to the /// lower 64 bits of the result. /// \returns A 128-bit vector of [2 x double] containing the moved values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_move_sd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_move_sd(__m128d __a, __m128d __b) { __a[0] = __b[0]; return __a; } @@ -2091,8 +2110,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi16(__m128i __a, /// A 128-bit vector of [4 x i32]. /// \returns A 128-bit vector of [4 x i32] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a + (__v4su)__b); } @@ -2108,9 +2127,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi32(__m128i __a, /// \param __b /// A 64-bit integer. /// \returns A 64-bit integer containing the sum of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, - __m64 __b) { - return (__m64)__builtin_ia32_paddq((__v1di)__a, (__v1di)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_add_si64(__m64 __a, __m64 __b) { + return (__m64)(((unsigned long long)__a) + ((unsigned long long)__b)); } /// Adds the corresponding elements of two 128-bit vectors of [2 x i64], @@ -2129,8 +2147,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_add_si64(__m64 __a, /// A 128-bit vector of [2 x i64]. /// \returns A 128-bit vector of [2 x i64] containing the sums of both /// parameters. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_add_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a + (__v2du)__b); } @@ -2431,9 +2449,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mullo_epi16(__m128i __a, /// \param __b /// A 64-bit integer containing one of the source operands. /// \returns A 64-bit integer vector containing the product of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_mul_su32(__m64 __a, - __m64 __b) { - return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mul_su32(__m64 __a, __m64 __b) { + return __trunc64(__builtin_ia32_pmuludq128((__v4si)__anyext128(__a), + (__v4si)__anyext128(__b))); } /// Multiplies 32-bit unsigned integer values contained in the lower @@ -2521,8 +2539,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi16(__m128i __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi32(__m128i __a, __m128i __b) { return (__m128i)((__v4su)__a - (__v4su)__b); } @@ -2539,9 +2557,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi32(__m128i __a, /// A 64-bit integer vector containing the subtrahend. /// \returns A 64-bit integer vector containing the difference of the values in /// the operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, - __m64 __b) { - return (__m64)__builtin_ia32_psubq((__v1di)__a, (__v1di)__b); +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sub_si64(__m64 __a, __m64 __b) { + return (__m64)((unsigned long long)__a - (unsigned long long)__b); } /// Subtracts the corresponding elements of two [2 x i64] vectors. @@ -2556,8 +2573,8 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX _mm_sub_si64(__m64 __a, /// A 128-bit integer vector containing the subtrahends. /// \returns A 128-bit integer vector containing the differences of the values /// in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_sub_epi64(__m128i __a, - __m128i __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_epi64(__m128i __a, __m128i __b) { return (__m128i)((__v2du)__a - (__v2du)__b); } @@ -3255,8 +3272,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmplt_epi32(__m128i __a, /// \returns A 128-bit vector of [2 x double] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_cvtsi64_sd(__m128d __a, - long long __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi64_sd(__m128d __a, long long __b) { __a[0] = __b; return __a; } @@ -3310,7 +3327,8 @@ static __inline__ long long __DEFAULT_FN_ATTRS _mm_cvttsd_si64(__m128d __a) { /// \param __a /// A 128-bit integer vector. /// \returns A 128-bit vector of [4 x float] containing the converted values. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_cvtepi32_ps(__m128i __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtepi32_ps(__m128i __a) { return (__m128) __builtin_convertvector((__v4si)__a, __v4sf); } @@ -3494,8 +3512,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_undefined_si128(void) { /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, - long long __q0) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_epi64x(long long __q1, long long __q0) { return __extension__(__m128i)(__v2di){__q0, __q1}; } @@ -3515,9 +3533,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64x(long long __q1, /// destination vector of [2 x i64]. /// \returns An initialized 128-bit vector of [2 x i64] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, - __m64 __q0) { - return _mm_set_epi64x((long long)__q1, (long long)__q0); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_epi64(__m64 __q1, __m64 __q0) { + return _mm_set_epi64x((long long)__q1[0], (long long)__q0[0]); } /// Initializes the 32-bit values in a 128-bit vector of [4 x i32] with @@ -3542,8 +3560,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi64(__m64 __q1, /// vector. /// \returns An initialized 128-bit vector of [4 x i32] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, - int __i1, int __i0) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi32(int __i3, + int __i2, + int __i1, + int __i0) { return __extension__(__m128i)(__v4si){__i0, __i1, __i2, __i3}; } @@ -3581,7 +3601,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set_epi32(int __i3, int __i2, /// vector. /// \returns An initialized 128-bit vector of [8 x i16] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, short __w2, short __w1, short __w0) { return __extension__(__m128i)(__v8hi){__w0, __w1, __w2, __w3, @@ -3630,7 +3650,7 @@ _mm_set_epi16(short __w7, short __w6, short __w5, short __w4, short __w3, /// Initializes bits [7:0] of the destination vector. /// \returns An initialized 128-bit vector of [16 x i8] containing the values /// provided in the operands. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, char __b10, char __b9, char __b8, char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { @@ -3652,7 +3672,8 @@ _mm_set_epi8(char __b15, char __b14, char __b13, char __b12, char __b11, /// vector. /// \returns An initialized 128-bit integer vector of [2 x i64] with both /// elements containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi64x(long long __q) { return _mm_set_epi64x(__q, __q); } @@ -3669,7 +3690,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64x(long long __q) { /// vector. /// \returns An initialized 128-bit vector of [2 x i64] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi64(__m64 __q) { return _mm_set_epi64(__q, __q); } @@ -3686,7 +3708,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi64(__m64 __q) { /// vector. /// \returns An initialized 128-bit vector of [4 x i32] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi32(int __i) { return _mm_set_epi32(__i, __i, __i, __i); } @@ -3703,7 +3725,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi32(int __i) { /// vector. /// \returns An initialized 128-bit vector of [8 x i16] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_epi16(short __w) { return _mm_set_epi16(__w, __w, __w, __w, __w, __w, __w, __w); } @@ -3720,7 +3743,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi16(short __w) { /// vector. /// \returns An initialized 128-bit vector of [16 x i8] with all elements /// containing the value provided in the operand. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_set1_epi8(char __b) { return _mm_set_epi8(__b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b); } @@ -3739,8 +3762,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b) { /// A 64-bit integral value used to initialize the upper 64 bits of the /// result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, - __m64 __q1) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setr_epi64(__m64 __q0, __m64 __q1) { return _mm_set_epi64(__q1, __q0); } @@ -3761,9 +3784,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi64(__m64 __q0, /// \param __i3 /// A 32-bit integral value used to initialize bits [127:96] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, - int __i2, - int __i3) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setr_epi32(int __i0, int __i1, int __i2, int __i3) { return _mm_set_epi32(__i3, __i2, __i1, __i0); } @@ -3792,7 +3814,7 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setr_epi32(int __i0, int __i1, /// \param __w7 /// A 16-bit integral value used to initialize bits [127:112] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, short __w5, short __w6, short __w7) { return _mm_set_epi16(__w7, __w6, __w5, __w4, __w3, __w2, __w1, __w0); @@ -3839,7 +3861,7 @@ _mm_setr_epi16(short __w0, short __w1, short __w2, short __w3, short __w4, /// \param __b15 /// An 8-bit integral value used to initialize bits [127:120] of the result. /// \returns An initialized 128-bit integer vector. -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, char __b6, char __b7, char __b8, char __b9, char __b10, char __b11, char __b12, char __b13, char __b14, char __b15) { @@ -3855,7 +3877,7 @@ _mm_setr_epi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, /// /// \returns An initialized 128-bit integer vector with all elements set to /// zero. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_setzero_si128(void) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR _mm_setzero_si128(void) { return __extension__(__m128i)(__v2di){0LL, 0LL}; } @@ -4588,7 +4610,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_unpacklo_epi64(__m128i __a, /// A 128-bit integer vector operand. The lower 64 bits are moved to the /// destination. /// \returns A 64-bit integer containing the lower 64 bits of the parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movepi64_pi64(__m128i __a) { return (__m64)__a[0]; } @@ -4603,8 +4626,9 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_movepi64_pi64(__m128i __a) { /// A 64-bit value. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { - return __extension__(__m128i)(__v2di){(long long)__a, 0}; +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movpi64_epi64(__m64 __a) { + return __builtin_shufflevector((__v1di)__a, _mm_setzero_si64(), 0, 1); } /// Moves the lower 64 bits of a 128-bit integer vector to a 128-bit @@ -4619,7 +4643,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_movpi64_epi64(__m64 __a) { /// destination. /// \returns A 128-bit integer vector. The lower 64 bits contain the value from /// the operand. The upper 64 bits are assigned zeros. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_move_epi64(__m128i __a) { return __builtin_shufflevector((__v2di)__a, _mm_setzero_si128(), 0, 2); } @@ -4638,8 +4663,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_move_epi64(__m128i __a) { /// A 128-bit vector of [2 x double]. \n /// Bits [127:64] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 1, 2 + 1); } @@ -4658,8 +4683,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpackhi_pd(__m128d __a, /// A 128-bit vector of [2 x double]. \n /// Bits [63:0] are written to bits [127:64] of the destination. /// \returns A 128-bit vector of [2 x double] containing the interleaved values. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_unpacklo_pd(__m128d __a, - __m128d __b) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_pd(__m128d __a, __m128d __b) { return __builtin_shufflevector((__v2df)__a, (__v2df)__b, 0, 2 + 0); } @@ -4722,7 +4747,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castpd_ps(__m128d __a) { return (__m128)__a; } @@ -4737,7 +4763,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castpd_ps(__m128d __a) { /// A 128-bit floating-point vector of [2 x double]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castpd_si128(__m128d __a) { return (__m128i)__a; } @@ -4752,7 +4779,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castpd_si128(__m128d __a) { /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castps_pd(__m128 __a) { return (__m128d)__a; } @@ -4767,7 +4795,8 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castps_pd(__m128 __a) { /// A 128-bit floating-point vector of [4 x float]. /// \returns A 128-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castps_si128(__m128 __a) { return (__m128i)__a; } @@ -4782,7 +4811,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_castps_si128(__m128 __a) { /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [4 x float] containing the same /// bitwise pattern as the parameter. -static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castsi128_ps(__m128i __a) { return (__m128)__a; } @@ -4797,7 +4827,8 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_castsi128_ps(__m128i __a) { /// A 128-bit integer vector. /// \returns A 128-bit floating-point vector of [2 x double] containing the same /// bitwise pattern as the parameter. -static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_castsi128_pd(__m128i __a) { +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_castsi128_pd(__m128i __a) { return (__m128d)__a; } @@ -4889,8 +4920,11 @@ void _mm_pause(void); #if defined(__cplusplus) } // extern "C" #endif + +#undef __anyext128 +#undef __trunc64 #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_MMX +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h index 73b04a824aba..9a5743d4b673 100644 --- a/lib/include/gfniintrin.h +++ b/lib/include/gfniintrin.h @@ -14,6 +14,7 @@ #ifndef __GFNIINTRIN_H #define __GFNIINTRIN_H +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) /* Default attributes for simple form (no masking). */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ @@ -25,26 +26,47 @@ __target__("avx,gfni,no-evex512"), \ __min_vector_width__(256))) -/* Default attributes for ZMM unmasked forms. */ -#define __DEFAULT_FN_ATTRS_Z \ +/* Default attributes for VLX masked forms. */ +#define __DEFAULT_FN_ATTRS_VL128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512f,evex512,gfni"), \ - __min_vector_width__(512))) -/* Default attributes for ZMM masked forms. */ -#define __DEFAULT_FN_ATTRS_Z_MASK \ + __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,evex512,gfni"), \ - __min_vector_width__(512))) + __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __min_vector_width__(256))) +#else +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("gfni"), \ + __min_vector_width__(128))) + +/* Default attributes for YMM unmasked form. */ +#define __DEFAULT_FN_ATTRS_Y \ + __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), \ + __min_vector_width__(256))) /* Default attributes for VLX masked forms. */ #define __DEFAULT_FN_ATTRS_VL128 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __target__("avx512bw,avx512vl,gfni"), \ __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS_VL256 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bw,avx512vl,gfni,no-evex512"), \ + __target__("avx512bw,avx512vl,gfni"), \ __min_vector_width__(256))) +#endif + +/* Default attributes for ZMM unmasked forms. */ +#define __DEFAULT_FN_ATTRS_Z \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512f,evex512,gfni"), \ + __min_vector_width__(512))) +/* Default attributes for ZMM masked forms. */ +#define __DEFAULT_FN_ATTRS_Z_MASK \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bw,evex512,gfni"), \ + __min_vector_width__(512))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ ((__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ diff --git a/lib/include/hexagon_types.h b/lib/include/hexagon_types.h index 029727cc4817..8e73fad4bcd4 100644 --- a/lib/include/hexagon_types.h +++ b/lib/include/hexagon_types.h @@ -1,7 +1,11 @@ -/******************************************************************************/ -/* (c) 2020 Qualcomm Innovation Center, Inc. All rights reserved. */ -/* */ -/******************************************************************************/ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + #ifndef HEXAGON_TYPES_H #define HEXAGON_TYPES_H diff --git a/lib/include/hvx_hexagon_protos.h b/lib/include/hvx_hexagon_protos.h index 7e3679a38b2c..fd120a589f64 100644 --- a/lib/include/hvx_hexagon_protos.h +++ b/lib/include/hvx_hexagon_protos.h @@ -5178,6 +5178,433 @@ #define Q6_Vuh_vmpy_VuhVuh_rs16(Vu,Vv) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpyuhvs)(Vu,Vv) #endif /* __HEXAGON_ARCH___ >= 69 */ +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vadd(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vadd_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vadd_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_sf_bf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.h=Vu32.hf + C Intrinsic Prototype: HVX_Vector Q6_Vh_equals_Vhf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vh_equals_Vhf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_h_hf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.hf=Vu32.h + C Intrinsic Prototype: HVX_Vector Q6_Vhf_equals_Vh(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vhf_equals_Vh(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_hf_h)(Vu) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.sf=Vu32.w + C Intrinsic Prototype: HVX_Vector Q6_Vsf_equals_Vw(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vsf_equals_Vw(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_sf_w)(Vu) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.w=Vu32.sf + C Intrinsic Prototype: HVX_Vector Q6_Vw_equals_Vsf(HVX_Vector Vu) + Instruction Type: CVI_VS + Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Vw_equals_Vsf(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vconv_w_sf)(Vu) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.bf=vcvt(Vu32.sf,Vv32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vbf_vcvt_VsfVsf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vbf_vcvt_VsfVsf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_bf_sf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Qd4=vcmp.gt(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gt_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VA Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gt_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) \ + ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf)(Vu, Vv)), -1) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Qx4&=vcmp.gt(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtand_QVbfVbf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtand_QVbfVbf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) \ + ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_and)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Qx4|=vcmp.gt(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtor_QVbfVbf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtor_QVbfVbf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) \ + ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_or)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Qx4^=vcmp.gt(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPred Q6_Q_vcmp_gtxacc_QVbfVbf(HVX_VectorPred + Qx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VA Execution + Slots: SLOT0123 + ========================================================================== */ + +#define Q6_Q_vcmp_gtxacc_QVbfVbf(Qx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandqrt) \ + ((__BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vgtbf_xor)( \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vandvrt)((Qx), -1), Vu, \ + Vv)), \ + -1) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.bf=vmax(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmax_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vbf_vmax_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmax_bf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vd32.bf=vmin(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_Vector Q6_Vbf_vmin_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_LATE Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vbf_vmin_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmin_bf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vmpy(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpy_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpy_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vxx32.sf+=vmpy(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vmpyacc_WsfVbfVbf(HVX_VectorPair + Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution + Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vmpyacc_WsfVbfVbf(Vxx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_sf_bf_acc)(Vxx, Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 73 +/* ========================================================================== + Assembly Syntax: Vdd32.sf=vsub(Vu32.bf,Vv32.bf) + C Intrinsic Prototype: HVX_VectorPair Q6_Wsf_vsub_VbfVbf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Wsf_vsub_VbfVbf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_sf_bf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 73 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32=vgetqfext(Vu32.x,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vgetqfext_VR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vgetqfext_VR(Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext)(Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vx32|=vgetqfext(Vu32.x,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vgetqfextor_VVR(HVX_Vector Vx, + HVX_Vector Vu, Word32 Rt) Instruction Type: CVI_VX Execution Slots: + SLOT23 + ========================================================================== */ + +#define Q6_V_vgetqfextor_VVR(Vx, Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_get_qfext_oracc)(Vx, Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.x=vsetqfext(Vu32,Rt32) + C Intrinsic Prototype: HVX_Vector Q6_V_vsetqfext_VR(HVX_Vector Vu, Word32 Rt) + Instruction Type: CVI_VX + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vsetqfext_VR(Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_set_qfext)(Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.f8=vabs(Vu32.f8) + C Intrinsic Prototype: HVX_Vector Q6_V_vabs_V(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vabs_V(Vu) __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vabs_f8)(Vu) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vadd(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vadd_VV(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vadd_VV(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vadd_hf_f8)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.b=vcvt2(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vb_vcvt2_VhfVhf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vb_vcvt2_VhfVhf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_b_hf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt2(Vu32.b) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vb(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt2_Vb(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_b)(Vu) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt2(Vu32.ub) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt2_Vub(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt2_Vub(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_hf_ub)(Vu) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.ub=vcvt2(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vub_vcvt2_VhfVhf(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vub_vcvt2_VhfVhf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt2_ub_hf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.f8=vcvt(Vu32.hf,Vv32.hf) + C Intrinsic Prototype: HVX_Vector Q6_V_vcvt_VhfVhf(HVX_Vector Vu, HVX_Vector + Vv) Instruction Type: CVI_VX Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vcvt_VhfVhf(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_f8_hf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vcvt(Vu32.f8) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vcvt_V(HVX_Vector Vu) + Instruction Type: CVI_VX_DV + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vcvt_V(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vcvt_hf_f8)(Vu) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.f8=vfmax(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_Vector Q6_V_vfmax_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vfmax_VV(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmax_f8)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.f8=vfmin(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_Vector Q6_V_vfmin_VV(HVX_Vector Vu, HVX_Vector Vv) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vfmin_VV(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfmin_f8)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.f8=vfneg(Vu32.f8) + C Intrinsic Prototype: HVX_Vector Q6_V_vfneg_V(HVX_Vector Vu) + Instruction Type: CVI_VX_LATE + Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_V_vfneg_V(Vu) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vfneg_f8)(Vu) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32=vmerge(Vu32.x,Vv32.w) + C Intrinsic Prototype: HVX_Vector Q6_V_vmerge_VVw(HVX_Vector Vu, HVX_Vector + Vv) Instruction Type: CVI_VS Execution Slots: SLOT0123 + ========================================================================== */ + +#define Q6_V_vmerge_VVw(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmerge_qf)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vmpy(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpy_VV(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vmpy_VV(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vxx32.hf+=vmpy(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vmpyacc_WhfVV(HVX_VectorPair + Vxx, HVX_Vector Vu, HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution + Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vmpyacc_WhfVV(Vxx, Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_hf_f8_acc)(Vxx, Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.hf,Rt32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_VhfRhf(HVX_Vector Vu, Word32 + Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_VhfRhf(Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_hf)(Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.qf16=vmpy(Vu32.qf16,Rt32.hf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf16_vmpy_Vqf16Rhf(HVX_Vector Vu, + Word32 Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf16_vmpy_Vqf16Rhf(Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_qf16)(Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vd32.qf32=vmpy(Vu32.sf,Rt32.sf) + C Intrinsic Prototype: HVX_Vector Q6_Vqf32_vmpy_VsfRsf(HVX_Vector Vu, Word32 + Rt) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Vqf32_vmpy_VsfRsf(Vu, Rt) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vmpy_rt_sf)(Vu, Rt) +#endif /* __HEXAGON_ARCH___ >= 79 */ + +#if __HVX_ARCH__ >= 79 +/* ========================================================================== + Assembly Syntax: Vdd32.hf=vsub(Vu32.f8,Vv32.f8) + C Intrinsic Prototype: HVX_VectorPair Q6_Whf_vsub_VV(HVX_Vector Vu, + HVX_Vector Vv) Instruction Type: CVI_VX_DV Execution Slots: SLOT23 + ========================================================================== */ + +#define Q6_Whf_vsub_VV(Vu, Vv) \ + __BUILTIN_VECTOR_WRAP(__builtin_HEXAGON_V6_vsub_hf_f8)(Vu, Vv) +#endif /* __HEXAGON_ARCH___ >= 79 */ + #endif /* __HVX__ */ #endif diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index cd6cf09b90ca..19c5987257a2 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -605,6 +605,20 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVRS__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2__) && defined(__MOVRS__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2_512__) && defined(__MOVRS__)) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__) #include #endif @@ -620,9 +634,6 @@ _storebe_i64(void * __P, long long __D) { #if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__) #include #endif -#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) -#include -#endif #if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \ defined(__WIDEKL__) @@ -634,10 +645,59 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__) #include #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TRANSPOSE__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AMX_MOVRS__) && defined(__AMX_TRANSPOSE__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TF32__) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AMX_TF32__) && defined(__AMX_TRANSPOSE__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AMX_BF16__) && defined(__AMX_TRANSPOSE__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AMX_FP16__) && defined(__AMX_TRANSPOSE__)) +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AMX_COMPLEX__) && defined(__AMX_TRANSPOSE__)) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || \ defined(__AVX512VP2INTERSECT__) #include @@ -648,6 +708,30 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__) +#include +#include +#include +#include +#include +#include +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__) +#include +#include +#include +#include +#include +#include +#endif + +#if !defined(__SCE__) || __has_feature(modules) || \ + (defined(__AVX10_2_512__) && defined(__SM4__)) +#include +#endif + #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__) #include #endif diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 6308c865ca91..376046aeeaf5 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -94,8 +94,8 @@ void __outwordstring(unsigned short, unsigned short *, unsigned long); unsigned long __readcr0(void); unsigned long __readcr2(void); unsigned __LPTRINT_TYPE__ __readcr3(void); -unsigned long __readcr4(void); -unsigned long __readcr8(void); +unsigned __LPTRINT_TYPE__ __readcr4(void); +unsigned __int64 __readcr8(void); unsigned int __readdr(unsigned int); #ifdef __i386__ unsigned char __readfsbyte(unsigned long); @@ -124,8 +124,8 @@ void __vmx_vmptrst(unsigned __int64 *); void __wbinvd(void); void __writecr0(unsigned int); void __writecr3(unsigned __INTPTR_TYPE__); -void __writecr4(unsigned int); -void __writecr8(unsigned int); +void __writecr4(unsigned __INTPTR_TYPE__); +void __writecr8(unsigned __int64); void __writedr(unsigned int, unsigned int); void __writefsbyte(unsigned long, unsigned char); void __writefsdword(unsigned long, unsigned long); @@ -330,33 +330,33 @@ static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile("hlt"); } -static inline unsigned char __inbyte(unsigned short port) { +static __inline__ unsigned char __inbyte(unsigned short port) { unsigned char ret; __asm__ __volatile__("inb %w1, %b0" : "=a"(ret) : "Nd"(port)); return ret; } -static inline unsigned short __inword(unsigned short port) { +static __inline__ unsigned short __inword(unsigned short port) { unsigned short ret; __asm__ __volatile__("inw %w1, %w0" : "=a"(ret) : "Nd"(port)); return ret; } -static inline unsigned long __indword(unsigned short port) { +static __inline__ unsigned long __indword(unsigned short port) { unsigned long ret; __asm__ __volatile__("inl %w1, %k0" : "=a"(ret) : "Nd"(port)); return ret; } -static inline void __outbyte(unsigned short port, unsigned char data) { +static __inline__ void __outbyte(unsigned short port, unsigned char data) { __asm__ __volatile__("outb %b0, %w1" : : "a"(data), "Nd"(port)); } -static inline void __outword(unsigned short port, unsigned short data) { +static __inline__ void __outword(unsigned short port, unsigned short data) { __asm__ __volatile__("outw %w0, %w1" : : "a"(data), "Nd"(port)); } -static inline void __outdword(unsigned short port, unsigned long data) { +static __inline__ void __outdword(unsigned short port, unsigned long data) { __asm__ __volatile__("outl %k0, %w1" : : "a"(data), "Nd"(port)); } #endif @@ -396,6 +396,16 @@ unsigned short __readx18word(unsigned long offset); unsigned long __readx18dword(unsigned long offset); unsigned __int64 __readx18qword(unsigned long offset); +void __addx18byte(unsigned long offset, unsigned char data); +void __addx18word(unsigned long offset, unsigned short data); +void __addx18dword(unsigned long offset, unsigned long data); +void __addx18qword(unsigned long offset, unsigned __int64 data); + +void __incx18byte(unsigned long offset); +void __incx18word(unsigned long offset); +void __incx18dword(unsigned long offset); +void __incx18qword(unsigned long offset); + double _CopyDoubleFromInt64(__int64); float _CopyFloatFromInt32(__int32); __int32 _CopyInt32FromFloat(float); diff --git a/lib/include/intrin0.h b/lib/include/intrin0.h index 866c8896617d..2bca9fc877e9 100644 --- a/lib/include/intrin0.h +++ b/lib/include/intrin0.h @@ -44,7 +44,7 @@ unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, __int64 *_ComparandResult); #endif -#ifdef __x86_64__ && !defined(__arm64ec__) +#if defined(__x86_64__) && !defined(__arm64ec__) unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64 *); unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, @@ -207,6 +207,9 @@ long _InterlockedExchange_rel(long volatile *_Target, long _Value); __int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value); __int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value); +void *_InterlockedExchangePointer_acq(void *volatile *_Target, void *_Value); +void *_InterlockedExchangePointer_nf(void *volatile *_Target, void *_Value); +void *_InterlockedExchangePointer_rel(void *volatile *_Target, void *_Value); /*----------------------------------------------------------------------------*\ |* Interlocked Compare Exchange @@ -237,6 +240,12 @@ __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination, __int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); +void *_InterlockedCompareExchangePointer_acq(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +void *_InterlockedCompareExchangePointer_nf(void *volatile *_Destination, + void *_Exchange, void *_Comparand); +void *_InterlockedCompareExchangePointer_rel(void *volatile *_Destination, + void *_Exchange, void *_Comparand); #endif #ifdef __cplusplus diff --git a/lib/include/larchintrin.h b/lib/include/larchintrin.h index f4218295919a..a1247d12e21f 100644 --- a/lib/include/larchintrin.h +++ b/lib/include/larchintrin.h @@ -228,17 +228,31 @@ extern __inline void ((void)__builtin_loongarch_ldpte_d((long int)(_1), (_2))) #endif -#define __frecipe_s(/*float*/ _1) \ - (float)__builtin_loongarch_frecipe_s((float)_1) +#ifdef __loongarch_frecipe +extern __inline float + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_s(float _1) { + return __builtin_loongarch_frecipe_s(_1); +} -#define __frecipe_d(/*double*/ _1) \ - (double)__builtin_loongarch_frecipe_d((double)_1) +extern __inline double + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __frecipe_d(double _1) { + return __builtin_loongarch_frecipe_d(_1); +} -#define __frsqrte_s(/*float*/ _1) \ - (float)__builtin_loongarch_frsqrte_s((float)_1) +extern __inline float + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_s(float _1) { + return __builtin_loongarch_frsqrte_s(_1); +} -#define __frsqrte_d(/*double*/ _1) \ - (double)__builtin_loongarch_frsqrte_d((double)_1) +extern __inline double + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __frsqrte_d(double _1) { + return __builtin_loongarch_frsqrte_d(_1); +} +#endif #ifdef __cplusplus } diff --git a/lib/include/lasxintrin.h b/lib/include/lasxintrin.h index dafc2a2f3e6a..85020d82829e 100644 --- a/lib/include/lasxintrin.h +++ b/lib/include/lasxintrin.h @@ -1726,18 +1726,6 @@ extern __inline return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); } -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 - __lasx_xvfrecipe_s(__m256 _1) { - return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); -} - -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d - __lasx_xvfrecipe_d(__m256d _1) { - return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); -} - extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvfrint_s(__m256 _1) { @@ -1762,18 +1750,6 @@ extern __inline return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); } -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 - __lasx_xvfrsqrte_s(__m256 _1) { - return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); -} - -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d - __lasx_xvfrsqrte_d(__m256d _1) { - return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); -} - extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 __lasx_xvflogb_s(__m256 _1) { @@ -2585,7 +2561,7 @@ extern __inline extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i __lasx_xvorn_v(__m256i _1, __m256i _2) { - return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); + return (__m256i)__builtin_lasx_xvorn_v((v32u8)_1, (v32u8)_2); } #define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) @@ -3866,6 +3842,32 @@ extern __inline return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); } +#if defined(__loongarch_frecipe) +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrecipe_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrecipe_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrecipe_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrecipe_d((v4f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 + __lasx_xvfrsqrte_s(__m256 _1) { + return (__m256)__builtin_lasx_xvfrsqrte_s((v8f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d + __lasx_xvfrsqrte_d(__m256d _1) { + return (__m256d)__builtin_lasx_xvfrsqrte_d((v4f64)_1); +} +#endif + #define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) diff --git a/lib/include/limits.h b/lib/include/limits.h index 56dffe568486..d08227fe4d3d 100644 --- a/lib/include/limits.h +++ b/lib/include/limits.h @@ -111,11 +111,14 @@ #define ULLONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL) #endif -/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. It's too bad - that we don't have something like #pragma poison that could be used to - deprecate a macro - the code should just use LLONG_MAX and friends. +/* LONG_LONG_MIN/LONG_LONG_MAX/ULONG_LONG_MAX are a GNU extension. Android's + bionic also defines them. It's too bad that we don't have something like + #pragma poison that could be used to deprecate a macro - the code should just + use LLONG_MAX and friends. */ -#if defined(__GNU_LIBRARY__) ? defined(__USE_GNU) : !defined(__STRICT_ANSI__) +#if (defined(__GNU_LIBRARY__) ? defined(__USE_GNU) \ + : !defined(__STRICT_ANSI__)) || \ + defined(__BIONIC__) #undef LONG_LONG_MIN #undef LONG_LONG_MAX diff --git a/lib/include/llvm_libc_wrappers/ctype.h b/lib/include/llvm_libc_wrappers/ctype.h index 49c2af93471b..960cf43302c4 100644 --- a/lib/include/llvm_libc_wrappers/ctype.h +++ b/lib/include/llvm_libc_wrappers/ctype.h @@ -51,6 +51,19 @@ #pragma push_macro("toascii") #pragma push_macro("tolower") #pragma push_macro("toupper") +#pragma push_macro("isalnum_l") +#pragma push_macro("isalpha_l") +#pragma push_macro("isascii_l") +#pragma push_macro("isblank_l") +#pragma push_macro("iscntrl_l") +#pragma push_macro("isdigit_l") +#pragma push_macro("isgraph_l") +#pragma push_macro("islower_l") +#pragma push_macro("isprint_l") +#pragma push_macro("ispunct_l") +#pragma push_macro("isspace_l") +#pragma push_macro("isupper_l") +#pragma push_macro("isxdigit_l") #undef isalnum #undef isalpha @@ -68,6 +81,18 @@ #undef toascii #undef tolower #undef toupper +#undef isalnum_l +#undef isalpha_l +#undef iscntrl_l +#undef isdigit_l +#undef islower_l +#undef isgraph_l +#undef isprint_l +#undef ispunct_l +#undef isspace_l +#undef isupper_l +#undef isblank_l +#undef isxdigit_l #pragma omp begin declare target @@ -93,6 +118,19 @@ #pragma pop_macro("toascii") #pragma pop_macro("tolower") #pragma pop_macro("toupper") +#pragma pop_macro("isalnum_l") +#pragma pop_macro("isalpha_l") +#pragma pop_macro("isascii_l") +#pragma pop_macro("isblank_l") +#pragma pop_macro("iscntrl_l") +#pragma pop_macro("isdigit_l") +#pragma pop_macro("isgraph_l") +#pragma pop_macro("islower_l") +#pragma pop_macro("isprint_l") +#pragma pop_macro("ispunct_l") +#pragma pop_macro("isspace_l") +#pragma pop_macro("isupper_l") +#pragma pop_macro("isxdigit_l") #endif #undef __LIBC_ATTRS diff --git a/lib/include/llvm_libc_wrappers/stdlib.h b/lib/include/llvm_libc_wrappers/stdlib.h index 7fce5a1a31d5..69afdf4a6897 100644 --- a/lib/include/llvm_libc_wrappers/stdlib.h +++ b/lib/include/llvm_libc_wrappers/stdlib.h @@ -34,8 +34,16 @@ _Static_assert(__builtin_offsetof(div_t, quot) == 0, "ABI mismatch!"); _Static_assert(__builtin_offsetof(ldiv_t, quot) == 0, "ABI mismatch!"); _Static_assert(__builtin_offsetof(lldiv_t, quot) == 0, "ABI mismatch!"); +#if defined(__GLIBC__) && __cplusplus >= 201703L +#define at_quick_exit atexit +#endif + #include +#if defined(__GLIBC__) && __cplusplus >= 201703L +#undef at_quick_exit +#endif + #pragma omp end declare target #undef __LIBC_ATTRS diff --git a/lib/include/lsxintrin.h b/lib/include/lsxintrin.h index f347955ce6fb..a9b19223fc4b 100644 --- a/lib/include/lsxintrin.h +++ b/lib/include/lsxintrin.h @@ -1776,18 +1776,6 @@ extern __inline return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); } -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 - __lsx_vfrecipe_s(__m128 _1) { - return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); -} - -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d - __lsx_vfrecipe_d(__m128d _1) { - return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); -} - extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vfrint_s(__m128 _1) { @@ -1812,18 +1800,6 @@ extern __inline return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); } -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 - __lsx_vfrsqrte_s(__m128 _1) { - return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); -} - -extern __inline - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d - __lsx_vfrsqrte_d(__m128d _1) { - return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); -} - extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 __lsx_vflogb_s(__m128 _1) { @@ -3425,7 +3401,7 @@ extern __inline extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i __lsx_vorn_v(__m128i _1, __m128i _2) { - return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); + return (__m128i)__builtin_lsx_vorn_v((v16u8)_1, (v16u8)_2); } #define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) @@ -3738,6 +3714,32 @@ extern __inline return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); } +#if defined(__loongarch_frecipe) +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrecipe_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrecipe_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrecipe_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrecipe_d((v2f64)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 + __lsx_vfrsqrte_s(__m128 _1) { + return (__m128)__builtin_lsx_vfrsqrte_s((v4f32)_1); +} + +extern __inline + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d + __lsx_vfrsqrte_d(__m128d _1) { + return (__m128d)__builtin_lsx_vfrsqrte_d((v2f64)_1); +} +#endif + #define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) #define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) diff --git a/lib/include/lzcntintrin.h b/lib/include/lzcntintrin.h index f4ddce9d0e68..27509021ec25 100644 --- a/lib/include/lzcntintrin.h +++ b/lib/include/lzcntintrin.h @@ -15,7 +15,13 @@ #define __LZCNTINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("lzcnt"))) +#endif #ifndef _MSC_VER /// Counts the number of leading zero bits in the operand. @@ -43,8 +49,7 @@ /// bits in the operand. /// \see _lzcnt_u32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -__lzcnt32(unsigned int __X) -{ +__lzcnt32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } @@ -60,8 +65,7 @@ __lzcnt32(unsigned int __X) /// bits in the operand. /// \see __lzcnt32 static __inline__ unsigned int __DEFAULT_FN_ATTRS -_lzcnt_u32(unsigned int __X) -{ +_lzcnt_u32(unsigned int __X) { return __builtin_ia32_lzcnt_u32(__X); } @@ -93,8 +97,7 @@ _lzcnt_u32(unsigned int __X) /// bits in the operand. /// \see __lzcnt64 static __inline__ unsigned long long __DEFAULT_FN_ATTRS -_lzcnt_u64(unsigned long long __X) -{ +_lzcnt_u64(unsigned long long __X) { return __builtin_ia32_lzcnt_u64(__X); } #endif diff --git a/lib/include/mmintrin.h b/lib/include/mmintrin.h index 4e154e2d8593..dc0fa5c523ee 100644 --- a/lib/include/mmintrin.h +++ b/lib/include/mmintrin.h @@ -21,10 +21,45 @@ typedef int __v2si __attribute__((__vector_size__(8))); typedef short __v4hi __attribute__((__vector_size__(8))); typedef char __v8qi __attribute__((__vector_size__(8))); +/* Unsigned types */ +typedef unsigned long long __v1du __attribute__ ((__vector_size__ (8))); +typedef unsigned int __v2su __attribute__ ((__vector_size__ (8))); +typedef unsigned short __v4hu __attribute__((__vector_size__(8))); +typedef unsigned char __v8qu __attribute__((__vector_size__(8))); + +/* We need an explicitly signed variant for char. Note that this shouldn't + * appear in the interface though. */ +typedef signed char __v8qs __attribute__((__vector_size__(8))); + +/* SSE/SSE2 types */ +typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); +typedef long long __v2di __attribute__ ((__vector_size__ (16))); +typedef int __v4si __attribute__((__vector_size__(16))); +typedef short __v8hi __attribute__((__vector_size__(16))); +typedef char __v16qi __attribute__((__vector_size__(16))); + /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("mmx,no-evex512"), \ - __min_vector_width__(64))) +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) +#define __DEFAULT_FN_ATTRS_SSE2 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sse2,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS_SSE2 \ + __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ + __min_vector_width__(128))) +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr +#else +#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 +#endif + +#define __trunc64(x) \ + (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __anyext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, -1, -1) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. @@ -50,10 +85,10 @@ _mm_empty(void) { /// A 32-bit integer value. /// \returns A 64-bit integer vector. The lower 32 bits contain the value of the /// parameter. The upper 32 bits are set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi32_si64(int __i) { - return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); + return __extension__ (__m64)(__v2si){__i, 0}; } /// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit @@ -67,10 +102,10 @@ _mm_cvtsi32_si64(int __i) /// A 64-bit integer vector. /// \returns A 32-bit signed integer value containing the lower 32 bits of the /// parameter. -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_si32(__m64 __m) { - return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); + return ((__v2si)__m)[0]; } /// Casts a 64-bit signed integer value into a 64-bit integer vector. @@ -83,7 +118,7 @@ _mm_cvtsi64_si32(__m64 __m) /// A 64-bit signed integer. /// \returns A 64-bit integer vector containing the same bitwise pattern as the /// parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtsi64_m64(long long __i) { return (__m64)__i; @@ -99,7 +134,7 @@ _mm_cvtsi64_m64(long long __i) /// A 64-bit integer vector. /// \returns A 64-bit signed integer containing the same bitwise pattern as the /// parameter. -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS_SSE2 _mm_cvtm64_si64(__m64 __m) { return (long long)__m; @@ -124,10 +159,11 @@ _mm_cvtm64_si64(__m64 __m) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); + return __trunc64(__builtin_ia32_packsswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Converts, with saturation, 32-bit signed integers from both 64-bit integer @@ -149,10 +185,11 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); + return __trunc64(__builtin_ia32_packssdw128( + (__v4si)__builtin_shufflevector(__m1, __m2, 0, 1), (__v4si){})); } /// Converts, with saturation, 16-bit signed integers from both 64-bit integer @@ -174,10 +211,11 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2) /// written to the upper 32 bits of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_packs_pu16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); + return __trunc64(__builtin_ia32_packuswb128( + (__v8hi)__builtin_shufflevector(__m1, __m2, 0, 1), (__v8hi){})); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] @@ -201,10 +239,11 @@ _mm_packs_pu16(__m64 __m1, __m64 __m2) /// Bits [63:56] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, + 4, 12, 5, 13, 6, 14, 7, 15); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -224,10 +263,11 @@ _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) /// Bits [63:48] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, + 2, 6, 3, 7); } /// Unpacks the upper 32 bits from two 64-bit integer vectors of @@ -245,10 +285,10 @@ _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 3); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] @@ -272,10 +312,11 @@ _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) /// Bits [31:24] are written to bits [63:56] of the result. /// \returns A 64-bit integer vector of [8 x i8] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, + 0, 8, 1, 9, 2, 10, 3, 11); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -295,10 +336,11 @@ _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) /// Bits [31:16] are written to bits [63:48] of the result. /// \returns A 64-bit integer vector of [4 x i16] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, + 0, 4, 1, 5); } /// Unpacks the lower 32 bits from two 64-bit integer vectors of @@ -316,10 +358,10 @@ _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) /// the upper 32 bits of the result. /// \returns A 64-bit integer vector of [2 x i32] containing the interleaved /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); + return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2); } /// Adds each 8-bit integer element of the first 64-bit integer vector @@ -337,10 +379,10 @@ _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)(((__v8qu)__m1) + ((__v8qu)__m2)); } /// Adds each 16-bit integer element of the first 64-bit integer vector @@ -358,10 +400,10 @@ _mm_add_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)(((__v4hu)__m1) + ((__v4hu)__m2)); } /// Adds each 32-bit integer element of the first 64-bit integer vector @@ -379,10 +421,10 @@ _mm_add_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_add_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); + return (__m64)(((__v2su)__m1) + ((__v2su)__m2)); } /// Adds, with saturation, each 8-bit signed integer element of the first @@ -403,10 +445,10 @@ _mm_add_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_elementwise_add_sat((__v8qs)__m1, (__v8qs)__m2); } /// Adds, with saturation, each 16-bit signed integer element of the first @@ -427,10 +469,10 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums /// of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_elementwise_add_sat((__v4hi)__m1, (__v4hi)__m2); } /// Adds, with saturation, each 8-bit unsigned integer element of the first @@ -450,10 +492,10 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_elementwise_add_sat((__v8qu)__m1, (__v8qu)__m2); } /// Adds, with saturation, each 16-bit unsigned integer element of the first @@ -473,10 +515,10 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// unsigned sums of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_adds_pu16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_elementwise_add_sat((__v4hu)__m1, (__v4hu)__m2); } /// Subtracts each 8-bit integer element of the second 64-bit integer @@ -494,10 +536,10 @@ _mm_adds_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)(((__v8qu)__m1) - ((__v8qu)__m2)); } /// Subtracts each 16-bit integer element of the second 64-bit integer @@ -515,10 +557,10 @@ _mm_sub_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)(((__v4hu)__m1) - ((__v4hu)__m2)); } /// Subtracts each 32-bit integer element of the second 64-bit integer @@ -536,10 +578,10 @@ _mm_sub_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32] containing the subtrahends. /// \returns A 64-bit integer vector of [2 x i32] containing the differences of /// both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sub_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); + return (__m64)(((__v2su)__m1) - ((__v2su)__m2)); } /// Subtracts, with saturation, each 8-bit signed integer element of the second @@ -560,10 +602,10 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_elementwise_sub_sat((__v8qs)__m1, (__v8qs)__m2); } /// Subtracts, with saturation, each 16-bit signed integer element of the @@ -584,10 +626,10 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_elementwise_sub_sat((__v4hi)__m1, (__v4hi)__m2); } /// Subtracts each 8-bit unsigned integer element of the second 64-bit @@ -608,10 +650,10 @@ _mm_subs_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8] containing the subtrahends. /// \returns A 64-bit integer vector of [8 x i8] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)__builtin_elementwise_sub_sat((__v8qu)__m1, (__v8qu)__m2); } /// Subtracts each 16-bit unsigned integer element of the second 64-bit @@ -632,10 +674,10 @@ _mm_subs_pu8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16] containing the subtrahends. /// \returns A 64-bit integer vector of [4 x i16] containing the saturated /// differences of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_subs_pu16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)__builtin_elementwise_sub_sat((__v4hu)__m1, (__v4hu)__m2); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -659,10 +701,11 @@ _mm_subs_pu16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [2 x i32] containing the sums of /// products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_madd_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); + return __trunc64(__builtin_ia32_pmaddwd128((__v8hi)__anyext128(__m1), + (__v8hi)__anyext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -680,10 +723,11 @@ _mm_madd_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); + return __trunc64(__builtin_ia32_pmulhw128((__v8hi)__anyext128(__m1), + (__v8hi)__anyext128(__m2))); } /// Multiplies each 16-bit signed integer element of the first 64-bit @@ -701,10 +745,10 @@ _mm_mulhi_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits /// of the products of both parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mullo_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)(((__v4hu)__m1) * ((__v4hu)__m2)); } /// Left-shifts each 16-bit signed integer element of the first @@ -724,10 +768,11 @@ _mm_mullo_pi16(__m64 __m1, __m64 __m2) /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi16(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psllw128((__v8hi)__anyext128(__m), + (__v8hi)__anyext128(__count))); } /// Left-shifts each 16-bit signed integer element of a 64-bit integer @@ -746,10 +791,11 @@ _mm_sll_pi16(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted /// values. If \a __count is greater or equal to 16, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi16(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psllwi128((__v8hi)__anyext128(__m), + __count)); } /// Left-shifts each 32-bit signed integer element of the first @@ -769,10 +815,11 @@ _mm_slli_pi16(__m64 __m, int __count) /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_pi32(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); + return __trunc64(__builtin_ia32_pslld128((__v4si)__anyext128(__m), + (__v4si)__anyext128(__count))); } /// Left-shifts each 32-bit signed integer element of a 64-bit integer @@ -791,10 +838,11 @@ _mm_sll_pi32(__m64 __m, __m64 __count) /// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted /// values. If \a __count is greater or equal to 32, the result is set to all /// 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_pi32(__m64 __m, int __count) { - return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); + return __trunc64(__builtin_ia32_pslldi128((__v4si)__anyext128(__m), + __count)); } /// Left-shifts the first 64-bit integer parameter by the number of bits @@ -811,10 +859,11 @@ _mm_slli_pi32(__m64 __m, int __count) /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sll_si64(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); + return __trunc64(__builtin_ia32_psllq128((__v2di)__anyext128(__m), + (__v2di)__anyext128(__count))); } /// Left-shifts the first parameter, which is a 64-bit integer, by the @@ -831,10 +880,11 @@ _mm_sll_si64(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the left-shifted value. If /// \a __count is greater or equal to 64, the result is set to 0. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_slli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); + return __trunc64(__builtin_ia32_psllqi128((__v2di)__anyext128(__m), + __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -855,10 +905,11 @@ _mm_slli_si64(__m64 __m, int __count) /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi16(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psraw128((__v8hi)__anyext128(__m), + (__v8hi)__anyext128(__count))); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector @@ -878,10 +929,11 @@ _mm_sra_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi16(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psrawi128((__v8hi)__anyext128(__m), + __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -902,10 +954,11 @@ _mm_srai_pi16(__m64 __m, int __count) /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sra_pi32(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); + return __trunc64(__builtin_ia32_psrad128((__v4si)__anyext128(__m), + (__v4si)__anyext128(__count))); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector @@ -925,10 +978,11 @@ _mm_sra_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srai_pi32(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); + return __trunc64(__builtin_ia32_psradi128((__v4si)__anyext128(__m), + __count)); } /// Right-shifts each 16-bit integer element of the first parameter, @@ -948,10 +1002,11 @@ _mm_srai_pi32(__m64 __m, int __count) /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi16(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psrlw128((__v8hi)__anyext128(__m), + (__v8hi)__anyext128(__count))); } /// Right-shifts each 16-bit integer element of a 64-bit integer vector @@ -970,10 +1025,11 @@ _mm_srl_pi16(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi16(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); + return __trunc64(__builtin_ia32_psrlwi128((__v8hi)__anyext128(__m), + __count)); } /// Right-shifts each 32-bit integer element of the first parameter, @@ -993,10 +1049,11 @@ _mm_srli_pi16(__m64 __m, int __count) /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_pi32(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); + return __trunc64(__builtin_ia32_psrld128((__v4si)__anyext128(__m), + (__v4si)__anyext128(__count))); } /// Right-shifts each 32-bit integer element of a 64-bit integer vector @@ -1015,10 +1072,11 @@ _mm_srl_pi32(__m64 __m, __m64 __count) /// A 32-bit integer value. /// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_pi32(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); + return __trunc64(__builtin_ia32_psrldi128((__v4si)__anyext128(__m), + __count)); } /// Right-shifts the first 64-bit integer parameter by the number of bits @@ -1035,10 +1093,11 @@ _mm_srli_pi32(__m64 __m, int __count) /// \param __count /// A 64-bit integer vector interpreted as a single 64-bit integer. /// \returns A 64-bit integer vector containing the right-shifted value. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srl_si64(__m64 __m, __m64 __count) { - return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); + return __trunc64(__builtin_ia32_psrlq128((__v2di)__anyext128(__m), + (__v2di)__anyext128(__count))); } /// Right-shifts the first parameter, which is a 64-bit integer, by the @@ -1056,10 +1115,11 @@ _mm_srl_si64(__m64 __m, __m64 __count) /// \param __count /// A 32-bit integer value. /// \returns A 64-bit integer vector containing the right-shifted value. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_srli_si64(__m64 __m, int __count) { - return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); + return __trunc64(__builtin_ia32_psrlqi128((__v2di)__anyext128(__m), + __count)); } /// Performs a bitwise AND of two 64-bit integer vectors. @@ -1074,10 +1134,10 @@ _mm_srli_si64(__m64 __m, int __count) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_and_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); + return (__m64)(((__v1du)__m1) & ((__v1du)__m2)); } /// Performs a bitwise NOT of the first 64-bit integer vector, and then @@ -1095,10 +1155,10 @@ _mm_and_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise AND of the second /// parameter and the one's complement of the first parameter. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_andnot_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); + return (__m64)(~((__v1du)__m1) & ((__v1du)__m2)); } /// Performs a bitwise OR of two 64-bit integer vectors. @@ -1113,10 +1173,10 @@ _mm_andnot_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_or_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); + return (__m64)(((__v1du)__m1) | ((__v1du)__m2)); } /// Performs a bitwise exclusive OR of two 64-bit integer vectors. @@ -1131,10 +1191,10 @@ _mm_or_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector. /// \returns A 64-bit integer vector containing the bitwise exclusive OR of both /// parameters. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_xor_si64(__m64 __m1, __m64 __m2) { - return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); + return (__m64)(((__v1du)__m1) ^ ((__v1du)__m2)); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of @@ -1153,10 +1213,10 @@ _mm_xor_si64(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); + return (__m64)(((__v8qi)__m1) == ((__v8qi)__m2)); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of @@ -1175,10 +1235,10 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)(((__v4hi)__m1) == ((__v4hi)__m2)); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of @@ -1197,10 +1257,10 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); + return (__m64)(((__v2si)__m1) == ((__v2si)__m2)); } /// Compares the 8-bit integer elements of two 64-bit integer vectors of @@ -1219,10 +1279,12 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [8 x i8]. /// \returns A 64-bit integer vector of [8 x i8] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); + /* This function always performs a signed comparison, but __v8qi is a char + which may be signed or unsigned, so use __v8qs. */ + return (__m64)((__v8qs)__m1 > (__v8qs)__m2); } /// Compares the 16-bit integer elements of two 64-bit integer vectors of @@ -1241,10 +1303,10 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [4 x i16]. /// \returns A 64-bit integer vector of [4 x i16] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); + return (__m64)((__v4hi)__m1 > (__v4hi)__m2); } /// Compares the 32-bit integer elements of two 64-bit integer vectors of @@ -1263,10 +1325,10 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) /// A 64-bit integer vector of [2 x i32]. /// \returns A 64-bit integer vector of [2 x i32] containing the comparison /// results. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); + return (__m64)((__v2si)__m1 > (__v2si)__m2); } /// Constructs a 64-bit integer vector initialized to zero. @@ -1276,10 +1338,9 @@ _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) /// This intrinsic corresponds to the PXOR instruction. /// /// \returns An initialized 64-bit integer vector with all elements set to zero. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_setzero_si64(void) -{ - return __extension__ (__m64){ 0LL }; +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_setzero_si64(void) { + return __extension__(__m64){0LL}; } /// Constructs a 64-bit integer vector initialized with the specified @@ -1297,10 +1358,9 @@ _mm_setzero_si64(void) /// A 32-bit integer value used to initialize the lower 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_set_pi32(int __i1, int __i0) -{ - return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_set_pi32(int __i1, int __i0) { + return __extension__(__m64)(__v2si){__i0, __i1}; } /// Constructs a 64-bit integer vector initialized with the specified @@ -1320,10 +1380,9 @@ _mm_set_pi32(int __i1, int __i0) /// \param __s0 /// A 16-bit integer value used to initialize bits [15:0] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) -{ - return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) { + return __extension__(__m64)(__v4hi){__s0, __s1, __s2, __s3}; } /// Constructs a 64-bit integer vector initialized with the specified @@ -1351,12 +1410,11 @@ _mm_set_pi16(short __s3, short __s2, short __s1, short __s0) /// \param __b0 /// An 8-bit integer value used to initialize bits [7:0] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, - char __b1, char __b0) -{ - return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, - __b4, __b5, __b6, __b7); + char __b1, char __b0) { + return __extension__(__m64)(__v8qi){__b0, __b1, __b2, __b3, + __b4, __b5, __b6, __b7}; } /// Constructs a 64-bit integer vector of [2 x i32], with each of the @@ -1372,10 +1430,9 @@ _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, /// A 32-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [2 x i32]. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_set1_pi32(int __i) -{ - return _mm_set_pi32(__i, __i); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_set1_pi32(int __i) { + return _mm_set_pi32(__i, __i); } /// Constructs a 64-bit integer vector of [4 x i16], with each of the @@ -1391,10 +1448,9 @@ _mm_set1_pi32(int __i) /// A 16-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [4 x i16]. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_set1_pi16(short __w) -{ - return _mm_set_pi16(__w, __w, __w, __w); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_set1_pi16(short __w) { + return _mm_set_pi16(__w, __w, __w, __w); } /// Constructs a 64-bit integer vector of [8 x i8], with each of the @@ -1409,10 +1465,9 @@ _mm_set1_pi16(short __w) /// An 8-bit integer value used to initialize each vector element of the /// result. /// \returns An initialized 64-bit integer vector of [8 x i8]. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_set1_pi8(char __b) -{ - return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_set1_pi8(char __b) { + return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); } /// Constructs a 64-bit integer vector, initialized in reverse order with @@ -1430,10 +1485,9 @@ _mm_set1_pi8(char __b) /// A 32-bit integer value used to initialize the upper 32 bits of the /// result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_setr_pi32(int __i0, int __i1) -{ - return _mm_set_pi32(__i1, __i0); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_setr_pi32(int __i0, int __i1) { + return _mm_set_pi32(__i1, __i0); } /// Constructs a 64-bit integer vector, initialized in reverse order with @@ -1453,10 +1507,9 @@ _mm_setr_pi32(int __i0, int __i1) /// \param __w3 /// A 16-bit integer value used to initialize bits [63:48] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) -{ - return _mm_set_pi16(__w3, __w2, __w1, __w0); +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { + return _mm_set_pi16(__w3, __w2, __w1, __w0); } /// Constructs a 64-bit integer vector, initialized in reverse order with @@ -1484,14 +1537,15 @@ _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) /// \param __b7 /// An 8-bit integer value used to initialize bits [63:56] of the result. /// \returns An initialized 64-bit integer vector. -static __inline__ __m64 __DEFAULT_FN_ATTRS +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, - char __b6, char __b7) -{ - return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); + char __b6, char __b7) { + return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); } -#undef __DEFAULT_FN_ATTRS +#undef __anyext128 +#undef __trunc64 +#undef __DEFAULT_FN_ATTRS_SSE2 /* Aliases for compatibility. */ #define _m_empty _mm_empty diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap index 9ffc249c8d1a..dcaf09e8f2c5 100644 --- a/lib/include/module.modulemap +++ b/lib/include/module.modulemap @@ -66,6 +66,8 @@ module _Builtin_intrinsics [system] [extern_c] { textual header "__wmmintrin_aes.h" textual header "__wmmintrin_pclmul.h" + textual header "mm3dnow.h" + explicit module mm_malloc { requires !freestanding header "mm_malloc.h" @@ -122,10 +124,6 @@ module _Builtin_intrinsics [system] [extern_c] { header "popcntintrin.h" } - explicit module mm3dnow { - header "mm3dnow.h" - } - explicit module aes_pclmul { header "wmmintrin.h" export aes diff --git a/lib/include/movrs_avx10_2_512intrin.h b/lib/include/movrs_avx10_2_512intrin.h new file mode 100644 index 000000000000..5cd907a59734 --- /dev/null +++ b/lib/include/movrs_avx10_2_512intrin.h @@ -0,0 +1,98 @@ +/*===----- movrs_avx10_2_512intrin.h - AVX10.2-512-MOVRS intrinsics --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __MOVRS_AVX10_2_512INTRIN_H +#define __MOVRS_AVX10_2_512INTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs, avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi8(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectb_512( + (__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, + (__v64qi)_mm512_loadrs_epi8(__A), + (__v64qi)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi32(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_loadrs_epi32(__A), + (__v16si)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi64(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_loadrs_epi64(__A), + (__v8di)_mm512_setzero_si512()); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_loadrs_epi16(void const *__A) { + return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A)); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectw_512( + (__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) { + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_loadrs_epi16(__A), + (__v32hi)_mm512_setzero_si512()); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif /* __x86_64__ */ +#endif /* __MOVRS_AVX10_2_512INTRIN_H */ diff --git a/lib/include/movrs_avx10_2intrin.h b/lib/include/movrs_avx10_2intrin.h new file mode 100644 index 000000000000..27b625b6b431 --- /dev/null +++ b/lib/include/movrs_avx10_2intrin.h @@ -0,0 +1,174 @@ +/*===--------- movrs_avx10_2intrin.h - AVX10.2-MOVRS intrinsics ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __MOVRS_AVX10_2INTRIN_H +#define __MOVRS_AVX10_2INTRIN_H +#ifdef __x86_64__ + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs,avx10.2-256"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("movrs,avx10.2-256"), __min_vector_width__(256))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi8(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectb_128( + (__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, + (__v16qi)_mm_loadrs_epi8(__A), + (__v16qi)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi8(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectb_256( + (__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, + (__v32qi)_mm256_loadrs_epi8(__A), + (__v32qi)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi32(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_loadrs_epi32(__A), + (__v4si)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi32(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_loadrs_epi32(__A), + (__v8si)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi64(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_loadrs_epi64(__A), + (__v2di)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi64(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_loadrs_epi64(__A), + (__v4di)_mm256_setzero_si256()); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_loadrs_epi16(void const *__A) { + return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A)); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectw_128( + (__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) { + return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_loadrs_epi16(__A), + (__v8hi)_mm_setzero_si128()); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_loadrs_epi16(void const *__A) { + return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A)); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectw_256( + (__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) { + return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_loadrs_epi16(__A), + (__v16hi)_mm256_setzero_si256()); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif /* __x86_64__ */ +#endif /* __MOVRS_AVX10_2INTRIN_H */ diff --git a/lib/include/movrsintrin.h b/lib/include/movrsintrin.h new file mode 100644 index 000000000000..250f4004cd2b --- /dev/null +++ b/lib/include/movrsintrin.h @@ -0,0 +1,59 @@ +/*===---------------- movrsintrin.h - MOVRS intrinsics ----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===----------------------------------------------------------------------===*/ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __MOVRSINTRIN_H +#define __MOVRSINTRIN_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("movrs"))) + +#ifdef __x86_64__ +static __inline__ char __DEFAULT_FN_ATTRS _movrs_i8(const void *__A) { + return (char)__builtin_ia32_movrsqi((const void *)__A); +} + +static __inline__ short __DEFAULT_FN_ATTRS _movrs_i16(const void *__A) { + return (short)__builtin_ia32_movrshi((const void *)__A); +} + +static __inline__ int __DEFAULT_FN_ATTRS _movrs_i32(const void *__A) { + return (int)__builtin_ia32_movrssi((const void *)__A); +} + +static __inline__ long long __DEFAULT_FN_ATTRS _movrs_i64(const void *__A) { + return (long long)__builtin_ia32_movrsdi((const void *)__A); +} +#endif // __x86_64__ + +// Loads a memory sequence containing the specified memory address into +/// the L3 data cache. Data will be shared (read/written) to by requesting +/// core and other cores. +/// +/// Note that the effect of this intrinsic is dependent on the processor +/// implementation. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the \c PREFETCHRS instruction. +/// +/// \param __P +/// A pointer specifying the memory address to be prefetched. +static __inline__ void __DEFAULT_FN_ATTRS +_m_prefetchrs(volatile const void *__P) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" + __builtin_ia32_prefetchrs((const void *)__P); +#pragma clang diagnostic pop +} + +#undef __DEFAULT_FN_ATTRS +#endif // __MOVRSINTRIN_H \ No newline at end of file diff --git a/lib/include/openmp_wrappers/__clang_openmp_device_functions.h b/lib/include/openmp_wrappers/__clang_openmp_device_functions.h index d5b6846b0348..3e354c63efc6 100644 --- a/lib/include/openmp_wrappers/__clang_openmp_device_functions.h +++ b/lib/include/openmp_wrappers/__clang_openmp_device_functions.h @@ -10,17 +10,15 @@ #ifndef __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ #define __CLANG_OPENMP_DEVICE_FUNCTIONS_H__ -#ifndef _OPENMP -#error "This file is for OpenMP compilation only." -#endif - #ifdef __cplusplus extern "C" { #endif +#ifdef __NVPTX__ #pragma omp begin declare variant match( \ device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) +#pragma push_macro("__CUDA__") #define __CUDA__ #define __OPENMP_NVPTX__ @@ -31,9 +29,10 @@ extern "C" { #include <__clang_cuda_device_functions.h> #undef __OPENMP_NVPTX__ -#undef __CUDA__ +#pragma pop_macro("__CUDA__") #pragma omp end declare variant +#endif #ifdef __AMDGCN__ #pragma omp begin declare variant match(device = {arch(amdgcn)}) diff --git a/lib/include/openmp_wrappers/complex_cmath.h b/lib/include/openmp_wrappers/complex_cmath.h index e3d9aebbbc24..cee36bde3f52 100644 --- a/lib/include/openmp_wrappers/complex_cmath.h +++ b/lib/include/openmp_wrappers/complex_cmath.h @@ -64,8 +64,13 @@ template __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) { } // conj - -template std::complex<_Tp> conj(const std::complex<_Tp> &__c) { +#ifdef _GLIBCXX20_CONSTEXPR +#define CXX20_CONSTEXPR_DEVICE __DEVICE__ +#else +#define CXX20_CONSTEXPR_DEVICE +#endif +template +CXX20_CONSTEXPR_DEVICE std::complex<_Tp> conj(const std::complex<_Tp> &__c) { return std::complex<_Tp>(__c.real(), -__c.imag()); } diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h index 91cee1edda30..cd605df7fb52 100644 --- a/lib/include/pmmintrin.h +++ b/lib/include/pmmintrin.h @@ -17,9 +17,21 @@ #include /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse3,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse3"), \ + __min_vector_width__(128))) +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. @@ -122,7 +134,7 @@ _mm_hsub_ps(__m128 __a, __m128 __b) /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movehdup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 1, 1, 3, 3); @@ -143,7 +155,7 @@ _mm_movehdup_ps(__m128 __a) /// destination. /// \returns A 128-bit vector of [4 x float] containing the moved and duplicated /// values. -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_moveldup_ps(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 2, 2); @@ -244,7 +256,7 @@ _mm_hsub_pd(__m128d __a, __m128d __b) /// [127:64] and [63:0] of the destination. /// \returns A 128-bit vector of [2 x double] containing the moved and /// duplicated values. -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movedup_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); @@ -297,5 +309,6 @@ _mm_mwait(unsigned __extensions, unsigned __hints) } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __PMMINTRIN_H */ diff --git a/lib/include/popcntintrin.h b/lib/include/popcntintrin.h index 0aa94aecda5b..b276b4da4dc2 100644 --- a/lib/include/popcntintrin.h +++ b/lib/include/popcntintrin.h @@ -11,12 +11,13 @@ #define __POPCNTINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) - #if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("popcnt"))) constexpr #else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) #endif /// Counts the number of bits in the source operand having a value of 1. @@ -29,7 +30,7 @@ /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. -static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ int __DEFAULT_FN_ATTRS _mm_popcnt_u32(unsigned int __A) { return __builtin_popcount(__A); @@ -46,7 +47,7 @@ _mm_popcnt_u32(unsigned int __A) /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. -static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ long long __DEFAULT_FN_ATTRS _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); @@ -54,6 +55,5 @@ _mm_popcnt_u64(unsigned long long __A) #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __POPCNTINTRIN_H */ diff --git a/lib/include/ptrauth.h b/lib/include/ptrauth.h index 154b599862a8..d489a67c533d 100644 --- a/lib/include/ptrauth.h +++ b/lib/include/ptrauth.h @@ -42,6 +42,9 @@ typedef enum { The extra data is always 0. */ ptrauth_key_cxx_vtable_pointer = ptrauth_key_process_independent_data, + /* The key used to sign pointers in ELF .init_array/.fini_array. */ + ptrauth_key_init_fini_pointer = ptrauth_key_process_independent_code, + /* Other pointers signed under the ABI use private ABI rules. */ } ptrauth_key; @@ -253,6 +256,9 @@ typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t; [[clang::ptrauth_vtable_pointer(key, address_discrimination, \ extra_discrimination)]] +/* The value is ptrauth_string_discriminator("init_fini") */ +#define __ptrauth_init_fini_discriminator 0xd9d4 + #else #define ptrauth_strip(__value, __key) \ diff --git a/lib/include/riscv_corev_alu.h b/lib/include/riscv_corev_alu.h new file mode 100644 index 000000000000..d2832ddf72ef --- /dev/null +++ b/lib/include/riscv_corev_alu.h @@ -0,0 +1,128 @@ +/*===---- riscv_corev_alu.h - CORE-V ALU intrinsics ------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RISCV_COREV_ALU_H +#define __RISCV_COREV_ALU_H + +#include + +#if defined(__cplusplus) +extern "C" { +#endif + +#if defined(__riscv_xcvalu) + +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_abs(long a) { + return __builtin_abs(a); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_slet(long a, long b) { + return __builtin_riscv_cv_alu_slet(a, b); +} + +static __inline__ long __DEFAULT_FN_ATTRS +__riscv_cv_alu_sletu(unsigned long a, unsigned long b) { + return __builtin_riscv_cv_alu_sletu(a, b); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_min(long a, long b) { + return __builtin_elementwise_min(a, b); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_minu(unsigned long a, unsigned long b) { + return __builtin_elementwise_min(a, b); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_max(long a, long b) { + return __builtin_elementwise_max(a, b); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_maxu(unsigned long a, unsigned long b) { + return __builtin_elementwise_max(a, b); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_exths(int16_t a) { + return __builtin_riscv_cv_alu_exths(a); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_exthz(uint16_t a) { + return __builtin_riscv_cv_alu_exthz(a); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_extbs(int8_t a) { + return __builtin_riscv_cv_alu_extbs(a); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_extbz(uint8_t a) { + return __builtin_riscv_cv_alu_extbz(a); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_clip(long a, + unsigned long b) { + return __builtin_riscv_cv_alu_clip(a, b); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_clipu(unsigned long a, unsigned long b) { + return __builtin_riscv_cv_alu_clipu(a, b); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_addN(long a, long b, + uint8_t shft) { + return __builtin_riscv_cv_alu_addN(a, b, shft); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_adduN(unsigned long a, unsigned long b, uint8_t shft) { + return __builtin_riscv_cv_alu_adduN(a, b, shft); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_addRN(long a, long b, + uint8_t shft) { + return __builtin_riscv_cv_alu_addRN(a, b, shft); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_adduRN(unsigned long a, unsigned long b, uint8_t shft) { + return __builtin_riscv_cv_alu_adduRN(a, b, shft); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_subN(long a, long b, + uint8_t shft) { + return __builtin_riscv_cv_alu_subN(a, b, shft); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_subuN(unsigned long a, unsigned long b, uint8_t shft) { + return __builtin_riscv_cv_alu_subuN(a, b, shft); +} + +static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_subRN(long a, long b, + uint8_t shft) { + return __builtin_riscv_cv_alu_subRN(a, b, shft); +} + +static __inline__ unsigned long __DEFAULT_FN_ATTRS +__riscv_cv_alu_subuRN(unsigned long a, unsigned long b, uint8_t shft) { + return __builtin_riscv_cv_alu_subuRN(a, b, shft); +} + +#endif // defined(__riscv_xcvalu) + +#if defined(__cplusplus) +} +#endif + +#endif // define __RISCV_COREV_ALU_H diff --git a/lib/include/riscv_vector.h b/lib/include/riscv_vector.h index c99ceb802174..0560e82a85fa 100644 --- a/lib/include/riscv_vector.h +++ b/lib/include/riscv_vector.h @@ -419,7 +419,6 @@ typedef __rvv_bfloat16m2x4_t vbfloat16m2x4_t; typedef __rvv_bfloat16m4_t vbfloat16m4_t; typedef __rvv_bfloat16m4x2_t vbfloat16m4x2_t; typedef __rvv_bfloat16m8_t vbfloat16m8_t; -#define __riscv_v_intrinsic_overloading 1 #ifdef __cplusplus } diff --git a/lib/include/sm4evexintrin.h b/lib/include/sm4evexintrin.h new file mode 100644 index 000000000000..f6ae0037baea --- /dev/null +++ b/lib/include/sm4evexintrin.h @@ -0,0 +1,32 @@ +/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif // __IMMINTRIN_H + +#ifndef __SM4EVEXINTRIN_H +#define __SM4EVEXINTRIN_H + +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("sm4,avx10.2-512"), __min_vector_width__(512))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4key4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS512 +_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) { + return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B); +} + +#undef __DEFAULT_FN_ATTRS512 + +#endif // __SM4EVEXINTRIN_H diff --git a/lib/include/smmintrin.h b/lib/include/smmintrin.h index b3fec474e35a..bc6fe4c801d7 100644 --- a/lib/include/smmintrin.h +++ b/lib/include/smmintrin.h @@ -17,9 +17,15 @@ #include /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ __target__("sse4.1,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), \ + __min_vector_width__(128))) +#endif /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 diff --git a/lib/include/stdalign.h b/lib/include/stdalign.h index 56cdfa52d4ba..158508e65d2b 100644 --- a/lib/include/stdalign.h +++ b/lib/include/stdalign.h @@ -10,10 +10,6 @@ #ifndef __STDALIGN_H #define __STDALIGN_H -#if defined(__MVS__) && __has_include_next() -#include_next -#else - #if defined(__cplusplus) || \ (defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L) #ifndef __cplusplus @@ -25,5 +21,4 @@ #define __alignof_is_defined 1 #endif /* __STDC_VERSION__ */ -#endif /* __MVS__ */ #endif /* __STDALIGN_H */ diff --git a/lib/include/tbmintrin.h b/lib/include/tbmintrin.h index f4e848a1c001..cf92d5a7b3b0 100644 --- a/lib/include/tbmintrin.h +++ b/lib/include/tbmintrin.h @@ -15,63 +15,60 @@ #define __TBMINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) constexpr +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("tbm"))) +#endif #define __bextri_u32(a, b) \ ((unsigned int)__builtin_ia32_bextri_u32((unsigned int)(a), \ (unsigned int)(b))) static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blcfill_u32(unsigned int __a) -{ +__blcfill_u32(unsigned int __a) { return __a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blci_u32(unsigned int __a) -{ +__blci_u32(unsigned int __a) { return __a | ~(__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blcic_u32(unsigned int __a) -{ +__blcic_u32(unsigned int __a) { return ~__a & (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blcmsk_u32(unsigned int __a) -{ +__blcmsk_u32(unsigned int __a) { return __a ^ (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blcs_u32(unsigned int __a) -{ +__blcs_u32(unsigned int __a) { return __a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsfill_u32(unsigned int __a) -{ +__blsfill_u32(unsigned int __a) { return __a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__blsic_u32(unsigned int __a) -{ +__blsic_u32(unsigned int __a) { return ~__a | (__a - 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__t1mskc_u32(unsigned int __a) -{ +__t1mskc_u32(unsigned int __a) { return ~__a | (__a + 1); } static __inline__ unsigned int __DEFAULT_FN_ATTRS -__tzmsk_u32(unsigned int __a) -{ +__tzmsk_u32(unsigned int __a) { return ~__a & (__a - 1); } @@ -81,56 +78,47 @@ __tzmsk_u32(unsigned int __a) (unsigned long long)(b))) static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blcfill_u64(unsigned long long __a) -{ +__blcfill_u64(unsigned long long __a) { return __a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blci_u64(unsigned long long __a) -{ +__blci_u64(unsigned long long __a) { return __a | ~(__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blcic_u64(unsigned long long __a) -{ +__blcic_u64(unsigned long long __a) { return ~__a & (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blcmsk_u64(unsigned long long __a) -{ +__blcmsk_u64(unsigned long long __a) { return __a ^ (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blcs_u64(unsigned long long __a) -{ +__blcs_u64(unsigned long long __a) { return __a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsfill_u64(unsigned long long __a) -{ +__blsfill_u64(unsigned long long __a) { return __a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__blsic_u64(unsigned long long __a) -{ +__blsic_u64(unsigned long long __a) { return ~__a | (__a - 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__t1mskc_u64(unsigned long long __a) -{ +__t1mskc_u64(unsigned long long __a) { return ~__a | (__a + 1); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS -__tzmsk_u64(unsigned long long __a) -{ +__tzmsk_u64(unsigned long long __a) { return ~__a & (__a - 1); } #endif diff --git a/lib/include/tmmintrin.h b/lib/include/tmmintrin.h index bf8327b692d1..371cc82e3dc9 100644 --- a/lib/include/tmmintrin.h +++ b/lib/include/tmmintrin.h @@ -17,13 +17,21 @@ #include /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, \ - __target__("ssse3,no-evex512"), __min_vector_width__(64))) -#define __DEFAULT_FN_ATTRS_MMX \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("mmx,ssse3,no-evex512"), \ - __min_vector_width__(64))) + __target__("ssse3,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), \ + __min_vector_width__(128))) +#endif + +#define __trunc64(x) \ + (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __anyext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, -1, -1) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer @@ -37,10 +45,10 @@ /// A 64-bit vector of [8 x i8]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi8(__m64 __a) { - return (__m64)__builtin_ia32_pabsb((__v8qi)__a); + return (__m64)__builtin_elementwise_abs((__v8qs)__a); } /// Computes the absolute value of each of the packed 8-bit signed @@ -73,10 +81,10 @@ _mm_abs_epi8(__m128i __a) /// A 64-bit vector of [4 x i16]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi16(__m64 __a) { - return (__m64)__builtin_ia32_pabsw((__v4hi)__a); + return (__m64)__builtin_elementwise_abs((__v4hi)__a); } /// Computes the absolute value of each of the packed 16-bit signed @@ -109,10 +117,10 @@ _mm_abs_epi16(__m128i __a) /// A 64-bit vector of [2 x i32]. /// \returns A 64-bit integer vector containing the absolute values of the /// elements in the operand. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_abs_pi32(__m64 __a) { - return (__m64)__builtin_ia32_pabsd((__v2si)__a); + return (__m64)__builtin_elementwise_abs((__v2si)__a); } /// Computes the absolute value of each of the packed 32-bit signed @@ -177,7 +185,10 @@ _mm_abs_epi32(__m128i __a) /// \returns A 64-bit integer vector containing the concatenated right-shifted /// value. #define _mm_alignr_pi8(a, b, n) \ - ((__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))) + ((__m64)__builtin_shufflevector( \ + __builtin_ia32_psrldqi128_byteshift( \ + __builtin_shufflevector((__v1di)(a), (__v1di)(b), 1, 0), \ + (n)), __extension__ (__v2di){}, 0)) /// Horizontally adds the adjacent pairs of values contained in 2 packed /// 128-bit vectors of [8 x i16]. @@ -242,10 +253,11 @@ _mm_hadd_epi32(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_phaddw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally adds the adjacent pairs of values contained in 2 packed @@ -265,10 +277,11 @@ _mm_hadd_pi16(__m64 __a, __m64 __b) /// destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both /// operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadd_pi32(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); + return __trunc64(__builtin_ia32_phaddd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally adds, with saturation, the adjacent pairs of values contained @@ -317,10 +330,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b) /// destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// sums of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hadds_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_phaddsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -386,10 +400,11 @@ _mm_hsub_epi32(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_phsubw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Horizontally subtracts the adjacent pairs of values contained in 2 @@ -409,10 +424,11 @@ _mm_hsub_pi16(__m64 __a, __m64 __b) /// the destination. /// \returns A 64-bit vector of [2 x i32] containing the horizontal differences /// of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsub_pi32(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); + return __trunc64(__builtin_ia32_phsubd128( + (__v4si)__builtin_shufflevector(__a, __b, 0, 1), (__v4si){})); } /// Horizontally subtracts, with saturation, the adjacent pairs of values @@ -461,10 +477,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b) /// the destination. /// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated /// differences of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_hsubs_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_phsubsw128( + (__v8hi)__builtin_shufflevector(__a, __b, 0, 1), (__v8hi){})); } /// Multiplies corresponding pairs of packed 8-bit unsigned integer @@ -525,10 +542,11 @@ _mm_maddubs_epi16(__m128i __a, __m128i __b) /// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n /// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n /// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_maddubs_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); + return __trunc64(__builtin_ia32_pmaddubsw128((__v16qi)__anyext128(__a), + (__v16qi)__anyext128(__b))); } /// Multiplies packed 16-bit signed integer values, truncates the 32-bit @@ -565,10 +583,11 @@ _mm_mulhrs_epi16(__m128i __a, __m128i __b) /// A 64-bit vector of [4 x i16] containing one of the source operands. /// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled /// products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_mulhrs_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_pmulhrsw128((__v8hi)__anyext128(__a), + (__v8hi)__anyext128(__b))); } /// Copies the 8-bit integers from a 128-bit integer vector to the @@ -614,12 +633,15 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b) /// 1: Clear the corresponding byte in the destination. \n /// 0: Copy the selected source byte to the corresponding byte in the /// destination. \n -/// Bits [3:0] select the source byte to be copied. +/// Bits [2:0] select the source byte to be copied. /// \returns A 64-bit integer vector containing the copied or cleared values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_shuffle_pi8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); + return __trunc64(__builtin_ia32_pshufb128( + (__v16qi)__builtin_shufflevector( + (__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1), + (__v16qi)__anyext128(__b))); } /// For each 8-bit integer in the first source operand, perform one of @@ -720,10 +742,11 @@ _mm_sign_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); + return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a), + (__v16qi)__anyext128(__b))); } /// For each 16-bit integer in the first source operand, perform one of @@ -746,10 +769,11 @@ _mm_sign_pi8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a), + (__v8hi)__anyext128(__b))); } /// For each 32-bit integer in the first source operand, perform one of @@ -772,13 +796,15 @@ _mm_sign_pi16(__m64 __a, __m64 __b) /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS _mm_sign_pi32(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); + return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a), + (__v4si)__anyext128(__b))); } +#undef __anyext128 +#undef __trunc64 #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_MMX #endif /* __TMMINTRIN_H */ diff --git a/lib/include/vecintrin.h b/lib/include/vecintrin.h index 1f51e32c0d13..a14c39f9f731 100644 --- a/lib/include/vecintrin.h +++ b/lib/include/vecintrin.h @@ -468,6 +468,27 @@ vec_perm(__vector __bool long long __a, __vector __bool long long __b, (__vector unsigned char)__a, (__vector unsigned char)__b, __c); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_perm(__vector signed __int128 __a, __vector signed __int128 __b, + __vector unsigned char __c) { + return (__vector signed __int128)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_perm(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned char __c) { + return (__vector unsigned __int128)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_perm(__vector __bool __int128 __a, __vector __bool __int128 __b, + __vector unsigned char __c) { + return (__vector __bool __int128)__builtin_s390_vperm( + (__vector unsigned char)__a, (__vector unsigned char)__b, __c); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_perm(__vector float __a, __vector float __b, @@ -514,9 +535,19 @@ vec_permi(__vector double __a, __vector double __b, int __c) (__vector unsigned long long)(Y), \ (((Z) & 2) << 1) | ((Z) & 1))) +/*-- vec_bperm --------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_ai __vector unsigned long long +vec_bperm(__vector unsigned __int128 __a, __vector unsigned char __b) { + return __builtin_s390_vbperm((__vector unsigned char)__a, __b); +} +#endif + /*-- vec_bperm_u128 ---------------------------------------------------------*/ #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned long long vec_bperm_u128(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vbperm(__a, __b); @@ -558,6 +589,18 @@ vec_revb(__vector unsigned long long __vec) { return __builtin_s390_vlbrg(__vec); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_revb(__vector signed __int128 __vec) { + return (__vector signed __int128) + __builtin_s390_vlbrq((unsigned __int128)__vec); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_revb(__vector unsigned __int128 __vec) { + return (__vector unsigned __int128) + __builtin_s390_vlbrq((unsigned __int128)__vec); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_revb(__vector float __vec) { @@ -820,6 +863,46 @@ vec_sel(__vector unsigned long long __a, __vector unsigned long long __b, (~(__vector unsigned long long)__c & __a)); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_sel(__vector signed __int128 __a, __vector signed __int128 __b, + __vector unsigned __int128 __c) { + return (((__vector signed __int128)__c & __b) | + (~(__vector signed __int128)__c & __a)); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_sel(__vector signed __int128 __a, __vector signed __int128 __b, + __vector __bool __int128 __c) { + return (((__vector signed __int128)__c & __b) | + (~(__vector signed __int128)__c & __a)); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_sel(__vector __bool __int128 __a, __vector __bool __int128 __b, + __vector unsigned __int128 __c) { + return (((__vector __bool __int128)__c & __b) | + (~(__vector __bool __int128)__c & __a)); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_sel(__vector __bool __int128 __a, __vector __bool __int128 __b, + __vector __bool __int128 __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sel(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__c & __b) | (~__c & __a); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sel(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector __bool __int128 __c) { + return (((__vector unsigned __int128)__c & __b) | + (~(__vector unsigned __int128)__c & __a)); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_sel(__vector float __a, __vector float __b, __vector unsigned int __c) { @@ -1078,6 +1161,22 @@ vec_xl(long __offset, const unsigned long long *__ptr) { return V; } +static inline __ATTRS_o_ai __vector signed __int128 +vec_xl(long __offset, const signed __int128 *__ptr) { + __vector signed __int128 V; + __builtin_memcpy(&V, ((const char *)__ptr + __offset), + sizeof(__vector signed __int128)); + return V; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_xl(long __offset, const unsigned __int128 *__ptr) { + __vector unsigned __int128 V; + __builtin_memcpy(&V, ((const char *)__ptr + __offset), + sizeof(__vector unsigned __int128)); + return V; +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_xl(long __offset, const float *__ptr) { @@ -1294,6 +1393,22 @@ vec_xst(__vector unsigned long long __vec, long __offset, sizeof(__vector unsigned long long)); } +static inline __ATTRS_o_ai void +vec_xst(__vector signed __int128 __vec, long __offset, + signed __int128 *__ptr) { + __vector signed __int128 V = __vec; + __builtin_memcpy(((char *)__ptr + __offset), &V, + sizeof(__vector signed __int128)); +} + +static inline __ATTRS_o_ai void +vec_xst(__vector unsigned __int128 __vec, long __offset, + unsigned __int128 *__ptr) { + __vector unsigned __int128 V = __vec; + __builtin_memcpy(((char *)__ptr + __offset), &V, + sizeof(__vector unsigned __int128)); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai void vec_xst(__vector float __vec, long __offset, float *__ptr) { @@ -1465,6 +1580,14 @@ extern __ATTRS_o __vector unsigned long long vec_load_bndry(const unsigned long long *__ptr, unsigned short __len) __constant_pow2_range(__len, 64, 4096); +extern __ATTRS_o __vector signed __int128 +vec_load_bndry(const signed __int128 *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + +extern __ATTRS_o __vector unsigned __int128 +vec_load_bndry(const unsigned __int128 *__ptr, unsigned short __len) + __constant_pow2_range(__len, 64, 4096); + #if __ARCH__ >= 12 extern __ATTRS_o __vector float vec_load_bndry(const float *__ptr, unsigned short __len) @@ -1496,43 +1619,51 @@ vec_load_len(const unsigned char *__ptr, unsigned int __len) { return (__vector unsigned char)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_load_len(const signed short *__ptr, unsigned int __len) { return (__vector signed short)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_load_len(const unsigned short *__ptr, unsigned int __len) { return (__vector unsigned short)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_load_len(const signed int *__ptr, unsigned int __len) { return (__vector signed int)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_load_len(const unsigned int *__ptr, unsigned int __len) { return (__vector unsigned int)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_load_len(const signed long long *__ptr, unsigned int __len) { return (__vector signed long long)__builtin_s390_vll(__len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_load_len(const unsigned long long *__ptr, unsigned int __len) { return (__vector unsigned long long)__builtin_s390_vll(__len, __ptr); } #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_load_len(const float *__ptr, unsigned int __len) { return (__vector float)__builtin_s390_vll(__len, __ptr); } #endif +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_load_len(const double *__ptr, unsigned int __len) { return (__vector double)__builtin_s390_vll(__len, __ptr); @@ -1541,7 +1672,12 @@ vec_load_len(const double *__ptr, unsigned int __len) { /*-- vec_load_len_r ---------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai __vector unsigned char +static inline __ATTRS_o_ai __vector signed char +vec_load_len_r(const signed char *__ptr, unsigned int __len) { + return (__vector signed char)__builtin_s390_vlrlr(__len, __ptr); +} + +static inline __ATTRS_o_ai __vector unsigned char vec_load_len_r(const unsigned char *__ptr, unsigned int __len) { return (__vector unsigned char)__builtin_s390_vlrlr(__len, __ptr); } @@ -1561,36 +1697,42 @@ vec_store_len(__vector unsigned char __vec, unsigned char *__ptr, __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector signed short __vec, signed short *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector unsigned short __vec, unsigned short *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector signed int __vec, signed int *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector unsigned int __vec, unsigned int *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector signed long long __vec, signed long long *__ptr, unsigned int __len) { __builtin_s390_vstl((__vector signed char)__vec, __len, __ptr); } +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr, unsigned int __len) { @@ -1598,6 +1740,7 @@ vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr, } #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector float __vec, float *__ptr, unsigned int __len) { @@ -1605,6 +1748,7 @@ vec_store_len(__vector float __vec, float *__ptr, } #endif +// This prototype is deprecated. static inline __ATTRS_o_ai void vec_store_len(__vector double __vec, double *__ptr, unsigned int __len) { @@ -1614,7 +1758,13 @@ vec_store_len(__vector double __vec, double *__ptr, /*-- vec_store_len_r --------------------------------------------------------*/ #if __ARCH__ >= 12 -static inline __ATTRS_ai void +static inline __ATTRS_o_ai void +vec_store_len_r(__vector signed char __vec, signed char *__ptr, + unsigned int __len) { + __builtin_s390_vstrlr(__vec, __len, __ptr); +} + +static inline __ATTRS_o_ai void vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr, unsigned int __len) { __builtin_s390_vstrlr((__vector signed char)__vec, __len, __ptr); @@ -1711,6 +1861,35 @@ vec_genmasks_64(unsigned char __first, unsigned char __last) return (__vector unsigned long long)__value; } +/*-- vec_gen_element_masks_* ------------------------------------------------*/ + +#if __ARCH__ >= 15 +static inline __ATTRS_ai __vector unsigned char +vec_gen_element_masks_8(__vector unsigned short __mask) { + return __builtin_s390_vgemb(__mask); +} + +static inline __ATTRS_ai __vector unsigned short +vec_gen_element_masks_16(__vector unsigned char __mask) { + return __builtin_s390_vgemh(__mask); +} + +static inline __ATTRS_ai __vector unsigned int +vec_gen_element_masks_32(__vector unsigned char __mask) { + return __builtin_s390_vgemf(__mask); +} + +static inline __ATTRS_ai __vector unsigned long long +vec_gen_element_masks_64(__vector unsigned char __mask) { + return __builtin_s390_vgemg(__mask); +} + +static inline __ATTRS_ai __vector unsigned __int128 +vec_gen_element_masks_128(__vector unsigned char __mask) { + return (__vector unsigned __int128)__builtin_s390_vgemq(__mask); +} +#endif + /*-- vec_splat --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char @@ -1894,6 +2073,16 @@ vec_splats(unsigned long long __scalar) { return (__vector unsigned long long)__scalar; } +static inline __ATTRS_o_ai __vector signed __int128 +vec_splats(signed __int128 __scalar) { + return (__vector signed __int128)__scalar; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_splats(unsigned __int128 __scalar) { + return (__vector unsigned __int128)__scalar; +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_splats(float __scalar) { @@ -2166,6 +2355,27 @@ vec_pack(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]); } +static inline __ATTRS_o_ai __vector signed long long +vec_pack(__vector signed __int128 __a, __vector signed __int128 __b) { + __vector signed long long __ac = (__vector signed long long)__a; + __vector signed long long __bc = (__vector signed long long)__b; + return (__vector signed long long)(__ac[1], __bc[1]); +} + +static inline __ATTRS_o_ai __vector __bool long long +vec_pack(__vector __bool __int128 __a, __vector __bool __int128 __b) { + __vector __bool long long __ac = (__vector __bool long long)__a; + __vector __bool long long __bc = (__vector __bool long long)__b; + return (__vector __bool long long)(__ac[1], __bc[1]); +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_pack(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + __vector unsigned long long __ac = (__vector unsigned long long)__a; + __vector unsigned long long __bc = (__vector unsigned long long)__b; + return (__vector unsigned long long)(__ac[1], __bc[1]); +} + /*-- vec_packs --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char @@ -2344,6 +2554,24 @@ vec_unpackh(__vector unsigned int __a) { return __builtin_s390_vuplhf(__a); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_unpackh(__vector signed long long __a) { + return (__vector signed __int128)__builtin_s390_vuphg(__a); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_unpackh(__vector __bool long long __a) { + return ((__vector __bool __int128) + __builtin_s390_vuphg((__vector signed long long)__a)); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_unpackh(__vector unsigned long long __a) { + return (__vector unsigned __int128)__builtin_s390_vuplhg(__a); +} +#endif + /*-- vec_unpackl ------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short @@ -2394,6 +2622,24 @@ vec_unpackl(__vector unsigned int __a) { return __builtin_s390_vupllf(__a); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_unpackl(__vector signed long long __a) { + return (__vector signed __int128)__builtin_s390_vuplg(__a); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_unpackl(__vector __bool long long __a) { + return ((__vector __bool __int128) + __builtin_s390_vuplg((__vector signed long long)__a)); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_unpackl(__vector unsigned long long __a) { + return (__vector unsigned __int128)__builtin_s390_vupllg(__a); +} +#endif + /*-- vec_cmpeq --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector __bool char @@ -2456,6 +2702,21 @@ vec_cmpeq(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a == __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpeq(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return (__vector __bool __int128)(__a == __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpeq(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector __bool __int128)(__a == __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpeq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector __bool __int128)(__a == __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpeq(__vector float __a, __vector float __b) { @@ -2510,6 +2771,16 @@ vec_cmpge(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a >= __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpge(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector __bool __int128)(__a >= __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector __bool __int128)(__a >= __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpge(__vector float __a, __vector float __b) { @@ -2564,6 +2835,16 @@ vec_cmpgt(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a > __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpgt(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector __bool __int128)(__a > __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmpgt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector __bool __int128)(__a > __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmpgt(__vector float __a, __vector float __b) { @@ -2618,6 +2899,16 @@ vec_cmple(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a <= __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmple(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector __bool __int128)(__a <= __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmple(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector __bool __int128)(__a <= __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmple(__vector float __a, __vector float __b) { @@ -2672,6 +2963,16 @@ vec_cmplt(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector __bool long long)(__a < __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmplt(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector __bool __int128)(__a < __b); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_cmplt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector __bool __int128)(__a < __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector __bool int vec_cmplt(__vector float __a, __vector float __b) { @@ -2914,6 +3215,29 @@ vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_eq(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_eq(__vector __bool __int128 __a, __vector __bool __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 0; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_eq(__vector float __a, __vector float __b) { @@ -3161,6 +3485,29 @@ vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_ne(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ne(__vector __bool __int128 __a, __vector __bool __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 3; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_ne(__vector float __a, __vector float __b) { @@ -3399,6 +3746,22 @@ vec_all_ge(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_ge(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_ge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc); + return __cc == 3; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_ge(__vector float __a, __vector float __b) { @@ -3637,6 +4000,22 @@ vec_all_gt(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_gt(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_gt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 0; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_gt(__vector float __a, __vector float __b) { @@ -3875,6 +4254,22 @@ vec_all_le(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 3; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_le(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc); + return __cc == 3; +} + +static inline __ATTRS_o_ai int +vec_all_le(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc == 3; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_le(__vector float __a, __vector float __b) { @@ -4113,6 +4508,22 @@ vec_all_lt(__vector __bool long long __a, __vector __bool long long __b) { return __cc == 0; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_all_lt(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc); + return __cc == 0; +} + +static inline __ATTRS_o_ai int +vec_all_lt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc); + return __cc == 0; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_all_lt(__vector float __a, __vector float __b) { @@ -4467,6 +4878,29 @@ vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_any_eq(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_eq(__vector __bool __int128 __a, __vector __bool __int128 __b) { + int __cc; + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc <= 1; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_eq(__vector float __a, __vector float __b) { @@ -4713,28 +5147,51 @@ vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) { return __cc != 0; } -#if __ARCH__ >= 12 +#if __ARCH__ >= 15 static inline __ATTRS_o_ai int -vec_any_ne(__vector float __a, __vector float __b) { +vec_any_ne(__vector signed __int128 __a, __vector signed __int128 __b) { int __cc; - __builtin_s390_vfcesbs(__a, __b, &__cc); + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); return __cc != 0; } -#endif static inline __ATTRS_o_ai int -vec_any_ne(__vector double __a, __vector double __b) { +vec_any_ne(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { int __cc; - __builtin_s390_vfcedbs(__a, __b, &__cc); + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); return __cc != 0; } -/*-- vec_any_ge -------------------------------------------------------------*/ - static inline __ATTRS_o_ai int -vec_any_ge(__vector signed char __a, __vector signed char __b) { +vec_any_ne(__vector __bool __int128 __a, __vector __bool __int128 __b) { int __cc; - __builtin_s390_vchbs(__b, __a, &__cc); + __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc != 0; +} +#endif + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_ne(__vector float __a, __vector float __b) { + int __cc; + __builtin_s390_vfcesbs(__a, __b, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int +vec_any_ne(__vector double __a, __vector double __b) { + int __cc; + __builtin_s390_vfcedbs(__a, __b, &__cc); + return __cc != 0; +} + +/*-- vec_any_ge -------------------------------------------------------------*/ + +static inline __ATTRS_o_ai int +vec_any_ge(__vector signed char __a, __vector signed char __b) { + int __cc; + __builtin_s390_vchbs(__b, __a, &__cc); return __cc != 0; } @@ -4951,6 +5408,22 @@ vec_any_ge(__vector __bool long long __a, __vector __bool long long __b) { return __cc != 0; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_any_ge(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_ge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc); + return __cc != 0; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_ge(__vector float __a, __vector float __b) { @@ -5189,6 +5662,22 @@ vec_any_gt(__vector __bool long long __a, __vector __bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_any_gt(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_gt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc <= 1; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_gt(__vector float __a, __vector float __b) { @@ -5427,6 +5916,22 @@ vec_any_le(__vector __bool long long __a, __vector __bool long long __b) { return __cc != 0; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_any_le(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc); + return __cc != 0; +} + +static inline __ATTRS_o_ai int +vec_any_le(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc); + return __cc != 0; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_le(__vector float __a, __vector float __b) { @@ -5665,6 +6170,22 @@ vec_any_lt(__vector __bool long long __a, __vector __bool long long __b) { return __cc <= 1; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai int +vec_any_lt(__vector signed __int128 __a, __vector signed __int128 __b) { + int __cc; + __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc); + return __cc <= 1; +} + +static inline __ATTRS_o_ai int +vec_any_lt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + int __cc; + __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc); + return __cc <= 1; +} +#endif + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_any_lt(__vector float __a, __vector float __b) { @@ -5753,40 +6274,419 @@ vec_any_nlt(__vector double __a, __vector double __b) { return __cc != 0; } -/*-- vec_any_nan ------------------------------------------------------------*/ +/*-- vec_any_nan ------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_nan(__vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc != 3; +} +#endif + +static inline __ATTRS_o_ai int +vec_any_nan(__vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc != 3; +} + +/*-- vec_any_numeric --------------------------------------------------------*/ + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai int +vec_any_numeric(__vector float __a) { + int __cc; + __builtin_s390_vftcisb(__a, 15, &__cc); + return __cc != 0; +} +#endif + +static inline __ATTRS_o_ai int +vec_any_numeric(__vector double __a) { + int __cc; + __builtin_s390_vftcidb(__a, 15, &__cc); + return __cc != 0; +} + +/*-- vec_blend --------------------------------------------------------------*/ + +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed char +vec_blend(__vector signed char __a, __vector signed char __b, + __vector signed char __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0)); +} + +static inline __ATTRS_o_ai __vector __bool char +vec_blend(__vector __bool char __a, __vector __bool char __b, + __vector signed char __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0)); +} + +static inline __ATTRS_o_ai __vector unsigned char +vec_blend(__vector unsigned char __a, __vector unsigned char __b, + __vector signed char __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0)); +} + +static inline __ATTRS_o_ai __vector signed short +vec_blend(__vector signed short __a, __vector signed short __b, + __vector signed short __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0)); +} + +static inline __ATTRS_o_ai __vector __bool short +vec_blend(__vector __bool short __a, __vector __bool short __b, + __vector signed short __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0)); +} + +static inline __ATTRS_o_ai __vector unsigned short +vec_blend(__vector unsigned short __a, __vector unsigned short __b, + __vector signed short __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0)); +} + +static inline __ATTRS_o_ai __vector signed int +vec_blend(__vector signed int __a, __vector signed int __b, + __vector signed int __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0)); +} + +static inline __ATTRS_o_ai __vector __bool int +vec_blend(__vector __bool int __a, __vector __bool int __b, + __vector signed int __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0)); +} + +static inline __ATTRS_o_ai __vector unsigned int +vec_blend(__vector unsigned int __a, __vector unsigned int __b, + __vector signed int __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0)); +} + +static inline __ATTRS_o_ai __vector signed long long +vec_blend(__vector signed long long __a, __vector signed long long __b, + __vector signed long long __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0)); +} + +static inline __ATTRS_o_ai __vector __bool long long +vec_blend(__vector __bool long long __a, __vector __bool long long __b, + __vector signed long long __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0)); +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_blend(__vector unsigned long long __a, __vector unsigned long long __b, + __vector signed long long __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0)); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_blend(__vector signed __int128 __a, __vector signed __int128 __b, + __vector signed __int128 __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0)); +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_blend(__vector __bool __int128 __a, __vector __bool __int128 __b, + __vector signed __int128 __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0)); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_blend(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector signed __int128 __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0)); +} + +static inline __ATTRS_o_ai __vector float +vec_blend(__vector float __a, __vector float __b, + __vector signed int __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0)); +} + +static inline __ATTRS_o_ai __vector double +vec_blend(__vector double __a, __vector double __b, + __vector signed long long __c) { + return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0)); +} +#endif + +/*-- vec_and ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai __vector __bool char +vec_and(__vector __bool char __a, __vector __bool char __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector signed char +vec_and(__vector signed char __a, __vector signed char __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector unsigned char +vec_and(__vector unsigned char __a, __vector unsigned char __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector __bool short +vec_and(__vector __bool short __a, __vector __bool short __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector signed short +vec_and(__vector signed short __a, __vector signed short __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector unsigned short +vec_and(__vector unsigned short __a, __vector unsigned short __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector __bool int +vec_and(__vector __bool int __a, __vector __bool int __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector signed int +vec_and(__vector signed int __a, __vector signed int __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector unsigned int +vec_and(__vector unsigned int __a, __vector unsigned int __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector __bool long long +vec_and(__vector __bool long long __a, __vector __bool long long __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector signed long long +vec_and(__vector signed long long __a, __vector signed long long __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_and(__vector unsigned long long __a, __vector unsigned long long __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_and(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_and(__vector signed __int128 __a, __vector signed __int128 __b) { + return __a & __b; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_and(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return __a & __b; +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai __vector float +vec_and(__vector float __a, __vector float __b) { + return (__vector float)((__vector unsigned int)__a & + (__vector unsigned int)__b); +} +#endif + +static inline __ATTRS_o_ai __vector double +vec_and(__vector double __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a & + (__vector unsigned long long)__b); +} + +/*-- vec_or ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai __vector __bool char +vec_or(__vector __bool char __a, __vector __bool char __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector signed char +vec_or(__vector signed char __a, __vector signed char __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector unsigned char +vec_or(__vector unsigned char __a, __vector unsigned char __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector __bool short +vec_or(__vector __bool short __a, __vector __bool short __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector signed short +vec_or(__vector signed short __a, __vector signed short __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector unsigned short +vec_or(__vector unsigned short __a, __vector unsigned short __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector __bool int +vec_or(__vector __bool int __a, __vector __bool int __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector signed int +vec_or(__vector signed int __a, __vector signed int __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector unsigned int +vec_or(__vector unsigned int __a, __vector unsigned int __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector __bool long long +vec_or(__vector __bool long long __a, __vector __bool long long __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector signed long long +vec_or(__vector signed long long __a, __vector signed long long __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_or(__vector unsigned long long __a, __vector unsigned long long __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector __bool __int128 +vec_or(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_or(__vector signed __int128 __a, __vector signed __int128 __b) { + return __a | __b; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_or(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return __a | __b; +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai __vector float +vec_or(__vector float __a, __vector float __b) { + return (__vector float)((__vector unsigned int)__a | + (__vector unsigned int)__b); +} +#endif + +static inline __ATTRS_o_ai __vector double +vec_or(__vector double __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a | + (__vector unsigned long long)__b); +} + +/*-- vec_xor ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai __vector __bool char +vec_xor(__vector __bool char __a, __vector __bool char __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector signed char +vec_xor(__vector signed char __a, __vector signed char __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector unsigned char +vec_xor(__vector unsigned char __a, __vector unsigned char __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector __bool short +vec_xor(__vector __bool short __a, __vector __bool short __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector signed short +vec_xor(__vector signed short __a, __vector signed short __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector unsigned short +vec_xor(__vector unsigned short __a, __vector unsigned short __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector __bool int +vec_xor(__vector __bool int __a, __vector __bool int __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector signed int +vec_xor(__vector signed int __a, __vector signed int __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector unsigned int +vec_xor(__vector unsigned int __a, __vector unsigned int __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector __bool long long +vec_xor(__vector __bool long long __a, __vector __bool long long __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector signed long long +vec_xor(__vector signed long long __a, __vector signed long long __b) { + return __a ^ __b; +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_xor(__vector unsigned long long __a, __vector unsigned long long __b) { + return __a ^ __b; +} -#if __ARCH__ >= 12 -static inline __ATTRS_o_ai int -vec_any_nan(__vector float __a) { - int __cc; - __builtin_s390_vftcisb(__a, 15, &__cc); - return __cc != 3; +static inline __ATTRS_o_ai __vector __bool __int128 +vec_xor(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return __a ^ __b; } -#endif -static inline __ATTRS_o_ai int -vec_any_nan(__vector double __a) { - int __cc; - __builtin_s390_vftcidb(__a, 15, &__cc); - return __cc != 3; +static inline __ATTRS_o_ai __vector signed __int128 +vec_xor(__vector signed __int128 __a, __vector signed __int128 __b) { + return __a ^ __b; } -/*-- vec_any_numeric --------------------------------------------------------*/ +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_xor(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return __a ^ __b; +} #if __ARCH__ >= 12 -static inline __ATTRS_o_ai int -vec_any_numeric(__vector float __a) { - int __cc; - __builtin_s390_vftcisb(__a, 15, &__cc); - return __cc != 0; +static inline __ATTRS_o_ai __vector float +vec_xor(__vector float __a, __vector float __b) { + return (__vector float)((__vector unsigned int)__a ^ + (__vector unsigned int)__b); } #endif -static inline __ATTRS_o_ai int -vec_any_numeric(__vector double __a) { - int __cc; - __builtin_s390_vftcidb(__a, 15, &__cc); - return __cc != 0; +static inline __ATTRS_o_ai __vector double +vec_xor(__vector double __a, __vector double __b) { + return (__vector double)((__vector unsigned long long)__a ^ + (__vector unsigned long long)__b); } /*-- vec_andc ---------------------------------------------------------------*/ @@ -5947,6 +6847,21 @@ vec_andc(__vector unsigned long long __a, __vector __bool long long __b) { return __a & ~__b; } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_andc(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_andc(__vector signed __int128 __a, __vector signed __int128 __b) { + return __a & ~__b; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_andc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return __a & ~__b; +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_andc(__vector float __a, __vector float __b) { @@ -6133,6 +7048,21 @@ vec_nor(__vector unsigned long long __a, __vector __bool long long __b) { return ~(__a | __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_nor(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_nor(__vector signed __int128 __a, __vector signed __int128 __b) { + return ~(__a | __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_nor(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return ~(__a | __b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_nor(__vector float __a, __vector float __b) { @@ -6224,6 +7154,21 @@ vec_orc(__vector unsigned long long __a, __vector unsigned long long __b) { return __a | ~__b; } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_orc(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_orc(__vector signed __int128 __a, __vector signed __int128 __b) { + return __a | ~__b; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_orc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return __a | ~__b; +} + static inline __ATTRS_o_ai __vector float vec_orc(__vector float __a, __vector float __b) { return (__vector float)((__vector unsigned int)__a | @@ -6300,6 +7245,21 @@ vec_nand(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a & __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_nand(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_nand(__vector signed __int128 __a, __vector signed __int128 __b) { + return ~(__a & __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_nand(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return ~(__a & __b); +} + static inline __ATTRS_o_ai __vector float vec_nand(__vector float __a, __vector float __b) { return (__vector float)~((__vector unsigned int)__a & @@ -6376,6 +7336,21 @@ vec_eqv(__vector unsigned long long __a, __vector unsigned long long __b) { return ~(__a ^ __b); } +static inline __ATTRS_o_ai __vector __bool __int128 +vec_eqv(__vector __bool __int128 __a, __vector __bool __int128 __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_eqv(__vector signed __int128 __a, __vector signed __int128 __b) { + return ~(__a ^ __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_eqv(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return ~(__a ^ __b); +} + static inline __ATTRS_o_ai __vector float vec_eqv(__vector float __a, __vector float __b) { return (__vector float)~((__vector unsigned int)__a ^ @@ -6389,6 +7364,91 @@ vec_eqv(__vector double __a, __vector double __b) { } #endif +/*-- vec_evaluate -----------------------------------------------------------*/ + +#if __ARCH__ >= 15 +extern __ATTRS_o __vector signed char +vec_evaluate(__vector signed char __a, __vector signed char __b, + __vector signed char __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector unsigned char +vec_evaluate(__vector unsigned char __a, __vector unsigned char __b, + __vector unsigned char __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector __bool char +vec_evaluate(__vector __bool char __a, __vector __bool char __b, + __vector __bool char __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector signed short +vec_evaluate(__vector signed short __a, __vector signed short __b, + __vector signed short __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector unsigned short +vec_evaluate(__vector unsigned short __a, __vector unsigned short __b, + __vector unsigned short __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector __bool short +vec_evaluate(__vector __bool short __a, __vector __bool short __b, + __vector __bool short __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector signed int +vec_evaluate(__vector signed int __a, __vector signed int __b, + __vector signed int __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector unsigned int +vec_evaluate(__vector unsigned int __a, __vector unsigned int __b, + __vector unsigned int __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector __bool int +vec_evaluate(__vector __bool int __a, __vector __bool int __b, + __vector __bool int __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector signed long long +vec_evaluate(__vector signed long long __a, __vector signed long long __b, + __vector signed long long __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector unsigned long long +vec_evaluate(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned long long __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector __bool long long +vec_evaluate(__vector __bool long long __a, __vector __bool long long __b, + __vector __bool long long __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector signed __int128 +vec_evaluate(__vector signed __int128 __a, __vector signed __int128 __b, + __vector signed __int128 __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector unsigned __int128 +vec_evaluate(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c, unsigned char __d) + __constant(__d); + +extern __ATTRS_o __vector __bool __int128 +vec_evaluate(__vector __bool __int128 __a, __vector __bool __int128 __b, + __vector __bool __int128 __c, unsigned char __d) + __constant(__d); + +#define vec_evaluate(A, B, C, D) \ + ((__typeof__((vec_evaluate)((A), (B), (C), (D)))) \ + __builtin_s390_veval((__vector unsigned char)(A), \ + (__vector unsigned char)(B), \ + (__vector unsigned char)(C), (D))) +#endif + /*-- vec_cntlz --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char @@ -6431,6 +7491,20 @@ vec_cntlz(__vector unsigned long long __a) { return __builtin_s390_vclzg(__a); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_cntlz(__vector signed __int128 __a) { + return (__vector unsigned __int128) + __builtin_s390_vclzq((unsigned __int128)__a); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_cntlz(__vector unsigned __int128 __a) { + return (__vector unsigned __int128) + __builtin_s390_vclzq((unsigned __int128)__a); +} +#endif + /*-- vec_cnttz --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char @@ -6473,46 +7547,60 @@ vec_cnttz(__vector unsigned long long __a) { return __builtin_s390_vctzg(__a); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_cnttz(__vector signed __int128 __a) { + return (__vector unsigned __int128) + __builtin_s390_vctzq((unsigned __int128)__a); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_cnttz(__vector unsigned __int128 __a) { + return (__vector unsigned __int128) + __builtin_s390_vctzq((unsigned __int128)__a); +} +#endif + /*-- vec_popcnt -------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector unsigned char vec_popcnt(__vector signed char __a) { - return __builtin_s390_vpopctb((__vector unsigned char)__a); + return __builtin_elementwise_popcount((__vector unsigned char)__a); } static inline __ATTRS_o_ai __vector unsigned char vec_popcnt(__vector unsigned char __a) { - return __builtin_s390_vpopctb(__a); + return __builtin_elementwise_popcount(__a); } static inline __ATTRS_o_ai __vector unsigned short vec_popcnt(__vector signed short __a) { - return __builtin_s390_vpopcth((__vector unsigned short)__a); + return __builtin_elementwise_popcount((__vector unsigned short)__a); } static inline __ATTRS_o_ai __vector unsigned short vec_popcnt(__vector unsigned short __a) { - return __builtin_s390_vpopcth(__a); + return __builtin_elementwise_popcount(__a); } static inline __ATTRS_o_ai __vector unsigned int vec_popcnt(__vector signed int __a) { - return __builtin_s390_vpopctf((__vector unsigned int)__a); + return __builtin_elementwise_popcount((__vector unsigned int)__a); } static inline __ATTRS_o_ai __vector unsigned int vec_popcnt(__vector unsigned int __a) { - return __builtin_s390_vpopctf(__a); + return __builtin_elementwise_popcount(__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_popcnt(__vector signed long long __a) { - return __builtin_s390_vpopctg((__vector unsigned long long)__a); + return __builtin_elementwise_popcount((__vector unsigned long long)__a); } static inline __ATTRS_o_ai __vector unsigned long long vec_popcnt(__vector unsigned long long __a) { - return __builtin_s390_vpopctg(__a); + return __builtin_elementwise_popcount(__a); } /*-- vec_rl -----------------------------------------------------------------*/ @@ -6904,8 +7992,21 @@ vec_sll(__vector unsigned long long __a, __vector unsigned int __b) { (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_sll(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sll(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vsl( + (__vector unsigned char)__a, __b); +} + /*-- vec_slb ----------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_slb(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vslb( @@ -6918,6 +8019,7 @@ vec_slb(__vector signed char __a, __vector unsigned char __b) { (__vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_slb(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vslb(__a, (__vector unsigned char)__b); @@ -6928,110 +8030,187 @@ vec_slb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vslb(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_slb(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_slb(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed short +vec_slb(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_slb(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_slb(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned short +vec_slb(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_slb(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_slb(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed int +vec_slb(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_slb(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_slb(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned int +vec_slb(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_slb(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_slb(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed long long +vec_slb(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_slb(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_slb(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned long long +vec_slb(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_slb(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_slb(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_slb(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_slb(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } + +static inline __ATTRS_o_ai __vector float +vec_slb(__vector float __a, __vector unsigned char __b) { + return (__vector float)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} #endif +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_slb(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_slb(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vslb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector double +vec_slb(__vector double __a, __vector unsigned char __b) { + return (__vector double)__builtin_s390_vslb( + (__vector unsigned char)__a, __b); +} + /*-- vec_sld ----------------------------------------------------------------*/ extern __ATTRS_o __vector signed char vec_sld(__vector signed char __a, __vector signed char __b, int __c) __constant_range(__c, 0, 15); +// This prototype is deprecated. extern __ATTRS_o __vector __bool char vec_sld(__vector __bool char __a, __vector __bool char __b, int __c) __constant_range(__c, 0, 15); @@ -7044,6 +8223,7 @@ extern __ATTRS_o __vector signed short vec_sld(__vector signed short __a, __vector signed short __b, int __c) __constant_range(__c, 0, 15); +// This prototype is deprecated. extern __ATTRS_o __vector __bool short vec_sld(__vector __bool short __a, __vector __bool short __b, int __c) __constant_range(__c, 0, 15); @@ -7056,6 +8236,7 @@ extern __ATTRS_o __vector signed int vec_sld(__vector signed int __a, __vector signed int __b, int __c) __constant_range(__c, 0, 15); +// This prototype is deprecated. extern __ATTRS_o __vector __bool int vec_sld(__vector __bool int __a, __vector __bool int __b, int __c) __constant_range(__c, 0, 15); @@ -7068,6 +8249,7 @@ extern __ATTRS_o __vector signed long long vec_sld(__vector signed long long __a, __vector signed long long __b, int __c) __constant_range(__c, 0, 15); +// This prototype is deprecated. extern __ATTRS_o __vector __bool long long vec_sld(__vector __bool long long __a, __vector __bool long long __b, int __c) __constant_range(__c, 0, 15); @@ -7077,6 +8259,15 @@ vec_sld(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 15); +extern __ATTRS_o __vector signed __int128 +vec_sld(__vector signed __int128 __a, __vector signed __int128 __b, int __c) + __constant_range(__c, 0, 15); + +extern __ATTRS_o __vector unsigned __int128 +vec_sld(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + int __c) + __constant_range(__c, 0, 15); + #if __ARCH__ >= 12 extern __ATTRS_o __vector float vec_sld(__vector float __a, __vector float __b, int __c) @@ -7126,6 +8317,15 @@ vec_sldw(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 3); +extern __ATTRS_o __vector signed __int128 +vec_sldw(__vector signed __int128 __a, __vector signed __int128 __b, int __c) + __constant_range(__c, 0, 3); + +extern __ATTRS_o __vector unsigned __int128 +vec_sldw(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + int __c) + __constant_range(__c, 0, 3); + // This prototype is deprecated. extern __ATTRS_o __vector double vec_sldw(__vector double __a, __vector double __b, int __c) @@ -7172,6 +8372,15 @@ vec_sldb(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 7); +extern __ATTRS_o __vector signed __int128 +vec_sldb(__vector signed __int128 __a, __vector signed __int128 __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o __vector unsigned __int128 +vec_sldb(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + int __c) + __constant_range(__c, 0, 7); + extern __ATTRS_o __vector float vec_sldb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); @@ -7429,8 +8638,21 @@ vec_sral(__vector unsigned long long __a, __vector unsigned int __b) { (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_sral(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sral(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vsra( + (__vector unsigned char)__a, __b); +} + /*-- vec_srab ---------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_srab(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vsrab( @@ -7443,6 +8665,7 @@ vec_srab(__vector signed char __a, __vector unsigned char __b) { (__vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_srab(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vsrab(__a, (__vector unsigned char)__b); @@ -7453,104 +8676,180 @@ vec_srab(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrab(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srab(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srab(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed short +vec_srab(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srab(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srab(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned short +vec_srab(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srab(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srab(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed int +vec_srab(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srab(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srab(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned int +vec_srab(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srab(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srab(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed long long +vec_srab(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_srab(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } -static inline __ATTRS_o_ai __vector unsigned long long -vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) { - return (__vector unsigned long long)__builtin_s390_vsrab( - (__vector unsigned char)__a, (__vector unsigned char)__b); +// This prototype is deprecated. +static inline __ATTRS_o_ai __vector unsigned long long +vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, (__vector unsigned char)__b); +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_srab(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_srab(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_srab(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); } #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_srab(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_srab(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } + +static inline __ATTRS_o_ai __vector float +vec_srab(__vector float __a, __vector unsigned char __b) { + return (__vector float)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} #endif +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_srab(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_srab(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vsrab( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector double +vec_srab(__vector double __a, __vector unsigned char __b) { + return (__vector double)__builtin_s390_vsrab( + (__vector unsigned char)__a, __b); +} + /*-- vec_srl ----------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char @@ -7794,8 +9093,21 @@ vec_srl(__vector unsigned long long __a, __vector unsigned int __b) { (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_srl(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_srl(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vsrl( + (__vector unsigned char)__a, __b); +} + /*-- vec_srb ----------------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed char vec_srb(__vector signed char __a, __vector signed char __b) { return (__vector signed char)__builtin_s390_vsrlb( @@ -7808,6 +9120,7 @@ vec_srb(__vector signed char __a, __vector unsigned char __b) { (__vector unsigned char)__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_srb(__vector unsigned char __a, __vector signed char __b) { return __builtin_s390_vsrlb(__a, (__vector unsigned char)__b); @@ -7818,104 +9131,180 @@ vec_srb(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vsrlb(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srb(__vector signed short __a, __vector signed short __b) { return (__vector signed short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed short vec_srb(__vector signed short __a, __vector unsigned short __b) { return (__vector signed short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed short +vec_srb(__vector signed short __a, __vector unsigned char __b) { + return (__vector signed short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srb(__vector unsigned short __a, __vector signed short __b) { return (__vector unsigned short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned short vec_srb(__vector unsigned short __a, __vector unsigned short __b) { return (__vector unsigned short)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned short +vec_srb(__vector unsigned short __a, __vector unsigned char __b) { + return (__vector unsigned short)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srb(__vector signed int __a, __vector signed int __b) { return (__vector signed int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed int vec_srb(__vector signed int __a, __vector unsigned int __b) { return (__vector signed int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed int +vec_srb(__vector signed int __a, __vector unsigned char __b) { + return (__vector signed int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srb(__vector unsigned int __a, __vector signed int __b) { return (__vector unsigned int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned int vec_srb(__vector unsigned int __a, __vector unsigned int __b) { return (__vector unsigned int)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned int +vec_srb(__vector unsigned int __a, __vector unsigned char __b) { + return (__vector unsigned int)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srb(__vector signed long long __a, __vector signed long long __b) { return (__vector signed long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector signed long long vec_srb(__vector signed long long __a, __vector unsigned long long __b) { return (__vector signed long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector signed long long +vec_srb(__vector signed long long __a, __vector unsigned char __b) { + return (__vector signed long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_srb(__vector unsigned long long __a, __vector signed long long __b) { return (__vector unsigned long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned long long vec_srb(__vector unsigned long long __a, __vector unsigned long long __b) { return (__vector unsigned long long)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector unsigned long long +vec_srb(__vector unsigned long long __a, __vector unsigned char __b) { + return (__vector unsigned long long)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_srb(__vector signed __int128 __a, __vector unsigned char __b) { + return (__vector signed __int128)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_srb(__vector unsigned __int128 __a, __vector unsigned char __b) { + return (__vector unsigned __int128)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + #if __ARCH__ >= 12 +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_srb(__vector float __a, __vector signed int __b) { return (__vector float)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector float vec_srb(__vector float __a, __vector unsigned int __b) { return (__vector float)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } + +static inline __ATTRS_o_ai __vector float +vec_srb(__vector float __a, __vector unsigned char __b) { + return (__vector float)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} #endif +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_srb(__vector double __a, __vector signed long long __b) { return (__vector double)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector double vec_srb(__vector double __a, __vector unsigned long long __b) { return (__vector double)__builtin_s390_vsrlb( (__vector unsigned char)__a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai __vector double +vec_srb(__vector double __a, __vector unsigned char __b) { + return (__vector double)__builtin_s390_vsrlb( + (__vector unsigned char)__a, __b); +} + /*-- vec_srdb ---------------------------------------------------------------*/ #if __ARCH__ >= 13 @@ -7953,6 +9342,15 @@ vec_srdb(__vector unsigned long long __a, __vector unsigned long long __b, int __c) __constant_range(__c, 0, 7); +extern __ATTRS_o __vector signed __int128 +vec_srdb(__vector signed __int128 __a, __vector signed __int128 __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o __vector unsigned __int128 +vec_srdb(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + int __c) + __constant_range(__c, 0, 7); + extern __ATTRS_o __vector float vec_srdb(__vector float __a, __vector float __b, int __c) __constant_range(__c, 0, 7); @@ -7989,6 +9387,11 @@ vec_abs(__vector signed long long __a) { return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed long long)0)); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_abs(__vector signed __int128 __a) { + return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed __int128)0)); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_abs(__vector float __a) { @@ -8169,6 +9572,16 @@ vec_max(__vector __bool long long __a, __vector unsigned long long __b) { return vec_sel(__b, __ac, vec_cmpgt(__ac, __b)); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_max(__vector signed __int128 __a, __vector signed __int128 __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_max(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return vec_sel(__b, __a, vec_cmpgt(__a, __b)); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_max(__vector float __a, __vector float __b) { @@ -8339,6 +9752,16 @@ vec_min(__vector __bool long long __a, __vector unsigned long long __b) { return vec_sel(__ac, __b, vec_cmpgt(__ac, __b)); } +static inline __ATTRS_o_ai __vector signed __int128 +vec_min(__vector signed __int128 __a, __vector signed __int128 __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_min(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return vec_sel(__a, __b, vec_cmpgt(__a, __b)); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai __vector float vec_min(__vector float __a, __vector float __b) { @@ -8357,9 +9780,11 @@ vec_min(__vector double __a, __vector double __b) { /*-- vec_add_u128 -----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char)((__int128)__a + (__int128)__b); + return (__vector unsigned char)(__vector unsigned __int128) + ((__int128)__a + (__int128)__b); } /*-- vec_addc ---------------------------------------------------------------*/ @@ -8384,30 +9809,59 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vaccg(__a, __b); } +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_addc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector unsigned __int128) + __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b); +} + /*-- vec_addc_u128 ----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b); } +/*-- vec_adde ---------------------------------------------------------------*/ + +static inline __ATTRS_ai __vector unsigned __int128 +vec_adde(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); +} + /*-- vec_adde_u128 ----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } +/*-- vec_addec --------------------------------------------------------------*/ + +static inline __ATTRS_ai __vector unsigned __int128 +vec_addec(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); +} + /*-- vec_addec_u128 ---------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } @@ -8434,6 +9888,14 @@ vec_avg(__vector signed long long __a, __vector signed long long __b) { return __builtin_s390_vavgg(__a, __b); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_avg(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector signed __int128) + __builtin_s390_vavgq((signed __int128)__a, (signed __int128)__b); +} +#endif + static inline __ATTRS_o_ai __vector unsigned char vec_avg(__vector unsigned char __a, __vector unsigned char __b) { return __builtin_s390_vavglb(__a, __b); @@ -8454,6 +9916,14 @@ vec_avg(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vavglg(__a, __b); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_avg(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector unsigned __int128) + __builtin_s390_vavglq((unsigned __int128)__a, (unsigned __int128)__b); +} +#endif + /*-- vec_checksum -----------------------------------------------------------*/ static inline __ATTRS_ai __vector unsigned int @@ -8478,12 +9948,19 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vgfmf(__a, __b); } +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_gfmsum(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned __int128)__builtin_s390_vgfmg(__a, __b); +} + /*-- vec_gfmsum_128 ---------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_128(__vector unsigned long long __a, __vector unsigned long long __b) { - return (__vector unsigned char)__builtin_s390_vgfmg(__a, __b); + return (__vector unsigned char)(__vector unsigned __int128) + __builtin_s390_vgfmg(__a, __b); } /*-- vec_gfmsum_accum -------------------------------------------------------*/ @@ -8506,13 +9983,21 @@ vec_gfmsum_accum(__vector unsigned int __a, __vector unsigned int __b, return __builtin_s390_vgfmaf(__a, __b, __c); } +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_gfmsum_accum(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c); +} + /*-- vec_gfmsum_accum_128 ---------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_gfmsum_accum_128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c); } @@ -8590,6 +10075,56 @@ vec_mladd(__vector unsigned int __a, __vector unsigned int __b, return __a * __b + __c; } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed long long +vec_mladd(__vector signed long long __a, __vector signed long long __b, + __vector signed long long __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai __vector signed long long +vec_mladd(__vector unsigned long long __a, __vector signed long long __b, + __vector signed long long __c) { + return (__vector signed long long)__a * __b + __c; +} + +static inline __ATTRS_o_ai __vector signed long long +vec_mladd(__vector signed long long __a, __vector unsigned long long __b, + __vector unsigned long long __c) { + return __a * (__vector signed long long)__b + (__vector signed long long)__c; +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_mladd(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned long long __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_mladd(__vector signed __int128 __a, __vector signed __int128 __b, + __vector signed __int128 __c) { + return __a * __b + __c; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_mladd(__vector unsigned __int128 __a, __vector signed __int128 __b, + __vector signed __int128 __c) { + return (__vector signed __int128)__a * __b + __c; +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_mladd(__vector signed __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return __a * (__vector signed __int128)__b + (__vector signed __int128)__c; +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_mladd(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return __a * __b + __c; +} +#endif + /*-- vec_mhadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char @@ -8628,6 +10163,34 @@ vec_mhadd(__vector unsigned int __a, __vector unsigned int __b, return __builtin_s390_vmalhf(__a, __b, __c); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed long long +vec_mhadd(__vector signed long long __a, __vector signed long long __b, + __vector signed long long __c) { + return __builtin_s390_vmahg(__a, __b, __c); +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_mhadd(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned long long __c) { + return __builtin_s390_vmalhg(__a, __b, __c); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_mhadd(__vector signed __int128 __a, __vector signed __int128 __b, + __vector signed __int128 __c) { + return (__vector signed __int128) + __builtin_s390_vmahq((signed __int128)__a, (signed __int128)__b, (signed __int128)__c); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_mhadd(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vmalhq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); +} +#endif + /*-- vec_meadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short @@ -8666,6 +10229,22 @@ vec_meadd(__vector unsigned int __a, __vector unsigned int __b, return __builtin_s390_vmalef(__a, __b, __c); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_meadd(__vector signed long long __a, __vector signed long long __b, + __vector signed __int128 __c) { + return (__vector signed __int128) + __builtin_s390_vmaeg(__a, __b, (signed __int128)__c); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_meadd(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vmaleg(__a, __b, (unsigned __int128)__c); +} +#endif + /*-- vec_moadd --------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short @@ -8704,6 +10283,22 @@ vec_moadd(__vector unsigned int __a, __vector unsigned int __b, return __builtin_s390_vmalof(__a, __b, __c); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_moadd(__vector signed long long __a, __vector signed long long __b, + __vector signed __int128 __c) { + return (__vector signed __int128) + __builtin_s390_vmaog(__a, __b, (signed __int128)__c); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_moadd(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vmalog(__a, __b, (unsigned __int128)__c); +} +#endif + /*-- vec_mulh ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed char @@ -8736,6 +10331,30 @@ vec_mulh(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlhf(__a, __b); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed long long +vec_mulh(__vector signed long long __a, __vector signed long long __b) { + return __builtin_s390_vmhg(__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned long long +vec_mulh(__vector unsigned long long __a, __vector unsigned long long __b) { + return __builtin_s390_vmlhg(__a, __b); +} + +static inline __ATTRS_o_ai __vector signed __int128 +vec_mulh(__vector signed __int128 __a, __vector signed __int128 __b) { + return (__vector signed __int128) + __builtin_s390_vmhq((signed __int128)__a, (signed __int128)__b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_mulh(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector unsigned __int128) + __builtin_s390_vmlhq((unsigned __int128)__a, (unsigned __int128)__b); +} +#endif + /*-- vec_mule ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short @@ -8768,6 +10387,18 @@ vec_mule(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlef(__a, __b); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_mule(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed __int128)__builtin_s390_vmeg(__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_mule(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned __int128)__builtin_s390_vmleg(__a, __b); +} +#endif + /*-- vec_mulo ---------------------------------------------------------------*/ static inline __ATTRS_o_ai __vector signed short @@ -8800,9 +10431,35 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vmlof(__a, __b); } +#if __ARCH__ >= 15 +static inline __ATTRS_o_ai __vector signed __int128 +vec_mulo(__vector signed long long __a, __vector signed long long __b) { + return (__vector signed __int128)__builtin_s390_vmog(__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_mulo(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned __int128)__builtin_s390_vmlog(__a, __b); +} +#endif + +/*-- vec_msum ---------------------------------------------------------------*/ + +#if __ARCH__ >= 12 +extern __ATTRS_o __vector unsigned __int128 +vec_msum(__vector unsigned long long __a, __vector unsigned long long __b, + __vector unsigned __int128 __c, int __d) + __constant_range(__d, 0, 15); + +#define vec_msum(X, Y, Z, W) \ + ((__typeof__((vec_msum)((X), (Y), (Z), (W)))) \ + __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W))) +#endif + /*-- vec_msum_u128 ----------------------------------------------------------*/ #if __ARCH__ >= 12 +// This prototype is deprecated. extern __ATTRS_o __vector unsigned char vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b, __vector unsigned char __c, int __d) @@ -8810,14 +10467,17 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b, #define vec_msum_u128(X, Y, Z, W) \ ((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \ + (__vector unsigned __int128) \ __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W))) #endif /*-- vec_sub_u128 -----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char)((__int128)__a - (__int128)__b); + return (__vector unsigned char)(__vector unsigned __int128) + ((__int128)__a - (__int128)__b); } /*-- vec_subc ---------------------------------------------------------------*/ @@ -8842,30 +10502,59 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) { return __builtin_s390_vscbig(__a, __b); } +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_subc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) { + return (__vector unsigned __int128) + __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b); +} + /*-- vec_subc_u128 ----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b); } +/*-- vec_sube ---------------------------------------------------------------*/ + +static inline __ATTRS_ai __vector unsigned __int128 +vec_sube(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); +} + /*-- vec_sube_u128 ----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } +/*-- vec_subec --------------------------------------------------------------*/ + +static inline __ATTRS_ai __vector unsigned __int128 +vec_subec(__vector unsigned __int128 __a, __vector unsigned __int128 __b, + __vector unsigned __int128 __c) { + return (__vector unsigned __int128) + __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b, + (unsigned __int128)__c); +} + /*-- vec_subec_u128 ---------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_ai __vector unsigned char vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b, __vector unsigned char __c) { - return (__vector unsigned char) + return (__vector unsigned char)(__vector unsigned __int128) __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c); } @@ -8882,16 +10571,32 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) { return __builtin_s390_vsumgf(__a, __b); } +/*-- vec_sum ----------------------------------------------------------------*/ + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sum(__vector unsigned int __a, __vector unsigned int __b) { + return (__vector unsigned __int128)__builtin_s390_vsumqf(__a, __b); +} + +static inline __ATTRS_o_ai __vector unsigned __int128 +vec_sum(__vector unsigned long long __a, __vector unsigned long long __b) { + return (__vector unsigned __int128)__builtin_s390_vsumqg(__a, __b); +} + /*-- vec_sum_u128 -----------------------------------------------------------*/ +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) { - return (__vector unsigned char)__builtin_s390_vsumqf(__a, __b); + return (__vector unsigned char)(__vector unsigned __int128) + __builtin_s390_vsumqf(__a, __b); } +// This prototype is deprecated. static inline __ATTRS_o_ai __vector unsigned char vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) { - return (__vector unsigned char)__builtin_s390_vsumqg(__a, __b); + return (__vector unsigned char)(__vector unsigned __int128) + __builtin_s390_vsumqg(__a, __b); } /*-- vec_sum4 ---------------------------------------------------------------*/ @@ -8956,6 +10661,19 @@ vec_test_mask(__vector unsigned long long __a, (__vector unsigned char)__b); } +static inline __ATTRS_o_ai int +vec_test_mask(__vector signed __int128 __a, __vector unsigned __int128 __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); +} + +static inline __ATTRS_o_ai int +vec_test_mask(__vector unsigned __int128 __a, + __vector unsigned __int128 __b) { + return __builtin_s390_vtm((__vector unsigned char)__a, + (__vector unsigned char)__b); +} + #if __ARCH__ >= 12 static inline __ATTRS_o_ai int vec_test_mask(__vector float __a, __vector unsigned int __b) { diff --git a/lib/include/wasm_simd128.h b/lib/include/wasm_simd128.h index 2327bec52522..08e39bf1a79b 100644 --- a/lib/include/wasm_simd128.h +++ b/lib/include/wasm_simd128.h @@ -33,6 +33,7 @@ typedef unsigned long long __u64x2 __attribute__((__vector_size__(16), __aligned__(16))); typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); +typedef __fp16 __f16x8 __attribute__((__vector_size__(16), __aligned__(16))); typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned char __u8x8 @@ -956,7 +957,7 @@ static __inline__ uint32_t __DEFAULT_FN_ATTRS wasm_i8x16_bitmask(v128_t __a) { } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_popcnt(v128_t __a) { - return (v128_t)__builtin_wasm_popcnt_i8x16((__i8x16)__a); + return (v128_t)__builtin_elementwise_popcount((__i8x16)__a); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, @@ -981,12 +982,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_elementwise_add_sat((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); + return (v128_t)__builtin_elementwise_add_sat((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, @@ -996,32 +997,32 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_s_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_elementwise_sub_sat((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_u_i8x16((__u8x16)__a, (__u8x16)__b); + return (v128_t)__builtin_elementwise_sub_sat((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_elementwise_min((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i8x16((__u8x16)__a, (__u8x16)__b); + return (v128_t)__builtin_elementwise_min((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_elementwise_max((__i8x16)__a, (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i8x16((__u8x16)__a, (__u8x16)__b); + return (v128_t)__builtin_elementwise_max((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_avgr(v128_t __a, @@ -1067,12 +1068,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_elementwise_add_sat((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); + return (v128_t)__builtin_elementwise_add_sat((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, @@ -1082,12 +1083,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_s_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_elementwise_sub_sat((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_sat(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_sat_u_i16x8((__u16x8)__a, (__u16x8)__b); + return (v128_t)__builtin_elementwise_sub_sat((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, @@ -1097,22 +1098,22 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_elementwise_min((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i16x8((__u16x8)__a, (__u16x8)__b); + return (v128_t)__builtin_elementwise_min((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_elementwise_max((__i16x8)__a, (__i16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i16x8((__u16x8)__a, (__u16x8)__b); + return (v128_t)__builtin_elementwise_max((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_avgr(v128_t __a, @@ -1168,22 +1169,22 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)__a, (__i32x4)__b); + return (v128_t)__builtin_elementwise_min((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i32x4((__u32x4)__a, (__u32x4)__b); + return (v128_t)__builtin_elementwise_min((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)__a, (__i32x4)__b); + return (v128_t)__builtin_elementwise_max((__i32x4)__a, (__i32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i32x4((__u32x4)__a, (__u32x4)__b); + return (v128_t)__builtin_elementwise_max((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_dot_i16x8(v128_t __a, @@ -1878,6 +1879,151 @@ wasm_i32x4_relaxed_dot_i8x16_i7x16_add(v128_t __a, v128_t __b, v128_t __c) { (__i8x16)__a, (__i8x16)__b, (__i32x4)__c); } +// FP16 intrinsics +#define __FP16_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("fp16"), \ + __min_vector_width__(128))) + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_splat(float __a) { + return (v128_t)__builtin_wasm_splat_f16x8(__a); +} + +#ifdef __wasm_fp16__ +// TODO Replace the following macros with regular C functions and use normal +// target-independent vector code like the other replace/extract instructions. + +#define wasm_f16x8_extract_lane(__a, __i) \ + (__builtin_wasm_extract_lane_f16x8((__f16x8)(__a), __i)) + +#define wasm_f16x8_replace_lane(__a, __i, __b) \ + ((v128_t)__builtin_wasm_replace_lane_f16x8((__f16x8)(__a), __i, __b)) + +#endif + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_abs(v128_t __a) { + return (v128_t)__builtin_wasm_abs_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_neg(v128_t __a) { + return (v128_t)(-(__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sqrt(v128_t __a) { + return (v128_t)__builtin_wasm_sqrt_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ceil(v128_t __a) { + return (v128_t)__builtin_wasm_ceil_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_floor(v128_t __a) { + return (v128_t)__builtin_wasm_floor_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_trunc(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_nearest(v128_t __a) { + return (v128_t)__builtin_wasm_nearest_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_eq(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a == (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ne(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a != (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_lt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a < (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_gt(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a > (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_le(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a <= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_ge(v128_t __a, v128_t __b) { + return (v128_t)((__f16x8)__a >= (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_add(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a + (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_sub(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a - (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_mul(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a * (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_div(v128_t __a, + v128_t __b) { + return (v128_t)((__f16x8)__a / (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_min(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_min_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_max(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_max_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmin(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmin_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_pmax(v128_t __a, + v128_t __b) { + return (v128_t)__builtin_wasm_pmax_f16x8((__f16x8)__a, (__f16x8)__b); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_i16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_s_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS +wasm_u16x8_trunc_sat_f16x8(v128_t __a) { + return (v128_t)__builtin_wasm_trunc_saturate_u_i16x8_f16x8((__f16x8)__a); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_i16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__i16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_convert_u16x8(v128_t __a) { + return (v128_t) __builtin_convertvector((__u16x8)__a, __f16x8); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_madd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_madd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + +static __inline__ v128_t __FP16_FN_ATTRS wasm_f16x8_relaxed_nmadd(v128_t __a, + v128_t __b, + v128_t __c) { + return (v128_t)__builtin_wasm_relaxed_nmadd_f16x8((__f16x8)__a, (__f16x8)__b, + (__f16x8)__c); +} + // Deprecated intrinsics static __inline__ v128_t __DEPRECATED_FN_ATTRS("wasm_i8x16_swizzle") diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 6fb27297af92..20e66d190113 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -32,12 +32,41 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); #endif /* Define the default attributes for the functions in this file. */ +#if defined(__EVEX512__) && !defined(__AVX10_1_512__) #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sse,no-evex512"), \ __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_MMX \ +#define __DEFAULT_FN_ATTRS_SSE2 \ __attribute__((__always_inline__, __nodebug__, \ - __target__("mmx,sse,no-evex512"), __min_vector_width__(64))) + __target__("sse2,no-evex512"), __min_vector_width__(128))) +#else +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("sse"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_SSE2 \ + __attribute__((__always_inline__, __nodebug__, __target__("sse2"), \ + __min_vector_width__(128))) +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR __DEFAULT_FN_ATTRS_SSE2 +#endif + +#define __trunc64(x) \ + (__m64) __builtin_shufflevector((__v2di)(x), __extension__(__v2di){}, 0) +#define __zext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, 2, 3) +#define __anyext128(x) \ + (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ + 1, -1, -1) +#define __zeroupper64(x) \ + (__m128i) __builtin_shufflevector((__v4si)(x), __extension__(__v4si){}, 0, \ + 1, 4, 5) /// Adds the 32-bit float values in the low-order bits of the operands. /// @@ -54,9 +83,8 @@ typedef unsigned int __v4su __attribute__((__vector_size__(16))); /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the sum /// of the lower 32 bits of both operands. The upper 96 bits are copied from /// the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_add_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_ss(__m128 __a, __m128 __b) { __a[0] += __b[0]; return __a; } @@ -74,9 +102,8 @@ _mm_add_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the sums of both /// operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_add_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_add_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a + (__v4sf)__b); } @@ -96,9 +123,8 @@ _mm_add_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// difference of the lower 32 bits of both operands. The upper 96 bits are /// copied from the upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sub_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_ss(__m128 __a, __m128 __b) { __a[0] -= __b[0]; return __a; } @@ -117,9 +143,8 @@ _mm_sub_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing the subtrahend. /// \returns A 128-bit vector of [4 x float] containing the differences between /// both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_sub_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sub_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a - (__v4sf)__b); } @@ -139,9 +164,8 @@ _mm_sub_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] containing the product of the lower /// 32 bits of both operands. The upper 96 bits are copied from the upper 96 /// bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mul_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_ss(__m128 __a, __m128 __b) { __a[0] *= __b[0]; return __a; } @@ -159,9 +183,8 @@ _mm_mul_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the products of both /// operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_mul_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_mul_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a * (__v4sf)__b); } @@ -181,9 +204,8 @@ _mm_mul_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] containing the quotients of the /// lower 32 bits of both operands. The upper 96 bits are copied from the /// upper 96 bits of the first source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_div_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_div_ss(__m128 __a, __m128 __b) { __a[0] /= __b[0]; return __a; } @@ -200,9 +222,8 @@ _mm_div_ss(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing the divisor. /// \returns A 128-bit vector of [4 x float] containing the quotients of both /// operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_div_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_div_ps(__m128 __a, __m128 __b) { return (__m128)((__v4sf)__a / (__v4sf)__b); } @@ -416,9 +437,8 @@ _mm_max_ps(__m128 __a, __m128 __b) /// A 128-bit vector containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// values between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_and_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_and_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a & (__v4su)__b); } @@ -438,9 +458,8 @@ _mm_and_ps(__m128 __a, __m128 __b) /// \returns A 128-bit vector of [4 x float] containing the bitwise AND of the /// one's complement of the first operand and the values in the second /// operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_andnot_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_andnot_ps(__m128 __a, __m128 __b) { return (__m128)(~(__v4su)__a & (__v4su)__b); } @@ -456,9 +475,8 @@ _mm_andnot_ps(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise OR of the /// values between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_or_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_or_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a | (__v4su)__b); } @@ -475,9 +493,8 @@ _mm_or_ps(__m128 __a, __m128 __b) /// A 128-bit vector of [4 x float] containing one of the source operands. /// \returns A 128-bit vector of [4 x float] containing the bitwise exclusive OR /// of the values between both operands. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_xor_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_xor_ps(__m128 __a, __m128 __b) { return (__m128)((__v4su)__a ^ (__v4su)__b); } @@ -1448,10 +1465,10 @@ _mm_cvtss_si64(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtps_pi32(__m128 __a) { - return (__m64)__builtin_ia32_cvtps2pi((__v4sf)__a); + return __trunc64(__builtin_ia32_cvtps2dq((__v4sf)__zeroupper64(__a))); } /// Converts two low-order float values in a 128-bit vector of @@ -1468,7 +1485,7 @@ _mm_cvtps_pi32(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvt_ps2pi(__m128 __a) { return _mm_cvtps_pi32(__a); @@ -1558,10 +1575,10 @@ _mm_cvttss_si64(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvttps_pi32(__m128 __a) { - return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a); + return __trunc64(__builtin_ia32_cvttps2dq((__v4sf)__zeroupper64(__a))); } /// Converts the lower (first) two elements of a 128-bit vector of [4 x float] @@ -1579,7 +1596,7 @@ _mm_cvttps_pi32(__m128 __a) /// \param __a /// A 128-bit vector of [4 x float]. /// \returns A 64-bit integer vector containing the converted values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtt_ps2pi(__m128 __a) { return _mm_cvttps_pi32(__a); @@ -1601,9 +1618,8 @@ _mm_cvtt_ps2pi(__m128 __a) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvtsi32_ss(__m128 __a, int __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvtsi32_ss(__m128 __a, + int __b) { __a[0] = __b; return __a; } @@ -1624,9 +1640,8 @@ _mm_cvtsi32_ss(__m128 __a, int __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvt_si2ss(__m128 __a, int __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_cvt_si2ss(__m128 __a, + int __b) { return _mm_cvtsi32_ss(__a, __b); } @@ -1648,9 +1663,8 @@ _mm_cvt_si2ss(__m128 __a, int __b) /// \returns A 128-bit vector of [4 x float] whose lower 32 bits contain the /// converted value of the second operand. The upper 96 bits are copied from /// the upper 96 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_cvtsi64_ss(__m128 __a, long long __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtsi64_ss(__m128 __a, long long __b) { __a[0] = __b; return __a; } @@ -1674,10 +1688,13 @@ _mm_cvtsi64_ss(__m128 __a, long long __b) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value of the second operand. The upper 64 bits are copied from /// the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpi32_ps(__m128 __a, __m64 __b) { - return __builtin_ia32_cvtpi2ps((__v4sf)__a, (__v2si)__b); + return (__m128)__builtin_shufflevector( + (__v4sf)__a, + __builtin_convertvector((__v4si)__zext128(__b), __v4sf), + 4, 5, 2, 3); } /// Converts two elements of a 64-bit vector of [2 x i32] into two @@ -1697,7 +1714,7 @@ _mm_cvtpi32_ps(__m128 __a, __m64 __b) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// converted value from the second operand. The upper 64 bits are copied /// from the upper 64 bits of the first operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvt_pi2ps(__m128 __a, __m64 __b) { return _mm_cvtpi32_ps(__a, __b); @@ -1714,9 +1731,8 @@ _mm_cvt_pi2ps(__m128 __a, __m64 __b) /// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are /// used in the extraction. /// \returns A 32-bit float containing the extracted value. -static __inline__ float __DEFAULT_FN_ATTRS -_mm_cvtss_f32(__m128 __a) -{ +static __inline__ float __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_cvtss_f32(__m128 __a) { return __a[0]; } @@ -1907,9 +1923,8 @@ _mm_undefined_ps(void) /// \returns An initialized 128-bit floating-point vector of [4 x float]. The /// lower 32 bits contain the value provided in the source operand. The /// upper 96 bits are set to zero. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_set_ss(float __w) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_ss(float __w) { return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f }; } @@ -1925,9 +1940,8 @@ _mm_set_ss(float __w) /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_set1_ps(float __w) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set1_ps(float __w) { return __extension__ (__m128){ __w, __w, __w, __w }; } @@ -1944,9 +1958,8 @@ _mm_set1_ps(float __w) /// A single-precision floating-point value used to initialize each vector /// element of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_set_ps1(float __w) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_ps1(float __w) { return _mm_set1_ps(__w); } @@ -1971,9 +1984,8 @@ _mm_set_ps1(float __w) /// A single-precision floating-point value used to initialize bits [31:0] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_set_ps(float __z, float __y, float __x, float __w) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_set_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __w, __x, __y, __z }; } @@ -1999,9 +2011,8 @@ _mm_set_ps(float __z, float __y, float __x, float __w) /// A single-precision floating-point value used to initialize bits [127:96] /// of the result. /// \returns An initialized 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_setr_ps(float __z, float __y, float __x, float __w) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setr_ps(float __z, float __y, float __x, float __w) { return __extension__ (__m128){ __z, __y, __x, __w }; } @@ -2014,9 +2025,8 @@ _mm_setr_ps(float __z, float __y, float __x, float __w) /// /// \returns An initialized 128-bit floating-point vector of [4 x float] with /// all elements set to zero. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_setzero_ps(void) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_setzero_ps(void) { return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; } @@ -2231,10 +2241,10 @@ _mm_storer_ps(float *__p, __m128 __a) /// A pointer to an aligned memory location used to store the register value. /// \param __a /// A 64-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS_MMX +static __inline__ void __DEFAULT_FN_ATTRS _mm_stream_pi(void *__p, __m64 __a) { - __builtin_ia32_movntq((__m64 *)__p, __a); + __builtin_nontemporal_store(__a, (__m64 *)__p); } /// Moves packed float values from a 128-bit vector of [4 x float] to a @@ -2296,7 +2306,7 @@ void _mm_sfence(void); /// 3: Bits [63:48] are copied to the destination. /// \returns A 16-bit integer containing the extracted 16 bits of packed data. #define _mm_extract_pi16(a, n) \ - ((int)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) + ((int)(unsigned short)__builtin_ia32_vec_ext_v4hi((__v4hi)a, (int)n)) /// Copies data from the 64-bit vector of [4 x i16] to the destination, /// and inserts the lower 16-bits of an integer operand at the 16-bit offset @@ -2342,10 +2352,10 @@ void _mm_sfence(void); /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_max_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pmaxsw((__v4hi)__a, (__v4hi)__b); + return (__m64)__builtin_elementwise_max((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer @@ -2361,10 +2371,10 @@ _mm_max_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_max_pu8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pmaxub((__v8qi)__a, (__v8qi)__b); + return (__m64)__builtin_elementwise_max((__v8qu)__a, (__v8qu)__b); } /// Compares each of the corresponding packed 16-bit integer values of @@ -2380,10 +2390,10 @@ _mm_max_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_min_pi16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pminsw((__v4hi)__a, (__v4hi)__b); + return (__m64)__builtin_elementwise_min((__v4hi)__a, (__v4hi)__b); } /// Compares each of the corresponding packed 8-bit unsigned integer @@ -2399,10 +2409,10 @@ _mm_min_pi16(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the comparison results. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_min_pu8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pminub((__v8qi)__a, (__v8qi)__b); + return (__m64)__builtin_elementwise_min((__v8qu)__a, (__v8qu)__b); } /// Takes the most significant bit from each 8-bit element in a 64-bit @@ -2417,10 +2427,10 @@ _mm_min_pu8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS_MMX +static __inline__ int __DEFAULT_FN_ATTRS_SSE2 _mm_movemask_pi8(__m64 __a) { - return __builtin_ia32_pmovmskb((__v8qi)__a); + return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); } /// Multiplies packed 16-bit unsigned integer values and writes the @@ -2436,10 +2446,11 @@ _mm_movemask_pi8(__m64 __a) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the products of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_mulhi_pu16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pmulhuw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_pmulhuw128((__v8hi)__anyext128(__a), + (__v8hi)__anyext128(__b))); } /// Shuffles the 4 16-bit integers from a 64-bit integer vector to the @@ -2476,8 +2487,10 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// _MM_SHUFFLE(b6, b4, b2, b0) can create an 8-bit mask of the form /// [b6, b4, b2, b0]. /// \returns A 64-bit integer vector containing the shuffled values. -#define _mm_shuffle_pi16(a, n) \ - ((__m64)__builtin_ia32_pshufw((__v4hi)(__m64)(a), (n))) +#define _mm_shuffle_pi16(a, n) \ + ((__m64)__builtin_shufflevector((__v4hi)(__m64)(a), __extension__(__v4hi){}, \ + (n) & 0x3, ((n) >> 2) & 0x3, \ + ((n) >> 4) & 0x3, ((n) >> 6) & 0x3)) /// Conditionally copies the values from each 8-bit element in the first /// 64-bit integer vector operand to the specified memory location, as @@ -2502,10 +2515,25 @@ _mm_mulhi_pu16(__m64 __a, __m64 __b) /// A pointer to a 64-bit memory location that will receive the conditionally /// copied integer values. The address of the memory location does not have /// to be aligned. -static __inline__ void __DEFAULT_FN_ATTRS_MMX +static __inline__ void __DEFAULT_FN_ATTRS_SSE2 _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) { - __builtin_ia32_maskmovq((__v8qi)__d, (__v8qi)__n, __p); + // This is complex, because we need to support the case where __p is pointing + // within the last 15 to 8 bytes of a page. In that case, using a 128-bit + // write might cause a trap where a 64-bit maskmovq would not. (Memory + // locations not selected by the mask bits might still cause traps.) + __m128i __d128 = __anyext128(__d); + __m128i __n128 = __zext128(__n); + if (((__SIZE_TYPE__)__p & 0xfff) >= 4096-15 && + ((__SIZE_TYPE__)__p & 0xfff) <= 4096-8) { + // If there's a risk of spurious trap due to a 128-bit write, back up the + // pointer by 8 bytes and shift values in registers to match. + __p -= 8; + __d128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__d128, 8); + __n128 = __builtin_ia32_pslldqi128_byteshift((__v2di)__n128, 8); + } + + __builtin_ia32_maskmovdqu((__v16qi)__d128, (__v16qi)__n128, __p); } /// Computes the rounded averages of the packed unsigned 8-bit integer @@ -2521,10 +2549,11 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_avg_pu8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pavgb((__v8qi)__a, (__v8qi)__b); + return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a), + (__v16qi)__anyext128(__b))); } /// Computes the rounded averages of the packed unsigned 16-bit integer @@ -2540,10 +2569,11 @@ _mm_avg_pu8(__m64 __a, __m64 __b) /// \param __b /// A 64-bit integer vector containing one of the source operands. /// \returns A 64-bit integer vector containing the averages of both operands. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_avg_pu16(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_pavgw((__v4hi)__a, (__v4hi)__b); + return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a), + (__v8hi)__anyext128(__b))); } /// Subtracts the corresponding 8-bit unsigned integer values of the two @@ -2562,10 +2592,11 @@ _mm_avg_pu16(__m64 __a, __m64 __b) /// \returns A 64-bit integer vector whose lower 16 bits contain the sums of the /// sets of absolute differences between both operands. The upper bits are /// cleared. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_sad_pu8(__m64 __a, __m64 __b) { - return (__m64)__builtin_ia32_psadbw((__v8qi)__a, (__v8qi)__b); + return __trunc64(__builtin_ia32_psadbw128((__v16qi)__zext128(__a), + (__v16qi)__zext128(__b))); } #if defined(__cplusplus) @@ -2741,9 +2772,8 @@ void _mm_setcsr(unsigned int __i); /// Bits [95:64] are written to bits [63:32] of the destination. \n /// Bits [127:96] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_unpackhi_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpackhi_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 2, 6, 3, 7); } @@ -2763,9 +2793,8 @@ _mm_unpackhi_ps(__m128 __a, __m128 __b) /// Bits [31:0] are written to bits [63:32] of the destination. \n /// Bits [63:32] are written to bits [127:96] of the destination. /// \returns A 128-bit vector of [4 x float] containing the interleaved values. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_unpacklo_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_unpacklo_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 4, 1, 5); } @@ -2785,9 +2814,8 @@ _mm_unpacklo_ps(__m128 __a, __m128 __b) /// A 128-bit floating-point vector of [4 x float]. The lower 32 bits are /// written to the lower 32 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_move_ss(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_move_ss(__m128 __a, __m128 __b) { __a[0] = __b[0]; return __a; } @@ -2807,9 +2835,8 @@ _mm_move_ss(__m128 __a, __m128 __b) /// A 128-bit floating-point vector of [4 x float]. The upper 64 bits are /// written to the lower 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_movehl_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movehl_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 6, 7, 2, 3); } @@ -2828,9 +2855,8 @@ _mm_movehl_ps(__m128 __a, __m128 __b) /// A 128-bit floating-point vector of [4 x float]. The lower 64 bits are /// written to the upper 64 bits of the result. /// \returns A 128-bit floating-point vector of [4 x float]. -static __inline__ __m128 __DEFAULT_FN_ATTRS -_mm_movelh_ps(__m128 __a, __m128 __b) -{ +static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movelh_ps(__m128 __a, __m128 __b) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)__b, 0, 1, 4, 5); } @@ -2846,22 +2872,10 @@ _mm_movelh_ps(__m128 __a, __m128 __b) /// from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpi16_ps(__m64 __a) { - __m64 __b, __c; - __m128 __r; - - __b = _mm_setzero_si64(); - __b = _mm_cmpgt_pi16(__b, __a); - __c = _mm_unpackhi_pi16(__a, __b); - __r = _mm_setzero_ps(); - __r = _mm_cvtpi32_ps(__r, __c); - __r = _mm_movelh_ps(__r, __r); - __c = _mm_unpacklo_pi16(__a, __b); - __r = _mm_cvtpi32_ps(__r, __c); - - return __r; + return __builtin_convertvector((__v4hi)__a, __v4sf); } /// Converts a 64-bit vector of 16-bit unsigned integer values into a @@ -2876,21 +2890,10 @@ _mm_cvtpi16_ps(__m64 __a) /// destination are copied from the corresponding elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpu16_ps(__m64 __a) { - __m64 __b, __c; - __m128 __r; - - __b = _mm_setzero_si64(); - __c = _mm_unpackhi_pi16(__a, __b); - __r = _mm_setzero_ps(); - __r = _mm_cvtpi32_ps(__r, __c); - __r = _mm_movelh_ps(__r, __r); - __c = _mm_unpacklo_pi16(__a, __b); - __r = _mm_cvtpi32_ps(__r, __c); - - return __r; + return __builtin_convertvector((__v4hu)__a, __v4sf); } /// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] @@ -2905,16 +2908,12 @@ _mm_cvtpu16_ps(__m64 __a) /// from the corresponding lower 4 elements in this operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpi8_ps(__m64 __a) { - __m64 __b; - - __b = _mm_setzero_si64(); - __b = _mm_cmpgt_pi8(__b, __a); - __b = _mm_unpacklo_pi8(__a, __b); - - return _mm_cvtpi16_ps(__b); + return __builtin_convertvector( + __builtin_shufflevector((__v8qs)__a, __extension__ (__v8qs){}, + 0, 1, 2, 3), __v4sf); } /// Converts the lower four unsigned 8-bit integer values from a 64-bit @@ -2930,15 +2929,12 @@ _mm_cvtpi8_ps(__m64 __a) /// operand. /// \returns A 128-bit vector of [4 x float] containing the copied and converted /// values from the source operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpu8_ps(__m64 __a) { - __m64 __b; - - __b = _mm_setzero_si64(); - __b = _mm_unpacklo_pi8(__a, __b); - - return _mm_cvtpi16_ps(__b); + return __builtin_convertvector( + __builtin_shufflevector((__v8qu)__a, __extension__ (__v8qu){}, + 0, 1, 2, 3), __v4sf); } /// Converts the two 32-bit signed integer values from each 64-bit vector @@ -2957,16 +2953,12 @@ _mm_cvtpu8_ps(__m64 __a) /// \returns A 128-bit vector of [4 x float] whose lower 64 bits contain the /// copied and converted values from the first operand. The upper 64 bits /// contain the copied and converted values from the second operand. -static __inline__ __m128 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m128 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) { - __m128 __c; - - __c = _mm_setzero_ps(); - __c = _mm_cvtpi32_ps(__c, __b); - __c = _mm_movelh_ps(__c, __c); - - return _mm_cvtpi32_ps(__c, __a); + return __builtin_convertvector( + __builtin_shufflevector((__v2si)__a, (__v2si)__b, + 0, 1, 2, 3), __v4sf); } /// Converts each single-precision floating-point element of a 128-bit @@ -2986,16 +2978,11 @@ _mm_cvtpi32x2_ps(__m64 __a, __m64 __b) /// A 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [4 x i16] containing the converted /// values. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtps_pi16(__m128 __a) { - __m64 __b, __c; - - __b = _mm_cvtps_pi32(__a); - __a = _mm_movehl_ps(__a, __a); - __c = _mm_cvtps_pi32(__a); - - return _mm_packs_pi32(__b, __c); + return __trunc64(__builtin_ia32_packssdw128( + (__v4si)__builtin_ia32_cvtps2dq((__v4sf)__a), (__v4si)_mm_setzero_ps())); } /// Converts each single-precision floating-point element of a 128-bit @@ -3016,7 +3003,7 @@ _mm_cvtps_pi16(__m128 __a) /// 128-bit floating-point vector of [4 x float]. /// \returns A 64-bit integer vector of [8 x i8]. The lower 32 bits contain the /// converted values and the uppper 32 bits are set to zero. -static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX +static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2 _mm_cvtps_pi8(__m128 __a) { __m64 __b, __c; @@ -3196,8 +3183,14 @@ do { \ #define _m_psadbw _mm_sad_pu8 #define _m_ _mm_ +#undef __trunc64 +#undef __zext128 +#undef __anyext128 +#undef __zeroupper64 #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_MMX +#undef __DEFAULT_FN_ATTRS_CONSTEXPR +#undef __DEFAULT_FN_ATTRS_SSE2 +#undef __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR /* Ugly hack for backwards-compatibility (compatible with gcc) */ #if defined(__SSE2__) && !__building_module(_Builtin_intrinsics) diff --git a/lib/libc/glibc/sysdeps/s390/s390-64/start-2.33.S b/lib/libc/glibc/sysdeps/s390/s390-64/start-2.33.S index 37503dd1dc87..02ed4aad5127 100644 --- a/lib/libc/glibc/sysdeps/s390/s390-64/start-2.33.S +++ b/lib/libc/glibc/sysdeps/s390/s390-64/start-2.33.S @@ -61,8 +61,7 @@ _start: cfi_startproc /* Mark r14 as undefined in order to stop unwinding here! */ - /* zig patch: r14 -> %r14. revert with llvm 20. */ - cfi_undefined (%r14) + cfi_undefined (r14) /* Load argc and argv from stack. */ la %r4,8(%r15) # get argv lg %r3,0(%r15) # get argc @@ -86,8 +85,7 @@ _start: /* Ok, now branch to the libc main routine. */ #ifdef PIC - /* zig patch: GOTENT -> GOT. revert with llvm 20. */ - larl %r2,main@GOT # load pointer to main + larl %r2,main@GOTENT # load pointer to main lg %r2,0(%r2) brasl %r14,__libc_start_main@plt #else diff --git a/lib/libc/glibc/sysdeps/s390/s390-64/start.S b/lib/libc/glibc/sysdeps/s390/s390-64/start.S index 959d761164e9..ab40519307ac 100644 --- a/lib/libc/glibc/sysdeps/s390/s390-64/start.S +++ b/lib/libc/glibc/sysdeps/s390/s390-64/start.S @@ -60,8 +60,7 @@ _start: cfi_startproc /* Mark r14 as undefined in order to stop unwinding here! */ - /* zig patch: r14 -> %r14. revert with llvm 20. */ - cfi_undefined (%r14) + cfi_undefined (r14) /* Load argc and argv from stack. */ la %r4,8(%r15) # get argv lg %r3,0(%r15) # get argc @@ -88,8 +87,7 @@ _start: # ifdef SHARED /* Used for dynamic linked position independent executable. => Scrt1.o */ - /* zig patch: GOTENT -> GOT. revert with llvm 20. */ - larl %r2,main@GOT # load pointer to main + larl %r2,main@GOTENT # load pointer to main lg %r2,0(%r2) # else /* Used for dynamic linked position dependent executable. @@ -121,8 +119,7 @@ _start: use of GOT relocations before __libc_start_main is called. */ __wrap_main: cfi_startproc - /* zig patch: GOTENT -> GOT. revert with llvm 20. */ - larl %r1,main@GOT # load pointer to main + larl %r1,main@GOTENT # load pointer to main lg %r1,0(%r1) br %r1 cfi_endproc diff --git a/lib/libcxx/include/__algorithm/adjacent_find.h b/lib/libcxx/include/__algorithm/adjacent_find.h index 6f15456e3a4d..2508250d8796 100644 --- a/lib/libcxx/include/__algorithm/adjacent_find.h +++ b/lib/libcxx/include/__algorithm/adjacent_find.h @@ -11,9 +11,9 @@ #define _LIBCPP___ALGORITHM_ADJACENT_FIND_H #include <__algorithm/comp.h> -#include <__algorithm/iterator_operations.h> #include <__config> -#include <__iterator/iterator_traits.h> +#include <__functional/identity.h> +#include <__type_traits/invoke.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -25,14 +25,15 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter -__adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter +__adjacent_find(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { if (__first == __last) return __first; + _Iter __i = __first; while (++__i != __last) { - if (__pred(*__first, *__i)) + if (std::__invoke(__pred, std::__invoke(__proj, *__first), std::__invoke(__proj, *__i))) return __first; __first = __i; } @@ -40,13 +41,14 @@ __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { - return std::__adjacent_find(std::move(__first), std::move(__last), __pred); + __identity __proj; + return std::__adjacent_find(std::move(__first), std::move(__last), __pred, __proj); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator adjacent_find(_ForwardIterator __first, _ForwardIterator __last) { return std::adjacent_find(std::move(__first), std::move(__last), __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/all_of.h b/lib/libcxx/include/__algorithm/all_of.h index ec84eea75929..6acc117fc47b 100644 --- a/lib/libcxx/include/__algorithm/all_of.h +++ b/lib/libcxx/include/__algorithm/all_of.h @@ -11,6 +11,8 @@ #define _LIBCPP___ALGORITHM_ALL_OF_H #include <__config> +#include <__functional/identity.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -18,15 +20,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { - for (; __first != __last; ++__first) - if (!__pred(*__first)) +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__all_of(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (!std::__invoke(__pred, std::__invoke(__proj, *__first))) return false; + } return true; } +template +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { + __identity __proj; + return std::__all_of(__first, __last, __pred, __proj); +} + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_ALL_OF_H diff --git a/lib/libcxx/include/__algorithm/any_of.h b/lib/libcxx/include/__algorithm/any_of.h index b5ff778c4171..4b6eb9451728 100644 --- a/lib/libcxx/include/__algorithm/any_of.h +++ b/lib/libcxx/include/__algorithm/any_of.h @@ -11,6 +11,8 @@ #define _LIBCPP___ALGORITHM_ANY_OF_H #include <__config> +#include <__functional/identity.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -18,15 +20,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool -any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { - for (; __first != __last; ++__first) - if (__pred(*__first)) +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__any_of(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + for (; __first != __last; ++__first) { + if (std::__invoke(__pred, std::__invoke(__proj, *__first))) return true; + } return false; } +template +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { + __identity __proj; + return std::__any_of(__first, __last, __pred, __proj); +} + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_ANY_OF_H diff --git a/lib/libcxx/include/__algorithm/binary_search.h b/lib/libcxx/include/__algorithm/binary_search.h index 6065fc37274d..4940059f285c 100644 --- a/lib/libcxx/include/__algorithm/binary_search.h +++ b/lib/libcxx/include/__algorithm/binary_search.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/lower_bound.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,14 +21,14 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { __first = std::lower_bound<_ForwardIterator, _Tp, __comp_ref_type<_Compare> >(__first, __last, __value, __comp); return __first != __last && !__comp(__value, *__first); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::binary_search(__first, __last, __value, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/comp.h b/lib/libcxx/include/__algorithm/comp.h index a0fa88d6d2ac..ab3c59841882 100644 --- a/lib/libcxx/include/__algorithm/comp.h +++ b/lib/libcxx/include/__algorithm/comp.h @@ -11,6 +11,7 @@ #include <__config> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_integral.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -44,6 +45,9 @@ struct __less { template inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true; +template +inline const bool __desugars_to_v<__totally_ordered_less_tag, __less<>, _Tp, _Tp> = is_integral<_Tp>::value; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ALGORITHM_COMP_H diff --git a/lib/libcxx/include/__algorithm/comp_ref_type.h b/lib/libcxx/include/__algorithm/comp_ref_type.h index c367fbb91ac2..6a9d5cef2671 100644 --- a/lib/libcxx/include/__algorithm/comp_ref_type.h +++ b/lib/libcxx/include/__algorithm/comp_ref_type.h @@ -56,10 +56,10 @@ struct __debug_less { // Pass the comparator by lvalue reference. Or in the debug mode, using a debugging wrapper that stores a reference. #if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG template -using __comp_ref_type = __debug_less<_Comp>; +using __comp_ref_type _LIBCPP_NODEBUG = __debug_less<_Comp>; #else template -using __comp_ref_type = _Comp&; +using __comp_ref_type _LIBCPP_NODEBUG = _Comp&; #endif _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/copy.h b/lib/libcxx/include/__algorithm/copy.h index 0890b895f540..962aa90059d5 100644 --- a/lib/libcxx/include/__algorithm/copy.h +++ b/lib/libcxx/include/__algorithm/copy.h @@ -11,11 +11,12 @@ #include <__algorithm/copy_move_common.h> #include <__algorithm/for_each_segment.h> -#include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> +#include <__type_traits/enable_if.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -28,10 +29,9 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template +template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter); -template struct __copy_impl { template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> @@ -47,7 +47,7 @@ struct __copy_impl { template struct _CopySegment { - using _Traits = __segmented_iterator_traits<_InIter>; + using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>; _OutIter& __result_; @@ -56,7 +56,7 @@ struct __copy_impl { _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void operator()(typename _Traits::__local_iterator __lfirst, typename _Traits::__local_iterator __llast) { - __result_ = std::__copy<_AlgPolicy>(__lfirst, __llast, std::move(__result_)).second; + __result_ = std::__copy(__lfirst, __llast, std::move(__result_)).second; } }; @@ -85,7 +85,7 @@ struct __copy_impl { while (true) { auto __local_last = _Traits::__end(__segment_iterator); auto __size = std::min<_DiffT>(__local_last - __local_first, __last - __first); - auto __iters = std::__copy<_AlgPolicy>(__first, __first + __size, __local_first); + auto __iters = std::__copy(__first, __first + __size, __local_first); __first = std::move(__iters.first); if (__first == __last) @@ -103,17 +103,16 @@ struct __copy_impl { } }; -template +template pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __copy(_InIter __first, _Sent __last, _OutIter __result) { - return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >( - std::move(__first), std::move(__last), std::move(__result)); + return std::__copy_move_unwrap_iters<__copy_impl>(std::move(__first), std::move(__last), std::move(__result)); } template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator copy(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { - return std::__copy<_ClassicAlgPolicy>(__first, __last, __result).second; + return std::__copy(__first, __last, __result).second; } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/copy_backward.h b/lib/libcxx/include/__algorithm/copy_backward.h index 73dc846a975a..48a768f577f5 100644 --- a/lib/libcxx/include/__algorithm/copy_backward.h +++ b/lib/libcxx/include/__algorithm/copy_backward.h @@ -13,8 +13,10 @@ #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> diff --git a/lib/libcxx/include/__algorithm/copy_if.h b/lib/libcxx/include/__algorithm/copy_if.h index 228e4d22323e..ffea621fc061 100644 --- a/lib/libcxx/include/__algorithm/copy_if.h +++ b/lib/libcxx/include/__algorithm/copy_if.h @@ -10,25 +10,41 @@ #define _LIBCPP___ALGORITHM_COPY_IF_H #include <__config> +#include <__functional/identity.h> +#include <__type_traits/invoke.h> +#include <__utility/move.h> +#include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator -copy_if(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Predicate __pred) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> +__copy_if(_InIter __first, _Sent __last, _OutIter __result, _Pred& __pred, _Proj& __proj) { for (; __first != __last; ++__first) { - if (__pred(*__first)) { + if (std::__invoke(__pred, std::__invoke(__proj, *__first))) { *__result = *__first; ++__result; } } - return __result; + return std::make_pair(std::move(__first), std::move(__result)); +} + +template +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator +copy_if(_InputIterator __first, _InputIterator __last, _OutputIterator __result, _Predicate __pred) { + __identity __proj; + return std::__copy_if(__first, __last, __result, __pred, __proj).second; } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_COPY_IF_H diff --git a/lib/libcxx/include/__algorithm/copy_move_common.h b/lib/libcxx/include/__algorithm/copy_move_common.h index 8a98451a8f96..7471012c01d9 100644 --- a/lib/libcxx/include/__algorithm/copy_move_common.h +++ b/lib/libcxx/include/__algorithm/copy_move_common.h @@ -9,10 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_COPY_MOVE_COMMON_H #define _LIBCPP___ALGORITHM_COPY_MOVE_COMMON_H -#include <__algorithm/iterator_operations.h> #include <__algorithm/unwrap_iter.h> #include <__algorithm/unwrap_range.h> #include <__config> +#include <__cstddef/size_t.h> #include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> #include <__string/constexpr_c_functions.h> @@ -24,7 +24,6 @@ #include <__type_traits/is_volatile.h> #include <__utility/move.h> #include <__utility/pair.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__algorithm/count.h b/lib/libcxx/include/__algorithm/count.h index 1cfe7f631ac1..cd9125779ec6 100644 --- a/lib/libcxx/include/__algorithm/count.h +++ b/lib/libcxx/include/__algorithm/count.h @@ -16,9 +16,10 @@ #include <__bit/popcount.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -43,7 +44,7 @@ __count(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { // __bit_iterator implementation template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __bit_iterator<_Cp, _IsConst>::difference_type -__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { +__count_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_type_traits<_Cp>::size_type __n) { using _It = __bit_iterator<_Cp, _IsConst>; using __storage_type = typename _It::__storage_type; using difference_type = typename _It::difference_type; @@ -74,12 +75,14 @@ template > __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) { if (__value) - return std::__count_bool(__first, static_cast(__last - __first)); - return std::__count_bool(__first, static_cast(__last - __first)); + return std::__count_bool( + __first, static_cast::size_type>(__last - __first)); + return std::__count_bool( + __first, static_cast::size_type>(__last - __first)); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator> count(_InputIterator __first, _InputIterator __last, const _Tp& __value) { __identity __proj; return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj); diff --git a/lib/libcxx/include/__algorithm/count_if.h b/lib/libcxx/include/__algorithm/count_if.h index 25782069d032..26f945e6bd98 100644 --- a/lib/libcxx/include/__algorithm/count_if.h +++ b/lib/libcxx/include/__algorithm/count_if.h @@ -10,8 +10,11 @@ #ifndef _LIBCPP___ALGORITHM_COUNT_IF_H #define _LIBCPP___ALGORITHM_COUNT_IF_H +#include <__algorithm/iterator_operations.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -19,15 +22,23 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __policy_iter_diff_t<_AlgPolicy, _Iter> +__count_if(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { + __policy_iter_diff_t<_AlgPolicy, _Iter> __counter(0); + for (; __first != __last; ++__first) { + if (std::__invoke(__pred, std::__invoke(__proj, *__first))) + ++__counter; + } + return __counter; +} + template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename iterator_traits<_InputIterator>::difference_type count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) { - typename iterator_traits<_InputIterator>::difference_type __r(0); - for (; __first != __last; ++__first) - if (__pred(*__first)) - ++__r; - return __r; + __identity __proj; + return std::__count_if<_ClassicAlgPolicy>(__first, __last, __pred, __proj); } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/equal.h b/lib/libcxx/include/__algorithm/equal.h index bfc8f72f6eb1..a276bb9954c9 100644 --- a/lib/libcxx/include/__algorithm/equal.h +++ b/lib/libcxx/include/__algorithm/equal.h @@ -14,13 +14,12 @@ #include <__algorithm/unwrap_iter.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__string/constexpr_c_functions.h> #include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> -#include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_volatile.h> #include <__utility/move.h> @@ -35,7 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate& __pred) { for (; __first1 != __last1; ++__first1, (void)++__first2) if (!__pred(*__first1, *__first2)) @@ -49,20 +48,20 @@ template && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&) { return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { return std::__equal_iter_impl( std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) { return std::equal(__first1, __last1, __first2, __equal_to()); } @@ -70,7 +69,7 @@ equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first #if _LIBCPP_STD_VER >= 14 template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __comp, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1 && __first2 != __last2) { if (!std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -90,13 +89,13 @@ template ::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&, _Proj2&) { return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1)); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -119,7 +118,7 @@ equal(_InputIterator1 __first1, } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::equal(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/equal_range.h b/lib/libcxx/include/__algorithm/equal_range.h index 09bbf8f00602..ff6f4f2225c7 100644 --- a/lib/libcxx/include/__algorithm/equal_range.h +++ b/lib/libcxx/include/__algorithm/equal_range.h @@ -17,11 +17,7 @@ #include <__algorithm/upper_bound.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> -#include <__iterator/advance.h> -#include <__iterator/distance.h> -#include <__iterator/iterator_traits.h> -#include <__iterator/next.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__type_traits/is_constructible.h> #include <__utility/move.h> @@ -60,9 +56,9 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { - static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); + static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible"); return std::__equal_range<_ClassicAlgPolicy>( std::move(__first), @@ -73,7 +69,7 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator> equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::equal_range(std::move(__first), std::move(__last), __value, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/fill_n.h b/lib/libcxx/include/__algorithm/fill_n.h index f29633f88087..a7e01c45b922 100644 --- a/lib/libcxx/include/__algorithm/fill_n.h +++ b/lib/libcxx/include/__algorithm/fill_n.h @@ -12,7 +12,6 @@ #include <__algorithm/min.h> #include <__config> #include <__fwd/bit_reference.h> -#include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> #include <__utility/convert_to_integral.h> @@ -33,7 +32,7 @@ __fill_n(_OutputIterator __first, _Size __n, const _Tp& __value); template _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void -__fill_n_bool(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) { +__fill_n_bool(__bit_iterator<_Cp, false> __first, typename __size_difference_type_traits<_Cp>::size_type __n) { using _It = __bit_iterator<_Cp, false>; using __storage_type = typename _It::__storage_type; diff --git a/lib/libcxx/include/__algorithm/find.h b/lib/libcxx/include/__algorithm/find.h index 7f58dbb13a57..24b8b2f96443 100644 --- a/lib/libcxx/include/__algorithm/find.h +++ b/lib/libcxx/include/__algorithm/find.h @@ -17,17 +17,18 @@ #include <__bit/invert_if.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__fwd/bit_reference.h> #include <__iterator/segmented_iterator.h> #include <__string/constexpr_c_functions.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> -#include <__type_traits/is_same.h> #include <__type_traits/is_signed.h> #include <__utility/move.h> #include -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS # include #endif @@ -63,7 +64,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _T return __last; } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cp, _IsConst> -__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n) { +__find_bool(__bit_iterator<_Cp, _IsConst> __first, typename __size_difference_type_traits<_Cp>::size_type __n) { using _It = __bit_iterator<_Cp, _IsConst>; using __storage_type = typename _It::__storage_type; @@ -134,8 +135,10 @@ template __find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) { if (static_cast(__value)) - return std::__find_bool(__first, static_cast(__last - __first)); - return std::__find_bool(__first, static_cast(__last - __first)); + return std::__find_bool( + __first, static_cast::size_type>(__last - __first)); + return std::__find_bool( + __first, static_cast::size_type>(__last - __first)); } // segmented iterator implementation @@ -167,7 +170,7 @@ struct __find_segment { // public API template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find(_InputIterator __first, _InputIterator __last, const _Tp& __value) { __identity __proj; return std::__rewrap_iter( diff --git a/lib/libcxx/include/__algorithm/find_end.h b/lib/libcxx/include/__algorithm/find_end.h index 7e08e7953534..86b4a3e2e368 100644 --- a/lib/libcxx/include/__algorithm/find_end.h +++ b/lib/libcxx/include/__algorithm/find_end.h @@ -12,14 +12,10 @@ #include <__algorithm/comp.h> #include <__algorithm/iterator_operations.h> -#include <__algorithm/search.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> -#include <__iterator/advance.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> -#include <__iterator/reverse_iterator.h> +#include <__type_traits/invoke.h> #include <__utility/pair.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -80,111 +76,8 @@ _LIBCPP_HIDE_FROM_ABI inline _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> } } -template < class _IterOps, - class _Pred, - class _Iter1, - class _Sent1, - class _Iter2, - class _Sent2, - class _Proj1, - class _Proj2> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter1 __find_end( - _Iter1 __first1, - _Sent1 __sent1, - _Iter2 __first2, - _Sent2 __sent2, - _Pred& __pred, - _Proj1& __proj1, - _Proj2& __proj2, - bidirectional_iterator_tag, - bidirectional_iterator_tag) { - auto __last1 = _IterOps::next(__first1, __sent1); - auto __last2 = _IterOps::next(__first2, __sent2); - // modeled after search algorithm (in reverse) - if (__first2 == __last2) - return __last1; // Everything matches an empty sequence - _Iter1 __l1 = __last1; - _Iter2 __l2 = __last2; - --__l2; - while (true) { - // Find last element in sequence 1 that matchs *(__last2-1), with a mininum of loop checks - while (true) { - if (__first1 == __l1) // return __last1 if no element matches *__first2 - return __last1; - if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) - break; - } - // *__l1 matches *__l2, now match elements before here - _Iter1 __m1 = __l1; - _Iter2 __m2 = __l2; - while (true) { - if (__m2 == __first2) // If pattern exhausted, __m1 is the answer (works for 1 element pattern) - return __m1; - if (__m1 == __first1) // Otherwise if source exhaused, pattern not found - return __last1; - - // if there is a mismatch, restart with a new __l1 - if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(__proj2, *--__m2))) { - break; - } // else there is a match, check next elements - } - } -} - -template < class _AlgPolicy, - class _Pred, - class _Iter1, - class _Sent1, - class _Iter2, - class _Sent2, - class _Proj1, - class _Proj2> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter1 __find_end( - _Iter1 __first1, - _Sent1 __sent1, - _Iter2 __first2, - _Sent2 __sent2, - _Pred& __pred, - _Proj1& __proj1, - _Proj2& __proj2, - random_access_iterator_tag, - random_access_iterator_tag) { - typedef typename iterator_traits<_Iter1>::difference_type _D1; - auto __last1 = _IterOps<_AlgPolicy>::next(__first1, __sent1); - auto __last2 = _IterOps<_AlgPolicy>::next(__first2, __sent2); - // Take advantage of knowing source and pattern lengths. Stop short when source is smaller than pattern - auto __len2 = __last2 - __first2; - if (__len2 == 0) - return __last1; - auto __len1 = __last1 - __first1; - if (__len1 < __len2) - return __last1; - const _Iter1 __s = __first1 + _D1(__len2 - 1); // End of pattern match can't go before here - _Iter1 __l1 = __last1; - _Iter2 __l2 = __last2; - --__l2; - while (true) { - while (true) { - if (__s == __l1) - return __last1; - if (std::__invoke(__pred, std::__invoke(__proj1, *--__l1), std::__invoke(__proj2, *__l2))) - break; - } - _Iter1 __m1 = __l1; - _Iter2 __m2 = __l2; - while (true) { - if (__m2 == __first2) - return __m1; - // no need to check range on __m1 because __s guarantees we have enough source - if (!std::__invoke(__pred, std::__invoke(__proj1, *--__m1), std::__invoke(*--__m2))) { - break; - } - } - } -} - template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_end_classic( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -205,7 +98,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Fo } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -215,7 +108,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::find_end(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/find_first_of.h b/lib/libcxx/include/__algorithm/find_first_of.h index 6b99f562f880..45ec13315437 100644 --- a/lib/libcxx/include/__algorithm/find_first_of.h +++ b/lib/libcxx/include/__algorithm/find_first_of.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,7 +34,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_fir } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, @@ -45,7 +44,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Fo } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::__find_first_of_ce(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/find_if.h b/lib/libcxx/include/__algorithm/find_if.h index 22092d352b06..fd63bcc3a50d 100644 --- a/lib/libcxx/include/__algorithm/find_if.h +++ b/lib/libcxx/include/__algorithm/find_if.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (__pred(*__first)) diff --git a/lib/libcxx/include/__algorithm/find_if_not.h b/lib/libcxx/include/__algorithm/find_if_not.h index cc2001967f0c..b4441b297c4b 100644 --- a/lib/libcxx/include/__algorithm/find_if_not.h +++ b/lib/libcxx/include/__algorithm/find_if_not.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (!__pred(*__first)) diff --git a/lib/libcxx/include/__algorithm/for_each.h b/lib/libcxx/include/__algorithm/for_each.h index 259e527f87f9..e08f583504c0 100644 --- a/lib/libcxx/include/__algorithm/for_each.h +++ b/lib/libcxx/include/__algorithm/for_each.h @@ -14,7 +14,6 @@ #include <__config> #include <__iterator/segmented_iterator.h> #include <__ranges/movable_box.h> -#include <__type_traits/enable_if.h> #include <__utility/in_place.h> #include <__utility/move.h> diff --git a/lib/libcxx/include/__algorithm/includes.h b/lib/libcxx/include/__algorithm/includes.h index 62af03c37426..bc6c6579693b 100644 --- a/lib/libcxx/include/__algorithm/includes.h +++ b/lib/libcxx/include/__algorithm/includes.h @@ -13,8 +13,7 @@ #include <__algorithm/comp_ref_type.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> -#include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/move.h> @@ -47,14 +46,14 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __includes( } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _Compare __comp) { static_assert( - __is_callable<_Compare, decltype(*__first1), decltype(*__first2)>::value, "Comparator has to be callable"); + __is_callable<_Compare&, decltype(*__first1), decltype(*__first2)>::value, "The comparator has to be callable"); return std::__includes( std::move(__first1), @@ -67,7 +66,7 @@ includes(_InputIterator1 __first1, } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::includes(std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __less<>()); } diff --git a/lib/libcxx/include/__algorithm/inplace_merge.h b/lib/libcxx/include/__algorithm/inplace_merge.h index a6bcc66a2fa4..1fc31b66f4bd 100644 --- a/lib/libcxx/include/__algorithm/inplace_merge.h +++ b/lib/libcxx/include/__algorithm/inplace_merge.h @@ -18,16 +18,15 @@ #include <__algorithm/rotate.h> #include <__algorithm/upper_bound.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__functional/identity.h> -#include <__iterator/advance.h> -#include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__iterator/reverse_iterator.h> #include <__memory/destruct_n.h> -#include <__memory/temporary_buffer.h> #include <__memory/unique_ptr.h> +#include <__memory/unique_temporary_buffer.h> +#include <__utility/move.h> #include <__utility/pair.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -45,17 +44,17 @@ class __invert // invert the sense of a comparison _Predicate __p_; public: - _LIBCPP_HIDE_FROM_ABI __invert() {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 __invert() {} - _LIBCPP_HIDE_FROM_ABI explicit __invert(_Predicate __p) : __p_(__p) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit __invert(_Predicate __p) : __p_(__p) {} template - _LIBCPP_HIDE_FROM_ABI bool operator()(const _T1& __x) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator()(const _T1& __x) { return !__p_(__x); } template - _LIBCPP_HIDE_FROM_ABI bool operator()(const _T1& __x, const _T2& __y) { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 bool operator()(const _T1& __x, const _T2& __y) { return __p_(__y, __x); } }; @@ -67,7 +66,7 @@ template -_LIBCPP_HIDE_FROM_ABI void __half_inplace_merge( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __half_inplace_merge( _InputIterator1 __first1, _Sent1 __last1, _InputIterator2 __first2, @@ -92,7 +91,7 @@ _LIBCPP_HIDE_FROM_ABI void __half_inplace_merge( } template -_LIBCPP_HIDE_FROM_ABI void __buffered_inplace_merge( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __buffered_inplace_merge( _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, @@ -123,7 +122,7 @@ _LIBCPP_HIDE_FROM_ABI void __buffered_inplace_merge( } template -void __inplace_merge( +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __inplace_merge( _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, @@ -208,16 +207,19 @@ _LIBCPP_HIDE_FROM_ABI void __inplace_merge( _BidirectionalIterator __first, _BidirectionalIterator __middle, _BidirectionalIterator __last, _Compare&& __comp) { typedef typename iterator_traits<_BidirectionalIterator>::value_type value_type; typedef typename iterator_traits<_BidirectionalIterator>::difference_type difference_type; - difference_type __len1 = _IterOps<_AlgPolicy>::distance(__first, __middle); - difference_type __len2 = _IterOps<_AlgPolicy>::distance(__middle, __last); - difference_type __buf_size = std::min(__len1, __len2); - // TODO: Remove the use of std::get_temporary_buffer - _LIBCPP_SUPPRESS_DEPRECATED_PUSH - pair __buf = std::get_temporary_buffer(__buf_size); - _LIBCPP_SUPPRESS_DEPRECATED_POP - unique_ptr __h(__buf.first); + difference_type __len1 = _IterOps<_AlgPolicy>::distance(__first, __middle); + difference_type __len2 = _IterOps<_AlgPolicy>::distance(__middle, __last); + difference_type __buf_size = std::min(__len1, __len2); + __unique_temporary_buffer __unique_buf = std::__allocate_unique_temporary_buffer(__buf_size); return std::__inplace_merge<_AlgPolicy>( - std::move(__first), std::move(__middle), std::move(__last), __comp, __len1, __len2, __buf.first, __buf.second); + std::move(__first), + std::move(__middle), + std::move(__last), + __comp, + __len1, + __len2, + __unique_buf.get(), + __unique_buf.get_deleter().__count_); } template diff --git a/lib/libcxx/include/__algorithm/is_heap.h b/lib/libcxx/include/__algorithm/is_heap.h index c589b804a5dc..dfe06200cedc 100644 --- a/lib/libcxx/include/__algorithm/is_heap.h +++ b/lib/libcxx/include/__algorithm/is_heap.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/is_heap_until.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,13 +21,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)) == __last; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) { return std::is_heap(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/is_heap_until.h b/lib/libcxx/include/__algorithm/is_heap_until.h index a174f2453cfc..7444d978e37f 100644 --- a/lib/libcxx/include/__algorithm/is_heap_until.h +++ b/lib/libcxx/include/__algorithm/is_heap_until.h @@ -46,13 +46,13 @@ __is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last) { return std::__is_heap_until(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/is_partitioned.h b/lib/libcxx/include/__algorithm/is_partitioned.h index 1f7c8b0b267e..700e452bbfa6 100644 --- a/lib/libcxx/include/__algorithm/is_partitioned.h +++ b/lib/libcxx/include/__algorithm/is_partitioned.h @@ -18,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (!__pred(*__first)) diff --git a/lib/libcxx/include/__algorithm/is_permutation.h b/lib/libcxx/include/__algorithm/is_permutation.h index 2ddfb32a212b..1afb11596bc6 100644 --- a/lib/libcxx/include/__algorithm/is_permutation.h +++ b/lib/libcxx/include/__algorithm/is_permutation.h @@ -14,12 +14,13 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> -#include <__iterator/next.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> +#include <__type_traits/is_same.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -113,7 +114,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl( // 2+1 iterators, predicate. Not used by range algorithms. template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( _ForwardIterator1 __first1, _Sentinel1 __last1, _ForwardIterator2 __first2, _BinaryPredicate&& __pred) { // Shorten sequences as much as possible by lopping of any equal prefix. for (; __first1 != __last1; ++__first1, (void)++__first2) { @@ -247,17 +248,17 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation( // 2+1 iterators, predicate template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __pred) { - static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value, - "The predicate has to be callable"); + static_assert(__is_callable<_BinaryPredicate&, decltype(*__first1), decltype(*__first2)>::value, + "The comparator has to be callable"); return std::__is_permutation<_ClassicAlgPolicy>(std::move(__first1), std::move(__last1), std::move(__first2), __pred); } // 2+1 iterators template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) { return std::is_permutation(__first1, __last1, __first2, __equal_to()); } @@ -266,7 +267,7 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt // 2+2 iterators template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::__is_permutation<_ClassicAlgPolicy>( std::move(__first1), @@ -280,14 +281,14 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo // 2+2 iterators, predicate template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation( _ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __pred) { - static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value, - "The predicate has to be callable"); + static_assert(__is_callable<_BinaryPredicate&, decltype(*__first1), decltype(*__first2)>::value, + "The comparator has to be callable"); return std::__is_permutation<_ClassicAlgPolicy>( std::move(__first1), diff --git a/lib/libcxx/include/__algorithm/is_sorted.h b/lib/libcxx/include/__algorithm/is_sorted.h index 3befb1ac9c26..196ae0beec01 100644 --- a/lib/libcxx/include/__algorithm/is_sorted.h +++ b/lib/libcxx/include/__algorithm/is_sorted.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/is_sorted_until.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,13 +21,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_sorted(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp) == __last; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_sorted(_ForwardIterator __first, _ForwardIterator __last) { return std::is_sorted(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/is_sorted_until.h b/lib/libcxx/include/__algorithm/is_sorted_until.h index 53a49f00de31..606641949db9 100644 --- a/lib/libcxx/include/__algorithm/is_sorted_until.h +++ b/lib/libcxx/include/__algorithm/is_sorted_until.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,13 +34,13 @@ __is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __ } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator is_sorted_until(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator is_sorted_until(_ForwardIterator __first, _ForwardIterator __last) { return std::is_sorted_until(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/iterator_operations.h b/lib/libcxx/include/__algorithm/iterator_operations.h index 8ced989233bc..e5c89c1e67e3 100644 --- a/lib/libcxx/include/__algorithm/iterator_operations.h +++ b/lib/libcxx/include/__algorithm/iterator_operations.h @@ -48,13 +48,13 @@ struct _RangeAlgPolicy {}; template <> struct _IterOps<_RangeAlgPolicy> { template - using __value_type = iter_value_t<_Iter>; + using __value_type _LIBCPP_NODEBUG = iter_value_t<_Iter>; template - using __iterator_category = ranges::__iterator_concept<_Iter>; + using __iterator_category _LIBCPP_NODEBUG = ranges::__iterator_concept<_Iter>; template - using __difference_type = iter_difference_t<_Iter>; + using __difference_type _LIBCPP_NODEBUG = iter_difference_t<_Iter>; static constexpr auto advance = ranges::advance; static constexpr auto distance = ranges::distance; @@ -72,13 +72,13 @@ struct _ClassicAlgPolicy {}; template <> struct _IterOps<_ClassicAlgPolicy> { template - using __value_type = typename iterator_traits<_Iter>::value_type; + using __value_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::value_type; template - using __iterator_category = typename iterator_traits<_Iter>::iterator_category; + using __iterator_category _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category; template - using __difference_type = typename iterator_traits<_Iter>::difference_type; + using __difference_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type; // advance template @@ -94,10 +94,10 @@ struct _IterOps<_ClassicAlgPolicy> { } template - using __deref_t = decltype(*std::declval<_Iter&>()); + using __deref_t _LIBCPP_NODEBUG = decltype(*std::declval<_Iter&>()); template - using __move_t = decltype(std::move(*std::declval<_Iter&>())); + using __move_t _LIBCPP_NODEBUG = decltype(std::move(*std::declval<_Iter&>())); template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 static void __validate_iter_reference() { @@ -216,6 +216,9 @@ struct _IterOps<_ClassicAlgPolicy> { } }; +template +using __policy_iter_diff_t _LIBCPP_NODEBUG = typename _IterOps<_AlgPolicy>::template __difference_type<_Iter>; + _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__algorithm/lexicographical_compare.h b/lib/libcxx/include/__algorithm/lexicographical_compare.h index edc29e269c88..ebe7e3b56a29 100644 --- a/lib/libcxx/include/__algorithm/lexicographical_compare.h +++ b/lib/libcxx/include/__algorithm/lexicographical_compare.h @@ -10,48 +10,120 @@ #define _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H #include <__algorithm/comp.h> -#include <__algorithm/comp_ref_type.h> +#include <__algorithm/min.h> +#include <__algorithm/mismatch.h> +#include <__algorithm/simd_utils.h> +#include <__algorithm/unwrap_iter.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> +#include <__string/constexpr_c_functions.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_equality_comparable.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_trivially_lexicographically_comparable.h> +#include <__type_traits/is_volatile.h> + +#if _LIBCPP_HAS_WIDE_CHARACTERS +# include +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __lexicographical_compare( - _InputIterator1 __first1, - _InputIterator1 __last1, - _InputIterator2 __first2, - _InputIterator2 __last2, - _Compare __comp) { - for (; __first2 != __last2; ++__first1, (void)++__first2) { - if (__first1 == __last1 || __comp(*__first1, *__first2)) + _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) { + while (__first2 != __last2) { + if (__first1 == __last1 || + std::__invoke(__comp, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) return true; - if (__comp(*__first2, *__first1)) + if (std::__invoke(__comp, std::__invoke(__proj2, *__first2), std::__invoke(__proj1, *__first1))) return false; + ++__first1; + ++__first2; } return false; } +#if _LIBCPP_STD_VER >= 14 + +// If the comparison operation is equivalent to < and that is a total order, we know that we can use equality comparison +// on that type instead to extract some information. Furthermore, if equality comparison on that type is trivial, the +// user can't observe that we're calling it. So instead of using the user-provided total order, we use std::mismatch, +// which uses equality comparison (and is vertorized). Additionally, if the type is trivially lexicographically +// comparable, we can go one step further and use std::memcmp directly instead of calling std::mismatch. +template && !is_volatile<_Tp>::value && + __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value && + __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, + int> = 0> +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +__lexicographical_compare(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Comp&, _Proj1&, _Proj2&) { + if constexpr (__is_trivially_lexicographically_comparable_v<_Tp, _Tp>) { + auto __res = + std::__constexpr_memcmp(__first1, __first2, __element_count(std::min(__last1 - __first1, __last2 - __first2))); + if (__res == 0) + return __last1 - __first1 < __last2 - __first2; + return __res < 0; + } +# if _LIBCPP_HAS_WIDE_CHARACTERS + else if constexpr (is_same<__remove_cv_t<_Tp>, wchar_t>::value) { + auto __res = std::__constexpr_wmemcmp(__first1, __first2, std::min(__last1 - __first1, __last2 - __first2)); + if (__res == 0) + return __last1 - __first1 < __last2 - __first2; + return __res < 0; + } +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + else { + auto __res = std::mismatch(__first1, __last1, __first2, __last2); + if (__res.second == __last2) + return false; + if (__res.first == __last1) + return true; + return *__res.first < *__res.second; + } +} + +#endif // _LIBCPP_STD_VER >= 14 + template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2, _Compare __comp) { - return std::__lexicographical_compare<__comp_ref_type<_Compare> >(__first1, __last1, __first2, __last2, __comp); + __identity __proj; + return std::__lexicographical_compare( + std::__unwrap_iter(__first1), + std::__unwrap_iter(__last1), + std::__unwrap_iter(__first2), + std::__unwrap_iter(__last2), + __comp, + __proj, + __proj); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool lexicographical_compare( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::lexicographical_compare(__first1, __last1, __first2, __last2, __less<>()); } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_LEXICOGRAPHICAL_COMPARE_H diff --git a/lib/libcxx/include/__algorithm/lower_bound.h b/lib/libcxx/include/__algorithm/lower_bound.h index c417d8483549..4fba6748e6d7 100644 --- a/lib/libcxx/include/__algorithm/lower_bound.h +++ b/lib/libcxx/include/__algorithm/lower_bound.h @@ -14,12 +14,11 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> -#include <__type_traits/remove_reference.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -28,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lower_bound_bisecting( _Iter __first, const _Type& __value, typename iterator_traits<_Iter>::difference_type __len, @@ -58,7 +57,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter __lo // whereas the one-sided version will yield O(n) operations on both counts, with a \Omega(log(n)) bound on the number of // comparisons. template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { // step = 0, ensuring we can always short-circuit when distance is 1 later on if (__first == __last || !std::__invoke(__comp, std::__invoke(__proj, *__first), __value)) @@ -84,22 +83,22 @@ __lower_bound_onesided(_ForwardIterator __first, _Sent __last, const _Type& __va } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator __lower_bound(_ForwardIterator __first, _Sent __last, const _Type& __value, _Comp& __comp, _Proj& __proj) { const auto __dist = _IterOps<_AlgPolicy>::distance(__first, __last); return std::__lower_bound_bisecting<_AlgPolicy>(__first, __value, __dist, __comp, __proj); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { - static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); + static_assert(__is_callable<_Compare&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); auto __proj = std::__identity(); return std::__lower_bound<_ClassicAlgPolicy>(__first, __last, __value, __comp, __proj); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator lower_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::lower_bound(__first, __last, __value, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/make_projected.h b/lib/libcxx/include/__algorithm/make_projected.h index 5245e523f3df..4a2582293875 100644 --- a/lib/libcxx/include/__algorithm/make_projected.h +++ b/lib/libcxx/include/__algorithm/make_projected.h @@ -9,15 +9,13 @@ #ifndef _LIBCPP___ALGORITHM_MAKE_PROJECTED_H #define _LIBCPP___ALGORITHM_MAKE_PROJECTED_H -#include <__concepts/same_as.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_member_pointer.h> -#include <__type_traits/is_same.h> #include <__utility/declval.h> #include <__utility/forward.h> @@ -36,16 +34,16 @@ struct _ProjectedPred { : __pred(__pred_arg), __proj(__proj_arg) {} template - typename __invoke_of<_Pred&, decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_Tp>()))>::type - _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI - operator()(_Tp&& __v) const { + __invoke_result_t<_Pred&, decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_Tp>()))> _LIBCPP_CONSTEXPR + _LIBCPP_HIDE_FROM_ABI + operator()(_Tp&& __v) const { return std::__invoke(__pred, std::__invoke(__proj, std::forward<_Tp>(__v))); } template - typename __invoke_of<_Pred&, - decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_T1>())), - decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_T2>()))>::type _LIBCPP_CONSTEXPR + __invoke_result_t<_Pred&, + decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_T1>())), + decltype(std::__invoke(std::declval<_Proj&>(), std::declval<_T2>()))> _LIBCPP_CONSTEXPR _LIBCPP_HIDE_FROM_ABI operator()(_T1&& __lhs, _T2&& __rhs) const { return std::__invoke( diff --git a/lib/libcxx/include/__algorithm/max.h b/lib/libcxx/include/__algorithm/max.h index d4c99f6f3643..1673e6be9123 100644 --- a/lib/libcxx/include/__algorithm/max.h +++ b/lib/libcxx/include/__algorithm/max.h @@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__a, __b) ? __b : __a; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::max(__a, __b, __less<>()); } @@ -39,13 +39,13 @@ max(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) #ifndef _LIBCPP_CXX03_LANG template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t, _Compare __comp) { return *std::__max_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp max(initializer_list<_Tp> __t) { return *std::max_element(__t.begin(), __t.end(), __less<>()); } diff --git a/lib/libcxx/include/__algorithm/max_element.h b/lib/libcxx/include/__algorithm/max_element.h index c036726cbccd..929f337fc10a 100644 --- a/lib/libcxx/include/__algorithm/max_element.h +++ b/lib/libcxx/include/__algorithm/max_element.h @@ -13,6 +13,7 @@ #include <__algorithm/comp_ref_type.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_callable.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,13 +36,15 @@ __max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator max_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { + static_assert( + __is_callable<_Compare&, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); return std::__max_element<__comp_ref_type<_Compare> >(__first, __last, __comp); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator max_element(_ForwardIterator __first, _ForwardIterator __last) { return std::max_element(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/merge.h b/lib/libcxx/include/__algorithm/merge.h index bad663c4b9f1..ae859b7b63ff 100644 --- a/lib/libcxx/include/__algorithm/merge.h +++ b/lib/libcxx/include/__algorithm/merge.h @@ -13,7 +13,6 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> #include <__config> -#include <__iterator/iterator_traits.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__algorithm/min.h b/lib/libcxx/include/__algorithm/min.h index 1bafad8a461e..660e0b204e19 100644 --- a/lib/libcxx/include/__algorithm/min.h +++ b/lib/libcxx/include/__algorithm/min.h @@ -25,13 +25,13 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__b, __a) ? __b : __a; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp& min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::min(__a, __b, __less<>()); } @@ -39,13 +39,13 @@ min(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) #ifndef _LIBCPP_CXX03_LANG template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t, _Compare __comp) { return *std::__min_element<__comp_ref_type<_Compare> >(__t.begin(), __t.end(), __comp); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp min(initializer_list<_Tp> __t) { return *std::min_element(__t.begin(), __t.end(), __less<>()); } diff --git a/lib/libcxx/include/__algorithm/min_element.h b/lib/libcxx/include/__algorithm/min_element.h index 65f3594d630c..db996365bf1d 100644 --- a/lib/libcxx/include/__algorithm/min_element.h +++ b/lib/libcxx/include/__algorithm/min_element.h @@ -13,8 +13,8 @@ #include <__algorithm/comp_ref_type.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/move.h> @@ -48,18 +48,18 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter __min_element(_Iter __ } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { static_assert( __has_forward_iterator_category<_ForwardIterator>::value, "std::min_element requires a ForwardIterator"); static_assert( - __is_callable<_Compare, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); + __is_callable<_Compare&, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); return std::__min_element<__comp_ref_type<_Compare> >(std::move(__first), std::move(__last), __comp); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator min_element(_ForwardIterator __first, _ForwardIterator __last) { return std::min_element(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/minmax.h b/lib/libcxx/include/__algorithm/minmax.h index 9feda2b4c0da..de0bec0ef72f 100644 --- a/lib/libcxx/include/__algorithm/minmax.h +++ b/lib/libcxx/include/__algorithm/minmax.h @@ -24,13 +24,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b, _Compare __comp) { return __comp(__b, __a) ? pair(__b, __a) : pair(__a, __b); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __b) { return std::minmax(__a, __b, __less<>()); } @@ -38,16 +38,16 @@ minmax(_LIBCPP_LIFETIMEBOUND const _Tp& __a, _LIBCPP_LIFETIMEBOUND const _Tp& __ #ifndef _LIBCPP_CXX03_LANG template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> minmax(initializer_list<_Tp> __t, _Compare __comp) { - static_assert(__is_callable<_Compare, _Tp, _Tp>::value, "The comparator has to be callable"); + static_assert(__is_callable<_Compare&, _Tp, _Tp>::value, "The comparator has to be callable"); __identity __proj; auto __ret = std::__minmax_element_impl(__t.begin(), __t.end(), __comp, __proj); return pair<_Tp, _Tp>(*__ret.first, *__ret.second); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Tp, _Tp> minmax(initializer_list<_Tp> __t) { return std::minmax(__t, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/minmax_element.h b/lib/libcxx/include/__algorithm/minmax_element.h index 43cb23347c34..dc0c3a818cd5 100644 --- a/lib/libcxx/include/__algorithm/minmax_element.h +++ b/lib/libcxx/include/__algorithm/minmax_element.h @@ -12,8 +12,8 @@ #include <__algorithm/comp.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/pair.h> @@ -79,18 +79,18 @@ __minmax_element_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> minmax_element(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) { static_assert( __has_forward_iterator_category<_ForwardIterator>::value, "std::minmax_element requires a ForwardIterator"); static_assert( - __is_callable<_Compare, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); + __is_callable<_Compare&, decltype(*__first), decltype(*__first)>::value, "The comparator has to be callable"); auto __proj = __identity(); return std::__minmax_element_impl(__first, __last, __comp, __proj); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_ForwardIterator, _ForwardIterator> minmax_element(_ForwardIterator __first, _ForwardIterator __last) { return std::minmax_element(__first, __last, __less<>()); } diff --git a/lib/libcxx/include/__algorithm/mismatch.h b/lib/libcxx/include/__algorithm/mismatch.h index 632bec02406a..a6836792c058 100644 --- a/lib/libcxx/include/__algorithm/mismatch.h +++ b/lib/libcxx/include/__algorithm/mismatch.h @@ -15,17 +15,18 @@ #include <__algorithm/simd_utils.h> #include <__algorithm/unwrap_iter.h> #include <__config> +#include <__cstddef/size_t.h> #include <__functional/identity.h> #include <__iterator/aliasing_iterator.h> +#include <__iterator/iterator_traits.h> #include <__type_traits/desugars_to.h> +#include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> #include <__utility/move.h> #include <__utility/pair.h> -#include <__utility/unreachable.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -37,7 +38,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1) { if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -49,7 +50,7 @@ __mismatch_loop(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); } @@ -57,7 +58,7 @@ __mismatch(_Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Pred& __pred, _Pro #if _LIBCPP_VECTORIZE_ALGORITHMS template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter, _Iter> __mismatch_vectorized(_Iter __first1, _Iter __last1, _Iter __first2) { using __value_type = __iter_value_type<_Iter>; constexpr size_t __unroll_count = 4; @@ -124,7 +125,7 @@ template ::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred&, _Proj1&, _Proj2&) { return std::__mismatch_vectorized(__first1, __last1, __first2); } @@ -137,7 +138,7 @@ template ::value && __is_identity<_Proj2>::value && __can_map_to_integer_v<_Tp> && __libcpp_is_trivially_equality_comparable<_Tp, _Tp>::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { if (__libcpp_is_constant_evaluated()) { return std::__mismatch_loop(__first1, __last1, __first2, __pred, __proj1, __proj2); @@ -150,7 +151,7 @@ __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Pred& __pred, _Proj1& __ #endif // _LIBCPP_VECTORIZE_ALGORITHMS template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) { __identity __proj; auto __res = std::__mismatch( @@ -159,14 +160,14 @@ mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) { return std::mismatch(__first1, __last1, __first2, __equal_to()); } #if _LIBCPP_STD_VER >= 14 template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch( +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { while (__first1 != __last1 && __first2 != __last2) { if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) @@ -178,14 +179,14 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> __mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { auto __len = std::min(__last1 - __first1, __last2 - __first2); return std::__mismatch(__first1, __first1 + __len, __first2, __pred, __proj1, __proj2); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -204,7 +205,7 @@ mismatch(_InputIterator1 __first1, } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) { return std::mismatch(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/move.h b/lib/libcxx/include/__algorithm/move.h index 1716d43e2a61..6f3b0eb5d292 100644 --- a/lib/libcxx/include/__algorithm/move.h +++ b/lib/libcxx/include/__algorithm/move.h @@ -14,8 +14,10 @@ #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -48,7 +50,7 @@ struct __move_impl { template struct _MoveSegment { - using _Traits = __segmented_iterator_traits<_InIter>; + using _Traits _LIBCPP_NODEBUG = __segmented_iterator_traits<_InIter>; _OutIter& __result_; diff --git a/lib/libcxx/include/__algorithm/move_backward.h b/lib/libcxx/include/__algorithm/move_backward.h index 4beb7bdbaac0..24a8d9b24527 100644 --- a/lib/libcxx/include/__algorithm/move_backward.h +++ b/lib/libcxx/include/__algorithm/move_backward.h @@ -13,8 +13,10 @@ #include <__algorithm/iterator_operations.h> #include <__algorithm/min.h> #include <__config> +#include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/common_type.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> #include <__utility/move.h> #include <__utility/pair.h> diff --git a/lib/libcxx/include/__algorithm/none_of.h b/lib/libcxx/include/__algorithm/none_of.h index 50841ba17cc6..e6bd19762229 100644 --- a/lib/libcxx/include/__algorithm/none_of.h +++ b/lib/libcxx/include/__algorithm/none_of.h @@ -19,7 +19,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool none_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) { for (; __first != __last; ++__first) if (__pred(*__first)) diff --git a/lib/libcxx/include/__algorithm/partial_sort_copy.h b/lib/libcxx/include/__algorithm/partial_sort_copy.h index ef7c9d34d949..172f53b290d5 100644 --- a/lib/libcxx/include/__algorithm/partial_sort_copy.h +++ b/lib/libcxx/include/__algorithm/partial_sort_copy.h @@ -18,8 +18,8 @@ #include <__algorithm/sort_heap.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -76,8 +76,8 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator _RandomAccessIterator __result_first, _RandomAccessIterator __result_last, _Compare __comp) { - static_assert( - __is_callable<_Compare, decltype(*__first), decltype(*__result_first)>::value, "Comparator has to be callable"); + static_assert(__is_callable<_Compare&, decltype(*__first), decltype(*__result_first)>::value, + "The comparator has to be callable"); auto __result = std::__partial_sort_copy<_ClassicAlgPolicy>( __first, diff --git a/lib/libcxx/include/__algorithm/partition.h b/lib/libcxx/include/__algorithm/partition.h index 824e49b9ec21..669aac3b2755 100644 --- a/lib/libcxx/include/__algorithm/partition.h +++ b/lib/libcxx/include/__algorithm/partition.h @@ -12,6 +12,7 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -29,7 +30,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _Forw __partition_impl(_ForwardIterator __first, _Sentinel __last, _Predicate __pred, forward_iterator_tag) { while (true) { if (__first == __last) - return std::make_pair(std::move(__first), std::move(__first)); + return std::make_pair(__first, __first); if (!__pred(*__first)) break; ++__first; diff --git a/lib/libcxx/include/__algorithm/pstl.h b/lib/libcxx/include/__algorithm/pstl.h index 0bb052b3f97c..aa7b49de933c 100644 --- a/lib/libcxx/include/__algorithm/pstl.h +++ b/lib/libcxx/include/__algorithm/pstl.h @@ -18,7 +18,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +#if _LIBCPP_HAS_EXPERIMENTAL_PSTL && _LIBCPP_STD_VER >= 17 # include <__functional/operations.h> # include <__iterator/cpp17_iterator_concepts.h> @@ -352,7 +352,7 @@ template , enable_if_t, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool is_partitioned(_ExecutionPolicy&& __policy, _ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { _LIBCPP_REQUIRE_CPP17_FORWARD_ITERATOR(_ForwardIterator, "is_partitioned requires ForwardIterators"); using _Implementation = __pstl::__dispatch<__pstl::__is_partitioned, __pstl::__current_configuration, _RawPolicy>; @@ -656,7 +656,7 @@ _LIBCPP_HIDE_FROM_ABI _ForwardOutIterator transform( _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_PSTL) && _LIBCPP_STD_VER >= 17 +#endif // _LIBCPP_HAS_EXPERIMENTAL_PSTL && _LIBCPP_STD_VER >= 17 _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__algorithm/radix_sort.h b/lib/libcxx/include/__algorithm/radix_sort.h new file mode 100644 index 000000000000..de6927995e74 --- /dev/null +++ b/lib/libcxx/include/__algorithm/radix_sort.h @@ -0,0 +1,332 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RADIX_SORT_H +#define _LIBCPP___ALGORITHM_RADIX_SORT_H + +// This is an implementation of classic LSD radix sort algorithm, running in linear time and using `O(max(N, M))` +// additional memory, where `N` is size of an input range, `M` - maximum value of +// a radix of the sorted integer type. Type of the radix and its maximum value are determined at compile time +// based on type returned by function `__radix`. The default radix is uint8. + +// The algorithm is equivalent to several consecutive calls of counting sort for each +// radix of the sorted numbers from low to high byte. +// The algorithm uses a temporary buffer of size equal to size of the input range. Each `i`-th pass +// of the algorithm sorts values by `i`-th radix and moves values to the temporary buffer (for each even `i`, counted +// from zero), or moves them back to the initial range (for each odd `i`). If there is only one radix in sorted integers +// (e.g. int8), the sorted values are placed to the buffer, and then moved back to the initial range. + +// The implementation also has several optimizations: +// - the counters for the counting sort are calculated in one pass for all radices; +// - if all values of a radix are the same, we do not sort that radix, and just move items to the buffer; +// - if two consecutive radices satisfies condition above, we do nothing for these two radices. + +#include <__algorithm/for_each.h> +#include <__algorithm/move.h> +#include <__bit/bit_log2.h> +#include <__bit/countl.h> +#include <__config> +#include <__functional/identity.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/move_iterator.h> +#include <__iterator/next.h> +#include <__iterator/reverse_iterator.h> +#include <__numeric/partial_sum.h> +#include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_unsigned.h> +#include <__type_traits/make_unsigned.h> +#include <__utility/forward.h> +#include <__utility/integer_sequence.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 14 + +template +_LIBCPP_HIDE_FROM_ABI pair<_OutputIterator, __iter_value_type<_InputIterator>> +__partial_sum_max(_InputIterator __first, _InputIterator __last, _OutputIterator __result) { + if (__first == __last) + return {__result, 0}; + + auto __max = *__first; + __iter_value_type<_InputIterator> __sum = *__first; + *__result = __sum; + + while (++__first != __last) { + if (__max < *__first) { + __max = *__first; + } + __sum = std::move(__sum) + *__first; + *++__result = __sum; + } + return {++__result, __max}; +} + +template +struct __radix_sort_traits { + using __image_type _LIBCPP_NODEBUG = decay_t<__invoke_result_t<_Map, _Value>>; + static_assert(is_unsigned<__image_type>::value); + + using __radix_type _LIBCPP_NODEBUG = decay_t<__invoke_result_t<_Radix, __image_type>>; + static_assert(is_integral<__radix_type>::value); + + static constexpr auto __radix_value_range = numeric_limits<__radix_type>::max() + 1; + static constexpr auto __radix_size = std::__bit_log2(__radix_value_range); + static constexpr auto __radix_count = sizeof(__image_type) * CHAR_BIT / __radix_size; +}; + +template +struct __counting_sort_traits { + using __image_type _LIBCPP_NODEBUG = decay_t<__invoke_result_t<_Map, _Value>>; + static_assert(is_unsigned<__image_type>::value); + + static constexpr const auto __value_range = numeric_limits<__image_type>::max() + 1; + static constexpr auto __radix_size = std::__bit_log2(__value_range); +}; + +template +_LIBCPP_HIDE_FROM_ABI auto __nth_radix(size_t __radix_number, _Radix __radix, _Integer __n) { + static_assert(is_unsigned<_Integer>::value); + using __traits = __counting_sort_traits<_Integer, _Radix>; + + return __radix(static_cast<_Integer>(__n >> __traits::__radix_size * __radix_number)); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__collect(_ForwardIterator __first, _ForwardIterator __last, _Map __map, _RandomAccessIterator __counters) { + using __value_type = __iter_value_type<_ForwardIterator>; + using __traits = __counting_sort_traits<__value_type, _Map>; + + std::for_each(__first, __last, [&__counters, &__map](const auto& __preimage) { ++__counters[__map(__preimage)]; }); + + const auto __counters_end = __counters + __traits::__value_range; + std::partial_sum(__counters, __counters_end, __counters); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__dispose(_ForwardIterator __first, + _ForwardIterator __last, + _RandomAccessIterator1 __result, + _Map __map, + _RandomAccessIterator2 __counters) { + std::for_each(__first, __last, [&__result, &__counters, &__map](auto&& __preimage) { + auto __index = __counters[__map(__preimage)]++; + __result[__index] = std::move(__preimage); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI bool __collect_impl( + _ForwardIterator __first, + _ForwardIterator __last, + _Map __map, + _Radix __radix, + _RandomAccessIterator1 __counters, + _RandomAccessIterator2 __maximums, + index_sequence<_Radices...>) { + using __value_type = __iter_value_type<_ForwardIterator>; + constexpr auto __radix_value_range = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_value_range; + + auto __previous = numeric_limits<__invoke_result_t<_Map, __value_type>>::min(); + auto __is_sorted = true; + std::for_each(__first, __last, [&__counters, &__map, &__radix, &__previous, &__is_sorted](const auto& __value) { + auto __current = __map(__value); + __is_sorted &= (__current >= __previous); + __previous = __current; + + (++__counters[_Radices][std::__nth_radix(_Radices, __radix, __current)], ...); + }); + + ((__maximums[_Radices] = + std::__partial_sum_max(__counters[_Radices], __counters[_Radices] + __radix_value_range, __counters[_Radices]) + .second), + ...); + + return __is_sorted; +} + +template +_LIBCPP_HIDE_FROM_ABI bool +__collect(_ForwardIterator __first, + _ForwardIterator __last, + _Map __map, + _Radix __radix, + _RandomAccessIterator1 __counters, + _RandomAccessIterator2 __maximums) { + using __value_type = __iter_value_type<_ForwardIterator>; + constexpr auto __radix_count = __radix_sort_traits<__value_type, _Map, _Radix>::__radix_count; + return std::__collect_impl( + __first, __last, __map, __radix, __counters, __maximums, make_index_sequence<__radix_count>()); +} + +template +_LIBCPP_HIDE_FROM_ABI void __dispose_backward( + _BidirectionalIterator __first, + _BidirectionalIterator __last, + _RandomAccessIterator1 __result, + _Map __map, + _RandomAccessIterator2 __counters) { + std::for_each(std::make_reverse_iterator(__last), + std::make_reverse_iterator(__first), + [&__result, &__counters, &__map](auto&& __preimage) { + auto __index = --__counters[__map(__preimage)]; + __result[__index] = std::move(__preimage); + }); +} + +template +_LIBCPP_HIDE_FROM_ABI _RandomAccessIterator +__counting_sort_impl(_ForwardIterator __first, _ForwardIterator __last, _RandomAccessIterator __result, _Map __map) { + using __value_type = __iter_value_type<_ForwardIterator>; + using __traits = __counting_sort_traits<__value_type, _Map>; + + __iter_diff_t<_RandomAccessIterator> __counters[__traits::__value_range + 1] = {0}; + + std::__collect(__first, __last, __map, std::next(std::begin(__counters))); + std::__dispose(__first, __last, __result, __map, std::begin(__counters)); + + return __result + __counters[__traits::__value_range]; +} + +template , _Map, _Radix>::__radix_count == 1, + int> = 0> +_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( + _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer, + _Map __map, + _Radix __radix) { + auto __buffer_end = std::__counting_sort_impl(__first, __last, __buffer, [&__map, &__radix](const auto& __value) { + return __radix(__map(__value)); + }); + + std::move(__buffer, __buffer_end, __first); +} + +template < + class _RandomAccessIterator1, + class _RandomAccessIterator2, + class _Map, + class _Radix, + enable_if_t< __radix_sort_traits<__iter_value_type<_RandomAccessIterator1>, _Map, _Radix>::__radix_count % 2 == 0, + int> = 0 > +_LIBCPP_HIDE_FROM_ABI void __radix_sort_impl( + _RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer_begin, + _Map __map, + _Radix __radix) { + using __value_type = __iter_value_type<_RandomAccessIterator1>; + using __traits = __radix_sort_traits<__value_type, _Map, _Radix>; + + __iter_diff_t<_RandomAccessIterator1> __counters[__traits::__radix_count][__traits::__radix_value_range] = {{0}}; + __iter_diff_t<_RandomAccessIterator1> __maximums[__traits::__radix_count] = {0}; + const auto __is_sorted = std::__collect(__first, __last, __map, __radix, __counters, __maximums); + if (!__is_sorted) { + const auto __range_size = std::distance(__first, __last); + auto __buffer_end = __buffer_begin + __range_size; + for (size_t __radix_number = 0; __radix_number < __traits::__radix_count; __radix_number += 2) { + const auto __n0th_is_single = __maximums[__radix_number] == __range_size; + const auto __n1th_is_single = __maximums[__radix_number + 1] == __range_size; + + if (__n0th_is_single && __n1th_is_single) { + continue; + } + + if (__n0th_is_single) { + std::move(__first, __last, __buffer_begin); + } else { + auto __n0th = [__radix_number, &__map, &__radix](const auto& __v) { + return std::__nth_radix(__radix_number, __radix, __map(__v)); + }; + std::__dispose_backward(__first, __last, __buffer_begin, __n0th, __counters[__radix_number]); + } + + if (__n1th_is_single) { + std::move(__buffer_begin, __buffer_end, __first); + } else { + auto __n1th = [__radix_number, &__map, &__radix](const auto& __v) { + return std::__nth_radix(__radix_number + 1, __radix, __map(__v)); + }; + std::__dispose_backward(__buffer_begin, __buffer_end, __first, __n1th, __counters[__radix_number + 1]); + } + } + } +} + +_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(bool __b) { return __b; } + +template +_LIBCPP_HIDE_FROM_ABI constexpr auto __shift_to_unsigned(_Ip __n) { + constexpr const auto __min_value = numeric_limits<_Ip>::min(); + return static_cast >(__n ^ __min_value); +} + +struct __low_byte_fn { + template + _LIBCPP_HIDE_FROM_ABI constexpr uint8_t operator()(_Ip __integer) const { + static_assert(is_unsigned<_Ip>::value); + + return static_cast(__integer & 0xff); + } +}; + +template +_LIBCPP_HIDE_FROM_ABI void +__radix_sort(_RandomAccessIterator1 __first, + _RandomAccessIterator1 __last, + _RandomAccessIterator2 __buffer, + _Map __map, + _Radix __radix) { + auto __map_to_unsigned = [__map = std::move(__map)](const auto& __x) { return std::__shift_to_unsigned(__map(__x)); }; + std::__radix_sort_impl(__first, __last, __buffer, __map_to_unsigned, __radix); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__radix_sort(_RandomAccessIterator1 __first, _RandomAccessIterator1 __last, _RandomAccessIterator2 __buffer) { + std::__radix_sort(__first, __last, __buffer, __identity{}, __low_byte_fn{}); +} + +#endif // _LIBCPP_STD_VER >= 14 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ALGORITHM_RADIX_SORT_H diff --git a/lib/libcxx/include/__algorithm/ranges_adjacent_find.h b/lib/libcxx/include/__algorithm/ranges_adjacent_find.h index 3c54f723310a..731142b29e6c 100644 --- a/lib/libcxx/include/__algorithm/ranges_adjacent_find.h +++ b/lib/libcxx/include/__algorithm/ranges_adjacent_find.h @@ -9,9 +9,9 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_ADJACENT_FIND_H #define _LIBCPP___ALGORITHM_RANGES_ADJACENT_FIND_H +#include <__algorithm/adjacent_find.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__functional/ranges_operations.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> @@ -32,30 +32,14 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __adjacent_find { -struct __fn { - template - _LIBCPP_HIDE_FROM_ABI constexpr static _Iter - __adjacent_find_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { - if (__first == __last) - return __first; - - auto __i = __first; - while (++__i != __last) { - if (std::invoke(__pred, std::invoke(__proj, *__first), std::invoke(__proj, *__i))) - return __first; - __first = __i; - } - return __i; - } - +struct __adjacent_find { template _Sent, class _Proj = identity, indirect_binary_predicate, projected<_Iter, _Proj>> _Pred = ranges::equal_to> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { - return __adjacent_find_impl(std::move(__first), std::move(__last), __pred, __proj); + return std::__adjacent_find(std::move(__first), std::move(__last), __pred, __proj); } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Range> operator()(_Range&& __range, _Pred __pred = {}, _Proj __proj = {}) const { - return __adjacent_find_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + return std::__adjacent_find(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __adjacent_find inline namespace __cpo { -inline constexpr auto adjacent_find = __adjacent_find::__fn{}; +inline constexpr auto adjacent_find = __adjacent_find{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_all_of.h b/lib/libcxx/include/__algorithm/ranges_all_of.h index 2f603b32f32d..c3d6dc08d3c5 100644 --- a/lib/libcxx/include/__algorithm/ranges_all_of.h +++ b/lib/libcxx/include/__algorithm/ranges_all_of.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_ALL_OF_H #define _LIBCPP___ALGORITHM_RANGES_ALL_OF_H +#include <__algorithm/all_of.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -30,24 +31,14 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __all_of { -struct __fn { - template - _LIBCPP_HIDE_FROM_ABI constexpr static bool __all_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { - for (; __first != __last; ++__first) { - if (!std::invoke(__pred, std::invoke(__proj, *__first))) - return false; - } - return true; - } - +struct __all_of { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred, _Proj __proj = {}) const { - return __all_of_impl(std::move(__first), std::move(__last), __pred, __proj); + return std::__all_of(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { - return __all_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + return std::__all_of(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __all_of inline namespace __cpo { -inline constexpr auto all_of = __all_of::__fn{}; +inline constexpr auto all_of = __all_of{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_any_of.h b/lib/libcxx/include/__algorithm/ranges_any_of.h index 205fcecc086e..7f0fd290f87d 100644 --- a/lib/libcxx/include/__algorithm/ranges_any_of.h +++ b/lib/libcxx/include/__algorithm/ranges_any_of.h @@ -9,9 +9,9 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_ANY_OF_H #define _LIBCPP___ALGORITHM_RANGES_ANY_OF_H +#include <__algorithm/any_of.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> #include <__ranges/access.h> @@ -30,24 +30,14 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __any_of { -struct __fn { - template - _LIBCPP_HIDE_FROM_ABI constexpr static bool __any_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { - for (; __first != __last; ++__first) { - if (std::invoke(__pred, std::invoke(__proj, *__first))) - return true; - } - return false; - } - +struct __any_of { template _Sent, class _Proj = identity, indirect_unary_predicate> _Pred> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Iter __first, _Sent __last, _Pred __pred = {}, _Proj __proj = {}) const { - return __any_of_impl(std::move(__first), std::move(__last), __pred, __proj); + return std::__any_of(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Pred> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()(_Range&& __range, _Pred __pred, _Proj __proj = {}) const { - return __any_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); + return std::__any_of(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __any_of inline namespace __cpo { -inline constexpr auto any_of = __any_of::__fn{}; +inline constexpr auto any_of = __any_of{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_binary_search.h b/lib/libcxx/include/__algorithm/ranges_binary_search.h index 1ef2bd62b599..47bd0997334e 100644 --- a/lib/libcxx/include/__algorithm/ranges_binary_search.h +++ b/lib/libcxx/include/__algorithm/ranges_binary_search.h @@ -32,8 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __binary_search { -struct __fn { +struct __binary_search { template _Sent, class _Type, @@ -57,10 +56,9 @@ struct __fn { return __ret != __last && !std::invoke(__comp, __value, std::invoke(__proj, *__ret)); } }; -} // namespace __binary_search inline namespace __cpo { -inline constexpr auto binary_search = __binary_search::__fn{}; +inline constexpr auto binary_search = __binary_search{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_clamp.h b/lib/libcxx/include/__algorithm/ranges_clamp.h index e6181ef9435e..4bb3e46e73bd 100644 --- a/lib/libcxx/include/__algorithm/ranges_clamp.h +++ b/lib/libcxx/include/__algorithm/ranges_clamp.h @@ -30,8 +30,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __clamp { -struct __fn { +struct __clamp { template > _Comp = ranges::less> @@ -50,10 +49,9 @@ struct __fn { return __value; } }; -} // namespace __clamp inline namespace __cpo { -inline constexpr auto clamp = __clamp::__fn{}; +inline constexpr auto clamp = __clamp{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_contains.h b/lib/libcxx/include/__algorithm/ranges_contains.h index 4836c3baed17..88de215297e5 100644 --- a/lib/libcxx/include/__algorithm/ranges_contains.h +++ b/lib/libcxx/include/__algorithm/ranges_contains.h @@ -33,8 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __contains { -struct __fn { +struct __contains { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool static @@ -50,10 +49,9 @@ struct __fn { ranges::end(__range); } }; -} // namespace __contains inline namespace __cpo { -inline constexpr auto contains = __contains::__fn{}; +inline constexpr auto contains = __contains{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_contains_subrange.h b/lib/libcxx/include/__algorithm/ranges_contains_subrange.h index 4398c457fd05..e8740d69dbef 100644 --- a/lib/libcxx/include/__algorithm/ranges_contains_subrange.h +++ b/lib/libcxx/include/__algorithm/ranges_contains_subrange.h @@ -35,8 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __contains_subrange { -struct __fn { +struct __contains_subrange { template _Sent1, forward_iterator _Iter2, @@ -81,10 +80,9 @@ struct __fn { return __ret.empty() == false; } }; -} // namespace __contains_subrange inline namespace __cpo { -inline constexpr auto contains_subrange = __contains_subrange::__fn{}; +inline constexpr auto contains_subrange = __contains_subrange{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_copy.h b/lib/libcxx/include/__algorithm/ranges_copy.h index e1d6d32f05f7..a69af9b2bffc 100644 --- a/lib/libcxx/include/__algorithm/ranges_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_copy.h @@ -11,7 +11,6 @@ #include <__algorithm/copy.h> #include <__algorithm/in_out_result.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> #include <__iterator/concepts.h> @@ -37,13 +36,12 @@ namespace ranges { template using copy_result = in_out_result<_InIter, _OutIter>; -namespace __copy { -struct __fn { +struct __copy { template _Sent, weakly_incrementable _OutIter> requires indirectly_copyable<_InIter, _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr copy_result<_InIter, _OutIter> operator()(_InIter __first, _Sent __last, _OutIter __result) const { - auto __ret = std::__copy<_RangeAlgPolicy>(std::move(__first), std::move(__last), std::move(__result)); + auto __ret = std::__copy(std::move(__first), std::move(__last), std::move(__result)); return {std::move(__ret.first), std::move(__ret.second)}; } @@ -51,14 +49,13 @@ struct __fn { requires indirectly_copyable, _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr copy_result, _OutIter> operator()(_Range&& __r, _OutIter __result) const { - auto __ret = std::__copy<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), std::move(__result)); + auto __ret = std::__copy(ranges::begin(__r), ranges::end(__r), std::move(__result)); return {std::move(__ret.first), std::move(__ret.second)}; } }; -} // namespace __copy inline namespace __cpo { -inline constexpr auto copy = __copy::__fn{}; +inline constexpr auto copy = __copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_copy_backward.h b/lib/libcxx/include/__algorithm/ranges_copy_backward.h index 93e326042503..81d14e465f7f 100644 --- a/lib/libcxx/include/__algorithm/ranges_copy_backward.h +++ b/lib/libcxx/include/__algorithm/ranges_copy_backward.h @@ -35,8 +35,7 @@ namespace ranges { template using copy_backward_result = in_out_result<_Ip, _Op>; -namespace __copy_backward { -struct __fn { +struct __copy_backward { template _Sent1, bidirectional_iterator _InIter2> requires indirectly_copyable<_InIter1, _InIter2> _LIBCPP_HIDE_FROM_ABI constexpr copy_backward_result<_InIter1, _InIter2> @@ -53,10 +52,9 @@ struct __fn { return {std::move(__ret.first), std::move(__ret.second)}; } }; -} // namespace __copy_backward inline namespace __cpo { -inline constexpr auto copy_backward = __copy_backward::__fn{}; +inline constexpr auto copy_backward = __copy_backward{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_copy_if.h b/lib/libcxx/include/__algorithm/ranges_copy_if.h index 4b41d2154e7f..acf74b669d48 100644 --- a/lib/libcxx/include/__algorithm/ranges_copy_if.h +++ b/lib/libcxx/include/__algorithm/ranges_copy_if.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_COPY_IF_H #define _LIBCPP___ALGORITHM_RANGES_COPY_IF_H +#include <__algorithm/copy_if.h> #include <__algorithm/in_out_result.h> #include <__config> #include <__functional/identity.h> @@ -36,20 +37,7 @@ namespace ranges { template using copy_if_result = in_out_result<_Ip, _Op>; -namespace __copy_if { -struct __fn { - template - _LIBCPP_HIDE_FROM_ABI static constexpr copy_if_result<_InIter, _OutIter> - __copy_if_impl(_InIter __first, _Sent __last, _OutIter __result, _Pred& __pred, _Proj& __proj) { - for (; __first != __last; ++__first) { - if (std::invoke(__pred, std::invoke(__proj, *__first))) { - *__result = *__first; - ++__result; - } - } - return {std::move(__first), std::move(__result)}; - } - +struct __copy_if { template _Sent, weakly_incrementable _OutIter, @@ -58,7 +46,8 @@ struct __fn { requires indirectly_copyable<_Iter, _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr copy_if_result<_Iter, _OutIter> operator()(_Iter __first, _Sent __last, _OutIter __result, _Pred __pred, _Proj __proj = {}) const { - return __copy_if_impl(std::move(__first), std::move(__last), std::move(__result), __pred, __proj); + auto __res = std::__copy_if(std::move(__first), std::move(__last), std::move(__result), __pred, __proj); + return {std::move(__res.first), std::move(__res.second)}; } template , _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr copy_if_result, _OutIter> operator()(_Range&& __r, _OutIter __result, _Pred __pred, _Proj __proj = {}) const { - return __copy_if_impl(ranges::begin(__r), ranges::end(__r), std::move(__result), __pred, __proj); + auto __res = std::__copy_if(ranges::begin(__r), ranges::end(__r), std::move(__result), __pred, __proj); + return {std::move(__res.first), std::move(__res.second)}; } }; -} // namespace __copy_if inline namespace __cpo { -inline constexpr auto copy_if = __copy_if::__fn{}; +inline constexpr auto copy_if = __copy_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_copy_n.h b/lib/libcxx/include/__algorithm/ranges_copy_n.h index 4353fa99278c..1fbc61674e2d 100644 --- a/lib/libcxx/include/__algorithm/ranges_copy_n.h +++ b/lib/libcxx/include/__algorithm/ranges_copy_n.h @@ -37,8 +37,8 @@ namespace ranges { template using copy_n_result = in_out_result<_Ip, _Op>; -namespace __copy_n { -struct __fn { +// TODO: Merge this with copy_n +struct __copy_n { template _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter> __go(_InIter __first, _DiffType __n, _OutIter __result) { @@ -54,7 +54,7 @@ struct __fn { template _LIBCPP_HIDE_FROM_ABI constexpr static copy_n_result<_InIter, _OutIter> __go(_InIter __first, _DiffType __n, _OutIter __result) { - auto __ret = std::__copy<_RangeAlgPolicy>(__first, __first + __n, __result); + auto __ret = std::__copy(__first, __first + __n, __result); return {__ret.first, __ret.second}; } @@ -65,10 +65,9 @@ struct __fn { return __go(std::move(__first), __n, std::move(__result)); } }; -} // namespace __copy_n inline namespace __cpo { -inline constexpr auto copy_n = __copy_n::__fn{}; +inline constexpr auto copy_n = __copy_n{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_count.h b/lib/libcxx/include/__algorithm/ranges_count.h index 4f3511743870..2b3969e76307 100644 --- a/lib/libcxx/include/__algorithm/ranges_count.h +++ b/lib/libcxx/include/__algorithm/ranges_count.h @@ -34,8 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __count { -struct __fn { +struct __count { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> @@ -50,10 +49,9 @@ struct __fn { return std::__count<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __proj); } }; -} // namespace __count inline namespace __cpo { -inline constexpr auto count = __count::__fn{}; +inline constexpr auto count = __count{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_count_if.h b/lib/libcxx/include/__algorithm/ranges_count_if.h index 5f2396ff7d53..6adeb78582bf 100644 --- a/lib/libcxx/include/__algorithm/ranges_count_if.h +++ b/lib/libcxx/include/__algorithm/ranges_count_if.h @@ -9,9 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_COUNT_IF_H #define _LIBCPP___ALGORITHM_RANGES_COUNT_IF_H +#include <__algorithm/count_if.h> +#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__functional/ranges_operations.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> @@ -33,26 +34,14 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -template -_LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> -__count_if_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { - iter_difference_t<_Iter> __counter(0); - for (; __first != __last; ++__first) { - if (std::invoke(__pred, std::invoke(__proj, *__first))) - ++__counter; - } - return __counter; -} - -namespace __count_if { -struct __fn { +struct __count_if { template _Sent, class _Proj = identity, indirect_unary_predicate> _Predicate> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Iter> operator()(_Iter __first, _Sent __last, _Predicate __pred, _Proj __proj = {}) const { - return ranges::__count_if_impl(std::move(__first), std::move(__last), __pred, __proj); + return std::__count_if<_RangeAlgPolicy>(std::move(__first), std::move(__last), __pred, __proj); } template , _Proj>> _Predicate> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr range_difference_t<_Range> operator()(_Range&& __r, _Predicate __pred, _Proj __proj = {}) const { - return ranges::__count_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); + return std::__count_if<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __pred, __proj); } }; -} // namespace __count_if inline namespace __cpo { -inline constexpr auto count_if = __count_if::__fn{}; +inline constexpr auto count_if = __count_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_ends_with.h b/lib/libcxx/include/__algorithm/ranges_ends_with.h index 06efdef36b7c..3621bda38912 100644 --- a/lib/libcxx/include/__algorithm/ranges_ends_with.h +++ b/lib/libcxx/include/__algorithm/ranges_ends_with.h @@ -22,6 +22,7 @@ #include <__iterator/reverse_iterator.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__ranges/size.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -36,8 +37,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __ends_with { -struct __fn { +struct __ends_with { template _LIBCPP_HIDE_FROM_ABI static constexpr bool __ends_with_fn_impl_bidirectional( _Iter1 __first1, @@ -185,10 +185,9 @@ struct __fn { } } }; -} // namespace __ends_with inline namespace __cpo { -inline constexpr auto ends_with = __ends_with::__fn{}; +inline constexpr auto ends_with = __ends_with{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_equal.h b/lib/libcxx/include/__algorithm/ranges_equal.h index edbd0e3641c1..c26d13f00220 100644 --- a/lib/libcxx/include/__algorithm/ranges_equal.h +++ b/lib/libcxx/include/__algorithm/ranges_equal.h @@ -34,8 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __equal { -struct __fn { +struct __equal { template _Sent1, input_iterator _Iter2, @@ -93,10 +92,9 @@ struct __fn { return false; } }; -} // namespace __equal inline namespace __cpo { -inline constexpr auto equal = __equal::__fn{}; +inline constexpr auto equal = __equal{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_equal_range.h b/lib/libcxx/include/__algorithm/ranges_equal_range.h index 4a308e016b54..cc765f196648 100644 --- a/lib/libcxx/include/__algorithm/ranges_equal_range.h +++ b/lib/libcxx/include/__algorithm/ranges_equal_range.h @@ -38,9 +38,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __equal_range { - -struct __fn { +struct __equal_range { template _Sent, class _Tp, @@ -64,10 +62,8 @@ struct __fn { } }; -} // namespace __equal_range - inline namespace __cpo { -inline constexpr auto equal_range = __equal_range::__fn{}; +inline constexpr auto equal_range = __equal_range{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_fill.h b/lib/libcxx/include/__algorithm/ranges_fill.h index 7a177d85e9f0..c248009f98fe 100644 --- a/lib/libcxx/include/__algorithm/ranges_fill.h +++ b/lib/libcxx/include/__algorithm/ranges_fill.h @@ -28,8 +28,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __fill { -struct __fn { +struct __fill { template _Iter, sentinel_for<_Iter> _Sent> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last, const _Type& __value) const { if constexpr (random_access_iterator<_Iter> && sized_sentinel_for<_Sent, _Iter>) { @@ -46,10 +45,9 @@ struct __fn { return (*this)(ranges::begin(__range), ranges::end(__range), __value); } }; -} // namespace __fill inline namespace __cpo { -inline constexpr auto fill = __fill::__fn{}; +inline constexpr auto fill = __fill{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_fill_n.h b/lib/libcxx/include/__algorithm/ranges_fill_n.h index a6e988c0089c..1276f13680a9 100644 --- a/lib/libcxx/include/__algorithm/ranges_fill_n.h +++ b/lib/libcxx/include/__algorithm/ranges_fill_n.h @@ -9,9 +9,11 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FILL_N_H #define _LIBCPP___ALGORITHM_RANGES_FILL_N_H +#include <__algorithm/fill_n.h> #include <__config> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -25,22 +27,16 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __fill_n { -struct __fn { +struct __fill_n { template _Iter> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, iter_difference_t<_Iter> __n, const _Type& __value) const { - for (; __n != 0; --__n) { - *__first = __value; - ++__first; - } - return __first; + return std::__fill_n(std::move(__first), __n, __value); } }; -} // namespace __fill_n inline namespace __cpo { -inline constexpr auto fill_n = __fill_n::__fn{}; +inline constexpr auto fill_n = __fill_n{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find.h b/lib/libcxx/include/__algorithm/ranges_find.h index 6b0d5efe37ab..1eac4cfa02a4 100644 --- a/lib/libcxx/include/__algorithm/ranges_find.h +++ b/lib/libcxx/include/__algorithm/ranges_find.h @@ -36,8 +36,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __find { -struct __fn { +struct __find { template _LIBCPP_HIDE_FROM_ABI static constexpr _Iter __find_unwrap(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) { @@ -64,10 +63,9 @@ struct __fn { return __find_unwrap(ranges::begin(__r), ranges::end(__r), __value, __proj); } }; -} // namespace __find inline namespace __cpo { -inline constexpr auto find = __find::__fn{}; +inline constexpr auto find = __find{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find_end.h b/lib/libcxx/include/__algorithm/ranges_find_end.h index e49e66dd4ac0..682724a48cd5 100644 --- a/lib/libcxx/include/__algorithm/ranges_find_end.h +++ b/lib/libcxx/include/__algorithm/ranges_find_end.h @@ -35,8 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __find_end { -struct __fn { +struct __find_end { template _Sent1, forward_iterator _Iter2, @@ -87,10 +86,9 @@ struct __fn { return {__ret.first, __ret.second}; } }; -} // namespace __find_end inline namespace __cpo { -inline constexpr auto find_end = __find_end::__fn{}; +inline constexpr auto find_end = __find_end{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find_first_of.h b/lib/libcxx/include/__algorithm/ranges_find_first_of.h index d92d9686bc44..102e16dd7a55 100644 --- a/lib/libcxx/include/__algorithm/ranges_find_first_of.h +++ b/lib/libcxx/include/__algorithm/ranges_find_first_of.h @@ -32,8 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __find_first_of { -struct __fn { +struct __find_first_of { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter1 __find_first_of_impl( _Iter1 __first1, @@ -90,10 +89,9 @@ struct __fn { __proj2); } }; -} // namespace __find_first_of inline namespace __cpo { -inline constexpr auto find_first_of = __find_first_of::__fn{}; +inline constexpr auto find_first_of = __find_first_of{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find_if.h b/lib/libcxx/include/__algorithm/ranges_find_if.h index 888f9ec3cb2d..ed6406e6186a 100644 --- a/lib/libcxx/include/__algorithm/ranges_find_if.h +++ b/lib/libcxx/include/__algorithm/ranges_find_if.h @@ -42,8 +42,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Ip __find_if_impl(_Ip __first, _Sp __last, _Pre return __first; } -namespace __find_if { -struct __fn { +struct __find_if { template _Sp, class _Proj = identity, @@ -59,10 +58,9 @@ struct __fn { return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred, __proj); } }; -} // namespace __find_if inline namespace __cpo { -inline constexpr auto find_if = __find_if::__fn{}; +inline constexpr auto find_if = __find_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find_if_not.h b/lib/libcxx/include/__algorithm/ranges_find_if_not.h index ec19545b5a1b..9a359b2afdab 100644 --- a/lib/libcxx/include/__algorithm/ranges_find_if_not.h +++ b/lib/libcxx/include/__algorithm/ranges_find_if_not.h @@ -34,8 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __find_if_not { -struct __fn { +struct __find_if_not { template _Sp, class _Proj = identity, @@ -53,10 +52,9 @@ struct __fn { return ranges::__find_if_impl(ranges::begin(__r), ranges::end(__r), __pred2, __proj); } }; -} // namespace __find_if_not inline namespace __cpo { -inline constexpr auto find_if_not = __find_if_not::__fn{}; +inline constexpr auto find_if_not = __find_if_not{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_find_last.h b/lib/libcxx/include/__algorithm/ranges_find_last.h index 95f7e77b8ccb..e7dae1704c2e 100644 --- a/lib/libcxx/include/__algorithm/ranges_find_last.h +++ b/lib/libcxx/include/__algorithm/ranges_find_last.h @@ -21,6 +21,7 @@ #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/subrange.h> +#include <__utility/forward.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -72,8 +73,7 @@ __find_last_impl(_Iter __first, _Sent __last, _Pred __pred, _Proj& __proj) { } } -namespace __find_last { -struct __fn { +struct __find_last { template struct __op { const _Type& __value; @@ -97,10 +97,8 @@ struct __fn { return ranges::__find_last_impl(ranges::begin(__range), ranges::end(__range), __op<_Type>{__value}, __proj); } }; -} // namespace __find_last -namespace __find_last_if { -struct __fn { +struct __find_last_if { template struct __op { _Pred& __pred; @@ -127,10 +125,8 @@ struct __fn { return ranges::__find_last_impl(ranges::begin(__range), ranges::end(__range), __op<_Pred>{__pred}, __proj); } }; -} // namespace __find_last_if -namespace __find_last_if_not { -struct __fn { +struct __find_last_if_not { template struct __op { _Pred& __pred; @@ -157,12 +153,11 @@ struct __fn { return ranges::__find_last_impl(ranges::begin(__range), ranges::end(__range), __op<_Pred>{__pred}, __proj); } }; -} // namespace __find_last_if_not inline namespace __cpo { -inline constexpr auto find_last = __find_last::__fn{}; -inline constexpr auto find_last_if = __find_last_if::__fn{}; -inline constexpr auto find_last_if_not = __find_last_if_not::__fn{}; +inline constexpr auto find_last = __find_last{}; +inline constexpr auto find_last_if = __find_last_if{}; +inline constexpr auto find_last_if_not = __find_last_if_not{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/fold.h b/lib/libcxx/include/__algorithm/ranges_fold.h similarity index 96% rename from lib/libcxx/include/__algorithm/fold.h rename to lib/libcxx/include/__algorithm/ranges_fold.h index 255658f52324..d2c392139850 100644 --- a/lib/libcxx/include/__algorithm/fold.h +++ b/lib/libcxx/include/__algorithm/ranges_fold.h @@ -7,10 +7,11 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___ALGORITHM_FOLD_H -#define _LIBCPP___ALGORITHM_FOLD_H +#ifndef _LIBCPP___ALGORITHM_RANGES_FOLD_H +#define _LIBCPP___ALGORITHM_RANGES_FOLD_H #include <__concepts/assignable.h> +#include <__concepts/constructible.h> #include <__concepts/convertible_to.h> #include <__concepts/invocable.h> #include <__concepts/movable.h> @@ -125,4 +126,4 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // _LIBCPP___ALGORITHM_FOLD_H +#endif // _LIBCPP___ALGORITHM_RANGES_FOLD_H diff --git a/lib/libcxx/include/__algorithm/ranges_for_each.h b/lib/libcxx/include/__algorithm/ranges_for_each.h index 225dc774c876..de39bc552275 100644 --- a/lib/libcxx/include/__algorithm/ranges_for_each.h +++ b/lib/libcxx/include/__algorithm/ranges_for_each.h @@ -36,8 +36,7 @@ namespace ranges { template using for_each_result = in_fun_result<_Iter, _Func>; -namespace __for_each { -struct __fn { +struct __for_each { private: template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> @@ -65,10 +64,9 @@ struct __fn { return __for_each_impl(ranges::begin(__range), ranges::end(__range), __func, __proj); } }; -} // namespace __for_each inline namespace __cpo { -inline constexpr auto for_each = __for_each::__fn{}; +inline constexpr auto for_each = __for_each{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_for_each_n.h b/lib/libcxx/include/__algorithm/ranges_for_each_n.h index d1fdca34cc5a..603cb723233c 100644 --- a/lib/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/lib/libcxx/include/__algorithm/ranges_for_each_n.h @@ -36,8 +36,7 @@ namespace ranges { template using for_each_n_result = in_fun_result<_Iter, _Func>; -namespace __for_each_n { -struct __fn { +struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { @@ -48,10 +47,9 @@ struct __fn { return {std::move(__first), std::move(__func)}; } }; -} // namespace __for_each_n inline namespace __cpo { -inline constexpr auto for_each_n = __for_each_n::__fn{}; +inline constexpr auto for_each_n = __for_each_n{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_generate.h b/lib/libcxx/include/__algorithm/ranges_generate.h index e6467198e6ba..04333b358eed 100644 --- a/lib/libcxx/include/__algorithm/ranges_generate.h +++ b/lib/libcxx/include/__algorithm/ranges_generate.h @@ -12,12 +12,12 @@ #include <__concepts/constructible.h> #include <__concepts/invocable.h> #include <__config> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/dangling.h> +#include <__type_traits/invoke.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -32,9 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __generate { - -struct __fn { +struct __generate { template _LIBCPP_HIDE_FROM_ABI constexpr static _OutIter __generate_fn_impl(_OutIter __first, _Sent __last, _Func& __gen) { for (; __first != __last; ++__first) { @@ -57,10 +55,8 @@ struct __fn { } }; -} // namespace __generate - inline namespace __cpo { -inline constexpr auto generate = __generate::__fn{}; +inline constexpr auto generate = __generate{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_generate_n.h b/lib/libcxx/include/__algorithm/ranges_generate_n.h index cd5fd7483ab2..a318994d0eaf 100644 --- a/lib/libcxx/include/__algorithm/ranges_generate_n.h +++ b/lib/libcxx/include/__algorithm/ranges_generate_n.h @@ -13,12 +13,12 @@ #include <__concepts/invocable.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__type_traits/invoke.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -33,9 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __generate_n { - -struct __fn { +struct __generate_n { template requires invocable<_Func&> && indirectly_writable<_OutIter, invoke_result_t<_Func&>> _LIBCPP_HIDE_FROM_ABI constexpr _OutIter @@ -49,10 +47,8 @@ struct __fn { } }; -} // namespace __generate_n - inline namespace __cpo { -inline constexpr auto generate_n = __generate_n::__fn{}; +inline constexpr auto generate_n = __generate_n{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_includes.h b/lib/libcxx/include/__algorithm/ranges_includes.h index c4c3b8ed088d..9145f3b5564f 100644 --- a/lib/libcxx/include/__algorithm/ranges_includes.h +++ b/lib/libcxx/include/__algorithm/ranges_includes.h @@ -35,9 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __includes { - -struct __fn { +struct __includes { template _Sent1, input_iterator _Iter2, @@ -82,10 +80,8 @@ struct __fn { } }; -} // namespace __includes - inline namespace __cpo { -inline constexpr auto includes = __includes::__fn{}; +inline constexpr auto includes = __includes{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_inplace_merge.h b/lib/libcxx/include/__algorithm/ranges_inplace_merge.h index d94c0ad46567..5879d0e7ef0f 100644 --- a/lib/libcxx/include/__algorithm/ranges_inplace_merge.h +++ b/lib/libcxx/include/__algorithm/ranges_inplace_merge.h @@ -39,9 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __inplace_merge { - -struct __fn { +struct __inplace_merge { template _LIBCPP_HIDE_FROM_ABI static constexpr auto __inplace_merge_impl(_Iter __first, _Iter __middle, _Sent __last, _Comp&& __comp, _Proj&& __proj) { @@ -68,10 +66,8 @@ struct __fn { } }; -} // namespace __inplace_merge - inline namespace __cpo { -inline constexpr auto inplace_merge = __inplace_merge::__fn{}; +inline constexpr auto inplace_merge = __inplace_merge{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_heap.h b/lib/libcxx/include/__algorithm/ranges_is_heap.h index 3d9e18ce1d90..b4724abfb62a 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_heap.h +++ b/lib/libcxx/include/__algorithm/ranges_is_heap.h @@ -34,9 +34,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __is_heap { - -struct __fn { +struct __is_heap { template _LIBCPP_HIDE_FROM_ABI constexpr static bool __is_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -65,10 +63,8 @@ struct __fn { } }; -} // namespace __is_heap - inline namespace __cpo { -inline constexpr auto is_heap = __is_heap::__fn{}; +inline constexpr auto is_heap = __is_heap{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_heap_until.h b/lib/libcxx/include/__algorithm/ranges_is_heap_until.h index 7a2e1fc7705b..25f3b484faa6 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_heap_until.h +++ b/lib/libcxx/include/__algorithm/ranges_is_heap_until.h @@ -35,9 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __is_heap_until { - -struct __fn { +struct __is_heap_until { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __is_heap_until_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -65,10 +63,8 @@ struct __fn { } }; -} // namespace __is_heap_until - inline namespace __cpo { -inline constexpr auto is_heap_until = __is_heap_until::__fn{}; +inline constexpr auto is_heap_until = __is_heap_until{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_partitioned.h b/lib/libcxx/include/__algorithm/ranges_is_partitioned.h index 5be6fba46fd9..8092abfcd1de 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_partitioned.h +++ b/lib/libcxx/include/__algorithm/ranges_is_partitioned.h @@ -31,8 +31,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __is_partitioned { -struct __fn { +struct __is_partitioned { template _LIBCPP_HIDE_FROM_ABI constexpr static bool __is_partitioned_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { @@ -70,10 +69,9 @@ struct __fn { return __is_partitioned_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __is_partitioned inline namespace __cpo { -inline constexpr auto is_partitioned = __is_partitioned::__fn{}; +inline constexpr auto is_partitioned = __is_partitioned{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_permutation.h b/lib/libcxx/include/__algorithm/ranges_is_permutation.h index 1f8d67007a57..53a431d2ba42 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_permutation.h +++ b/lib/libcxx/include/__algorithm/ranges_is_permutation.h @@ -33,8 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __is_permutation { -struct __fn { +struct __is_permutation { template _LIBCPP_HIDE_FROM_ABI constexpr static bool __is_permutation_func_impl( _Iter1 __first1, @@ -91,10 +90,9 @@ struct __fn { __proj2); } }; -} // namespace __is_permutation inline namespace __cpo { -inline constexpr auto is_permutation = __is_permutation::__fn{}; +inline constexpr auto is_permutation = __is_permutation{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_sorted.h b/lib/libcxx/include/__algorithm/ranges_is_sorted.h index 5b88d422b4b0..ab0670688a0e 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_sorted.h +++ b/lib/libcxx/include/__algorithm/ranges_is_sorted.h @@ -31,8 +31,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __is_sorted { -struct __fn { +struct __is_sorted { template _Sent, class _Proj = identity, @@ -51,10 +50,9 @@ struct __fn { return ranges::__is_sorted_until_impl(ranges::begin(__range), __last, __comp, __proj) == __last; } }; -} // namespace __is_sorted inline namespace __cpo { -inline constexpr auto is_sorted = __is_sorted::__fn{}; +inline constexpr auto is_sorted = __is_sorted{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_is_sorted_until.h b/lib/libcxx/include/__algorithm/ranges_is_sorted_until.h index 54de530c8b2f..f2e51c264e4a 100644 --- a/lib/libcxx/include/__algorithm/ranges_is_sorted_until.h +++ b/lib/libcxx/include/__algorithm/ranges_is_sorted_until.h @@ -47,8 +47,7 @@ __is_sorted_until_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj return __i; } -namespace __is_sorted_until { -struct __fn { +struct __is_sorted_until { template _Sent, class _Proj = identity, @@ -66,10 +65,9 @@ struct __fn { return ranges::__is_sorted_until_impl(ranges::begin(__range), ranges::end(__range), __comp, __proj); } }; -} // namespace __is_sorted_until inline namespace __cpo { -inline constexpr auto is_sorted_until = __is_sorted_until::__fn{}; +inline constexpr auto is_sorted_until = __is_sorted_until{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_iterator_concept.h b/lib/libcxx/include/__algorithm/ranges_iterator_concept.h index 2af891d3af00..58790e95aa80 100644 --- a/lib/libcxx/include/__algorithm/ranges_iterator_concept.h +++ b/lib/libcxx/include/__algorithm/ranges_iterator_concept.h @@ -44,7 +44,7 @@ consteval auto __get_iterator_concept() { } template -using __iterator_concept = decltype(__get_iterator_concept<_Iter>()); +using __iterator_concept _LIBCPP_NODEBUG = decltype(__get_iterator_concept<_Iter>()); } // namespace ranges _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/ranges_lexicographical_compare.h b/lib/libcxx/include/__algorithm/ranges_lexicographical_compare.h index 6d82017e302a..ec12b0cc29ac 100644 --- a/lib/libcxx/include/__algorithm/ranges_lexicographical_compare.h +++ b/lib/libcxx/include/__algorithm/ranges_lexicographical_compare.h @@ -9,6 +9,8 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H #define _LIBCPP___ALGORITHM_RANGES_LEXICOGRAPHICAL_COMPARE_H +#include <__algorithm/lexicographical_compare.h> +#include <__algorithm/unwrap_range.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -31,10 +33,9 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __lexicographical_compare { -struct __fn { +struct __lexicographical_compare { template - _LIBCPP_HIDE_FROM_ABI constexpr static bool __lexicographical_compare_impl( + static _LIBCPP_HIDE_FROM_ABI constexpr bool __lexicographical_compare_unwrap( _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, @@ -42,15 +43,16 @@ struct __fn { _Comp& __comp, _Proj1& __proj1, _Proj2& __proj2) { - while (__first2 != __last2) { - if (__first1 == __last1 || std::invoke(__comp, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) - return true; - if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) - return false; - ++__first1; - ++__first2; - } - return false; + auto [__first1_un, __last1_un] = std::__unwrap_range(std::move(__first1), std::move(__last1)); + auto [__first2_un, __last2_un] = std::__unwrap_range(std::move(__first2), std::move(__last2)); + return std::__lexicographical_compare( + std::move(__first1_un), + std::move(__last1_un), + std::move(__first2_un), + std::move(__last2_un), + __comp, + __proj1, + __proj2); } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool operator()( _Range1&& __range1, _Range2&& __range2, _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - return __lexicographical_compare_impl( + return __lexicographical_compare_unwrap( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), @@ -90,10 +92,9 @@ struct __fn { __proj2); } }; -} // namespace __lexicographical_compare inline namespace __cpo { -inline constexpr auto lexicographical_compare = __lexicographical_compare::__fn{}; +inline constexpr auto lexicographical_compare = __lexicographical_compare{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_lower_bound.h b/lib/libcxx/include/__algorithm/ranges_lower_bound.h index 0651147e0424..d1b332849b8b 100644 --- a/lib/libcxx/include/__algorithm/ranges_lower_bound.h +++ b/lib/libcxx/include/__algorithm/ranges_lower_bound.h @@ -36,8 +36,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __lower_bound { -struct __fn { +struct __lower_bound { template _Sent, class _Type, @@ -57,10 +56,9 @@ struct __fn { return std::__lower_bound<_RangeAlgPolicy>(ranges::begin(__r), ranges::end(__r), __value, __comp, __proj); } }; -} // namespace __lower_bound inline namespace __cpo { -inline constexpr auto lower_bound = __lower_bound::__fn{}; +inline constexpr auto lower_bound = __lower_bound{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_make_heap.h b/lib/libcxx/include/__algorithm/ranges_make_heap.h index fe9c024fbf8a..97148f77b418 100644 --- a/lib/libcxx/include/__algorithm/ranges_make_heap.h +++ b/lib/libcxx/include/__algorithm/ranges_make_heap.h @@ -40,9 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __make_heap { - -struct __fn { +struct __make_heap { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __make_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -69,10 +67,8 @@ struct __fn { } }; -} // namespace __make_heap - inline namespace __cpo { -inline constexpr auto make_heap = __make_heap::__fn{}; +inline constexpr auto make_heap = __make_heap{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_max.h b/lib/libcxx/include/__algorithm/ranges_max.h index d0ee6f314b0c..f631344422ed 100644 --- a/lib/libcxx/include/__algorithm/ranges_max.h +++ b/lib/libcxx/include/__algorithm/ranges_max.h @@ -36,8 +36,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __max { -struct __fn { +struct __max { template > _Comp = ranges::less> @@ -87,10 +86,9 @@ struct __fn { } } }; -} // namespace __max inline namespace __cpo { -inline constexpr auto max = __max::__fn{}; +inline constexpr auto max = __max{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_max_element.h b/lib/libcxx/include/__algorithm/ranges_max_element.h index c57730927116..869f71ecc8d2 100644 --- a/lib/libcxx/include/__algorithm/ranges_max_element.h +++ b/lib/libcxx/include/__algorithm/ranges_max_element.h @@ -32,8 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __max_element { -struct __fn { +struct __max_element { template _Sp, class _Proj = identity, @@ -53,10 +52,9 @@ struct __fn { return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp_lhs_rhs_swapped, __proj); } }; -} // namespace __max_element inline namespace __cpo { -inline constexpr auto max_element = __max_element::__fn{}; +inline constexpr auto max_element = __max_element{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_merge.h b/lib/libcxx/include/__algorithm/ranges_merge.h index bdf9a62d90bd..f3e0486fe488 100644 --- a/lib/libcxx/include/__algorithm/ranges_merge.h +++ b/lib/libcxx/include/__algorithm/ranges_merge.h @@ -39,42 +39,7 @@ namespace ranges { template using merge_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; -namespace __merge { - -template < class _InIter1, - class _Sent1, - class _InIter2, - class _Sent2, - class _OutIter, - class _Comp, - class _Proj1, - class _Proj2> -_LIBCPP_HIDE_FROM_ABI constexpr merge_result<__remove_cvref_t<_InIter1>, - __remove_cvref_t<_InIter2>, - __remove_cvref_t<_OutIter>> -__merge_impl(_InIter1&& __first1, - _Sent1&& __last1, - _InIter2&& __first2, - _Sent2&& __last2, - _OutIter&& __result, - _Comp&& __comp, - _Proj1&& __proj1, - _Proj2&& __proj2) { - for (; __first1 != __last1 && __first2 != __last2; ++__result) { - if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) { - *__result = *__first2; - ++__first2; - } else { - *__result = *__first1; - ++__first1; - } - } - auto __ret1 = ranges::copy(std::move(__first1), std::move(__last1), std::move(__result)); - auto __ret2 = ranges::copy(std::move(__first2), std::move(__last2), std::move(__ret1.out)); - return {std::move(__ret1.in), std::move(__ret2.in), std::move(__ret2.out)}; -} - -struct __fn { +struct __merge { template _Sent1, input_iterator _InIter2, @@ -120,12 +85,43 @@ struct __fn { __proj1, __proj2); } -}; -} // namespace __merge + template < class _InIter1, + class _Sent1, + class _InIter2, + class _Sent2, + class _OutIter, + class _Comp, + class _Proj1, + class _Proj2> + _LIBCPP_HIDE_FROM_ABI static constexpr merge_result<__remove_cvref_t<_InIter1>, + __remove_cvref_t<_InIter2>, + __remove_cvref_t<_OutIter>> + __merge_impl(_InIter1&& __first1, + _Sent1&& __last1, + _InIter2&& __first2, + _Sent2&& __last2, + _OutIter&& __result, + _Comp&& __comp, + _Proj1&& __proj1, + _Proj2&& __proj2) { + for (; __first1 != __last1 && __first2 != __last2; ++__result) { + if (std::invoke(__comp, std::invoke(__proj2, *__first2), std::invoke(__proj1, *__first1))) { + *__result = *__first2; + ++__first2; + } else { + *__result = *__first1; + ++__first1; + } + } + auto __ret1 = ranges::copy(std::move(__first1), std::move(__last1), std::move(__result)); + auto __ret2 = ranges::copy(std::move(__first2), std::move(__last2), std::move(__ret1.out)); + return {std::move(__ret1.in), std::move(__ret2.in), std::move(__ret2.out)}; + } +}; inline namespace __cpo { -inline constexpr auto merge = __merge::__fn{}; +inline constexpr auto merge = __merge{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_min.h b/lib/libcxx/include/__algorithm/ranges_min.h index cc569d2a060c..302b5d7975b0 100644 --- a/lib/libcxx/include/__algorithm/ranges_min.h +++ b/lib/libcxx/include/__algorithm/ranges_min.h @@ -35,8 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __min { -struct __fn { +struct __min { template > _Comp = ranges::less> @@ -79,10 +78,9 @@ struct __fn { } } }; -} // namespace __min inline namespace __cpo { -inline constexpr auto min = __min::__fn{}; +inline constexpr auto min = __min{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_min_element.h b/lib/libcxx/include/__algorithm/ranges_min_element.h index 588ef258e26f..fb92ae56bcd6 100644 --- a/lib/libcxx/include/__algorithm/ranges_min_element.h +++ b/lib/libcxx/include/__algorithm/ranges_min_element.h @@ -46,8 +46,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr _Ip __min_element_impl(_Ip __first, _Sp __last, return __first; } -namespace __min_element { -struct __fn { +struct __min_element { template _Sp, class _Proj = identity, @@ -65,10 +64,9 @@ struct __fn { return ranges::__min_element_impl(ranges::begin(__r), ranges::end(__r), __comp, __proj); } }; -} // namespace __min_element inline namespace __cpo { -inline constexpr auto min_element = __min_element::__fn{}; +inline constexpr auto min_element = __min_element{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_minmax.h b/lib/libcxx/include/__algorithm/ranges_minmax.h index 09cbefd91a8c..5f2e5cb2a1ee 100644 --- a/lib/libcxx/include/__algorithm/ranges_minmax.h +++ b/lib/libcxx/include/__algorithm/ranges_minmax.h @@ -24,6 +24,7 @@ #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_integral.h> #include <__type_traits/is_reference.h> #include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_cvref.h> @@ -47,8 +48,7 @@ namespace ranges { template using minmax_result = min_max_result<_T1>; -namespace __minmax { -struct __fn { +struct __minmax { template > _Comp = ranges::less> @@ -159,10 +159,9 @@ struct __fn { } } }; -} // namespace __minmax inline namespace __cpo { -inline constexpr auto minmax = __minmax::__fn{}; +inline constexpr auto minmax = __minmax{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_minmax_element.h b/lib/libcxx/include/__algorithm/ranges_minmax_element.h index 4bf6d2404e46..e1a22dde0955 100644 --- a/lib/libcxx/include/__algorithm/ranges_minmax_element.h +++ b/lib/libcxx/include/__algorithm/ranges_minmax_element.h @@ -40,8 +40,7 @@ namespace ranges { template using minmax_element_result = min_max_result<_T1>; -namespace __minmax_element { -struct __fn { +struct __minmax_element { template _Sp, class _Proj = identity, @@ -61,10 +60,9 @@ struct __fn { return {__ret.first, __ret.second}; } }; -} // namespace __minmax_element inline namespace __cpo { -inline constexpr auto minmax_element = __minmax_element::__fn{}; +inline constexpr auto minmax_element = __minmax_element{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_mismatch.h b/lib/libcxx/include/__algorithm/ranges_mismatch.h index c4bf0022a9bc..b35747dfa43a 100644 --- a/lib/libcxx/include/__algorithm/ranges_mismatch.h +++ b/lib/libcxx/include/__algorithm/ranges_mismatch.h @@ -39,8 +39,7 @@ namespace ranges { template using mismatch_result = in_in_result<_I1, _I2>; -namespace __mismatch { -struct __fn { +struct __mismatch { template static _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result<_I1, _I2> __go(_I1 __first1, _S1 __last1, _I2 __first2, _S2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { @@ -84,10 +83,9 @@ struct __fn { ranges::begin(__r1), ranges::end(__r1), ranges::begin(__r2), ranges::end(__r2), __pred, __proj1, __proj2); } }; -} // namespace __mismatch inline namespace __cpo { -constexpr inline auto mismatch = __mismatch::__fn{}; +constexpr inline auto mismatch = __mismatch{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_move.h b/lib/libcxx/include/__algorithm/ranges_move.h index be869f36c973..02bf7fd00619 100644 --- a/lib/libcxx/include/__algorithm/ranges_move.h +++ b/lib/libcxx/include/__algorithm/ranges_move.h @@ -35,8 +35,7 @@ namespace ranges { template using move_result = in_out_result<_InIter, _OutIter>; -namespace __move { -struct __fn { +struct __move { template _LIBCPP_HIDE_FROM_ABI constexpr static move_result<_InIter, _OutIter> __move_impl(_InIter __first, _Sent __last, _OutIter __result) { @@ -58,10 +57,9 @@ struct __fn { return __move_impl(ranges::begin(__range), ranges::end(__range), std::move(__result)); } }; -} // namespace __move inline namespace __cpo { -inline constexpr auto move = __move::__fn{}; +inline constexpr auto move = __move{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_move_backward.h b/lib/libcxx/include/__algorithm/ranges_move_backward.h index 6d4071a33b81..4737e6c9756d 100644 --- a/lib/libcxx/include/__algorithm/ranges_move_backward.h +++ b/lib/libcxx/include/__algorithm/ranges_move_backward.h @@ -37,8 +37,7 @@ namespace ranges { template using move_backward_result = in_out_result<_InIter, _OutIter>; -namespace __move_backward { -struct __fn { +struct __move_backward { template _LIBCPP_HIDE_FROM_ABI constexpr static move_backward_result<_InIter, _OutIter> __move_backward_impl(_InIter __first, _Sent __last, _OutIter __result) { @@ -60,10 +59,9 @@ struct __fn { return __move_backward_impl(ranges::begin(__range), ranges::end(__range), std::move(__result)); } }; -} // namespace __move_backward inline namespace __cpo { -inline constexpr auto move_backward = __move_backward::__fn{}; +inline constexpr auto move_backward = __move_backward{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_next_permutation.h b/lib/libcxx/include/__algorithm/ranges_next_permutation.h index 18535e0a6254..1b485423e892 100644 --- a/lib/libcxx/include/__algorithm/ranges_next_permutation.h +++ b/lib/libcxx/include/__algorithm/ranges_next_permutation.h @@ -40,9 +40,7 @@ namespace ranges { template using next_permutation_result = in_found_result<_InIter>; -namespace __next_permutation { - -struct __fn { +struct __next_permutation { template _Sent, class _Comp = ranges::less, class _Proj = identity> requires sortable<_Iter, _Comp, _Proj> _LIBCPP_HIDE_FROM_ABI constexpr next_permutation_result<_Iter> @@ -62,10 +60,8 @@ struct __fn { } }; -} // namespace __next_permutation - inline namespace __cpo { -constexpr inline auto next_permutation = __next_permutation::__fn{}; +constexpr inline auto next_permutation = __next_permutation{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_none_of.h b/lib/libcxx/include/__algorithm/ranges_none_of.h index 7df3c1829fcf..a1612826220d 100644 --- a/lib/libcxx/include/__algorithm/ranges_none_of.h +++ b/lib/libcxx/include/__algorithm/ranges_none_of.h @@ -30,8 +30,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __none_of { -struct __fn { +struct __none_of { template _LIBCPP_HIDE_FROM_ABI constexpr static bool __none_of_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { @@ -59,10 +58,9 @@ struct __fn { return __none_of_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __none_of inline namespace __cpo { -inline constexpr auto none_of = __none_of::__fn{}; +inline constexpr auto none_of = __none_of{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_nth_element.h b/lib/libcxx/include/__algorithm/ranges_nth_element.h index 90ade9efe10d..e92c51e713cb 100644 --- a/lib/libcxx/include/__algorithm/ranges_nth_element.h +++ b/lib/libcxx/include/__algorithm/ranges_nth_element.h @@ -39,9 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __nth_element { - -struct __fn { +struct __nth_element { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __nth_element_fn_impl(_Iter __first, _Iter __nth, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -68,10 +66,8 @@ struct __fn { } }; -} // namespace __nth_element - inline namespace __cpo { -inline constexpr auto nth_element = __nth_element::__fn{}; +inline constexpr auto nth_element = __nth_element{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_partial_sort.h b/lib/libcxx/include/__algorithm/ranges_partial_sort.h index c67247d2e0a7..fc8a1f7d9306 100644 --- a/lib/libcxx/include/__algorithm/ranges_partial_sort.h +++ b/lib/libcxx/include/__algorithm/ranges_partial_sort.h @@ -41,9 +41,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __partial_sort { - -struct __fn { +struct __partial_sort { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __partial_sort_fn_impl(_Iter __first, _Iter __middle, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -66,10 +64,8 @@ struct __fn { } }; -} // namespace __partial_sort - inline namespace __cpo { -inline constexpr auto partial_sort = __partial_sort::__fn{}; +inline constexpr auto partial_sort = __partial_sort{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_partial_sort_copy.h b/lib/libcxx/include/__algorithm/ranges_partial_sort_copy.h index b3bdeb78fb6f..f221504a8cae 100644 --- a/lib/libcxx/include/__algorithm/ranges_partial_sort_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_partial_sort_copy.h @@ -42,9 +42,7 @@ namespace ranges { template using partial_sort_copy_result = in_out_result<_InIter, _OutIter>; -namespace __partial_sort_copy { - -struct __fn { +struct __partial_sort_copy { template _Sent1, random_access_iterator _Iter2, @@ -98,10 +96,8 @@ struct __fn { } }; -} // namespace __partial_sort_copy - inline namespace __cpo { -inline constexpr auto partial_sort_copy = __partial_sort_copy::__fn{}; +inline constexpr auto partial_sort_copy = __partial_sort_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_partition.h b/lib/libcxx/include/__algorithm/ranges_partition.h index a67ac4c96757..b9cc3c189370 100644 --- a/lib/libcxx/include/__algorithm/ranges_partition.h +++ b/lib/libcxx/include/__algorithm/ranges_partition.h @@ -24,6 +24,7 @@ #include <__ranges/access.h> #include <__ranges/concepts.h> #include <__ranges/subrange.h> +#include <__type_traits/remove_cvref.h> #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -40,9 +41,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __partition { - -struct __fn { +struct __partition { template _LIBCPP_HIDE_FROM_ABI static constexpr subrange<__remove_cvref_t<_Iter>> __partition_fn_impl(_Iter&& __first, _Sent&& __last, _Pred&& __pred, _Proj&& __proj) { @@ -72,10 +71,8 @@ struct __fn { } }; -} // namespace __partition - inline namespace __cpo { -inline constexpr auto partition = __partition::__fn{}; +inline constexpr auto partition = __partition{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_partition_copy.h b/lib/libcxx/include/__algorithm/ranges_partition_copy.h index d60c865dd2a8..47878a401723 100644 --- a/lib/libcxx/include/__algorithm/ranges_partition_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_partition_copy.h @@ -38,9 +38,7 @@ namespace ranges { template using partition_copy_result = in_out_out_result<_InIter, _OutIter1, _OutIter2>; -namespace __partition_copy { - -struct __fn { +struct __partition_copy { // TODO(ranges): delegate to the classic algorithm. template _LIBCPP_HIDE_FROM_ABI constexpr static partition_copy_result<__remove_cvref_t<_InIter>, @@ -94,10 +92,8 @@ struct __fn { } }; -} // namespace __partition_copy - inline namespace __cpo { -inline constexpr auto partition_copy = __partition_copy::__fn{}; +inline constexpr auto partition_copy = __partition_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_partition_point.h b/lib/libcxx/include/__algorithm/ranges_partition_point.h index c5b11b5fed19..324efbb86d64 100644 --- a/lib/libcxx/include/__algorithm/ranges_partition_point.h +++ b/lib/libcxx/include/__algorithm/ranges_partition_point.h @@ -35,9 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __partition_point { - -struct __fn { +struct __partition_point { // TODO(ranges): delegate to the classic algorithm. template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter @@ -77,10 +75,8 @@ struct __fn { } }; -} // namespace __partition_point - inline namespace __cpo { -inline constexpr auto partition_point = __partition_point::__fn{}; +inline constexpr auto partition_point = __partition_point{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_pop_heap.h b/lib/libcxx/include/__algorithm/ranges_pop_heap.h index 01f92c0f2288..eccf54c094e3 100644 --- a/lib/libcxx/include/__algorithm/ranges_pop_heap.h +++ b/lib/libcxx/include/__algorithm/ranges_pop_heap.h @@ -40,9 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __pop_heap { - -struct __fn { +struct __pop_heap { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __pop_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -70,10 +68,8 @@ struct __fn { } }; -} // namespace __pop_heap - inline namespace __cpo { -inline constexpr auto pop_heap = __pop_heap::__fn{}; +inline constexpr auto pop_heap = __pop_heap{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_prev_permutation.h b/lib/libcxx/include/__algorithm/ranges_prev_permutation.h index 225cee9b75ec..f2294b1cb00b 100644 --- a/lib/libcxx/include/__algorithm/ranges_prev_permutation.h +++ b/lib/libcxx/include/__algorithm/ranges_prev_permutation.h @@ -40,9 +40,7 @@ namespace ranges { template using prev_permutation_result = in_found_result<_InIter>; -namespace __prev_permutation { - -struct __fn { +struct __prev_permutation { template _Sent, class _Comp = ranges::less, class _Proj = identity> requires sortable<_Iter, _Comp, _Proj> _LIBCPP_HIDE_FROM_ABI constexpr prev_permutation_result<_Iter> @@ -62,10 +60,8 @@ struct __fn { } }; -} // namespace __prev_permutation - inline namespace __cpo { -constexpr inline auto prev_permutation = __prev_permutation::__fn{}; +constexpr inline auto prev_permutation = __prev_permutation{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_push_heap.h b/lib/libcxx/include/__algorithm/ranges_push_heap.h index 9d187af38c53..c5e0465bdcfe 100644 --- a/lib/libcxx/include/__algorithm/ranges_push_heap.h +++ b/lib/libcxx/include/__algorithm/ranges_push_heap.h @@ -40,9 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __push_heap { - -struct __fn { +struct __push_heap { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __push_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -69,10 +67,8 @@ struct __fn { } }; -} // namespace __push_heap - inline namespace __cpo { -inline constexpr auto push_heap = __push_heap::__fn{}; +inline constexpr auto push_heap = __push_heap{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_remove.h b/lib/libcxx/include/__algorithm/ranges_remove.h index 17c3a2c5cd06..6fbc49eba8a7 100644 --- a/lib/libcxx/include/__algorithm/ranges_remove.h +++ b/lib/libcxx/include/__algorithm/ranges_remove.h @@ -33,8 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __remove { -struct __fn { +struct __remove { template _Sent, class _Type, class _Proj = identity> requires indirect_binary_predicate, const _Type*> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr subrange<_Iter> @@ -52,10 +51,9 @@ struct __fn { return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __remove inline namespace __cpo { -inline constexpr auto remove = __remove::__fn{}; +inline constexpr auto remove = __remove{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_remove_copy.h b/lib/libcxx/include/__algorithm/ranges_remove_copy.h index 84529eceac68..764c52ee16b2 100644 --- a/lib/libcxx/include/__algorithm/ranges_remove_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_remove_copy.h @@ -38,9 +38,7 @@ namespace ranges { template using remove_copy_result = in_out_result<_InIter, _OutIter>; -namespace __remove_copy { - -struct __fn { +struct __remove_copy { template _Sent, weakly_incrementable _OutIter, @@ -65,10 +63,8 @@ struct __fn { } }; -} // namespace __remove_copy - inline namespace __cpo { -inline constexpr auto remove_copy = __remove_copy::__fn{}; +inline constexpr auto remove_copy = __remove_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_remove_copy_if.h b/lib/libcxx/include/__algorithm/ranges_remove_copy_if.h index 56fe01753312..87136ae8258d 100644 --- a/lib/libcxx/include/__algorithm/ranges_remove_copy_if.h +++ b/lib/libcxx/include/__algorithm/ranges_remove_copy_if.h @@ -53,9 +53,7 @@ __remove_copy_if_impl(_InIter __first, _Sent __last, _OutIter __result, _Pred& _ return {std::move(__first), std::move(__result)}; } -namespace __remove_copy_if { - -struct __fn { +struct __remove_copy_if { template _Sent, weakly_incrementable _OutIter, @@ -79,10 +77,8 @@ struct __fn { } }; -} // namespace __remove_copy_if - inline namespace __cpo { -inline constexpr auto remove_copy_if = __remove_copy_if::__fn{}; +inline constexpr auto remove_copy_if = __remove_copy_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_remove_if.h b/lib/libcxx/include/__algorithm/ranges_remove_if.h index 0ea5d9a01b88..384b3d41d080 100644 --- a/lib/libcxx/include/__algorithm/ranges_remove_if.h +++ b/lib/libcxx/include/__algorithm/ranges_remove_if.h @@ -53,8 +53,7 @@ __remove_if_impl(_Iter __first, _Sent __last, _Pred& __pred, _Proj& __proj) { return {__new_end, __i}; } -namespace __remove_if { -struct __fn { +struct __remove_if { template _Sent, class _Proj = identity, @@ -73,10 +72,9 @@ struct __fn { return ranges::__remove_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __proj); } }; -} // namespace __remove_if inline namespace __cpo { -inline constexpr auto remove_if = __remove_if::__fn{}; +inline constexpr auto remove_if = __remove_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_replace.h b/lib/libcxx/include/__algorithm/ranges_replace.h index 2b88dc032972..15b1f38554a8 100644 --- a/lib/libcxx/include/__algorithm/ranges_replace.h +++ b/lib/libcxx/include/__algorithm/ranges_replace.h @@ -32,8 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __replace { -struct __fn { +struct __replace { template _Sent, class _Type1, class _Type2, class _Proj = identity> requires indirectly_writable<_Iter, const _Type2&> && indirect_binary_predicate, const _Type1*> @@ -52,10 +51,9 @@ struct __fn { return ranges::__replace_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __new_value, __proj); } }; -} // namespace __replace inline namespace __cpo { -inline constexpr auto replace = __replace::__fn{}; +inline constexpr auto replace = __replace{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_replace_copy.h b/lib/libcxx/include/__algorithm/ranges_replace_copy.h index 633f993e5c94..7ab1c71543e2 100644 --- a/lib/libcxx/include/__algorithm/ranges_replace_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_replace_copy.h @@ -38,9 +38,7 @@ namespace ranges { template using replace_copy_result = in_out_result<_InIter, _OutIter>; -namespace __replace_copy { - -struct __fn { +struct __replace_copy { template _Sent, class _OldType, @@ -77,10 +75,8 @@ struct __fn { } }; -} // namespace __replace_copy - inline namespace __cpo { -inline constexpr auto replace_copy = __replace_copy::__fn{}; +inline constexpr auto replace_copy = __replace_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_replace_copy_if.h b/lib/libcxx/include/__algorithm/ranges_replace_copy_if.h index e065c3ac0acc..852ec45edaef 100644 --- a/lib/libcxx/include/__algorithm/ranges_replace_copy_if.h +++ b/lib/libcxx/include/__algorithm/ranges_replace_copy_if.h @@ -52,9 +52,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr replace_copy_if_result<_InIter, _OutIter> __repl return {std::move(__first), std::move(__result)}; } -namespace __replace_copy_if { - -struct __fn { +struct __replace_copy_if { template _Sent, class _Type, @@ -82,10 +80,8 @@ struct __fn { } }; -} // namespace __replace_copy_if - inline namespace __cpo { -inline constexpr auto replace_copy_if = __replace_copy_if::__fn{}; +inline constexpr auto replace_copy_if = __replace_copy_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_replace_if.h b/lib/libcxx/include/__algorithm/ranges_replace_if.h index 6445f42aea19..baa566810b5d 100644 --- a/lib/libcxx/include/__algorithm/ranges_replace_if.h +++ b/lib/libcxx/include/__algorithm/ranges_replace_if.h @@ -42,8 +42,7 @@ __replace_if_impl(_Iter __first, _Sent __last, _Pred& __pred, const _Type& __new return __first; } -namespace __replace_if { -struct __fn { +struct __replace_if { template _Sent, class _Type, @@ -65,10 +64,9 @@ struct __fn { return ranges::__replace_if_impl(ranges::begin(__range), ranges::end(__range), __pred, __new_value, __proj); } }; -} // namespace __replace_if inline namespace __cpo { -inline constexpr auto replace_if = __replace_if::__fn{}; +inline constexpr auto replace_if = __replace_if{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_reverse.h b/lib/libcxx/include/__algorithm/ranges_reverse.h index 9ec865995b4a..4e8211871977 100644 --- a/lib/libcxx/include/__algorithm/ranges_reverse.h +++ b/lib/libcxx/include/__algorithm/ranges_reverse.h @@ -27,8 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __reverse { -struct __fn { +struct __reverse { template _Sent> requires permutable<_Iter> _LIBCPP_HIDE_FROM_ABI constexpr _Iter operator()(_Iter __first, _Sent __last) const { @@ -65,10 +64,9 @@ struct __fn { return (*this)(ranges::begin(__range), ranges::end(__range)); } }; -} // namespace __reverse inline namespace __cpo { -inline constexpr auto reverse = __reverse::__fn{}; +inline constexpr auto reverse = __reverse{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_reverse_copy.h b/lib/libcxx/include/__algorithm/ranges_reverse_copy.h index 60043787a717..e5ca5cf652dc 100644 --- a/lib/libcxx/include/__algorithm/ranges_reverse_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_reverse_copy.h @@ -37,8 +37,7 @@ namespace ranges { template using reverse_copy_result = in_out_result<_InIter, _OutIter>; -namespace __reverse_copy { -struct __fn { +struct __reverse_copy { template _Sent, weakly_incrementable _OutIter> requires indirectly_copyable<_InIter, _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr reverse_copy_result<_InIter, _OutIter> @@ -54,10 +53,9 @@ struct __fn { return {ranges::next(ranges::begin(__range), ranges::end(__range)), std::move(__ret.out)}; } }; -} // namespace __reverse_copy inline namespace __cpo { -inline constexpr auto reverse_copy = __reverse_copy::__fn{}; +inline constexpr auto reverse_copy = __reverse_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_rotate.h b/lib/libcxx/include/__algorithm/ranges_rotate.h index 8d33a6f0799b..c1affc684ae4 100644 --- a/lib/libcxx/include/__algorithm/ranges_rotate.h +++ b/lib/libcxx/include/__algorithm/ranges_rotate.h @@ -33,9 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __rotate { - -struct __fn { +struct __rotate { template _LIBCPP_HIDE_FROM_ABI constexpr static subrange<_Iter> __rotate_fn_impl(_Iter __first, _Iter __middle, _Sent __last) { auto __ret = std::__rotate<_RangeAlgPolicy>(std::move(__first), std::move(__middle), std::move(__last)); @@ -55,10 +53,8 @@ struct __fn { } }; -} // namespace __rotate - inline namespace __cpo { -inline constexpr auto rotate = __rotate::__fn{}; +inline constexpr auto rotate = __rotate{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_rotate_copy.h b/lib/libcxx/include/__algorithm/ranges_rotate_copy.h index 26fe110b5389..c0b4264a1b25 100644 --- a/lib/libcxx/include/__algorithm/ranges_rotate_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_rotate_copy.h @@ -34,8 +34,7 @@ namespace ranges { template using rotate_copy_result = in_out_result<_InIter, _OutIter>; -namespace __rotate_copy { -struct __fn { +struct __rotate_copy { template _Sent, weakly_incrementable _OutIter> requires indirectly_copyable<_InIter, _OutIter> _LIBCPP_HIDE_FROM_ABI constexpr rotate_copy_result<_InIter, _OutIter> @@ -52,10 +51,9 @@ struct __fn { return (*this)(ranges::begin(__range), std::move(__middle), ranges::end(__range), std::move(__result)); } }; -} // namespace __rotate_copy inline namespace __cpo { -inline constexpr auto rotate_copy = __rotate_copy::__fn{}; +inline constexpr auto rotate_copy = __rotate_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_sample.h b/lib/libcxx/include/__algorithm/ranges_sample.h index e4f60a7b66be..a3b29608150d 100644 --- a/lib/libcxx/include/__algorithm/ranges_sample.h +++ b/lib/libcxx/include/__algorithm/ranges_sample.h @@ -35,9 +35,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __sample { - -struct __fn { +struct __sample { template _Sent, weakly_incrementable _OutIter, class _Gen> requires(forward_iterator<_Iter> || random_access_iterator<_OutIter>) && indirectly_copyable<_Iter, _OutIter> && uniform_random_bit_generator> @@ -58,10 +56,8 @@ struct __fn { } }; -} // namespace __sample - inline namespace __cpo { -inline constexpr auto sample = __sample::__fn{}; +inline constexpr auto sample = __sample{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_search.h b/lib/libcxx/include/__algorithm/ranges_search.h index 55294c60631b..b71151203963 100644 --- a/lib/libcxx/include/__algorithm/ranges_search.h +++ b/lib/libcxx/include/__algorithm/ranges_search.h @@ -33,8 +33,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __search { -struct __fn { +struct __search { template _LIBCPP_HIDE_FROM_ABI static constexpr subrange<_Iter1> __ranges_search_impl( _Iter1 __first1, @@ -120,10 +119,9 @@ struct __fn { __proj2); } }; -} // namespace __search inline namespace __cpo { -inline constexpr auto search = __search::__fn{}; +inline constexpr auto search = __search{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_search_n.h b/lib/libcxx/include/__algorithm/ranges_search_n.h index 56e12755b9bf..81b568c0965f 100644 --- a/lib/libcxx/include/__algorithm/ranges_search_n.h +++ b/lib/libcxx/include/__algorithm/ranges_search_n.h @@ -39,8 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __search_n { -struct __fn { +struct __search_n { template _LIBCPP_HIDE_FROM_ABI static constexpr subrange<_Iter1> __ranges_search_n_impl( _Iter1 __first, _Sent1 __last, _SizeT __count, const _Type& __value, _Pred& __pred, _Proj& __proj) { @@ -100,10 +99,9 @@ struct __fn { return __ranges_search_n_impl(ranges::begin(__range), ranges::end(__range), __count, __value, __pred, __proj); } }; -} // namespace __search_n inline namespace __cpo { -inline constexpr auto search_n = __search_n::__fn{}; +inline constexpr auto search_n = __search_n{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_set_difference.h b/lib/libcxx/include/__algorithm/ranges_set_difference.h index 0841fb4ffd0c..1c83c7bdd5a3 100644 --- a/lib/libcxx/include/__algorithm/ranges_set_difference.h +++ b/lib/libcxx/include/__algorithm/ranges_set_difference.h @@ -10,7 +10,6 @@ #define _LIBCPP___ALGORITHM_RANGES_SET_DIFFERENCE_H #include <__algorithm/in_out_result.h> -#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/set_difference.h> #include <__config> @@ -42,9 +41,7 @@ namespace ranges { template using set_difference_result = in_out_result<_InIter, _OutIter>; -namespace __set_difference { - -struct __fn { +struct __set_difference { template _Sent1, input_iterator _InIter2, @@ -63,7 +60,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_difference<_RangeAlgPolicy>( + auto __ret = std::__set_difference( __first1, __last1, __first2, __last2, __result, ranges::__make_projected_comp(__comp, __proj1, __proj2)); return {std::move(__ret.first), std::move(__ret.second)}; } @@ -82,7 +79,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_difference<_RangeAlgPolicy>( + auto __ret = std::__set_difference( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), @@ -93,10 +90,8 @@ struct __fn { } }; -} // namespace __set_difference - inline namespace __cpo { -inline constexpr auto set_difference = __set_difference::__fn{}; +inline constexpr auto set_difference = __set_difference{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_set_intersection.h b/lib/libcxx/include/__algorithm/ranges_set_intersection.h index 9427379745b6..068794cf1b14 100644 --- a/lib/libcxx/include/__algorithm/ranges_set_intersection.h +++ b/lib/libcxx/include/__algorithm/ranges_set_intersection.h @@ -40,9 +40,7 @@ namespace ranges { template using set_intersection_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; -namespace __set_intersection { - -struct __fn { +struct __set_intersection { template _Sent1, input_iterator _InIter2, @@ -98,10 +96,8 @@ struct __fn { } }; -} // namespace __set_intersection - inline namespace __cpo { -inline constexpr auto set_intersection = __set_intersection::__fn{}; +inline constexpr auto set_intersection = __set_intersection{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_set_symmetric_difference.h b/lib/libcxx/include/__algorithm/ranges_set_symmetric_difference.h index 995eb0999d94..c0a814043192 100644 --- a/lib/libcxx/include/__algorithm/ranges_set_symmetric_difference.h +++ b/lib/libcxx/include/__algorithm/ranges_set_symmetric_difference.h @@ -10,7 +10,6 @@ #define _LIBCPP___ALGORITHM_RANGES_SET_SYMMETRIC_DIFFERENCE_H #include <__algorithm/in_in_out_result.h> -#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/set_symmetric_difference.h> #include <__config> @@ -40,9 +39,7 @@ namespace ranges { template using set_symmetric_difference_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; -namespace __set_symmetric_difference { - -struct __fn { +struct __set_symmetric_difference { template _Sent1, input_iterator _InIter2, @@ -61,7 +58,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_symmetric_difference<_RangeAlgPolicy>( + auto __ret = std::__set_symmetric_difference( std::move(__first1), std::move(__last1), std::move(__first2), @@ -87,7 +84,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_symmetric_difference<_RangeAlgPolicy>( + auto __ret = std::__set_symmetric_difference( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), @@ -98,10 +95,8 @@ struct __fn { } }; -} // namespace __set_symmetric_difference - inline namespace __cpo { -inline constexpr auto set_symmetric_difference = __set_symmetric_difference::__fn{}; +inline constexpr auto set_symmetric_difference = __set_symmetric_difference{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_set_union.h b/lib/libcxx/include/__algorithm/ranges_set_union.h index e870e390cc66..039ffb5932f3 100644 --- a/lib/libcxx/include/__algorithm/ranges_set_union.h +++ b/lib/libcxx/include/__algorithm/ranges_set_union.h @@ -10,7 +10,6 @@ #define _LIBCPP___ALGORITHM_RANGES_SET_UNION_H #include <__algorithm/in_in_out_result.h> -#include <__algorithm/iterator_operations.h> #include <__algorithm/make_projected.h> #include <__algorithm/set_union.h> #include <__config> @@ -43,9 +42,7 @@ namespace ranges { template using set_union_result = in_in_out_result<_InIter1, _InIter2, _OutIter>; -namespace __set_union { - -struct __fn { +struct __set_union { template _Sent1, input_iterator _InIter2, @@ -64,7 +61,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_union<_RangeAlgPolicy>( + auto __ret = std::__set_union( std::move(__first1), std::move(__last1), std::move(__first2), @@ -88,7 +85,7 @@ struct __fn { _Comp __comp = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { - auto __ret = std::__set_union<_RangeAlgPolicy>( + auto __ret = std::__set_union( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), @@ -99,10 +96,8 @@ struct __fn { } }; -} // namespace __set_union - inline namespace __cpo { -inline constexpr auto set_union = __set_union::__fn{}; +inline constexpr auto set_union = __set_union{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_shuffle.h b/lib/libcxx/include/__algorithm/ranges_shuffle.h index ab98ea22caab..87cb3685bb95 100644 --- a/lib/libcxx/include/__algorithm/ranges_shuffle.h +++ b/lib/libcxx/include/__algorithm/ranges_shuffle.h @@ -39,9 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __shuffle { - -struct __fn { +struct __shuffle { template _Sent, class _Gen> requires permutable<_Iter> && uniform_random_bit_generator> _LIBCPP_HIDE_FROM_ABI _Iter operator()(_Iter __first, _Sent __last, _Gen&& __gen) const { @@ -56,10 +54,8 @@ struct __fn { } }; -} // namespace __shuffle - inline namespace __cpo { -inline constexpr auto shuffle = __shuffle::__fn{}; +inline constexpr auto shuffle = __shuffle{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_sort.h b/lib/libcxx/include/__algorithm/ranges_sort.h index 0296c146b3ed..2afad4c41301 100644 --- a/lib/libcxx/include/__algorithm/ranges_sort.h +++ b/lib/libcxx/include/__algorithm/ranges_sort.h @@ -39,9 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __sort { - -struct __fn { +struct __sort { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __sort_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -68,10 +66,8 @@ struct __fn { } }; -} // namespace __sort - inline namespace __cpo { -inline constexpr auto sort = __sort::__fn{}; +inline constexpr auto sort = __sort{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_sort_heap.h b/lib/libcxx/include/__algorithm/ranges_sort_heap.h index bab30df1708c..d3e20874fac5 100644 --- a/lib/libcxx/include/__algorithm/ranges_sort_heap.h +++ b/lib/libcxx/include/__algorithm/ranges_sort_heap.h @@ -40,9 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __sort_heap { - -struct __fn { +struct __sort_heap { template _LIBCPP_HIDE_FROM_ABI constexpr static _Iter __sort_heap_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { @@ -69,10 +67,8 @@ struct __fn { } }; -} // namespace __sort_heap - inline namespace __cpo { -inline constexpr auto sort_heap = __sort_heap::__fn{}; +inline constexpr auto sort_heap = __sort_heap{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_stable_partition.h b/lib/libcxx/include/__algorithm/ranges_stable_partition.h index f34027ff772c..cfc02e1e97b3 100644 --- a/lib/libcxx/include/__algorithm/ranges_stable_partition.h +++ b/lib/libcxx/include/__algorithm/ranges_stable_partition.h @@ -42,9 +42,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __stable_partition { - -struct __fn { +struct __stable_partition { template _LIBCPP_HIDE_FROM_ABI static subrange<__remove_cvref_t<_Iter>> __stable_partition_fn_impl(_Iter&& __first, _Sent&& __last, _Pred&& __pred, _Proj&& __proj) { @@ -76,10 +74,8 @@ struct __fn { } }; -} // namespace __stable_partition - inline namespace __cpo { -inline constexpr auto stable_partition = __stable_partition::__fn{}; +inline constexpr auto stable_partition = __stable_partition{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_stable_sort.h b/lib/libcxx/include/__algorithm/ranges_stable_sort.h index 93909e253cc0..9c7df80ae987 100644 --- a/lib/libcxx/include/__algorithm/ranges_stable_sort.h +++ b/lib/libcxx/include/__algorithm/ranges_stable_sort.h @@ -39,9 +39,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __stable_sort { - -struct __fn { +struct __stable_sort { template _LIBCPP_HIDE_FROM_ABI static _Iter __stable_sort_fn_impl(_Iter __first, _Sent __last, _Comp& __comp, _Proj& __proj) { auto __last_iter = ranges::next(__first, __last); @@ -66,10 +64,8 @@ struct __fn { } }; -} // namespace __stable_sort - inline namespace __cpo { -inline constexpr auto stable_sort = __stable_sort::__fn{}; +inline constexpr auto stable_sort = __stable_sort{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_starts_with.h b/lib/libcxx/include/__algorithm/ranges_starts_with.h index 17084e4f2433..ae145d59010a 100644 --- a/lib/libcxx/include/__algorithm/ranges_starts_with.h +++ b/lib/libcxx/include/__algorithm/ranges_starts_with.h @@ -32,8 +32,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __starts_with { -struct __fn { +struct __starts_with { template _Sent1, input_iterator _Iter2, @@ -50,7 +49,7 @@ struct __fn { _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { - return __mismatch::__fn::__go( + return __mismatch::__go( std::move(__first1), std::move(__last1), std::move(__first2), @@ -69,7 +68,7 @@ struct __fn { requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr bool operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { - return __mismatch::__fn::__go( + return __mismatch::__go( ranges::begin(__range1), ranges::end(__range1), ranges::begin(__range2), @@ -80,9 +79,8 @@ struct __fn { .in2 == ranges::end(__range2); } }; -} // namespace __starts_with inline namespace __cpo { -inline constexpr auto starts_with = __starts_with::__fn{}; +inline constexpr auto starts_with = __starts_with{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_swap_ranges.h b/lib/libcxx/include/__algorithm/ranges_swap_ranges.h index b6d9f618395a..ab6db50d8a13 100644 --- a/lib/libcxx/include/__algorithm/ranges_swap_ranges.h +++ b/lib/libcxx/include/__algorithm/ranges_swap_ranges.h @@ -36,8 +36,7 @@ namespace ranges { template using swap_ranges_result = in_in_result<_I1, _I2>; -namespace __swap_ranges { -struct __fn { +struct __swap_ranges { template _S1, input_iterator _I2, sentinel_for<_I2> _S2> requires indirectly_swappable<_I1, _I2> _LIBCPP_HIDE_FROM_ABI constexpr swap_ranges_result<_I1, _I2> @@ -54,10 +53,9 @@ struct __fn { return operator()(ranges::begin(__r1), ranges::end(__r1), ranges::begin(__r2), ranges::end(__r2)); } }; -} // namespace __swap_ranges inline namespace __cpo { -inline constexpr auto swap_ranges = __swap_ranges::__fn{}; +inline constexpr auto swap_ranges = __swap_ranges{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_transform.h b/lib/libcxx/include/__algorithm/ranges_transform.h index 7850ec4f8465..091311821968 100644 --- a/lib/libcxx/include/__algorithm/ranges_transform.h +++ b/lib/libcxx/include/__algorithm/ranges_transform.h @@ -41,8 +41,7 @@ using unary_transform_result = in_out_result<_Ip, _Op>; template using binary_transform_result = in_in_out_result<_I1, _I2, _O1>; -namespace __transform { -struct __fn { +struct __transform { private: template _LIBCPP_HIDE_FROM_ABI static constexpr unary_transform_result<_InIter, _OutIter> @@ -161,10 +160,9 @@ struct __fn { __projection2); } }; -} // namespace __transform inline namespace __cpo { -inline constexpr auto transform = __transform::__fn{}; +inline constexpr auto transform = __transform{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_unique.h b/lib/libcxx/include/__algorithm/ranges_unique.h index 7a9b78432187..a817359abd88 100644 --- a/lib/libcxx/include/__algorithm/ranges_unique.h +++ b/lib/libcxx/include/__algorithm/ranges_unique.h @@ -40,9 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __unique { - -struct __fn { +struct __unique { template _Sent, class _Proj = identity, @@ -66,10 +64,8 @@ struct __fn { } }; -} // namespace __unique - inline namespace __cpo { -inline constexpr auto unique = __unique::__fn{}; +inline constexpr auto unique = __unique{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_unique_copy.h b/lib/libcxx/include/__algorithm/ranges_unique_copy.h index 61133885ae80..ee7f0a0187b7 100644 --- a/lib/libcxx/include/__algorithm/ranges_unique_copy.h +++ b/lib/libcxx/include/__algorithm/ranges_unique_copy.h @@ -44,12 +44,10 @@ namespace ranges { template using unique_copy_result = in_out_result<_InIter, _OutIter>; -namespace __unique_copy { - template concept __can_reread_from_output = (input_iterator<_OutIter> && same_as, iter_value_t<_OutIter>>); -struct __fn { +struct __unique_copy { template static consteval auto __get_algo_tag() { if constexpr (forward_iterator<_InIter>) { @@ -62,7 +60,7 @@ struct __fn { } template - using __algo_tag_t = decltype(__get_algo_tag<_InIter, _OutIter>()); + using __algo_tag_t _LIBCPP_NODEBUG = decltype(__get_algo_tag<_InIter, _OutIter>()); template _Sent, @@ -104,10 +102,8 @@ struct __fn { } }; -} // namespace __unique_copy - inline namespace __cpo { -inline constexpr auto unique_copy = __unique_copy::__fn{}; +inline constexpr auto unique_copy = __unique_copy{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/ranges_upper_bound.h b/lib/libcxx/include/__algorithm/ranges_upper_bound.h index fa6fa7f70ed5..4b2835d4d58d 100644 --- a/lib/libcxx/include/__algorithm/ranges_upper_bound.h +++ b/lib/libcxx/include/__algorithm/ranges_upper_bound.h @@ -30,8 +30,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace ranges { -namespace __upper_bound { -struct __fn { +struct __upper_bound { template _Sent, class _Type, @@ -60,10 +59,9 @@ struct __fn { ranges::begin(__r), ranges::end(__r), __value, __comp_lhs_rhs_swapped, __proj); } }; -} // namespace __upper_bound inline namespace __cpo { -inline constexpr auto upper_bound = __upper_bound::__fn{}; +inline constexpr auto upper_bound = __upper_bound{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__algorithm/remove.h b/lib/libcxx/include/__algorithm/remove.h index fd01c23cb670..b2d7023c5b07 100644 --- a/lib/libcxx/include/__algorithm/remove.h +++ b/lib/libcxx/include/__algorithm/remove.h @@ -24,7 +24,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator remove(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { __first = std::find(__first, __last, __value); if (__first != __last) { diff --git a/lib/libcxx/include/__algorithm/remove_if.h b/lib/libcxx/include/__algorithm/remove_if.h index b14f3c0efa7e..56fd745569ee 100644 --- a/lib/libcxx/include/__algorithm/remove_if.h +++ b/lib/libcxx/include/__algorithm/remove_if.h @@ -23,7 +23,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator remove_if(_ForwardIterator __first, _ForwardIterator __last, _Predicate __pred) { __first = std::find_if<_ForwardIterator, _Predicate&>(__first, __last, __pred); if (__first != __last) { diff --git a/lib/libcxx/include/__algorithm/search.h b/lib/libcxx/include/__algorithm/search.h index b82ca7809535..161fd39d861a 100644 --- a/lib/libcxx/include/__algorithm/search.h +++ b/lib/libcxx/include/__algorithm/search.h @@ -14,11 +14,11 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/advance.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/pair.h> @@ -160,20 +160,20 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_Iter1, _Iter1> __searc } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2, _BinaryPredicate __pred) { - static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value, - "BinaryPredicate has to be callable"); + static_assert(__is_callable<_BinaryPredicate&, decltype(*__first1), decltype(*__first2)>::value, + "The comparator has to be callable"); auto __proj = __identity(); return std::__search_impl(__first1, __last1, __first2, __last2, __pred, __proj, __proj).first; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 search(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) { return std::search(__first1, __last1, __first2, __last2, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/search_n.h b/lib/libcxx/include/__algorithm/search_n.h index 771647d3168a..38474e1b2379 100644 --- a/lib/libcxx/include/__algorithm/search_n.h +++ b/lib/libcxx/include/__algorithm/search_n.h @@ -14,12 +14,13 @@ #include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/advance.h> #include <__iterator/concepts.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__ranges/concepts.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_callable.h> #include <__utility/convert_to_integral.h> #include <__utility/pair.h> @@ -136,16 +137,16 @@ __search_n_impl(_Iter1 __first, _Sent1 __last, _DiffT __count, const _Type& __va } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n( +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n( _ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value, _BinaryPredicate __pred) { static_assert( - __is_callable<_BinaryPredicate, decltype(*__first), const _Tp&>::value, "BinaryPredicate has to be callable"); + __is_callable<_BinaryPredicate&, decltype(*__first), const _Tp&>::value, "The comparator has to be callable"); auto __proj = __identity(); return std::__search_n_impl(__first, __last, std::__convert_to_integral(__count), __value, __pred, __proj).first; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator search_n(_ForwardIterator __first, _ForwardIterator __last, _Size __count, const _Tp& __value) { return std::search_n(__first, __last, std::__convert_to_integral(__count), __value, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/set_difference.h b/lib/libcxx/include/__algorithm/set_difference.h index f414bcecb50d..0cd1bc45d64f 100644 --- a/lib/libcxx/include/__algorithm/set_difference.h +++ b/lib/libcxx/include/__algorithm/set_difference.h @@ -12,10 +12,8 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> #include <__type_traits/remove_cvref.h> #include <__utility/move.h> @@ -30,7 +28,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<__remove_cvref_t<_InIter1>, __remove_cvref_t<_OutIter> > __set_difference( _InIter1&& __first1, _Sent1&& __last1, _InIter2&& __first2, _Sent2&& __last2, _OutIter&& __result, _Comp&& __comp) { @@ -46,7 +44,7 @@ __set_difference( ++__first2; } } - return std::__copy<_AlgPolicy>(std::move(__first1), std::move(__last1), std::move(__result)); + return std::__copy(std::move(__first1), std::move(__last1), std::move(__result)); } template @@ -57,8 +55,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_d _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) { - return std::__set_difference<_ClassicAlgPolicy, __comp_ref_type<_Compare> >( - __first1, __last1, __first2, __last2, __result, __comp) + return std::__set_difference<__comp_ref_type<_Compare> >(__first1, __last1, __first2, __last2, __result, __comp) .second; } @@ -69,7 +66,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_d _InputIterator2 __first2, _InputIterator2 __last2, _OutputIterator __result) { - return std::__set_difference<_ClassicAlgPolicy>(__first1, __last1, __first2, __last2, __result, __less<>()).second; + return std::__set_difference(__first1, __last1, __first2, __last2, __result, __less<>()).second; } _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__algorithm/set_intersection.h b/lib/libcxx/include/__algorithm/set_intersection.h index bb0d86cd0f58..6246e24b9ca4 100644 --- a/lib/libcxx/include/__algorithm/set_intersection.h +++ b/lib/libcxx/include/__algorithm/set_intersection.h @@ -19,6 +19,7 @@ #include <__iterator/next.h> #include <__type_traits/is_same.h> #include <__utility/exchange.h> +#include <__utility/forward.h> #include <__utility/move.h> #include <__utility/swap.h> @@ -84,7 +85,7 @@ template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InForwardIter1, _InForwardIter2, _OutIter> __set_intersection( _InForwardIter1 __first1, @@ -129,7 +130,7 @@ template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InInputIter1, _InInputIter2, _OutIter> __set_intersection( _InInputIter1 __first1, @@ -160,7 +161,7 @@ __set_intersection( } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_intersection_result<_InIter1, _InIter2, _OutIter> __set_intersection( _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { diff --git a/lib/libcxx/include/__algorithm/set_symmetric_difference.h b/lib/libcxx/include/__algorithm/set_symmetric_difference.h index db36665a6136..91ea4067c0d0 100644 --- a/lib/libcxx/include/__algorithm/set_symmetric_difference.h +++ b/lib/libcxx/include/__algorithm/set_symmetric_difference.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> @@ -39,13 +38,13 @@ struct __set_symmetric_difference_result { : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} }; -template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter> __set_symmetric_difference( _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { while (__first1 != __last1) { if (__first2 == __last2) { - auto __ret1 = std::__copy<_AlgPolicy>(std::move(__first1), std::move(__last1), std::move(__result)); + auto __ret1 = std::__copy(std::move(__first1), std::move(__last1), std::move(__result)); return __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter>( std::move(__ret1.first), std::move(__first2), std::move((__ret1.second))); } @@ -63,7 +62,7 @@ __set_symmetric_difference( ++__first2; } } - auto __ret2 = std::__copy<_AlgPolicy>(std::move(__first2), std::move(__last2), std::move(__result)); + auto __ret2 = std::__copy(std::move(__first2), std::move(__last2), std::move(__result)); return __set_symmetric_difference_result<_InIter1, _InIter2, _OutIter>( std::move(__first1), std::move(__ret2.first), std::move((__ret2.second))); } @@ -76,7 +75,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_symmetri _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) { - return std::__set_symmetric_difference<_ClassicAlgPolicy, __comp_ref_type<_Compare> >( + return std::__set_symmetric_difference<__comp_ref_type<_Compare> >( std::move(__first1), std::move(__last1), std::move(__first2), diff --git a/lib/libcxx/include/__algorithm/set_union.h b/lib/libcxx/include/__algorithm/set_union.h index a79c50fd3cf2..393dddce4302 100644 --- a/lib/libcxx/include/__algorithm/set_union.h +++ b/lib/libcxx/include/__algorithm/set_union.h @@ -12,7 +12,6 @@ #include <__algorithm/comp.h> #include <__algorithm/comp_ref_type.h> #include <__algorithm/copy.h> -#include <__algorithm/iterator_operations.h> #include <__config> #include <__iterator/iterator_traits.h> #include <__utility/move.h> @@ -39,12 +38,12 @@ struct __set_union_result { : __in1_(std::move(__in_iter1)), __in2_(std::move(__in_iter2)), __out_(std::move(__out_iter)) {} }; -template +template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_union_result<_InIter1, _InIter2, _OutIter> __set_union( _InIter1 __first1, _Sent1 __last1, _InIter2 __first2, _Sent2 __last2, _OutIter __result, _Compare&& __comp) { for (; __first1 != __last1; ++__result) { if (__first2 == __last2) { - auto __ret1 = std::__copy<_AlgPolicy>(std::move(__first1), std::move(__last1), std::move(__result)); + auto __ret1 = std::__copy(std::move(__first1), std::move(__last1), std::move(__result)); return __set_union_result<_InIter1, _InIter2, _OutIter>( std::move(__ret1.first), std::move(__first2), std::move((__ret1.second))); } @@ -59,7 +58,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __set_union_result<_InIter1, ++__first1; } } - auto __ret2 = std::__copy<_AlgPolicy>(std::move(__first2), std::move(__last2), std::move(__result)); + auto __ret2 = std::__copy(std::move(__first2), std::move(__last2), std::move(__result)); return __set_union_result<_InIter1, _InIter2, _OutIter>( std::move(__first1), std::move(__ret2.first), std::move((__ret2.second))); } @@ -72,7 +71,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator set_union( _InputIterator2 __last2, _OutputIterator __result, _Compare __comp) { - return std::__set_union<_ClassicAlgPolicy, __comp_ref_type<_Compare> >( + return std::__set_union<__comp_ref_type<_Compare> >( std::move(__first1), std::move(__last1), std::move(__first2), diff --git a/lib/libcxx/include/__algorithm/shuffle.h b/lib/libcxx/include/__algorithm/shuffle.h index c9c56ce8c2c0..7177fbb469ba 100644 --- a/lib/libcxx/include/__algorithm/shuffle.h +++ b/lib/libcxx/include/__algorithm/shuffle.h @@ -11,12 +11,12 @@ #include <__algorithm/iterator_operations.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator_traits.h> #include <__random/uniform_int_distribution.h> #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/swap.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__algorithm/simd_utils.h b/lib/libcxx/include/__algorithm/simd_utils.h index 549197be8018..4e03723a3285 100644 --- a/lib/libcxx/include/__algorithm/simd_utils.h +++ b/lib/libcxx/include/__algorithm/simd_utils.h @@ -14,10 +14,10 @@ #include <__bit/countl.h> #include <__bit/countr.h> #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_same.h> #include <__utility/integer_sequence.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -70,7 +70,7 @@ struct __get_as_integer_type_impl<8> { }; template -using __get_as_integer_type_t = typename __get_as_integer_type_impl::type; +using __get_as_integer_type_t _LIBCPP_NODEBUG = typename __get_as_integer_type_impl::type; // This isn't specialized for 64 byte vectors on purpose. They have the potential to significantly reduce performance // in mixed simd/non-simd workloads and don't provide any performance improvement for currently vectorized algorithms @@ -90,7 +90,7 @@ inline constexpr size_t __native_vector_size = 1; # endif template -using __simd_vector __attribute__((__ext_vector_type__(_Np))) = _ArithmeticT; +using __simd_vector __attribute__((__ext_vector_type__(_Np))) _LIBCPP_NODEBUG = _ArithmeticT; template inline constexpr size_t __simd_vector_size_v = []() -> size_t { @@ -106,23 +106,23 @@ _LIBCPP_HIDE_FROM_ABI _Tp __simd_vector_underlying_type_impl(__simd_vector<_Tp, } template -using __simd_vector_underlying_type_t = decltype(std::__simd_vector_underlying_type_impl(_VecT{})); +using __simd_vector_underlying_type_t _LIBCPP_NODEBUG = decltype(std::__simd_vector_underlying_type_impl(_VecT{})); // This isn't inlined without always_inline when loading chars. template -_LIBCPP_NODISCARD _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { +[[__nodiscard__]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _VecT __load_vector(_Iter __iter) noexcept { return [=](index_sequence<_Indices...>) _LIBCPP_ALWAYS_INLINE noexcept { return _VecT{__iter[_Indices]...}; }(make_index_sequence<__simd_vector_size_v<_VecT>>{}); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __all_of(__simd_vector<_Tp, _Np> __vec) noexcept { return __builtin_reduce_and(__builtin_convertvector(__vec, __simd_vector)); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_Tp, _Np> __vec) noexcept { using __mask_vec = __simd_vector; // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 @@ -151,7 +151,7 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t __find_first_not_set(__simd_vector<_Tp, _Np> __vec) noexcept { return std::__find_first_set(~__vec); } diff --git a/lib/libcxx/include/__algorithm/sort.h b/lib/libcxx/include/__algorithm/sort.h index 07b5814639e9..8dd0721f2c65 100644 --- a/lib/libcxx/include/__algorithm/sort.h +++ b/lib/libcxx/include/__algorithm/sort.h @@ -27,9 +27,14 @@ #include <__functional/ranges_operations.h> #include <__iterator/iterator_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/disjunction.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/is_same.h> +#include <__type_traits/is_trivially_copyable.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> #include @@ -44,110 +49,11 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -// stable, 2-3 compares, 0-2 swaps - -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 unsigned -__sort3(_ForwardIterator __x, _ForwardIterator __y, _ForwardIterator __z, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - - unsigned __r = 0; - if (!__c(*__y, *__x)) // if x <= y - { - if (!__c(*__z, *__y)) // if y <= z - return __r; // x <= y && y <= z - // x <= y && y > z - _Ops::iter_swap(__y, __z); // x <= z && y < z - __r = 1; - if (__c(*__y, *__x)) // if x > y - { - _Ops::iter_swap(__x, __y); // x < y && y <= z - __r = 2; - } - return __r; // x <= y && y < z - } - if (__c(*__z, *__y)) // x > y, if y > z - { - _Ops::iter_swap(__x, __z); // x < y && y < z - __r = 1; - return __r; - } - _Ops::iter_swap(__x, __y); // x > y && y <= z - __r = 1; // x < y && x <= z - if (__c(*__z, *__y)) // if y > z - { - _Ops::iter_swap(__y, __z); // x <= y && y < z - __r = 2; - } - return __r; -} // x <= y && y <= z - -// stable, 3-6 compares, 0-5 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort4(_ForwardIterator __x1, _ForwardIterator __x2, _ForwardIterator __x3, _ForwardIterator __x4, _Compare __c) { - using _Ops = _IterOps<_AlgPolicy>; - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); - if (__c(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__c(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__c(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } -} - -// stable, 4-10 compares, 0-9 swaps - -template -_LIBCPP_HIDE_FROM_ABI void -__sort5(_ForwardIterator __x1, - _ForwardIterator __x2, - _ForwardIterator __x3, - _ForwardIterator __x4, - _ForwardIterator __x5, - _Comp __comp) { - using _Ops = _IterOps<_AlgPolicy>; - - std::__sort4<_AlgPolicy, _Comp>(__x1, __x2, __x3, __x4, __comp); - if (__comp(*__x5, *__x4)) { - _Ops::iter_swap(__x4, __x5); - if (__comp(*__x4, *__x3)) { - _Ops::iter_swap(__x3, __x4); - if (__comp(*__x3, *__x2)) { - _Ops::iter_swap(__x2, __x3); - if (__comp(*__x2, *__x1)) { - _Ops::iter_swap(__x1, __x2); - } - } - } - } -} - -// The comparator being simple is a prerequisite for using the branchless optimization. -template -struct __is_simple_comparator : false_type {}; -template <> -struct __is_simple_comparator<__less<>&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -template -struct __is_simple_comparator&> : true_type {}; -#if _LIBCPP_STD_VER >= 20 -template <> -struct __is_simple_comparator : true_type {}; -template <> -struct __is_simple_comparator : true_type {}; -#endif - template ::value_type> -using __use_branchless_sort = - integral_constant::value && sizeof(_Tp) <= sizeof(void*) && - is_arithmetic<_Tp>::value && __is_simple_comparator<_Compare>::value>; +inline const bool __use_branchless_sort = + __libcpp_is_contiguous_iterator<_Iter>::value && __is_cheap_to_copy<_Tp> && is_arithmetic<_Tp>::value && + (__desugars_to_v<__less_tag, __remove_cvref_t<_Compare>, _Tp, _Tp> || + __desugars_to_v<__greater_tag, __remove_cvref_t<_Compare>, _Tp, _Tp>); namespace __detail { @@ -158,59 +64,88 @@ enum { __block_size = sizeof(uint64_t) * 8 }; // Ensures that __c(*__x, *__y) is true by swapping *__x and *__y if necessary. template -inline _LIBCPP_HIDE_FROM_ABI void __cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__cond_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; bool __r = __c(*__x, *__y); value_type __tmp = __r ? *__x : *__y; *__y = __r ? *__y : *__x; *__x = __tmp; + return !__r; } // Ensures that *__x, *__y and *__z are ordered according to the comparator __c, // under the assumption that *__y and *__z are already ordered. template -inline _LIBCPP_HIDE_FROM_ABI void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool __partially_sorted_swap(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { // Note: this function behaves correctly even with proxy iterators (because it relies on `value_type`). using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; - bool __r = __c(*__z, *__x); - value_type __tmp = __r ? *__z : *__x; - *__z = __r ? *__x : *__z; - __r = __c(__tmp, *__y); - *__x = __r ? *__x : *__y; - *__y = __r ? *__y : __tmp; + bool __r1 = __c(*__z, *__x); + value_type __tmp = __r1 ? *__z : *__x; + *__z = __r1 ? *__x : *__z; + bool __r2 = __c(__tmp, *__y); + *__x = __r2 ? *__x : *__y; + *__y = __r2 ? *__y : __tmp; + return !__r1 || !__r2; } +// stable, 2-3 compares, 0-2 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__cond_swap<_Compare>(__x2, __x3, __c); - std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { + bool __swapped1 = std::__cond_swap<_Compare>(__x2, __x3, __c); + bool __swapped2 = std::__partially_sorted_swap<_Compare>(__x1, __x2, __x3, __c); + return __swapped1 || __swapped2; } template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort3_maybe_branchless( - _RandomAccessIterator __x1, _RandomAccessIterator __x2, _RandomAccessIterator __x3, _Compare __c) { - std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); -} + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 bool +__sort3(_RandomAccessIterator __x, _RandomAccessIterator __y, _RandomAccessIterator __z, _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + + if (!__c(*__y, *__x)) // if x <= y + { + if (!__c(*__z, *__y)) // if y <= z + return false; // x <= y && y <= z + // x <= y && y > z + _Ops::iter_swap(__y, __z); // x <= z && y < z + if (__c(*__y, *__x)) // if x > y + _Ops::iter_swap(__x, __y); // x < y && y <= z + return true; // x <= y && y < z + } + if (__c(*__z, *__y)) // x > y, if y > z + { + _Ops::iter_swap(__x, __z); // x < y && y < z + return true; + } + _Ops::iter_swap(__x, __y); // x > y && y <= z + // x < y && x <= z + if (__c(*__z, *__y)) // if y > z + _Ops::iter_swap(__y, __z); // x <= y && y < z + return true; +} // x <= y && y <= z + +// stable, 3-6 compares, 0-5 swaps template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x3, __c); std::__cond_swap<_Compare>(__x2, __x4, __c); std::__cond_swap<_Compare>(__x1, __x2, __c); @@ -221,27 +156,39 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort4_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _Compare __c) { - std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort4(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _Compare __c) { + using _Ops = _IterOps<_AlgPolicy>; + std::__sort3<_AlgPolicy, _Compare>(__x1, __x2, __x3, __c); + if (__c(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__c(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__c(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } } +// stable, 4-10 compares, 0-9 swaps + template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { + __enable_if_t<__use_branchless_sort<_Compare, _RandomAccessIterator>, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __c) { std::__cond_swap<_Compare>(__x1, __x2, __c); std::__cond_swap<_Compare>(__x4, __x5, __c); std::__partially_sorted_swap<_Compare>(__x3, __x4, __x5, __c); @@ -253,16 +200,29 @@ inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI void __sort5_maybe_branchless( - _RandomAccessIterator __x1, - _RandomAccessIterator __x2, - _RandomAccessIterator __x3, - _RandomAccessIterator __x4, - _RandomAccessIterator __x5, - _Compare __c) { - std::__sort5<_AlgPolicy, _Compare, _RandomAccessIterator>( - std::move(__x1), std::move(__x2), std::move(__x3), std::move(__x4), std::move(__x5), __c); + __enable_if_t, int> = 0> +inline _LIBCPP_HIDE_FROM_ABI void +__sort5(_RandomAccessIterator __x1, + _RandomAccessIterator __x2, + _RandomAccessIterator __x3, + _RandomAccessIterator __x4, + _RandomAccessIterator __x5, + _Compare __comp) { + using _Ops = _IterOps<_AlgPolicy>; + + std::__sort4<_AlgPolicy, _Compare>(__x1, __x2, __x3, __x4, __comp); + if (__comp(*__x5, *__x4)) { + _Ops::iter_swap(__x4, __x5); + if (__comp(*__x4, *__x3)) { + _Ops::iter_swap(__x3, __x4); + if (__comp(*__x3, *__x2)) { + _Ops::iter_swap(__x2, __x3); + if (__comp(*__x2, *__x1)) { + _Ops::iter_swap(__x1, __x2); + } + } + } + } } // Assumes size > 0 @@ -280,7 +240,7 @@ __selection_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, // Sort the iterator range [__first, __last) using the comparator __comp using // the insertion sort algorithm. template -_LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __insertion_sort(_BidirectionalIterator __first, _BidirectionalIterator __last, _Compare __comp) { using _Ops = _IterOps<_AlgPolicy>; @@ -352,14 +312,14 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator _Ops::iter_swap(__first, __last); return true; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), --__last, __comp); return true; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort4<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return true; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Comp>( + std::__sort5<_AlgPolicy, _Comp>( __first, __first + difference_type(1), __first + difference_type(2), @@ -370,7 +330,7 @@ __insertion_sort_incomplete(_RandomAccessIterator __first, _RandomAccessIterator } typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; _RandomAccessIterator __j = __first + difference_type(2); - std::__sort3_maybe_branchless<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); + std::__sort3<_AlgPolicy, _Comp>(__first, __first + difference_type(1), __j, __comp); const unsigned __limit = 8; unsigned __count = 0; for (_RandomAccessIterator __i = __j + difference_type(1); __i != __last; ++__i) { @@ -777,14 +737,14 @@ void __introsort(_RandomAccessIterator __first, _Ops::iter_swap(__first, __last); return; case 3: - std::__sort3_maybe_branchless<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); + std::__sort3<_AlgPolicy, _Compare>(__first, __first + difference_type(1), --__last, __comp); return; case 4: - std::__sort4_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort4<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), --__last, __comp); return; case 5: - std::__sort5_maybe_branchless<_AlgPolicy, _Compare>( + std::__sort5<_AlgPolicy, _Compare>( __first, __first + difference_type(1), __first + difference_type(2), @@ -891,7 +851,7 @@ template void __sort(_RandomAccessIterator, _RandomAccessIterator, _Comp); extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less&, char*>(char*, char*, __less&); -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS extern template _LIBCPP_EXPORTED_FROM_ABI void __sort<__less&, wchar_t*>(wchar_t*, wchar_t*, __less&); #endif extern template _LIBCPP_EXPORTED_FROM_ABI void @@ -925,20 +885,18 @@ __sort_dispatch(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co // Only use bitset partitioning for arithmetic types. We should also check // that the default comparator is in use so that we are sure that there are no // branches in the comparator. - std::__introsort<_AlgPolicy, - _Comp&, - _RandomAccessIterator, - __use_branchless_sort<_Comp, _RandomAccessIterator>::value>(__first, __last, __comp, __depth_limit); + std::__introsort<_AlgPolicy, _Comp&, _RandomAccessIterator, __use_branchless_sort<_Comp, _RandomAccessIterator> >( + __first, __last, __comp, __depth_limit); } template -using __is_any_of = _Or...>; +using __is_any_of _LIBCPP_NODEBUG = _Or...>; template -using __sort_is_specialized_in_library = __is_any_of< +using __sort_is_specialized_in_library _LIBCPP_NODEBUG = __is_any_of< _Type, char, -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS wchar_t, #endif signed char, diff --git a/lib/libcxx/include/__algorithm/stable_partition.h b/lib/libcxx/include/__algorithm/stable_partition.h index 8bb1eaf2d224..2ba7239a3a03 100644 --- a/lib/libcxx/include/__algorithm/stable_partition.h +++ b/lib/libcxx/include/__algorithm/stable_partition.h @@ -12,15 +12,16 @@ #include <__algorithm/iterator_operations.h> #include <__algorithm/rotate.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__memory/destruct_n.h> -#include <__memory/temporary_buffer.h> #include <__memory/unique_ptr.h> +#include <__memory/unique_temporary_buffer.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -132,14 +133,12 @@ __stable_partition_impl(_ForwardIterator __first, _ForwardIterator __last, _Pred // We now have a reduced range [__first, __last) // *__first is known to be false difference_type __len = _IterOps<_AlgPolicy>::distance(__first, __last); + __unique_temporary_buffer __unique_buf; pair __p(0, 0); - unique_ptr __h; if (__len >= __alloc_limit) { - // TODO: Remove the use of std::get_temporary_buffer - _LIBCPP_SUPPRESS_DEPRECATED_PUSH - __p = std::get_temporary_buffer(__len); - _LIBCPP_SUPPRESS_DEPRECATED_POP - __h.reset(__p.first); + __unique_buf = std::__allocate_unique_temporary_buffer(__len); + __p.first = __unique_buf.get(); + __p.second = __unique_buf.get_deleter().__count_; } return std::__stable_partition_impl<_AlgPolicy, _Predicate&>( std::move(__first), std::move(__last), __pred, __len, __p, forward_iterator_tag()); @@ -272,14 +271,12 @@ _LIBCPP_HIDE_FROM_ABI _BidirectionalIterator __stable_partition_impl( // *__last is known to be true // __len >= 2 difference_type __len = _IterOps<_AlgPolicy>::distance(__first, __last) + 1; + __unique_temporary_buffer __unique_buf; pair __p(0, 0); - unique_ptr __h; if (__len >= __alloc_limit) { - // TODO: Remove the use of std::get_temporary_buffer - _LIBCPP_SUPPRESS_DEPRECATED_PUSH - __p = std::get_temporary_buffer(__len); - _LIBCPP_SUPPRESS_DEPRECATED_POP - __h.reset(__p.first); + __unique_buf = std::__allocate_unique_temporary_buffer(__len); + __p.first = __unique_buf.get(); + __p.second = __unique_buf.get_deleter().__count_; } return std::__stable_partition_impl<_AlgPolicy, _Predicate&>( std::move(__first), std::move(__last), __pred, __len, __p, bidirectional_iterator_tag()); diff --git a/lib/libcxx/include/__algorithm/stable_sort.h b/lib/libcxx/include/__algorithm/stable_sort.h index 726e7e16b356..3cfbcf08d2c5 100644 --- a/lib/libcxx/include/__algorithm/stable_sort.h +++ b/lib/libcxx/include/__algorithm/stable_sort.h @@ -13,17 +13,24 @@ #include <__algorithm/comp_ref_type.h> #include <__algorithm/inplace_merge.h> #include <__algorithm/iterator_operations.h> +#include <__algorithm/radix_sort.h> #include <__algorithm/sort.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__debug_utils/strict_weak_ordering_check.h> #include <__iterator/iterator_traits.h> +#include <__memory/construct_at.h> #include <__memory/destruct_n.h> -#include <__memory/temporary_buffer.h> #include <__memory/unique_ptr.h> +#include <__memory/unique_temporary_buffer.h> +#include <__type_traits/desugars_to.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_trivially_assignable.h> +#include <__type_traits/remove_cvref.h> #include <__utility/move.h> #include <__utility/pair.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,7 +42,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_HIDE_FROM_ABI void __insertion_sort_move( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __insertion_sort_move( _BidirectionalIterator __first1, _BidirectionalIterator __last1, typename iterator_traits<_BidirectionalIterator>::value_type* __first2, @@ -47,19 +54,19 @@ _LIBCPP_HIDE_FROM_ABI void __insertion_sort_move( __destruct_n __d(0); unique_ptr __h(__first2, __d); value_type* __last2 = __first2; - ::new ((void*)__last2) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__last2, _Ops::__iter_move(__first1)); __d.template __incr(); for (++__last2; ++__first1 != __last1; ++__last2) { value_type* __j2 = __last2; value_type* __i2 = __j2; if (__comp(*__first1, *--__i2)) { - ::new ((void*)__j2) value_type(std::move(*__i2)); + std::__construct_at(__j2, std::move(*__i2)); __d.template __incr(); for (--__j2; __i2 != __first2 && __comp(*__first1, *--__i2); --__j2) *__j2 = std::move(*__i2); *__j2 = _Ops::__iter_move(__first1); } else { - ::new ((void*)__j2) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__j2, _Ops::__iter_move(__first1)); __d.template __incr(); } } @@ -68,7 +75,7 @@ _LIBCPP_HIDE_FROM_ABI void __insertion_sort_move( } template -_LIBCPP_HIDE_FROM_ABI void __merge_move_construct( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __merge_move_construct( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -83,22 +90,22 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_construct( for (; true; ++__result) { if (__first1 == __last1) { for (; __first2 != __last2; ++__first2, (void)++__result, __d.template __incr()) - ::new ((void*)__result) value_type(_Ops::__iter_move(__first2)); + std::__construct_at(__result, _Ops::__iter_move(__first2)); __h.release(); return; } if (__first2 == __last2) { for (; __first1 != __last1; ++__first1, (void)++__result, __d.template __incr()) - ::new ((void*)__result) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__result, _Ops::__iter_move(__first1)); __h.release(); return; } if (__comp(*__first2, *__first1)) { - ::new ((void*)__result) value_type(_Ops::__iter_move(__first2)); + std::__construct_at(__result, _Ops::__iter_move(__first2)); __d.template __incr(); ++__first2; } else { - ::new ((void*)__result) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__result, _Ops::__iter_move(__first1)); __d.template __incr(); ++__first1; } @@ -106,7 +113,7 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_construct( } template -_LIBCPP_HIDE_FROM_ABI void __merge_move_assign( +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __merge_move_assign( _InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, @@ -134,19 +141,21 @@ _LIBCPP_HIDE_FROM_ABI void __merge_move_assign( } template -void __stable_sort(_RandomAccessIterator __first, - _RandomAccessIterator __last, - _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len, - typename iterator_traits<_RandomAccessIterator>::value_type* __buff, - ptrdiff_t __buff_size); +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort( + _RandomAccessIterator __first, + _RandomAccessIterator __last, + _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __len, + typename iterator_traits<_RandomAccessIterator>::value_type* __buff, + ptrdiff_t __buff_size); template -void __stable_sort_move(_RandomAccessIterator __first1, - _RandomAccessIterator __last1, - _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len, - typename iterator_traits<_RandomAccessIterator>::value_type* __first2) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort_move( + _RandomAccessIterator __first1, + _RandomAccessIterator __last1, + _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __len, + typename iterator_traits<_RandomAccessIterator>::value_type* __first2) { using _Ops = _IterOps<_AlgPolicy>; typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; @@ -154,21 +163,21 @@ void __stable_sort_move(_RandomAccessIterator __first1, case 0: return; case 1: - ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__first2, _Ops::__iter_move(__first1)); return; case 2: __destruct_n __d(0); unique_ptr __h2(__first2, __d); if (__comp(*--__last1, *__first1)) { - ::new ((void*)__first2) value_type(_Ops::__iter_move(__last1)); + std::__construct_at(__first2, _Ops::__iter_move(__last1)); __d.template __incr(); ++__first2; - ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__first2, _Ops::__iter_move(__first1)); } else { - ::new ((void*)__first2) value_type(_Ops::__iter_move(__first1)); + std::__construct_at(__first2, _Ops::__iter_move(__first1)); __d.template __incr(); ++__first2; - ::new ((void*)__first2) value_type(_Ops::__iter_move(__last1)); + std::__construct_at(__first2, _Ops::__iter_move(__last1)); } __h2.release(); return; @@ -189,13 +198,36 @@ struct __stable_sort_switch { static const unsigned value = 128 * is_trivially_copy_assignable<_Tp>::value; }; +#if _LIBCPP_STD_VER >= 17 +template +_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_min_bound() { + static_assert(is_integral<_Tp>::value); + if constexpr (sizeof(_Tp) == 1) { + return 1 << 8; + } + + return 1 << 10; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr unsigned __radix_sort_max_bound() { + static_assert(is_integral<_Tp>::value); + if constexpr (sizeof(_Tp) >= 8) { + return 1 << 15; + } + + return 1 << 16; +} +#endif // _LIBCPP_STD_VER >= 17 + template -void __stable_sort(_RandomAccessIterator __first, - _RandomAccessIterator __last, - _Compare __comp, - typename iterator_traits<_RandomAccessIterator>::difference_type __len, - typename iterator_traits<_RandomAccessIterator>::value_type* __buff, - ptrdiff_t __buff_size) { +_LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort( + _RandomAccessIterator __first, + _RandomAccessIterator __last, + _Compare __comp, + typename iterator_traits<_RandomAccessIterator>::difference_type __len, + typename iterator_traits<_RandomAccessIterator>::value_type* __buff, + ptrdiff_t __buff_size) { typedef typename iterator_traits<_RandomAccessIterator>::value_type value_type; typedef typename iterator_traits<_RandomAccessIterator>::difference_type difference_type; switch (__len) { @@ -211,6 +243,22 @@ void __stable_sort(_RandomAccessIterator __first, std::__insertion_sort<_AlgPolicy, _Compare>(__first, __last, __comp); return; } + +#if _LIBCPP_STD_VER >= 17 + constexpr auto __default_comp = + __desugars_to_v<__totally_ordered_less_tag, __remove_cvref_t<_Compare>, value_type, value_type >; + constexpr auto __integral_value = + is_integral_v && is_same_v< value_type&, __iter_reference<_RandomAccessIterator>>; + constexpr auto __allowed_radix_sort = __default_comp && __integral_value; + if constexpr (__allowed_radix_sort) { + if (__len <= __buff_size && __len >= static_cast(__radix_sort_min_bound()) && + __len <= static_cast(__radix_sort_max_bound())) { + std::__radix_sort(__first, __last, __buff); + return; + } + } +#endif // _LIBCPP_STD_VER >= 17 + typename iterator_traits<_RandomAccessIterator>::difference_type __l2 = __len / 2; _RandomAccessIterator __m = __first + __l2; if (__len <= __buff_size) { @@ -235,20 +283,18 @@ void __stable_sort(_RandomAccessIterator __first, } template -inline _LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare& __comp) { using value_type = typename iterator_traits<_RandomAccessIterator>::value_type; using difference_type = typename iterator_traits<_RandomAccessIterator>::difference_type; difference_type __len = __last - __first; + __unique_temporary_buffer __unique_buf; pair __buf(0, 0); - unique_ptr __h; if (__len > static_cast(__stable_sort_switch::value)) { - // TODO: Remove the use of std::get_temporary_buffer - _LIBCPP_SUPPRESS_DEPRECATED_PUSH - __buf = std::get_temporary_buffer(__len); - _LIBCPP_SUPPRESS_DEPRECATED_POP - __h.reset(__buf.first); + __unique_buf = std::__allocate_unique_temporary_buffer(__len); + __buf.first = __unique_buf.get(); + __buf.second = __unique_buf.get_deleter().__count_; } std::__stable_sort<_AlgPolicy, __comp_ref_type<_Compare> >(__first, __last, __comp, __len, __buf.first, __buf.second); @@ -256,18 +302,18 @@ __stable_sort_impl(_RandomAccessIterator __first, _RandomAccessIterator __last, } template -inline _LIBCPP_HIDE_FROM_ABI void +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) { std::__stable_sort_impl<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __comp); } template -inline _LIBCPP_HIDE_FROM_ABI void stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void +stable_sort(_RandomAccessIterator __first, _RandomAccessIterator __last) { std::stable_sort(__first, __last, __less<>()); } _LIBCPP_END_NAMESPACE_STD - _LIBCPP_POP_MACROS #endif // _LIBCPP___ALGORITHM_STABLE_SORT_H diff --git a/lib/libcxx/include/__algorithm/three_way_comp_ref_type.h b/lib/libcxx/include/__algorithm/three_way_comp_ref_type.h index 5702a1fee082..f6f76455e466 100644 --- a/lib/libcxx/include/__algorithm/three_way_comp_ref_type.h +++ b/lib/libcxx/include/__algorithm/three_way_comp_ref_type.h @@ -61,10 +61,10 @@ struct __debug_three_way_comp { // Pass the comparator by lvalue reference. Or in the debug mode, using a debugging wrapper that stores a reference. # if _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_DEBUG template -using __three_way_comp_ref_type = __debug_three_way_comp<_Comp>; +using __three_way_comp_ref_type _LIBCPP_NODEBUG = __debug_three_way_comp<_Comp>; # else template -using __three_way_comp_ref_type = _Comp&; +using __three_way_comp_ref_type _LIBCPP_NODEBUG = _Comp&; # endif #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__algorithm/uniform_random_bit_generator_adaptor.h b/lib/libcxx/include/__algorithm/uniform_random_bit_generator_adaptor.h index aef0fbfb7c28..bc7a8925e128 100644 --- a/lib/libcxx/include/__algorithm/uniform_random_bit_generator_adaptor.h +++ b/lib/libcxx/include/__algorithm/uniform_random_bit_generator_adaptor.h @@ -10,7 +10,7 @@ #define _LIBCPP___ALGORITHM_RANGES_UNIFORM_RANDOM_BIT_GENERATOR_ADAPTOR_H #include <__config> -#include <__functional/invoke.h> +#include <__type_traits/invoke.h> #include <__type_traits/remove_cvref.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__algorithm/unique.h b/lib/libcxx/include/__algorithm/unique.h index d597014596f2..307c424a7c2f 100644 --- a/lib/libcxx/include/__algorithm/unique.h +++ b/lib/libcxx/include/__algorithm/unique.h @@ -13,6 +13,7 @@ #include <__algorithm/comp.h> #include <__algorithm/iterator_operations.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__utility/move.h> #include <__utility/pair.h> @@ -29,9 +30,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD // unique template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 std::pair<_Iter, _Iter> __unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { - __first = std::__adjacent_find(__first, __last, __pred); + __identity __proj; + __first = std::__adjacent_find(__first, __last, __pred, __proj); if (__first != __last) { // ... a a ? ... // f i @@ -46,13 +48,13 @@ __unique(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) { } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator unique(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) { return std::__unique<_ClassicAlgPolicy>(std::move(__first), std::move(__last), __pred).first; } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator unique(_ForwardIterator __first, _ForwardIterator __last) { return std::unique(__first, __last, __equal_to()); } diff --git a/lib/libcxx/include/__algorithm/unwrap_iter.h b/lib/libcxx/include/__algorithm/unwrap_iter.h index 8cc0d22d4fc2..b66a682e765f 100644 --- a/lib/libcxx/include/__algorithm/unwrap_iter.h +++ b/lib/libcxx/include/__algorithm/unwrap_iter.h @@ -46,7 +46,7 @@ struct __unwrap_iter_impl { // It's a contiguous iterator, so we can use a raw pointer instead template struct __unwrap_iter_impl<_Iter, true> { - using _ToAddressT = decltype(std::__to_address(std::declval<_Iter>())); + using _ToAddressT _LIBCPP_NODEBUG = decltype(std::__to_address(std::declval<_Iter>())); static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Iter __rewrap(_Iter __orig_iter, _ToAddressT __unwrapped_iter) { return __orig_iter + (__unwrapped_iter - std::__to_address(__orig_iter)); diff --git a/lib/libcxx/include/__algorithm/upper_bound.h b/lib/libcxx/include/__algorithm/upper_bound.h index c39dec2e8969..d77286c9e5af 100644 --- a/lib/libcxx/include/__algorithm/upper_bound.h +++ b/lib/libcxx/include/__algorithm/upper_bound.h @@ -18,6 +18,8 @@ #include <__iterator/advance.h> #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_callable.h> #include <__type_traits/is_constructible.h> #include <__utility/move.h> @@ -48,15 +50,16 @@ __upper_bound(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) { + static_assert(__is_callable<_Compare&, const _Tp&, decltype(*__first)>::value, "The comparator has to be callable"); static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible"); return std::__upper_bound<_ClassicAlgPolicy>( std::move(__first), std::move(__last), __value, std::move(__comp), std::__identity()); } template -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator upper_bound(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) { return std::upper_bound(std::move(__first), std::move(__last), __value, __less<>()); } diff --git a/lib/libcxx/include/__assert b/lib/libcxx/include/__assert index 49769fb4d449..90eaa6023587 100644 --- a/lib/libcxx/include/__assert +++ b/lib/libcxx/include/__assert @@ -23,10 +23,10 @@ : _LIBCPP_ASSERTION_HANDLER(__FILE__ ":" _LIBCPP_TOSTRING(__LINE__) ": assertion " _LIBCPP_TOSTRING( \ expression) " failed: " message "\n")) -// TODO: __builtin_assume can currently inhibit optimizations. Until this has been fixed and we can add -// assumptions without a clear optimization intent, disable that to avoid worsening the code generation. -// See https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609 for a discussion. -#if 0 && __has_builtin(__builtin_assume) +// WARNING: __builtin_assume can currently inhibit optimizations. Only add assumptions with a clear +// optimization intent. See https://discourse.llvm.org/t/llvm-assume-blocks-optimization/71609 for a +// discussion. +#if __has_builtin(__builtin_assume) # define _LIBCPP_ASSUME(expression) \ (_LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wassume") \ __builtin_assume(static_cast(expression)) _LIBCPP_DIAGNOSTIC_POP) @@ -44,18 +44,18 @@ # define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSERT(expression, message) // Disabled checks. // On most modern platforms, dereferencing a null pointer does not lead to an actual memory access. -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_NON_NULL(expression, message) ((void)0) // Overlapping ranges will make algorithms produce incorrect results but don't directly lead to a security // vulnerability. -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) ((void)0) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) ((void)0) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) ((void)0) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) ((void)0) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) ((void)0) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) ((void)0) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) ((void)0) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) ((void)0) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) ((void)0) // Extensive hardening mode checks. @@ -73,8 +73,8 @@ # define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSERT(expression, message) # define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSERT(expression, message) // Disabled checks. -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) ((void)0) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) ((void)0) // Debug hardening mode checks. @@ -99,18 +99,18 @@ #else // All checks disabled. -# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_NON_NULL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_PEDANTIC(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_INTERNAL(expression, message) _LIBCPP_ASSUME(expression) -# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) _LIBCPP_ASSUME(expression) +# define _LIBCPP_ASSERT_VALID_INPUT_RANGE(expression, message) ((void)0) +# define _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS(expression, message) ((void)0) +# define _LIBCPP_ASSERT_NON_NULL(expression, message) ((void)0) +# define _LIBCPP_ASSERT_NON_OVERLAPPING_RANGES(expression, message) ((void)0) +# define _LIBCPP_ASSERT_VALID_DEALLOCATION(expression, message) ((void)0) +# define _LIBCPP_ASSERT_VALID_EXTERNAL_API_CALL(expression, message) ((void)0) +# define _LIBCPP_ASSERT_COMPATIBLE_ALLOCATOR(expression, message) ((void)0) +# define _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(expression, message) ((void)0) +# define _LIBCPP_ASSERT_PEDANTIC(expression, message) ((void)0) +# define _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(expression, message) ((void)0) +# define _LIBCPP_ASSERT_INTERNAL(expression, message) ((void)0) +# define _LIBCPP_ASSERT_UNCATEGORIZED(expression, message) ((void)0) #endif // _LIBCPP_HARDENING_MODE == _LIBCPP_HARDENING_MODE_FAST // clang-format on diff --git a/lib/libcxx/include/__assertion_handler b/lib/libcxx/include/__assertion_handler index 3b6d6b2cca53..1d6b21fc6bb4 100644 --- a/lib/libcxx/include/__assertion_handler +++ b/lib/libcxx/include/__assertion_handler @@ -10,8 +10,13 @@ #ifndef _LIBCPP___ASSERTION_HANDLER #define _LIBCPP___ASSERTION_HANDLER -#include <__config> -#include <__verbose_abort> +#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) +# include <__cxx03/__config> +# include <__cxx03/__verbose_abort> +#else +# include <__config> +# include <__verbose_abort> +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -26,7 +31,8 @@ # if __has_builtin(__builtin_verbose_trap) // AppleClang shipped a slightly different version of __builtin_verbose_trap from the upstream // version before upstream Clang actually got the builtin. -# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000 +// TODO: Remove once AppleClang supports the two-arguments version of the builtin. +# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700 # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap(message) # else # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap("libc++", message) diff --git a/lib/libcxx/include/__atomic/aliases.h b/lib/libcxx/include/__atomic/aliases.h index e27e09af6b77..4fccebab2563 100644 --- a/lib/libcxx/include/__atomic/aliases.h +++ b/lib/libcxx/include/__atomic/aliases.h @@ -14,9 +14,10 @@ #include <__atomic/contention_t.h> #include <__atomic/is_always_lock_free.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__type_traits/conditional.h> #include <__type_traits/make_unsigned.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -37,12 +38,12 @@ using atomic_long = atomic; using atomic_ulong = atomic; using atomic_llong = atomic; using atomic_ullong = atomic; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T using atomic_char8_t = atomic; #endif using atomic_char16_t = atomic; using atomic_char32_t = atomic; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using atomic_wchar_t = atomic; #endif @@ -83,19 +84,19 @@ using atomic_uintmax_t = atomic; // C++20 atomic_{signed,unsigned}_lock_free: prefer the contention type most highly, then the largest lock-free type #if _LIBCPP_STD_VER >= 20 # if ATOMIC_LLONG_LOCK_FREE == 2 -using __largest_lock_free_type = long long; +using __largest_lock_free_type _LIBCPP_NODEBUG = long long; # elif ATOMIC_INT_LOCK_FREE == 2 -using __largest_lock_free_type = int; +using __largest_lock_free_type _LIBCPP_NODEBUG = int; # elif ATOMIC_SHORT_LOCK_FREE == 2 -using __largest_lock_free_type = short; +using __largest_lock_free_type _LIBCPP_NODEBUG = short; # elif ATOMIC_CHAR_LOCK_FREE == 2 -using __largest_lock_free_type = char; +using __largest_lock_free_type _LIBCPP_NODEBUG = char; # else # define _LIBCPP_NO_LOCK_FREE_TYPES // There are no lockfree types (this can happen on unusual platforms) # endif # ifndef _LIBCPP_NO_LOCK_FREE_TYPES -using __contention_t_or_largest = +using __contention_t_or_largest _LIBCPP_NODEBUG = __conditional_t<__libcpp_is_always_lock_free<__cxx_contention_t>::__value, __cxx_contention_t, __largest_lock_free_type>; diff --git a/lib/libcxx/include/__atomic/atomic.h b/lib/libcxx/include/__atomic/atomic.h index bd3f659c22df..975a479e2040 100644 --- a/lib/libcxx/include/__atomic/atomic.h +++ b/lib/libcxx/include/__atomic/atomic.h @@ -9,21 +9,24 @@ #ifndef _LIBCPP___ATOMIC_ATOMIC_H #define _LIBCPP___ATOMIC_ATOMIC_H -#include <__atomic/atomic_base.h> +#include <__atomic/atomic_sync.h> #include <__atomic/check_memory_order.h> -#include <__atomic/cxx_atomic_impl.h> +#include <__atomic/is_always_lock_free.h> #include <__atomic/memory_order.h> +#include <__atomic/support.h> #include <__config> -#include <__functional/operations.h> +#include <__cstddef/ptrdiff_t.h> #include <__memory/addressof.h> +#include <__type_traits/enable_if.h> #include <__type_traits/is_floating_point.h> #include <__type_traits/is_function.h> +#include <__type_traits/is_integral.h> +#include <__type_traits/is_nothrow_constructible.h> #include <__type_traits/is_same.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_pointer.h> #include <__type_traits/remove_volatile.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -32,11 +35,202 @@ _LIBCPP_BEGIN_NAMESPACE_STD +template ::value && !is_same<_Tp, bool>::value> +struct __atomic_base // false +{ + mutable __cxx_atomic_impl<_Tp> __a_; + +#if _LIBCPP_STD_VER >= 17 + static constexpr bool is_always_lock_free = __libcpp_is_always_lock_free<__cxx_atomic_impl<_Tp> >::__value; +#endif + + _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT { + return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>)); + } + _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT { + return static_cast<__atomic_base const volatile*>(this)->is_lock_free(); + } + _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT + _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { + std::__cxx_atomic_store(std::addressof(__a_), __d, __m); + } + _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT + _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { + std::__cxx_atomic_store(std::addressof(__a_), __d, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT + _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { + return std::__cxx_atomic_load(std::addressof(__a_), __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT + _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { + return std::__cxx_atomic_load(std::addressof(__a_), __m); + } + _LIBCPP_HIDE_FROM_ABI operator _Tp() const volatile _NOEXCEPT { return load(); } + _LIBCPP_HIDE_FROM_ABI operator _Tp() const _NOEXCEPT { return load(); } + _LIBCPP_HIDE_FROM_ABI _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_exchange(std::addressof(__a_), __d, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_exchange(std::addressof(__a_), __d, __m); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile _NOEXCEPT + _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { + return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __s, __f); + } + _LIBCPP_HIDE_FROM_ABI bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) _NOEXCEPT + _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { + return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __s, __f); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile _NOEXCEPT + _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { + return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __s, __f); + } + _LIBCPP_HIDE_FROM_ABI bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) _NOEXCEPT + _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { + return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __s, __f); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __m, __m); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __m, __m); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __m, __m); + } + _LIBCPP_HIDE_FROM_ABI bool + compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __m, __m); + } + +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const + volatile _NOEXCEPT { + std::__atomic_wait(*this, __v, __m); + } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void + wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { + std::__atomic_wait(*this, __v, __m); + } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { + std::__atomic_notify_one(*this); + } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { + std::__atomic_notify_all(*this); + } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } +#endif // _LIBCPP_STD_VER >= 20 + +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_HIDE_FROM_ABI constexpr __atomic_base() noexcept(is_nothrow_default_constructible_v<_Tp>) : __a_(_Tp()) {} +#else + _LIBCPP_HIDE_FROM_ABI __atomic_base() _NOEXCEPT = default; +#endif + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __a_(__d) {} + + __atomic_base(const __atomic_base&) = delete; +}; + +// atomic + +template +struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> { + using __base _LIBCPP_NODEBUG = __atomic_base<_Tp, false>; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __atomic_base() _NOEXCEPT = default; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {} + + _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_fetch_add(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_fetch_add(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_fetch_sub(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_fetch_sub(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_fetch_and(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_fetch_and(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_fetch_or(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_fetch_or(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { + return std::__cxx_atomic_fetch_xor(std::addressof(this->__a_), __op, __m); + } + _LIBCPP_HIDE_FROM_ABI _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { + return std::__cxx_atomic_fetch_xor(std::addressof(this->__a_), __op, __m); + } + + _LIBCPP_HIDE_FROM_ABI _Tp operator++(int) volatile _NOEXCEPT { return fetch_add(_Tp(1)); } + _LIBCPP_HIDE_FROM_ABI _Tp operator++(int) _NOEXCEPT { return fetch_add(_Tp(1)); } + _LIBCPP_HIDE_FROM_ABI _Tp operator--(int) volatile _NOEXCEPT { return fetch_sub(_Tp(1)); } + _LIBCPP_HIDE_FROM_ABI _Tp operator--(int) _NOEXCEPT { return fetch_sub(_Tp(1)); } + _LIBCPP_HIDE_FROM_ABI _Tp operator++() volatile _NOEXCEPT { return fetch_add(_Tp(1)) + _Tp(1); } + _LIBCPP_HIDE_FROM_ABI _Tp operator++() _NOEXCEPT { return fetch_add(_Tp(1)) + _Tp(1); } + _LIBCPP_HIDE_FROM_ABI _Tp operator--() volatile _NOEXCEPT { return fetch_sub(_Tp(1)) - _Tp(1); } + _LIBCPP_HIDE_FROM_ABI _Tp operator--() _NOEXCEPT { return fetch_sub(_Tp(1)) - _Tp(1); } + _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __op) volatile _NOEXCEPT { return fetch_add(__op) + __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __op) _NOEXCEPT { return fetch_add(__op) + __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator-=(_Tp __op) volatile _NOEXCEPT { return fetch_sub(__op) - __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator-=(_Tp __op) _NOEXCEPT { return fetch_sub(__op) - __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator&=(_Tp __op) volatile _NOEXCEPT { return fetch_and(__op) & __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator&=(_Tp __op) _NOEXCEPT { return fetch_and(__op) & __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator|=(_Tp __op) volatile _NOEXCEPT { return fetch_or(__op) | __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator|=(_Tp __op) _NOEXCEPT { return fetch_or(__op) | __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) volatile _NOEXCEPT { return fetch_xor(__op) ^ __op; } + _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) _NOEXCEPT { return fetch_xor(__op) ^ __op; } +}; + +// Here we need _IsIntegral because the default template argument is not enough +// e.g __atomic_base is __atomic_base, which inherits from +// __atomic_base and the caller of the wait function is +// __atomic_base. So specializing __atomic_base<_Tp> does not work +template +struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > { + static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) { + return __a.load(__order); + } + + static _LIBCPP_HIDE_FROM_ABI _Tp + __atomic_load(const volatile __atomic_base<_Tp, _IsIntegral>& __this, memory_order __order) { + return __this.load(__order); + } + + static _LIBCPP_HIDE_FROM_ABI const __cxx_atomic_impl<_Tp>* + __atomic_contention_address(const __atomic_base<_Tp, _IsIntegral>& __a) { + return std::addressof(__a.__a_); + } + + static _LIBCPP_HIDE_FROM_ABI const volatile __cxx_atomic_impl<_Tp>* + __atomic_contention_address(const volatile __atomic_base<_Tp, _IsIntegral>& __this) { + return std::addressof(__this.__a_); + } +}; + template struct atomic : public __atomic_base<_Tp> { - using __base = __atomic_base<_Tp>; - using value_type = _Tp; - using difference_type = value_type; + using __base _LIBCPP_NODEBUG = __atomic_base<_Tp>; + using value_type = _Tp; + using difference_type = value_type; #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI atomic() = default; @@ -63,9 +257,9 @@ struct atomic : public __atomic_base<_Tp> { template struct atomic<_Tp*> : public __atomic_base<_Tp*> { - using __base = __atomic_base<_Tp*>; - using value_type = _Tp*; - using difference_type = ptrdiff_t; + using __base _LIBCPP_NODEBUG = __atomic_base<_Tp*>; + using value_type = _Tp*; + using difference_type = ptrdiff_t; _LIBCPP_HIDE_FROM_ABI atomic() _NOEXCEPT = default; @@ -121,6 +315,9 @@ struct atomic<_Tp*> : public __atomic_base<_Tp*> { atomic& operator=(const atomic&) volatile = delete; }; +template +struct __atomic_waitable_traits > : __atomic_waitable_traits<__atomic_base<_Tp> > {}; + #if _LIBCPP_STD_VER >= 20 template requires is_floating_point_v<_Tp> @@ -178,7 +375,8 @@ struct atomic<_Tp> : __atomic_base<_Tp> { auto __builtin_op = [](auto __a, auto __builtin_operand, auto __order) { return std::__cxx_atomic_fetch_add(__a, __builtin_operand, __order); }; - return __rmw_op(std::forward<_This>(__self), __operand, __m, std::plus<>{}, __builtin_op); + auto __plus = [](auto __a, auto __b) { return __a + __b; }; + return __rmw_op(std::forward<_This>(__self), __operand, __m, __plus, __builtin_op); } template @@ -186,13 +384,14 @@ struct atomic<_Tp> : __atomic_base<_Tp> { auto __builtin_op = [](auto __a, auto __builtin_operand, auto __order) { return std::__cxx_atomic_fetch_sub(__a, __builtin_operand, __order); }; - return __rmw_op(std::forward<_This>(__self), __operand, __m, std::minus<>{}, __builtin_op); + auto __minus = [](auto __a, auto __b) { return __a - __b; }; + return __rmw_op(std::forward<_This>(__self), __operand, __m, __minus, __builtin_op); } public: - using __base = __atomic_base<_Tp>; - using value_type = _Tp; - using difference_type = value_type; + using __base _LIBCPP_NODEBUG = __atomic_base<_Tp>; + using value_type = _Tp; + using difference_type = value_type; _LIBCPP_HIDE_FROM_ABI constexpr atomic() noexcept = default; _LIBCPP_HIDE_FROM_ABI constexpr atomic(_Tp __d) noexcept : __base(__d) {} @@ -429,6 +628,8 @@ _LIBCPP_HIDE_FROM_ABI bool atomic_compare_exchange_strong_explicit( return __o->compare_exchange_strong(*__e, __d, __s, __f); } +#if _LIBCPP_STD_VER >= 20 + // atomic_wait template @@ -462,29 +663,27 @@ atomic_wait_explicit(const atomic<_Tp>* __o, typename atomic<_Tp>::value_type __ // atomic_notify_one template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_one(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_one(); } // atomic_notify_all template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(volatile atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } template -_LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { +_LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void atomic_notify_all(atomic<_Tp>* __o) _NOEXCEPT { __o->notify_all(); } +#endif // _LIBCPP_STD_VER >= 20 + // atomic_fetch_add template diff --git a/lib/libcxx/include/__atomic/atomic_base.h b/lib/libcxx/include/__atomic/atomic_base.h deleted file mode 100644 index 7e26434c9c3a..000000000000 --- a/lib/libcxx/include/__atomic/atomic_base.h +++ /dev/null @@ -1,221 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___ATOMIC_ATOMIC_BASE_H -#define _LIBCPP___ATOMIC_ATOMIC_BASE_H - -#include <__atomic/atomic_sync.h> -#include <__atomic/check_memory_order.h> -#include <__atomic/cxx_atomic_impl.h> -#include <__atomic/is_always_lock_free.h> -#include <__atomic/memory_order.h> -#include <__config> -#include <__memory/addressof.h> -#include <__type_traits/is_integral.h> -#include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_same.h> -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -template ::value && !is_same<_Tp, bool>::value> -struct __atomic_base // false -{ - mutable __cxx_atomic_impl<_Tp> __a_; - -#if _LIBCPP_STD_VER >= 17 - static constexpr bool is_always_lock_free = __libcpp_is_always_lock_free<__cxx_atomic_impl<_Tp> >::__value; -#endif - - _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const volatile _NOEXCEPT { - return __cxx_atomic_is_lock_free(sizeof(__cxx_atomic_impl<_Tp>)); - } - _LIBCPP_HIDE_FROM_ABI bool is_lock_free() const _NOEXCEPT { - return static_cast<__atomic_base const volatile*>(this)->is_lock_free(); - } - _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT - _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { - std::__cxx_atomic_store(std::addressof(__a_), __d, __m); - } - _LIBCPP_HIDE_FROM_ABI void store(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT - _LIBCPP_CHECK_STORE_MEMORY_ORDER(__m) { - std::__cxx_atomic_store(std::addressof(__a_), __d, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT - _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { - return std::__cxx_atomic_load(std::addressof(__a_), __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp load(memory_order __m = memory_order_seq_cst) const _NOEXCEPT - _LIBCPP_CHECK_LOAD_MEMORY_ORDER(__m) { - return std::__cxx_atomic_load(std::addressof(__a_), __m); - } - _LIBCPP_HIDE_FROM_ABI operator _Tp() const volatile _NOEXCEPT { return load(); } - _LIBCPP_HIDE_FROM_ABI operator _Tp() const _NOEXCEPT { return load(); } - _LIBCPP_HIDE_FROM_ABI _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_exchange(std::addressof(__a_), __d, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp exchange(_Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_exchange(std::addressof(__a_), __d, __m); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile _NOEXCEPT - _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __s, __f); - } - _LIBCPP_HIDE_FROM_ABI bool compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) _NOEXCEPT - _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __s, __f); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) volatile _NOEXCEPT - _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __s, __f); - } - _LIBCPP_HIDE_FROM_ABI bool compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __s, memory_order __f) _NOEXCEPT - _LIBCPP_CHECK_EXCHANGE_MEMORY_ORDER(__s, __f) { - return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __s, __f); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __m, __m); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_weak(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_compare_exchange_weak(std::addressof(__a_), std::addressof(__e), __d, __m, __m); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __m, __m); - } - _LIBCPP_HIDE_FROM_ABI bool - compare_exchange_strong(_Tp& __e, _Tp __d, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_compare_exchange_strong(std::addressof(__a_), std::addressof(__e), __d, __m, __m); - } - - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(_Tp __v, memory_order __m = memory_order_seq_cst) const - volatile _NOEXCEPT { - std::__atomic_wait(*this, __v, __m); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void - wait(_Tp __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { - std::__atomic_wait(*this, __v, __m); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { - std::__atomic_notify_one(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { - std::__atomic_notify_all(*this); - } - _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } - -#if _LIBCPP_STD_VER >= 20 - _LIBCPP_HIDE_FROM_ABI constexpr __atomic_base() noexcept(is_nothrow_default_constructible_v<_Tp>) : __a_(_Tp()) {} -#else - _LIBCPP_HIDE_FROM_ABI __atomic_base() _NOEXCEPT = default; -#endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __a_(__d) {} - - __atomic_base(const __atomic_base&) = delete; -}; - -// atomic - -template -struct __atomic_base<_Tp, true> : public __atomic_base<_Tp, false> { - using __base = __atomic_base<_Tp, false>; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __atomic_base() _NOEXCEPT = default; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __atomic_base(_Tp __d) _NOEXCEPT : __base(__d) {} - - _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_fetch_add(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_add(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_fetch_add(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_fetch_sub(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_sub(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_fetch_sub(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_fetch_and(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_and(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_fetch_and(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_fetch_or(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_or(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_fetch_or(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) volatile _NOEXCEPT { - return std::__cxx_atomic_fetch_xor(std::addressof(this->__a_), __op, __m); - } - _LIBCPP_HIDE_FROM_ABI _Tp fetch_xor(_Tp __op, memory_order __m = memory_order_seq_cst) _NOEXCEPT { - return std::__cxx_atomic_fetch_xor(std::addressof(this->__a_), __op, __m); - } - - _LIBCPP_HIDE_FROM_ABI _Tp operator++(int) volatile _NOEXCEPT { return fetch_add(_Tp(1)); } - _LIBCPP_HIDE_FROM_ABI _Tp operator++(int) _NOEXCEPT { return fetch_add(_Tp(1)); } - _LIBCPP_HIDE_FROM_ABI _Tp operator--(int) volatile _NOEXCEPT { return fetch_sub(_Tp(1)); } - _LIBCPP_HIDE_FROM_ABI _Tp operator--(int) _NOEXCEPT { return fetch_sub(_Tp(1)); } - _LIBCPP_HIDE_FROM_ABI _Tp operator++() volatile _NOEXCEPT { return fetch_add(_Tp(1)) + _Tp(1); } - _LIBCPP_HIDE_FROM_ABI _Tp operator++() _NOEXCEPT { return fetch_add(_Tp(1)) + _Tp(1); } - _LIBCPP_HIDE_FROM_ABI _Tp operator--() volatile _NOEXCEPT { return fetch_sub(_Tp(1)) - _Tp(1); } - _LIBCPP_HIDE_FROM_ABI _Tp operator--() _NOEXCEPT { return fetch_sub(_Tp(1)) - _Tp(1); } - _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __op) volatile _NOEXCEPT { return fetch_add(__op) + __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator+=(_Tp __op) _NOEXCEPT { return fetch_add(__op) + __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator-=(_Tp __op) volatile _NOEXCEPT { return fetch_sub(__op) - __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator-=(_Tp __op) _NOEXCEPT { return fetch_sub(__op) - __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator&=(_Tp __op) volatile _NOEXCEPT { return fetch_and(__op) & __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator&=(_Tp __op) _NOEXCEPT { return fetch_and(__op) & __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator|=(_Tp __op) volatile _NOEXCEPT { return fetch_or(__op) | __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator|=(_Tp __op) _NOEXCEPT { return fetch_or(__op) | __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) volatile _NOEXCEPT { return fetch_xor(__op) ^ __op; } - _LIBCPP_HIDE_FROM_ABI _Tp operator^=(_Tp __op) _NOEXCEPT { return fetch_xor(__op) ^ __op; } -}; - -// Here we need _IsIntegral because the default template argument is not enough -// e.g __atomic_base is __atomic_base, which inherits from -// __atomic_base and the caller of the wait function is -// __atomic_base. So specializing __atomic_base<_Tp> does not work -template -struct __atomic_waitable_traits<__atomic_base<_Tp, _IsIntegral> > { - static _LIBCPP_HIDE_FROM_ABI _Tp __atomic_load(const __atomic_base<_Tp, _IsIntegral>& __a, memory_order __order) { - return __a.load(__order); - } - - static _LIBCPP_HIDE_FROM_ABI _Tp - __atomic_load(const volatile __atomic_base<_Tp, _IsIntegral>& __this, memory_order __order) { - return __this.load(__order); - } - - static _LIBCPP_HIDE_FROM_ABI const __cxx_atomic_impl<_Tp>* - __atomic_contention_address(const __atomic_base<_Tp, _IsIntegral>& __a) { - return std::addressof(__a.__a_); - } - - static _LIBCPP_HIDE_FROM_ABI const volatile __cxx_atomic_impl<_Tp>* - __atomic_contention_address(const volatile __atomic_base<_Tp, _IsIntegral>& __this) { - return std::addressof(__this.__a_); - } -}; - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___ATOMIC_ATOMIC_BASE_H diff --git a/lib/libcxx/include/__atomic/atomic_flag.h b/lib/libcxx/include/__atomic/atomic_flag.h index 00b157cdff78..5cc6fb0c55d0 100644 --- a/lib/libcxx/include/__atomic/atomic_flag.h +++ b/lib/libcxx/include/__atomic/atomic_flag.h @@ -11,8 +11,8 @@ #include <__atomic/atomic_sync.h> #include <__atomic/contention_t.h> -#include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> +#include <__atomic/support.h> #include <__chrono/duration.h> #include <__config> #include <__memory/addressof.h> @@ -48,26 +48,24 @@ struct atomic_flag { __cxx_atomic_store(&__a_, _LIBCPP_ATOMIC_FLAG_TYPE(false), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void - wait(bool __v, memory_order __m = memory_order_seq_cst) const volatile _NOEXCEPT { +#if _LIBCPP_STD_VER >= 20 + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const + volatile _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void wait(bool __v, memory_order __m = memory_order_seq_cst) const _NOEXCEPT { std::__atomic_wait(*this, _LIBCPP_ATOMIC_FLAG_TYPE(__v), __m); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { - std::__atomic_notify_one(*this); - } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() volatile _NOEXCEPT { std::__atomic_notify_one(*this); } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_one() _NOEXCEPT { std::__atomic_notify_one(*this); } _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() volatile _NOEXCEPT { std::__atomic_notify_all(*this); } - _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { - std::__atomic_notify_all(*this); - } + _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void notify_all() _NOEXCEPT { std::__atomic_notify_all(*this); } +#endif #if _LIBCPP_STD_VER >= 20 _LIBCPP_HIDE_FROM_ABI constexpr atomic_flag() _NOEXCEPT : __a_(false) {} @@ -144,45 +142,45 @@ inline _LIBCPP_HIDE_FROM_ABI void atomic_flag_clear_explicit(atomic_flag* __o, m __o->clear(__m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +#if _LIBCPP_STD_VER >= 20 +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const volatile atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait(const atomic_flag* __o, bool __v) _NOEXCEPT { __o->wait(__v); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const volatile atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_wait_explicit(const atomic_flag* __o, bool __v, memory_order __m) _NOEXCEPT { __o->wait(__v, __m); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_one(atomic_flag* __o) _NOEXCEPT { __o->notify_one(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(volatile atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } -inline _LIBCPP_DEPRECATED_ATOMIC_SYNC _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void -atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_SYNC void atomic_flag_notify_all(atomic_flag* __o) _NOEXCEPT { __o->notify_all(); } +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__atomic/atomic_lock_free.h b/lib/libcxx/include/__atomic/atomic_lock_free.h index 0715439db450..3ae9b8856e81 100644 --- a/lib/libcxx/include/__atomic/atomic_lock_free.h +++ b/lib/libcxx/include/__atomic/atomic_lock_free.h @@ -18,7 +18,7 @@ #if defined(__CLANG_ATOMIC_BOOL_LOCK_FREE) # define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE # define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T # define ATOMIC_CHAR8_T_LOCK_FREE __CLANG_ATOMIC_CHAR8_T_LOCK_FREE # endif # define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE @@ -32,7 +32,7 @@ #elif defined(__GCC_ATOMIC_BOOL_LOCK_FREE) # define ATOMIC_BOOL_LOCK_FREE __GCC_ATOMIC_BOOL_LOCK_FREE # define ATOMIC_CHAR_LOCK_FREE __GCC_ATOMIC_CHAR_LOCK_FREE -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T # define ATOMIC_CHAR8_T_LOCK_FREE __GCC_ATOMIC_CHAR8_T_LOCK_FREE # endif # define ATOMIC_CHAR16_T_LOCK_FREE __GCC_ATOMIC_CHAR16_T_LOCK_FREE diff --git a/lib/libcxx/include/__atomic/atomic_ref.h b/lib/libcxx/include/__atomic/atomic_ref.h index b0180a37ab50..177ea646b6cd 100644 --- a/lib/libcxx/include/__atomic/atomic_ref.h +++ b/lib/libcxx/include/__atomic/atomic_ref.h @@ -20,14 +20,16 @@ #include <__assert> #include <__atomic/atomic_sync.h> #include <__atomic/check_memory_order.h> +#include <__atomic/memory_order.h> #include <__atomic/to_gcc_order.h> #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> +#include <__cstddef/byte.h> +#include <__cstddef/ptrdiff_t.h> #include <__memory/addressof.h> #include <__type_traits/has_unique_object_representation.h> #include <__type_traits/is_trivially_copyable.h> -#include #include #include @@ -219,7 +221,7 @@ struct __atomic_ref_base { _LIBCPP_HIDE_FROM_ABI void notify_all() const noexcept { std::__atomic_notify_all(*this); } protected: - typedef _Tp _Aligned_Tp __attribute__((aligned(required_alignment))); + using _Aligned_Tp [[__gnu__::__aligned__(required_alignment), __gnu__::__nodebug__]] = _Tp; _Aligned_Tp* __ptr_; _LIBCPP_HIDE_FROM_ABI __atomic_ref_base(_Tp& __obj) : __ptr_(std::addressof(__obj)) {} @@ -239,7 +241,7 @@ template struct atomic_ref : public __atomic_ref_base<_Tp> { static_assert(is_trivially_copyable_v<_Tp>, "std::atomic_ref requires that 'T' be a trivially copyable type"); - using __base = __atomic_ref_base<_Tp>; + using __base _LIBCPP_NODEBUG = __atomic_ref_base<_Tp>; _LIBCPP_HIDE_FROM_ABI explicit atomic_ref(_Tp& __obj) : __base(__obj) { _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN( @@ -257,7 +259,7 @@ struct atomic_ref : public __atomic_ref_base<_Tp> { template requires(std::integral<_Tp> && !std::same_as) struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> { - using __base = __atomic_ref_base<_Tp>; + using __base _LIBCPP_NODEBUG = __atomic_ref_base<_Tp>; using difference_type = __base::value_type; @@ -303,7 +305,7 @@ struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> { template requires std::floating_point<_Tp> struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> { - using __base = __atomic_ref_base<_Tp>; + using __base _LIBCPP_NODEBUG = __atomic_ref_base<_Tp>; using difference_type = __base::value_type; @@ -342,7 +344,7 @@ struct atomic_ref<_Tp> : public __atomic_ref_base<_Tp> { template struct atomic_ref<_Tp*> : public __atomic_ref_base<_Tp*> { - using __base = __atomic_ref_base<_Tp*>; + using __base _LIBCPP_NODEBUG = __atomic_ref_base<_Tp*>; using difference_type = ptrdiff_t; diff --git a/lib/libcxx/include/__atomic/atomic_sync.h b/lib/libcxx/include/__atomic/atomic_sync.h index aaf81f58731a..0dae448d649b 100644 --- a/lib/libcxx/include/__atomic/atomic_sync.h +++ b/lib/libcxx/include/__atomic/atomic_sync.h @@ -10,14 +10,12 @@ #define _LIBCPP___ATOMIC_ATOMIC_SYNC_H #include <__atomic/contention_t.h> -#include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> #include <__atomic/to_gcc_order.h> #include <__chrono/duration.h> #include <__config> #include <__memory/addressof.h> #include <__thread/poll_with_backoff.h> -#include <__thread/support.h> #include <__type_traits/conjunction.h> #include <__type_traits/decay.h> #include <__type_traits/invoke.h> @@ -57,19 +55,8 @@ struct __atomic_waitable< _Tp, decltype(__atomic_waitable_traits<__decay_t<_Tp> >::__atomic_contention_address( std::declval()))> > : true_type {}; -template -struct __atomic_wait_poll_impl { - const _AtomicWaitable& __a_; - _Poll __poll_; - memory_order __order_; - - _LIBCPP_HIDE_FROM_ABI bool operator()() const { - auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a_, __order_); - return __poll_(__current_val); - } -}; - -#ifndef _LIBCPP_HAS_NO_THREADS +#if _LIBCPP_STD_VER >= 20 +# if _LIBCPP_HAS_THREADS _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_one(void const volatile*) _NOEXCEPT; _LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI void __cxx_atomic_notify_all(void const volatile*) _NOEXCEPT; @@ -93,7 +80,7 @@ struct __atomic_wait_backoff_impl { _Poll __poll_; memory_order __order_; - using __waitable_traits = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; + using __waitable_traits _LIBCPP_NODEBUG = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >; _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool @@ -120,15 +107,13 @@ struct __atomic_wait_backoff_impl { _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI bool operator()(chrono::nanoseconds __elapsed) const { - if (__elapsed > chrono::microseconds(64)) { + if (__elapsed > chrono::microseconds(4)) { auto __contention_address = __waitable_traits::__atomic_contention_address(__a_); __cxx_contention_t __monitor_val; if (__update_monitor_val_and_poll(__contention_address, __monitor_val)) return true; std::__libcpp_atomic_wait(__contention_address, __monitor_val); - } else if (__elapsed > chrono::microseconds(4)) - __libcpp_thread_yield(); - else { + } else { } // poll return false; } @@ -144,11 +129,16 @@ struct __atomic_wait_backoff_impl { // value. The predicate function must not return `false` spuriously. template _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -__atomic_wait_unless(const _AtomicWaitable& __a, _Poll&& __poll, memory_order __order) { +__atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) { static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); - __atomic_wait_poll_impl<_AtomicWaitable, __decay_t<_Poll> > __poll_impl = {__a, __poll, __order}; __atomic_wait_backoff_impl<_AtomicWaitable, __decay_t<_Poll> > __backoff_fn = {__a, __poll, __order}; - std::__libcpp_thread_poll_with_backoff(__poll_impl, __backoff_fn); + std::__libcpp_thread_poll_with_backoff( + /* poll */ + [&]() { + auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order); + return __poll(__current_val); + }, + /* backoff */ __backoff_fn); } template @@ -163,12 +153,17 @@ _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _ std::__cxx_atomic_notify_all(__atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_contention_address(__a)); } -#else // _LIBCPP_HAS_NO_THREADS +# else // _LIBCPP_HAS_THREADS template -_LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, _Poll&& __poll, memory_order __order) { - __atomic_wait_poll_impl<_AtomicWaitable, __decay_t<_Poll> > __poll_fn = {__a, __poll, __order}; - std::__libcpp_thread_poll_with_backoff(__poll_fn, __spinning_backoff_policy()); +_LIBCPP_HIDE_FROM_ABI void __atomic_wait_unless(const _AtomicWaitable& __a, memory_order __order, _Poll&& __poll) { + std::__libcpp_thread_poll_with_backoff( + /* poll */ + [&]() { + auto __current_val = __atomic_waitable_traits<__decay_t<_AtomicWaitable> >::__atomic_load(__a, __order); + return __poll(__current_val); + }, + /* backoff */ __spinning_backoff_policy()); } template @@ -177,29 +172,24 @@ _LIBCPP_HIDE_FROM_ABI void __atomic_notify_one(const _AtomicWaitable&) {} template _LIBCPP_HIDE_FROM_ABI void __atomic_notify_all(const _AtomicWaitable&) {} -#endif // _LIBCPP_HAS_NO_THREADS +# endif // _LIBCPP_HAS_THREADS template _LIBCPP_HIDE_FROM_ABI bool __cxx_nonatomic_compare_equal(_Tp const& __lhs, _Tp const& __rhs) { return std::memcmp(std::addressof(__lhs), std::addressof(__rhs), sizeof(_Tp)) == 0; } -template -struct __atomic_compare_unequal_to { - _Tp __val_; - _LIBCPP_HIDE_FROM_ABI bool operator()(const _Tp& __arg) const { - return !std::__cxx_nonatomic_compare_equal(__arg, __val_); - } -}; - -template +template _LIBCPP_AVAILABILITY_SYNC _LIBCPP_HIDE_FROM_ABI void -__atomic_wait(_AtomicWaitable& __a, _Up __val, memory_order __order) { +__atomic_wait(_AtomicWaitable& __a, _Tp __val, memory_order __order) { static_assert(__atomic_waitable<_AtomicWaitable>::value, ""); - __atomic_compare_unequal_to<_Up> __nonatomic_equal = {__val}; - std::__atomic_wait_unless(__a, __nonatomic_equal, __order); + std::__atomic_wait_unless(__a, __order, [&](_Tp const& __current) { + return !std::__cxx_nonatomic_compare_equal(__current, __val); + }); } +#endif // C++20 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___ATOMIC_ATOMIC_SYNC_H diff --git a/lib/libcxx/include/__atomic/contention_t.h b/lib/libcxx/include/__atomic/contention_t.h index 65890f338ce9..5b42a0125f87 100644 --- a/lib/libcxx/include/__atomic/contention_t.h +++ b/lib/libcxx/include/__atomic/contention_t.h @@ -9,7 +9,7 @@ #ifndef _LIBCPP___ATOMIC_CONTENTION_T_H #define _LIBCPP___ATOMIC_CONTENTION_T_H -#include <__atomic/cxx_atomic_impl.h> +#include <__atomic/support.h> #include <__config> #include @@ -20,12 +20,12 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if defined(__linux__) || (defined(_AIX) && !defined(__64BIT__)) -using __cxx_contention_t = int32_t; +using __cxx_contention_t _LIBCPP_NODEBUG = int32_t; #else -using __cxx_contention_t = int64_t; +using __cxx_contention_t _LIBCPP_NODEBUG = int64_t; #endif // __linux__ || (_AIX && !__64BIT__) -using __cxx_atomic_contention_t = __cxx_atomic_impl<__cxx_contention_t>; +using __cxx_atomic_contention_t _LIBCPP_NODEBUG = __cxx_atomic_impl<__cxx_contention_t>; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__atomic/fence.h b/lib/libcxx/include/__atomic/fence.h index 8c27ea54d62d..0a63cedddb3f 100644 --- a/lib/libcxx/include/__atomic/fence.h +++ b/lib/libcxx/include/__atomic/fence.h @@ -9,8 +9,8 @@ #ifndef _LIBCPP___ATOMIC_FENCE_H #define _LIBCPP___ATOMIC_FENCE_H -#include <__atomic/cxx_atomic_impl.h> #include <__atomic/memory_order.h> +#include <__atomic/support.h> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__atomic/memory_order.h b/lib/libcxx/include/__atomic/memory_order.h index 294121d1c4e7..44790fe888b3 100644 --- a/lib/libcxx/include/__atomic/memory_order.h +++ b/lib/libcxx/include/__atomic/memory_order.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // to pin the underlying type in C++20. enum __legacy_memory_order { __mo_relaxed, __mo_consume, __mo_acquire, __mo_release, __mo_acq_rel, __mo_seq_cst }; -using __memory_order_underlying_t = underlying_type<__legacy_memory_order>::type; +using __memory_order_underlying_t _LIBCPP_NODEBUG = underlying_type<__legacy_memory_order>::type; #if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__atomic/support.h b/lib/libcxx/include/__atomic/support.h new file mode 100644 index 000000000000..4b555ab483ca --- /dev/null +++ b/lib/libcxx/include/__atomic/support.h @@ -0,0 +1,124 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ATOMIC_SUPPORT_H +#define _LIBCPP___ATOMIC_SUPPORT_H + +#include <__config> +#include <__type_traits/is_trivially_copyable.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +// +// This file implements base support for atomics on the platform. +// +// The following operations and types must be implemented (where _Atmc +// is __cxx_atomic_base_impl for readability): +// +// clang-format off +// +// template +// struct __cxx_atomic_base_impl; +// +// #define __cxx_atomic_is_lock_free(__size) +// +// void __cxx_atomic_thread_fence(memory_order __order) noexcept; +// void __cxx_atomic_signal_fence(memory_order __order) noexcept; +// +// template +// void __cxx_atomic_init(_Atmc<_Tp> volatile* __a, _Tp __val) noexcept; +// template +// void __cxx_atomic_init(_Atmc<_Tp>* __a, _Tp __val) noexcept; +// +// template +// void __cxx_atomic_store(_Atmc<_Tp> volatile* __a, _Tp __val, memory_order __order) noexcept; +// template +// void __cxx_atomic_store(_Atmc<_Tp>* __a, _Tp __val, memory_order __order) noexcept; +// +// template +// _Tp __cxx_atomic_load(_Atmc<_Tp> const volatile* __a, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_load(_Atmc<_Tp> const* __a, memory_order __order) noexcept; +// +// template +// void __cxx_atomic_load_inplace(_Atmc<_Tp> const volatile* __a, _Tp* __dst, memory_order __order) noexcept; +// template +// void __cxx_atomic_load_inplace(_Atmc<_Tp> const* __a, _Tp* __dst, memory_order __order) noexcept; +// +// template +// _Tp __cxx_atomic_exchange(_Atmc<_Tp> volatile* __a, _Tp __value, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_exchange(_Atmc<_Tp>* __a, _Tp __value, memory_order __order) noexcept; +// +// template +// bool __cxx_atomic_compare_exchange_strong(_Atmc<_Tp> volatile* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) noexcept; +// template +// bool __cxx_atomic_compare_exchange_strong(_Atmc<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) noexcept; +// +// template +// bool __cxx_atomic_compare_exchange_weak(_Atmc<_Tp> volatile* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) noexcept; +// template +// bool __cxx_atomic_compare_exchange_weak(_Atmc<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) noexcept; +// +// template +// _Tp __cxx_atomic_fetch_add(_Atmc<_Tp> volatile* __a, _Tp __delta, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_add(_Atmc<_Tp>* __a, _Tp __delta, memory_order __order) noexcept; +// +// template +// _Tp* __cxx_atomic_fetch_add(_Atmc<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) noexcept; +// template +// _Tp* __cxx_atomic_fetch_add(_Atmc<_Tp*>* __a, ptrdiff_t __delta, memory_order __order) noexcept; +// +// template +// _Tp __cxx_atomic_fetch_sub(_Atmc<_Tp> volatile* __a, _Tp __delta, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_sub(_Atmc<_Tp>* __a, _Tp __delta, memory_order __order) noexcept; +// template +// _Tp* __cxx_atomic_fetch_sub(_Atmc<_Tp*> volatile* __a, ptrdiff_t __delta, memory_order __order) noexcept; +// template +// _Tp* __cxx_atomic_fetch_sub(_Atmc<_Tp*>* __a, ptrdiff_t __delta, memory_order __order) noexcept; +// +// template +// _Tp __cxx_atomic_fetch_and(_Atmc<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_and(_Atmc<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept; +// +// template +// _Tp __cxx_atomic_fetch_or(_Atmc<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_or(_Atmc<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_xor(_Atmc<_Tp> volatile* __a, _Tp __pattern, memory_order __order) noexcept; +// template +// _Tp __cxx_atomic_fetch_xor(_Atmc<_Tp>* __a, _Tp __pattern, memory_order __order) noexcept; +// +// clang-format on +// + +#if _LIBCPP_HAS_GCC_ATOMIC_IMP +# include <__atomic/support/gcc.h> +#elif _LIBCPP_HAS_C_ATOMIC_IMP +# include <__atomic/support/c11.h> +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template > +struct __cxx_atomic_impl : public _Base { + static_assert(is_trivially_copyable<_Tp>::value, "std::atomic requires that 'T' be a trivially copyable type"); + + _LIBCPP_HIDE_FROM_ABI __cxx_atomic_impl() _NOEXCEPT = default; + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp __value) _NOEXCEPT : _Base(__value) {} +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ATOMIC_SUPPORT_H diff --git a/lib/libcxx/include/__atomic/cxx_atomic_impl.h b/lib/libcxx/include/__atomic/support/c11.h similarity index 52% rename from lib/libcxx/include/__atomic/cxx_atomic_impl.h rename to lib/libcxx/include/__atomic/support/c11.h index 18e88aa97bec..177a075be407 100644 --- a/lib/libcxx/include/__atomic/cxx_atomic_impl.h +++ b/lib/libcxx/include/__atomic/support/c11.h @@ -6,275 +6,39 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___ATOMIC_CXX_ATOMIC_IMPL_H -#define _LIBCPP___ATOMIC_CXX_ATOMIC_IMPL_H +#ifndef _LIBCPP___ATOMIC_SUPPORT_C11_H +#define _LIBCPP___ATOMIC_SUPPORT_C11_H #include <__atomic/memory_order.h> -#include <__atomic/to_gcc_order.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__memory/addressof.h> -#include <__type_traits/is_assignable.h> -#include <__type_traits/is_trivially_copyable.h> #include <__type_traits/remove_const.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif -_LIBCPP_BEGIN_NAMESPACE_STD - -#if defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) - -// [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because -// the default operator= in an object is not volatile, a byte-by-byte copy -// is required. -template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) { - __a_value = __val; -} -template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) { - volatile char* __to = reinterpret_cast(std::addressof(__a_value)); - volatile char* __end = __to + sizeof(_Tp); - volatile const char* __from = reinterpret_cast(std::addressof(__val)); - while (__to != __end) - *__to++ = *__from++; -} - -template -struct __cxx_atomic_base_impl { - _LIBCPP_HIDE_FROM_ABI -# ifndef _LIBCPP_CXX03_LANG - __cxx_atomic_base_impl() _NOEXCEPT = default; -# else - __cxx_atomic_base_impl() _NOEXCEPT : __a_value() { - } -# endif // _LIBCPP_CXX03_LANG - _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {} - _Tp __a_value; -}; - -template -_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_init(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __val) { - __cxx_atomic_assign_volatile(__a->__a_value, __val); -} - -template -_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val) { - __a->__a_value = __val; -} - -_LIBCPP_HIDE_FROM_ABI inline void __cxx_atomic_thread_fence(memory_order __order) { - __atomic_thread_fence(__to_gcc_order(__order)); -} - -_LIBCPP_HIDE_FROM_ABI inline void __cxx_atomic_signal_fence(memory_order __order) { - __atomic_signal_fence(__to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI void -__cxx_atomic_store(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __val, memory_order __order) { - __atomic_store(std::addressof(__a->__a_value), std::addressof(__val), __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val, memory_order __order) { - __atomic_store(std::addressof(__a->__a_value), std::addressof(__val), __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) { - _Tp __ret; - __atomic_load(std::addressof(__a->__a_value), std::addressof(__ret), __to_gcc_order(__order)); - return __ret; -} - -template -_LIBCPP_HIDE_FROM_ABI void -__cxx_atomic_load_inplace(const volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp* __dst, memory_order __order) { - __atomic_load(std::addressof(__a->__a_value), __dst, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI void -__cxx_atomic_load_inplace(const __cxx_atomic_base_impl<_Tp>* __a, _Tp* __dst, memory_order __order) { - __atomic_load(std::addressof(__a->__a_value), __dst, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_load(const __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) { - _Tp __ret; - __atomic_load(std::addressof(__a->__a_value), std::addressof(__ret), __to_gcc_order(__order)); - return __ret; -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_exchange(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __value, memory_order __order) { - _Tp __ret; - __atomic_exchange( - std::addressof(__a->__a_value), std::addressof(__value), std::addressof(__ret), __to_gcc_order(__order)); - return __ret; -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp>* __a, _Tp __value, memory_order __order) { - _Tp __ret; - __atomic_exchange( - std::addressof(__a->__a_value), std::addressof(__value), std::addressof(__ret), __to_gcc_order(__order)); - return __ret; -} - -template -_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_strong( - volatile __cxx_atomic_base_impl<_Tp>* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) { - return __atomic_compare_exchange( - std::addressof(__a->__a_value), - __expected, - std::addressof(__value), - false, - __to_gcc_order(__success), - __to_gcc_failure_order(__failure)); -} - -template -_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_strong( - __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { - return __atomic_compare_exchange( - std::addressof(__a->__a_value), - __expected, - std::addressof(__value), - false, - __to_gcc_order(__success), - __to_gcc_failure_order(__failure)); -} - -template -_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_weak( - volatile __cxx_atomic_base_impl<_Tp>* __a, - _Tp* __expected, - _Tp __value, - memory_order __success, - memory_order __failure) { - return __atomic_compare_exchange( - std::addressof(__a->__a_value), - __expected, - std::addressof(__value), - true, - __to_gcc_order(__success), - __to_gcc_failure_order(__failure)); -} - -template -_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_weak( - __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { - return __atomic_compare_exchange( - std::addressof(__a->__a_value), - __expected, - std::addressof(__value), - true, - __to_gcc_order(__success), - __to_gcc_failure_order(__failure)); -} - -template -struct __skip_amt { - enum { value = 1 }; -}; - -template -struct __skip_amt<_Tp*> { - enum { value = sizeof(_Tp) }; -}; - -// FIXME: Haven't figured out what the spec says about using arrays with -// atomic_fetch_add. Force a failure rather than creating bad behavior. -template -struct __skip_amt<_Tp[]> {}; -template -struct __skip_amt<_Tp[n]> {}; - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { - return __atomic_fetch_add(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { - return __atomic_fetch_add(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { - return __atomic_fetch_sub(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { - return __atomic_fetch_sub(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_and(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_and(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_or(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_or(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_xor(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_xor(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -template -_LIBCPP_HIDE_FROM_ABI _Tp -__cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { - return __atomic_fetch_xor(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); -} - -# define __cxx_atomic_is_lock_free(__s) __atomic_is_lock_free(__s, 0) +// +// This file implements support for C11-style atomics +// -#elif defined(_LIBCPP_HAS_C_ATOMIC_IMP) +_LIBCPP_BEGIN_NAMESPACE_STD template struct __cxx_atomic_base_impl { _LIBCPP_HIDE_FROM_ABI -# ifndef _LIBCPP_CXX03_LANG +#ifndef _LIBCPP_CXX03_LANG __cxx_atomic_base_impl() _NOEXCEPT = default; -# else +#else __cxx_atomic_base_impl() _NOEXCEPT : __a_value() { } -# endif // _LIBCPP_CXX03_LANG +#endif // _LIBCPP_CXX03_LANG _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp __value) _NOEXCEPT : __a_value(__value) {} _LIBCPP_DISABLE_EXTENSION_WARNING _Atomic(_Tp) __a_value; }; -# define __cxx_atomic_is_lock_free(__s) __c11_atomic_is_lock_free(__s) +#define __cxx_atomic_is_lock_free(__s) __c11_atomic_is_lock_free(__s) _LIBCPP_HIDE_FROM_ABI inline void __cxx_atomic_thread_fence(memory_order __order) _NOEXCEPT { __c11_atomic_thread_fence(static_cast<__memory_order_underlying_t>(__order)); @@ -495,16 +259,6 @@ __cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_o std::addressof(__a->__a_value), __pattern, static_cast<__memory_order_underlying_t>(__order)); } -#endif // _LIBCPP_HAS_GCC_ATOMIC_IMP, _LIBCPP_HAS_C_ATOMIC_IMP - -template > -struct __cxx_atomic_impl : public _Base { - static_assert(is_trivially_copyable<_Tp>::value, "std::atomic requires that 'T' be a trivially copyable type"); - - _LIBCPP_HIDE_FROM_ABI __cxx_atomic_impl() _NOEXCEPT = default; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __cxx_atomic_impl(_Tp __value) _NOEXCEPT : _Base(__value) {} -}; - _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___ATOMIC_CXX_ATOMIC_IMPL_H +#endif // _LIBCPP___ATOMIC_SUPPORT_C11_H diff --git a/lib/libcxx/include/__atomic/support/gcc.h b/lib/libcxx/include/__atomic/support/gcc.h new file mode 100644 index 000000000000..73c1b1c8070a --- /dev/null +++ b/lib/libcxx/include/__atomic/support/gcc.h @@ -0,0 +1,265 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ATOMIC_SUPPORT_GCC_H +#define _LIBCPP___ATOMIC_SUPPORT_GCC_H + +#include <__atomic/memory_order.h> +#include <__atomic/to_gcc_order.h> +#include <__config> +#include <__memory/addressof.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_assignable.h> +#include <__type_traits/remove_const.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +// +// This file implements support for GCC-style atomics +// + +_LIBCPP_BEGIN_NAMESPACE_STD + +// [atomics.types.generic]p1 guarantees _Tp is trivially copyable. Because +// the default operator= in an object is not volatile, a byte-by-byte copy +// is required. +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_assign_volatile(_Tp& __a_value, _Tv const& __val) { + __a_value = __val; +} +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_assign_volatile(_Tp volatile& __a_value, _Tv volatile const& __val) { + volatile char* __to = reinterpret_cast(std::addressof(__a_value)); + volatile char* __end = __to + sizeof(_Tp); + volatile const char* __from = reinterpret_cast(std::addressof(__val)); + while (__to != __end) + *__to++ = *__from++; +} + +template +struct __cxx_atomic_base_impl { + _LIBCPP_HIDE_FROM_ABI +#ifndef _LIBCPP_CXX03_LANG + __cxx_atomic_base_impl() _NOEXCEPT = default; +#else + __cxx_atomic_base_impl() _NOEXCEPT : __a_value() { + } +#endif // _LIBCPP_CXX03_LANG + _LIBCPP_CONSTEXPR explicit __cxx_atomic_base_impl(_Tp value) _NOEXCEPT : __a_value(value) {} + _Tp __a_value; +}; + +template +_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_init(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __val) { + __cxx_atomic_assign_volatile(__a->__a_value, __val); +} + +template +_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_init(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val) { + __a->__a_value = __val; +} + +_LIBCPP_HIDE_FROM_ABI inline void __cxx_atomic_thread_fence(memory_order __order) { + __atomic_thread_fence(__to_gcc_order(__order)); +} + +_LIBCPP_HIDE_FROM_ABI inline void __cxx_atomic_signal_fence(memory_order __order) { + __atomic_signal_fence(__to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__cxx_atomic_store(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __val, memory_order __order) { + __atomic_store(std::addressof(__a->__a_value), std::addressof(__val), __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI void __cxx_atomic_store(__cxx_atomic_base_impl<_Tp>* __a, _Tp __val, memory_order __order) { + __atomic_store(std::addressof(__a->__a_value), std::addressof(__val), __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_load(const volatile __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) { + _Tp __ret; + __atomic_load(std::addressof(__a->__a_value), std::addressof(__ret), __to_gcc_order(__order)); + return __ret; +} + +template +_LIBCPP_HIDE_FROM_ABI void +__cxx_atomic_load_inplace(const volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp* __dst, memory_order __order) { + __atomic_load(std::addressof(__a->__a_value), __dst, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI void +__cxx_atomic_load_inplace(const __cxx_atomic_base_impl<_Tp>* __a, _Tp* __dst, memory_order __order) { + __atomic_load(std::addressof(__a->__a_value), __dst, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_load(const __cxx_atomic_base_impl<_Tp>* __a, memory_order __order) { + _Tp __ret; + __atomic_load(std::addressof(__a->__a_value), std::addressof(__ret), __to_gcc_order(__order)); + return __ret; +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_exchange(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __value, memory_order __order) { + _Tp __ret; + __atomic_exchange( + std::addressof(__a->__a_value), std::addressof(__value), std::addressof(__ret), __to_gcc_order(__order)); + return __ret; +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_exchange(__cxx_atomic_base_impl<_Tp>* __a, _Tp __value, memory_order __order) { + _Tp __ret; + __atomic_exchange( + std::addressof(__a->__a_value), std::addressof(__value), std::addressof(__ret), __to_gcc_order(__order)); + return __ret; +} + +template +_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_strong( + volatile __cxx_atomic_base_impl<_Tp>* __a, + _Tp* __expected, + _Tp __value, + memory_order __success, + memory_order __failure) { + return __atomic_compare_exchange( + std::addressof(__a->__a_value), + __expected, + std::addressof(__value), + false, + __to_gcc_order(__success), + __to_gcc_failure_order(__failure)); +} + +template +_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_strong( + __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { + return __atomic_compare_exchange( + std::addressof(__a->__a_value), + __expected, + std::addressof(__value), + false, + __to_gcc_order(__success), + __to_gcc_failure_order(__failure)); +} + +template +_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_weak( + volatile __cxx_atomic_base_impl<_Tp>* __a, + _Tp* __expected, + _Tp __value, + memory_order __success, + memory_order __failure) { + return __atomic_compare_exchange( + std::addressof(__a->__a_value), + __expected, + std::addressof(__value), + true, + __to_gcc_order(__success), + __to_gcc_failure_order(__failure)); +} + +template +_LIBCPP_HIDE_FROM_ABI bool __cxx_atomic_compare_exchange_weak( + __cxx_atomic_base_impl<_Tp>* __a, _Tp* __expected, _Tp __value, memory_order __success, memory_order __failure) { + return __atomic_compare_exchange( + std::addressof(__a->__a_value), + __expected, + std::addressof(__value), + true, + __to_gcc_order(__success), + __to_gcc_failure_order(__failure)); +} + +template +struct __skip_amt { + enum { value = 1 }; +}; + +template +struct __skip_amt<_Tp*> { + enum { value = sizeof(_Tp) }; +}; + +// FIXME: Haven't figured out what the spec says about using arrays with +// atomic_fetch_add. Force a failure rather than creating bad behavior. +template +struct __skip_amt<_Tp[]> {}; +template +struct __skip_amt<_Tp[n]> {}; + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_add(volatile __cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { + return __atomic_fetch_add(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_add(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { + return __atomic_fetch_add(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_sub(volatile __cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { + return __atomic_fetch_sub(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_sub(__cxx_atomic_base_impl<_Tp>* __a, _Td __delta, memory_order __order) { + return __atomic_fetch_sub(std::addressof(__a->__a_value), __delta * __skip_amt<_Tp>::value, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_and(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_and(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_and(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_and(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_or(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_or(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp __cxx_atomic_fetch_or(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_or(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_xor(volatile __cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_xor(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +template +_LIBCPP_HIDE_FROM_ABI _Tp +__cxx_atomic_fetch_xor(__cxx_atomic_base_impl<_Tp>* __a, _Tp __pattern, memory_order __order) { + return __atomic_fetch_xor(std::addressof(__a->__a_value), __pattern, __to_gcc_order(__order)); +} + +#define __cxx_atomic_is_lock_free(__s) __atomic_is_lock_free(__s, 0) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___ATOMIC_SUPPORT_GCC_H diff --git a/lib/libcxx/include/__bit/bit_cast.h b/lib/libcxx/include/__bit/bit_cast.h index cd0456738179..735025065a72 100644 --- a/lib/libcxx/include/__bit/bit_cast.h +++ b/lib/libcxx/include/__bit/bit_cast.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #ifndef _LIBCPP_CXX03_LANG template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr _ToType __bit_cast(const _FromType& __from) noexcept { return __builtin_bit_cast(_ToType, __from); } diff --git a/lib/libcxx/include/__bit/bit_log2.h b/lib/libcxx/include/__bit/bit_log2.h index 62936f678686..94ee6c3b2bb1 100644 --- a/lib/libcxx/include/__bit/bit_log2.h +++ b/lib/libcxx/include/__bit/bit_log2.h @@ -10,8 +10,8 @@ #define _LIBCPP___BIT_BIT_LOG2_H #include <__bit/countl.h> -#include <__concepts/arithmetic.h> #include <__config> +#include <__type_traits/is_unsigned_integer.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -20,14 +20,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 20 +#if _LIBCPP_STD_VER >= 14 -template <__libcpp_unsigned_integer _Tp> +template _LIBCPP_HIDE_FROM_ABI constexpr _Tp __bit_log2(_Tp __t) noexcept { - return numeric_limits<_Tp>::digits - 1 - std::countl_zero(__t); + static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__bit_log2 requires an unsigned integer type"); + return numeric_limits<_Tp>::digits - 1 - std::__countl_zero(__t); } -#endif // _LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 14 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__bit/byteswap.h b/lib/libcxx/include/__bit/byteswap.h index 6225ecf2f92d..d761e6a6fdb4 100644 --- a/lib/libcxx/include/__bit/byteswap.h +++ b/lib/libcxx/include/__bit/byteswap.h @@ -32,7 +32,7 @@ template return __builtin_bswap32(__val); } else if constexpr (sizeof(_Tp) == 8) { return __builtin_bswap64(__val); -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 } else if constexpr (sizeof(_Tp) == 16) { # if __has_builtin(__builtin_bswap128) return __builtin_bswap128(__val); @@ -40,7 +40,7 @@ template return static_cast<_Tp>(byteswap(static_cast(__val))) << 64 | static_cast<_Tp>(byteswap(static_cast(__val >> 64))); # endif // __has_builtin(__builtin_bswap128) -# endif // _LIBCPP_HAS_NO_INT128 +# endif // _LIBCPP_HAS_INT128 } else { static_assert(sizeof(_Tp) == 0, "byteswap is unimplemented for integral types of this size"); } diff --git a/lib/libcxx/include/__bit/countl.h b/lib/libcxx/include/__bit/countl.h index 998a0b44c19d..d4df1d049b29 100644 --- a/lib/libcxx/include/__bit/countl.h +++ b/lib/libcxx/include/__bit/countl.h @@ -27,19 +27,19 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned __x) _NOEXCEPT { return __builtin_clz(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long __x) _NOEXCEPT { return __builtin_clzl(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(unsigned long long __x) _NOEXCEPT { return __builtin_clzll(__x); } -#ifndef _LIBCPP_HAS_NO_INT128 +#if _LIBCPP_HAS_INT128 inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) _NOEXCEPT { # if __has_builtin(__builtin_clzg) return __builtin_clzg(__x); @@ -57,7 +57,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_clz(__uint128_t __x) : __builtin_clzll(static_cast(__x >> 64)); # endif } -#endif // _LIBCPP_HAS_NO_INT128 +#endif // _LIBCPP_HAS_INT128 template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countl_zero(_Tp __t) _NOEXCEPT { diff --git a/lib/libcxx/include/__bit/countr.h b/lib/libcxx/include/__bit/countr.h index 9e92021fba35..2f7571133bd0 100644 --- a/lib/libcxx/include/__bit/countr.h +++ b/lib/libcxx/include/__bit/countr.h @@ -26,20 +26,20 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned __x) _NOEXCEPT { return __builtin_ctz(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long __x) _NOEXCEPT { return __builtin_ctzl(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { return __builtin_ctzll(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 int __countr_zero(_Tp __t) _NOEXCEPT { #if __has_builtin(__builtin_ctzg) return __builtin_ctzg(__t, numeric_limits<_Tp>::digits); #else // __has_builtin(__builtin_ctzg) diff --git a/lib/libcxx/include/__bit/rotate.h b/lib/libcxx/include/__bit/rotate.h index 90e430e9d042..d79d98de296a 100644 --- a/lib/libcxx/include/__bit/rotate.h +++ b/lib/libcxx/include/__bit/rotate.h @@ -26,31 +26,31 @@ _LIBCPP_BEGIN_NAMESPACE_STD template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotl(_Tp __x, int __s) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotl requires an unsigned integer type"); - const int __N = numeric_limits<_Tp>::digits; - int __r = __s % __N; + const int __n = numeric_limits<_Tp>::digits; + int __r = __s % __n; if (__r == 0) return __x; if (__r > 0) - return (__x << __r) | (__x >> (__N - __r)); + return (__x << __r) | (__x >> (__n - __r)); - return (__x >> -__r) | (__x << (__N + __r)); + return (__x >> -__r) | (__x << (__n + __r)); } template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp __rotr(_Tp __x, int __s) _NOEXCEPT { static_assert(__libcpp_is_unsigned_integer<_Tp>::value, "__rotr requires an unsigned integer type"); - const int __N = numeric_limits<_Tp>::digits; - int __r = __s % __N; + const int __n = numeric_limits<_Tp>::digits; + int __r = __s % __n; if (__r == 0) return __x; if (__r > 0) - return (__x >> __r) | (__x << (__N - __r)); + return (__x >> __r) | (__x << (__n - __r)); - return (__x << -__r) | (__x >> (__N + __r)); + return (__x << -__r) | (__x >> (__n + __r)); } #if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference index 22637d439741..67abb023122e 100644 --- a/lib/libcxx/include/__bit_reference +++ b/lib/libcxx/include/__bit_reference @@ -11,20 +11,20 @@ #define _LIBCPP___BIT_REFERENCE #include <__algorithm/copy_n.h> -#include <__algorithm/fill_n.h> #include <__algorithm/min.h> #include <__bit/countr.h> -#include <__bit/invert_if.h> -#include <__bit/popcount.h> #include <__compare/ordering.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__fwd/bit_reference.h> #include <__iterator/iterator_traits.h> #include <__memory/construct_at.h> #include <__memory/pointer_traits.h> #include <__type_traits/conditional.h> +#include <__type_traits/is_constant_evaluated.h> +#include <__type_traits/void_t.h> #include <__utility/swap.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -43,10 +43,22 @@ struct __has_storage_type { static const bool value = false; }; +template +struct __size_difference_type_traits { + using difference_type = ptrdiff_t; + using size_type = size_t; +}; + +template +struct __size_difference_type_traits<_Cp, __void_t > { + using difference_type = typename _Cp::difference_type; + using size_type = typename _Cp::size_type; +}; + template ::value> class __bit_reference { - using __storage_type = typename _Cp::__storage_type; - using __storage_pointer = typename _Cp::__storage_pointer; + using __storage_type _LIBCPP_NODEBUG = typename _Cp::__storage_type; + using __storage_pointer _LIBCPP_NODEBUG = typename _Cp::__storage_pointer; __storage_pointer __seg_; __storage_type __mask_; @@ -57,7 +69,7 @@ class __bit_reference { friend class __bit_iterator<_Cp, false>; public: - using __container = typename _Cp::__self; + using __container _LIBCPP_NODEBUG = typename _Cp::__self; _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_reference(const __bit_reference&) = default; @@ -137,8 +149,8 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void swap(bool& __x, template class __bit_const_reference { - using __storage_type = typename _Cp::__storage_type; - using __storage_pointer = typename _Cp::__const_storage_pointer; + using __storage_type _LIBCPP_NODEBUG = typename _Cp::__storage_type; + using __storage_pointer _LIBCPP_NODEBUG = typename _Cp::__const_storage_pointer; __storage_pointer __seg_; __storage_type __mask_; @@ -147,7 +159,7 @@ class __bit_const_reference { friend class __bit_iterator<_Cp, true>; public: - using __container = typename _Cp::__self; + using __container _LIBCPP_NODEBUG = typename _Cp::__self; _LIBCPP_HIDE_FROM_ABI __bit_const_reference(const __bit_const_reference&) = default; __bit_const_reference& operator=(const __bit_const_reference&) = delete; @@ -589,10 +601,10 @@ inline _LIBCPP_HIDE_FROM_ABI __bit_iterator<_Cr, false> swap_ranges( template struct __bit_array { - using difference_type = typename _Cp::difference_type; - using __storage_type = typename _Cp::__storage_type; - using __storage_pointer = typename _Cp::__storage_pointer; - using iterator = typename _Cp::iterator; + using difference_type _LIBCPP_NODEBUG = typename __size_difference_type_traits<_Cp>::difference_type; + using __storage_type _LIBCPP_NODEBUG = typename _Cp::__storage_type; + using __storage_pointer _LIBCPP_NODEBUG = typename _Cp::__storage_pointer; + using iterator _LIBCPP_NODEBUG = typename _Cp::iterator; static const unsigned __bits_per_word = _Cp::__bits_per_word; static const unsigned _Np = 4; @@ -781,7 +793,7 @@ equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __b template class __bit_iterator { public: - using difference_type = typename _Cp::difference_type; + using difference_type = typename __size_difference_type_traits<_Cp>::difference_type; using value_type = bool; using pointer = __bit_iterator; #ifndef _LIBCPP_ABI_BITSET_VECTOR_BOOL_CONST_SUBSCRIPT_RETURN_BOOL @@ -792,8 +804,8 @@ public: using iterator_category = random_access_iterator_tag; private: - using __storage_type = typename _Cp::__storage_type; - using __storage_pointer = + using __storage_type _LIBCPP_NODEBUG = typename _Cp::__storage_type; + using __storage_pointer _LIBCPP_NODEBUG = __conditional_t<_IsConst, typename _Cp::__const_storage_pointer, typename _Cp::__storage_pointer>; static const unsigned __bits_per_word = _Cp::__bits_per_word; @@ -968,7 +980,7 @@ private: template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend void - __fill_n_bool(__bit_iterator<_Dp, false> __first, typename _Dp::size_type __n); + __fill_n_bool(__bit_iterator<_Dp, false> __first, typename __size_difference_type_traits<_Dp>::size_type __n); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, false> __copy_aligned( @@ -1011,10 +1023,10 @@ private: equal(__bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC1>, __bit_iterator<_Dp, _IC2>); template _LIBCPP_CONSTEXPR_SINCE_CXX20 friend __bit_iterator<_Dp, _IC> - __find_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + __find_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type); template - friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI - _LIBCPP_CONSTEXPR_SINCE_CXX20 __count_bool(__bit_iterator<_Dp, _IC>, typename _Dp::size_type); + friend typename __bit_iterator<_Dp, _IC>::difference_type _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 + __count_bool(__bit_iterator<_Dp, _IC>, typename __size_difference_type_traits<_Dp>::size_type); }; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__charconv/from_chars_floating_point.h b/lib/libcxx/include/__charconv/from_chars_floating_point.h new file mode 100644 index 000000000000..811e518a81db --- /dev/null +++ b/lib/libcxx/include/__charconv/from_chars_floating_point.h @@ -0,0 +1,73 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H +#define _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H + +#include <__assert> +#include <__charconv/chars_format.h> +#include <__charconv/from_chars_result.h> +#include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__system_error/errc.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 17 + +template +struct __from_chars_result { + _Fp __value; + ptrdiff_t __n; + errc __ec; +}; + +template +_LIBCPP_EXPORTED_FROM_ABI __from_chars_result<_Fp> __from_chars_floating_point( + _LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt); + +extern template __from_chars_result __from_chars_floating_point( + _LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt); + +extern template __from_chars_result __from_chars_floating_point( + _LIBCPP_NOESCAPE const char* __first, _LIBCPP_NOESCAPE const char* __last, chars_format __fmt); + +template +_LIBCPP_HIDE_FROM_ABI from_chars_result +__from_chars(const char* __first, const char* __last, _Fp& __value, chars_format __fmt) { + __from_chars_result<_Fp> __r = std::__from_chars_floating_point<_Fp>(__first, __last, __fmt); + if (__r.__ec != errc::invalid_argument) + __value = __r.__value; + return {__first + __r.__n, __r.__ec}; +} + +_LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_HIDE_FROM_ABI inline from_chars_result +from_chars(const char* __first, const char* __last, float& __value, chars_format __fmt = chars_format::general) { + return std::__from_chars(__first, __last, __value, __fmt); +} + +_LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_HIDE_FROM_ABI inline from_chars_result +from_chars(const char* __first, const char* __last, double& __value, chars_format __fmt = chars_format::general) { + return std::__from_chars(__first, __last, __value, __fmt); +} + +#endif // _LIBCPP_STD_VER >= 17 + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H diff --git a/lib/libcxx/include/__charconv/tables.h b/lib/libcxx/include/__charconv/tables.h index 6b93536b8c1b..9568bf841cd0 100644 --- a/lib/libcxx/include/__charconv/tables.h +++ b/lib/libcxx/include/__charconv/tables.h @@ -95,7 +95,7 @@ inline constexpr uint64_t __pow10_64[20] = { UINT64_C(1000000000000000000), UINT64_C(10000000000000000000)}; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 inline constexpr int __pow10_128_offset = 0; inline constexpr __uint128_t __pow10_128[40] = { UINT64_C(0), diff --git a/lib/libcxx/include/__charconv/to_chars_base_10.h b/lib/libcxx/include/__charconv/to_chars_base_10.h index c49f4f6797aa..06e4e692337d 100644 --- a/lib/libcxx/include/__charconv/to_chars_base_10.h +++ b/lib/libcxx/include/__charconv/to_chars_base_10.h @@ -124,7 +124,7 @@ __base_10_u64(char* __buffer, uint64_t __value) noexcept { return __itoa::__append10(__buffer, __value); } -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 /// \returns 10^\a exp /// /// \pre \a exp [19, 39] diff --git a/lib/libcxx/include/__charconv/to_chars_integral.h b/lib/libcxx/include/__charconv/to_chars_integral.h index 0369f4dfb9bd..710299df9b4d 100644 --- a/lib/libcxx/include/__charconv/to_chars_integral.h +++ b/lib/libcxx/include/__charconv/to_chars_integral.h @@ -18,14 +18,15 @@ #include <__charconv/to_chars_result.h> #include <__charconv/traits.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__system_error/errc.h> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> +#include <__type_traits/is_integral.h> #include <__type_traits/is_same.h> #include <__type_traits/make_32_64_or_128_bit.h> #include <__type_traits/make_unsigned.h> #include <__utility/unreachable.h> -#include #include #include @@ -70,7 +71,7 @@ __to_chars_itoa(char* __first, char* __last, _Tp __value, false_type) { return {__last, errc::value_too_large}; } -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 template <> inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI to_chars_result __to_chars_itoa(char* __first, char* __last, __uint128_t __value, false_type) { diff --git a/lib/libcxx/include/__charconv/traits.h b/lib/libcxx/include/__charconv/traits.h index c91c6da32479..2cb37c8cfb02 100644 --- a/lib/libcxx/include/__charconv/traits.h +++ b/lib/libcxx/include/__charconv/traits.h @@ -88,7 +88,7 @@ struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t struct _LIBCPP_HIDDEN __traits_base<_Tp, __enable_if_t > { using type = __uint128_t; diff --git a/lib/libcxx/include/__chrono/convert_to_tm.h b/lib/libcxx/include/__chrono/convert_to_tm.h index 3a51019b8078..7d06a38d87f2 100644 --- a/lib/libcxx/include/__chrono/convert_to_tm.h +++ b/lib/libcxx/include/__chrono/convert_to_tm.h @@ -24,6 +24,7 @@ #include <__chrono/sys_info.h> #include <__chrono/system_clock.h> #include <__chrono/time_point.h> +#include <__chrono/utc_clock.h> #include <__chrono/weekday.h> #include <__chrono/year.h> #include <__chrono/year_month.h> @@ -98,6 +99,22 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const chrono::sys_time<_Duration> __tp return __result; } +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +template +_LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(chrono::utc_time<_Duration> __tp) { + _Tm __result = std::__convert_to_tm<_Tm>(chrono::utc_clock::to_sys(__tp)); + + if (chrono::get_leap_second_info(__tp).is_leap_second) + ++__result.tm_sec; + + return __result; +} + +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION + // Convert a chrono (calendar) time point, or dururation to the given _Tm type, // which must have the same properties as std::tm. template @@ -110,13 +127,19 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const _ChronoT& __value) { if constexpr (__is_time_point<_ChronoT>) { if constexpr (same_as) return std::__convert_to_tm<_Tm>(__value); +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB + else if constexpr (same_as) + return std::__convert_to_tm<_Tm>(__value); +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION else if constexpr (same_as) return std::__convert_to_tm<_Tm>(_ChronoT::clock::to_sys(__value)); else if constexpr (same_as) return std::__convert_to_tm<_Tm>(chrono::sys_time{__value.time_since_epoch()}); else static_assert(sizeof(_ChronoT) == 0, "TODO: Add the missing clock specialization"); - } else if constexpr (chrono::__is_duration<_ChronoT>::value) { + } else if constexpr (chrono::__is_duration_v<_ChronoT>) { // [time.format]/6 // ... However, if a flag refers to a "time of day" (e.g. %H, %I, %p, // etc.), then a specialization of duration is interpreted as the time of @@ -175,18 +198,17 @@ _LIBCPP_HIDE_FROM_ABI _Tm __convert_to_tm(const _ChronoT& __value) { if (__value.hours().count() > std::numeric_limits::max()) std::__throw_format_error("Formatting hh_mm_ss, encountered an hour overflow"); __result.tm_hour = __value.hours().count(); -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB } else if constexpr (same_as<_ChronoT, chrono::sys_info>) { // Has no time information. } else if constexpr (same_as<_ChronoT, chrono::local_info>) { // Has no time information. -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION } else if constexpr (__is_specialization_v<_ChronoT, chrono::zoned_time>) { return std::__convert_to_tm<_Tm>( chrono::sys_time{__value.get_local_time().time_since_epoch()}); # endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB } else static_assert(sizeof(_ChronoT) == 0, "Add the missing type specialization"); diff --git a/lib/libcxx/include/__chrono/day.h b/lib/libcxx/include/__chrono/day.h index 7342084b08c8..f5b14689a78a 100644 --- a/lib/libcxx/include/__chrono/day.h +++ b/lib/libcxx/include/__chrono/day.h @@ -11,8 +11,8 @@ #define _LIBCPP___CHRONO_DAY_H #include <__chrono/duration.h> +#include <__compare/ordering.h> #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__chrono/duration.h b/lib/libcxx/include/__chrono/duration.h index 1e36d7342836..941aca600959 100644 --- a/lib/libcxx/include/__chrono/duration.h +++ b/lib/libcxx/include/__chrono/duration.h @@ -35,26 +35,25 @@ template > class _LIBCPP_TEMPLATE_VIS duration; template -struct __is_duration : false_type {}; +inline const bool __is_duration_v = false; template -struct __is_duration > : true_type {}; +inline const bool __is_duration_v > = true; template -struct __is_duration > : true_type {}; +inline const bool __is_duration_v > = true; template -struct __is_duration > : true_type {}; +inline const bool __is_duration_v > = true; template -struct __is_duration > : true_type {}; +inline const bool __is_duration_v > = true; } // namespace chrono template struct _LIBCPP_TEMPLATE_VIS common_type, chrono::duration<_Rep2, _Period2> > { - typedef chrono::duration::type, typename __ratio_gcd<_Period1, _Period2>::type> - type; + typedef chrono::duration::type, __ratio_gcd<_Period1, _Period2> > type; }; namespace chrono { @@ -102,7 +101,7 @@ struct __duration_cast<_FromDuration, _ToDuration, _Period, false, false> { } }; -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration duration_cast(const duration<_Rep, _Period>& __fd) { return __duration_cast, _ToDuration>()(__fd); } @@ -124,7 +123,7 @@ struct _LIBCPP_TEMPLATE_VIS duration_values { }; #if _LIBCPP_STD_VER >= 17 -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration<_Rep, _Period>& __d) { _ToDuration __t = chrono::duration_cast<_ToDuration>(__d); if (__t > __d) @@ -132,7 +131,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration floor(const duration< return __t; } -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_Rep, _Period>& __d) { _ToDuration __t = chrono::duration_cast<_ToDuration>(__d); if (__t < __d) @@ -140,7 +139,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration ceil(const duration<_ return __t; } -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration<_Rep, _Period>& __d) { _ToDuration __lower = chrono::floor<_ToDuration>(__d); _ToDuration __upper = __lower + _ToDuration{1}; @@ -158,15 +157,15 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ToDuration round(const duration< template class _LIBCPP_TEMPLATE_VIS duration { - static_assert(!__is_duration<_Rep>::value, "A duration representation can not be a duration"); - static_assert(__is_ratio<_Period>::value, "Second template parameter of duration must be a std::ratio"); + static_assert(!__is_duration_v<_Rep>, "A duration representation can not be a duration"); + static_assert(__is_ratio_v<_Period>, "Second template parameter of duration must be a std::ratio"); static_assert(_Period::num > 0, "duration period must be positive"); template struct __no_overflow { private: - static const intmax_t __gcd_n1_n2 = __static_gcd<_R1::num, _R2::num>::value; - static const intmax_t __gcd_d1_d2 = __static_gcd<_R1::den, _R2::den>::value; + static const intmax_t __gcd_n1_n2 = __static_gcd<_R1::num, _R2::num>; + static const intmax_t __gcd_d1_d2 = __static_gcd<_R1::den, _R2::den>; static const intmax_t __n1 = _R1::num / __gcd_n1_n2; static const intmax_t __d1 = _R1::den / __gcd_d1_d2; static const intmax_t __n2 = _R2::num / __gcd_n1_n2; @@ -434,7 +433,7 @@ operator*(const _Rep1& __s, const duration<_Rep2, _Period>& __d) { template ::value && + __enable_if_t && is_convertible::type>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> @@ -456,7 +455,7 @@ operator/(const duration<_Rep1, _Period1>& __lhs, const duration<_Rep2, _Period2 template ::value && + __enable_if_t && is_convertible::type>::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR duration::type, _Period> @@ -543,8 +542,4 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 -# include -#endif - #endif // _LIBCPP___CHRONO_DURATION_H diff --git a/lib/libcxx/include/__chrono/exception.h b/lib/libcxx/include/__chrono/exception.h index 266f8fac4417..1eb5b1b62d92 100644 --- a/lib/libcxx/include/__chrono/exception.h +++ b/lib/libcxx/include/__chrono/exception.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/calendar.h> # include <__chrono/local_info.h> @@ -71,9 +71,9 @@ class nonexistent_local_time : public runtime_error { }; template -_LIBCPP_NORETURN _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI void __throw_nonexistent_local_time( +[[noreturn]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI void __throw_nonexistent_local_time( [[maybe_unused]] const local_time<_Duration>& __time, [[maybe_unused]] const local_info& __info) { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS throw nonexistent_local_time(__time, __info); # else _LIBCPP_VERBOSE_ABORT("nonexistent_local_time was thrown in -fno-exceptions mode"); @@ -115,9 +115,9 @@ class ambiguous_local_time : public runtime_error { }; template -_LIBCPP_NORETURN _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI void __throw_ambiguous_local_time( +[[noreturn]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI void __throw_ambiguous_local_time( [[maybe_unused]] const local_time<_Duration>& __time, [[maybe_unused]] const local_info& __info) { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS throw ambiguous_local_time(__time, __info); # else _LIBCPP_VERBOSE_ABORT("ambiguous_local_time was thrown in -fno-exceptions mode"); @@ -130,6 +130,6 @@ _LIBCPP_NORETURN _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI void __throw_am _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_EXCEPTION_H diff --git a/lib/libcxx/include/__chrono/file_clock.h b/lib/libcxx/include/__chrono/file_clock.h index 4dd3f88ce5ba..b4b7e9dc14e7 100644 --- a/lib/libcxx/include/__chrono/file_clock.h +++ b/lib/libcxx/include/__chrono/file_clock.h @@ -47,7 +47,7 @@ _LIBCPP_END_NAMESPACE_STD #ifndef _LIBCPP_CXX03_LANG _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM struct _FilesystemClock { -# if !defined(_LIBCPP_HAS_NO_INT128) +# if _LIBCPP_HAS_INT128 typedef __int128_t rep; typedef nano period; # else diff --git a/lib/libcxx/include/__chrono/formatter.h b/lib/libcxx/include/__chrono/formatter.h index 449c415e9576..d17acd274e4c 100644 --- a/lib/libcxx/include/__chrono/formatter.h +++ b/lib/libcxx/include/__chrono/formatter.h @@ -10,55 +10,60 @@ #ifndef _LIBCPP___CHRONO_FORMATTER_H #define _LIBCPP___CHRONO_FORMATTER_H -#include <__algorithm/ranges_copy.h> -#include <__chrono/calendar.h> -#include <__chrono/concepts.h> -#include <__chrono/convert_to_tm.h> -#include <__chrono/day.h> -#include <__chrono/duration.h> -#include <__chrono/file_clock.h> -#include <__chrono/hh_mm_ss.h> -#include <__chrono/local_info.h> -#include <__chrono/month.h> -#include <__chrono/month_weekday.h> -#include <__chrono/monthday.h> -#include <__chrono/ostream.h> -#include <__chrono/parser_std_format_spec.h> -#include <__chrono/statically_widen.h> -#include <__chrono/sys_info.h> -#include <__chrono/system_clock.h> -#include <__chrono/time_point.h> -#include <__chrono/weekday.h> -#include <__chrono/year.h> -#include <__chrono/year_month.h> -#include <__chrono/year_month_day.h> -#include <__chrono/year_month_weekday.h> -#include <__chrono/zoned_time.h> -#include <__concepts/arithmetic.h> -#include <__concepts/same_as.h> #include <__config> -#include <__format/concepts.h> -#include <__format/format_error.h> -#include <__format/format_functions.h> -#include <__format/format_parse_context.h> -#include <__format/formatter.h> -#include <__format/parser_std_format_spec.h> -#include <__format/write_escaped.h> -#include <__memory/addressof.h> -#include <__type_traits/is_specialization.h> -#include -#include -#include -#include -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif + +#if _LIBCPP_HAS_LOCALIZATION + +# include <__algorithm/ranges_copy.h> +# include <__chrono/calendar.h> +# include <__chrono/concepts.h> +# include <__chrono/convert_to_tm.h> +# include <__chrono/day.h> +# include <__chrono/duration.h> +# include <__chrono/file_clock.h> +# include <__chrono/hh_mm_ss.h> +# include <__chrono/local_info.h> +# include <__chrono/month.h> +# include <__chrono/month_weekday.h> +# include <__chrono/monthday.h> +# include <__chrono/ostream.h> +# include <__chrono/parser_std_format_spec.h> +# include <__chrono/statically_widen.h> +# include <__chrono/sys_info.h> +# include <__chrono/system_clock.h> +# include <__chrono/time_point.h> +# include <__chrono/utc_clock.h> +# include <__chrono/weekday.h> +# include <__chrono/year.h> +# include <__chrono/year_month.h> +# include <__chrono/year_month_day.h> +# include <__chrono/year_month_weekday.h> +# include <__chrono/zoned_time.h> +# include <__concepts/arithmetic.h> +# include <__concepts/same_as.h> +# include <__format/concepts.h> +# include <__format/format_error.h> +# include <__format/format_functions.h> +# include <__format/format_parse_context.h> +# include <__format/formatter.h> +# include <__format/parser_std_format_spec.h> +# include <__format/write_escaped.h> +# include <__memory/addressof.h> +# include <__type_traits/is_specialization.h> +# include +# include +# include +# include +# include +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 20 namespace __formatter { @@ -139,25 +144,23 @@ __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::hh_mm_ss< __value.fractional_width); } -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && \ - !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM template _LIBCPP_HIDE_FROM_ABI void __format_sub_seconds(basic_stringstream<_CharT>& __sstr, const chrono::zoned_time<_Duration, _TimeZonePtr>& __value) { __formatter::__format_sub_seconds(__sstr, __value.get_local_time().time_since_epoch()); } -# endif +# endif template consteval bool __use_fraction() { if constexpr (__is_time_point<_Tp>) return chrono::hh_mm_ss::fractional_width; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && \ - !defined(_LIBCPP_HAS_NO_FILESYSTEM) && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return chrono::hh_mm_ss::fractional_width; -# endif - else if constexpr (chrono::__is_duration<_Tp>::value) +# endif + else if constexpr (chrono::__is_duration_v<_Tp>) return chrono::hh_mm_ss<_Tp>::fractional_width; else if constexpr (__is_hh_mm_ss<_Tp>) return _Tp::fractional_width; @@ -225,16 +228,15 @@ struct _LIBCPP_HIDE_FROM_ABI __time_zone { template _LIBCPP_HIDE_FROM_ABI __time_zone __convert_to_time_zone([[maybe_unused]] const _Tp& __value) { -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB if constexpr (same_as<_Tp, chrono::sys_info>) return {__value.abbrev, __value.offset}; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return __formatter::__convert_to_time_zone(__value.get_info()); -# endif +# endif else -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB return {"UTC", chrono::seconds{0}}; } @@ -272,7 +274,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( } break; case _CharT('j'): - if constexpr (chrono::__is_duration<_Tp>::value) + if constexpr (chrono::__is_duration_v<_Tp>) // Converting a duration where the period has a small ratio to days // may fail to compile. This due to loss of precision in the // conversion. In order to avoid that issue convert to seconds as @@ -284,7 +286,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( break; case _CharT('q'): - if constexpr (chrono::__is_duration<_Tp>::value) { + if constexpr (chrono::__is_duration_v<_Tp>) { __sstr << chrono::__units_suffix<_CharT, typename _Tp::period>(); break; } @@ -300,7 +302,7 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( // MSVC STL ignores precision but uses separator // FMT honours precision and has a bug for separator // https://godbolt.org/z/78b7sMxns - if constexpr (chrono::__is_duration<_Tp>::value) { + if constexpr (chrono::__is_duration_v<_Tp>) { __sstr << std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{}"), __value.count()); break; } @@ -341,16 +343,16 @@ _LIBCPP_HIDE_FROM_ABI void __format_chrono_using_chrono_specs( // // TODO FMT evaluate the comment above. -# if defined(__GLIBC__) || defined(_AIX) || defined(_WIN32) +# if defined(__GLIBC__) || defined(_AIX) || defined(_WIN32) case _CharT('y'): // Glibc fails for negative values, AIX for positive values too. __sstr << std::format(_LIBCPP_STATICALLY_WIDEN(_CharT, "{:02}"), (std::abs(__t.tm_year + 1900)) % 100); break; -# endif // defined(__GLIBC__) || defined(_AIX) || defined(_WIN32) +# endif // defined(__GLIBC__) || defined(_AIX) || defined(_WIN32) case _CharT('Y'): // Depending on the platform's libc the range of supported years is - // limited. Intead of of testing all conditions use the internal + // limited. Instead of of testing all conditions use the internal // implementation unconditionally. __formatter::__format_year(__sstr, __t.tm_year + 1900); break; @@ -442,17 +444,16 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_ok(const _Tp& __value) { return __value.weekday().ok(); else if constexpr (__is_hh_mm_ss<_Tp>) return true; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB else if constexpr (same_as<_Tp, chrono::sys_info>) return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return true; -# endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -493,17 +494,16 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __weekday_name_ok(const _Tp& __value) { return __value.weekday().ok(); else if constexpr (__is_hh_mm_ss<_Tp>) return true; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB else if constexpr (same_as<_Tp, chrono::sys_info>) return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return true; -# endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -544,17 +544,16 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __date_ok(const _Tp& __value) { return __value.ok(); else if constexpr (__is_hh_mm_ss<_Tp>) return true; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB else if constexpr (same_as<_Tp, chrono::sys_info>) return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return true; -# endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -595,17 +594,16 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __month_name_ok(const _Tp& __value) { return __value.month().ok(); else if constexpr (__is_hh_mm_ss<_Tp>) return true; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB else if constexpr (same_as<_Tp, chrono::sys_info>) return true; else if constexpr (same_as<_Tp, chrono::local_info>) return true; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM else if constexpr (__is_specialization_v<_Tp, chrono::zoned_time>) return true; -# endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB else static_assert(sizeof(_Tp) == 0, "Add the missing type specialization"); } @@ -630,7 +628,7 @@ __format_chrono(const _Tp& __value, if (__chrono_specs.empty()) __sstr << __value; else { - if constexpr (chrono::__is_duration<_Tp>::value) { + if constexpr (chrono::__is_duration_v<_Tp>) { // A duration can be a user defined arithmetic type. Users may specialize // numeric_limits, but they may not specialize is_signed. if constexpr (numeric_limits::is_signed) { @@ -714,7 +712,7 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_chrono { template struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -722,10 +720,27 @@ struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : pub } }; +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +template +struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { +public: + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; + + template + _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { + return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock); + } +}; + +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM + template struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -736,7 +751,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : pu template struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -748,7 +763,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : p template struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -770,7 +785,7 @@ struct formatter, _CharT> : public __formatter_c template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -781,7 +796,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -792,7 +807,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatte template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -803,7 +818,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -814,7 +829,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __format template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -825,7 +840,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -836,7 +851,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __f template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -847,7 +862,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __form template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -858,7 +873,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public _ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -869,7 +884,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -880,7 +895,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : publ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -891,7 +906,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __for template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -902,7 +917,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public _ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -913,7 +928,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : pub template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -924,7 +939,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : publ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -935,7 +950,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter : template struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -943,11 +958,11 @@ struct formatter, _CharT> : public __formatter_chron } }; -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB template <__fmt_char_type _CharT> struct formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { @@ -958,33 +973,33 @@ struct formatter : public __formatter_chrono<_CharT> { template <__fmt_char_type _CharT> struct formatter : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags{}); } }; -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM // Note due to how libc++'s formatters are implemented there is no need to add // the exposition only local-time-format-t abstraction. template struct formatter, _CharT> : public __formatter_chrono<_CharT> { public: - using _Base = __formatter_chrono<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_chrono<_CharT>; template _LIBCPP_HIDE_FROM_ABI constexpr typename _ParseContext::iterator parse(_ParseContext& __ctx) { return _Base::__parse(__ctx, __format_spec::__fields_chrono, __format_spec::__flags::__clock); } }; -# endif // !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && - // !defined(_LIBCPP_HAS_NO_LOCALIZATION) -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB -#endif // if _LIBCPP_STD_VER >= 20 +# endif // if _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_HAS_LOCALIZATION + #endif // _LIBCPP___CHRONO_FORMATTER_H diff --git a/lib/libcxx/include/__chrono/hh_mm_ss.h b/lib/libcxx/include/__chrono/hh_mm_ss.h index 57d2247fe6a3..6ea8a28ee093 100644 --- a/lib/libcxx/include/__chrono/hh_mm_ss.h +++ b/lib/libcxx/include/__chrono/hh_mm_ss.h @@ -29,8 +29,8 @@ namespace chrono { template class hh_mm_ss { private: - static_assert(__is_duration<_Duration>::value, "template parameter of hh_mm_ss must be a std::chrono::duration"); - using __CommonType = common_type_t<_Duration, chrono::seconds>; + static_assert(__is_duration_v<_Duration>, "template parameter of hh_mm_ss must be a std::chrono::duration"); + using __CommonType _LIBCPP_NODEBUG = common_type_t<_Duration, chrono::seconds>; _LIBCPP_HIDE_FROM_ABI static constexpr uint64_t __pow10(unsigned __exp) { uint64_t __ret = 1; diff --git a/lib/libcxx/include/__chrono/high_resolution_clock.h b/lib/libcxx/include/__chrono/high_resolution_clock.h index 0697fd2de9b4..d324c7f0283b 100644 --- a/lib/libcxx/include/__chrono/high_resolution_clock.h +++ b/lib/libcxx/include/__chrono/high_resolution_clock.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace chrono { -#ifndef _LIBCPP_HAS_NO_MONOTONIC_CLOCK +#if _LIBCPP_HAS_MONOTONIC_CLOCK typedef steady_clock high_resolution_clock; #else typedef system_clock high_resolution_clock; diff --git a/lib/libcxx/include/__chrono/leap_second.h b/lib/libcxx/include/__chrono/leap_second.h index 1a0e7f3107de..1857bef80376 100644 --- a/lib/libcxx/include/__chrono/leap_second.h +++ b/lib/libcxx/include/__chrono/leap_second.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/duration.h> # include <__chrono/system_clock.h> @@ -43,84 +43,89 @@ class leap_second { _LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default; _LIBCPP_HIDE_FROM_ABI leap_second& operator=(const leap_second&) = default; - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; } private: sys_seconds __date_; seconds __value_; -}; -_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator==(const leap_second& __x, const leap_second& __y) { - return __x.date() == __y.date(); -} - -_LIBCPP_HIDE_FROM_ABI inline constexpr strong_ordering operator<=>(const leap_second& __x, const leap_second& __y) { - return __x.date() <=> __y.date(); -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const leap_second& __x, const sys_time<_Duration>& __y) { - return __x.date() == __y; -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const leap_second& __x, const sys_time<_Duration>& __y) { - return __x.date() < __y; -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const sys_time<_Duration>& __x, const leap_second& __y) { - return __x < __y.date(); -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const leap_second& __x, const sys_time<_Duration>& __y) { - return __y < __x; -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const sys_time<_Duration>& __x, const leap_second& __y) { - return __y < __x; -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const leap_second& __x, const sys_time<_Duration>& __y) { - return !(__y < __x); -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const sys_time<_Duration>& __x, const leap_second& __y) { - return !(__y < __x); -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const leap_second& __x, const sys_time<_Duration>& __y) { - return !(__x < __y); -} - -template -_LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const sys_time<_Duration>& __x, const leap_second& __y) { - return !(__x < __y); -} - -# ifndef _LIBCPP_COMPILER_GCC -// This requirement cause a compilation loop in GCC-13 and running out of memory. -// TODO TZDB Test whether GCC-14 fixes this. -template - requires three_way_comparable_with> -_LIBCPP_HIDE_FROM_ABI constexpr auto operator<=>(const leap_second& __x, const sys_time<_Duration>& __y) { - return __x.date() <=> __y; -} -# endif + // The function + // template + // requires three_way_comparable_with> + // constexpr auto operator<=>(const leap_second& x, const sys_time& y) noexcept; + // + // Has constraints that are recursive (LWG4139). The proposed resolution is + // to make the funcion a hidden friend. For consistency make this change for + // all comparison functions. + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const leap_second& __x, const leap_second& __y) { + return __x.date() == __y.date(); + } + + _LIBCPP_HIDE_FROM_ABI friend constexpr strong_ordering operator<=>(const leap_second& __x, const leap_second& __y) { + return __x.date() <=> __y.date(); + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() == __y; + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() < __y; + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(const sys_time<_Duration>& __x, const leap_second& __y) { + return __x < __y.date(); + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const leap_second& __x, const sys_time<_Duration>& __y) { + return __y < __x; + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(const sys_time<_Duration>& __x, const leap_second& __y) { + return __y < __x; + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const leap_second& __x, const sys_time<_Duration>& __y) { + return !(__y < __x); + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(const sys_time<_Duration>& __x, const leap_second& __y) { + return !(__y < __x); + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const leap_second& __x, const sys_time<_Duration>& __y) { + return !(__x < __y); + } + + template + _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(const sys_time<_Duration>& __x, const leap_second& __y) { + return !(__x < __y); + } + + template + requires three_way_comparable_with> + _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator<=>(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() <=> __y; + } +}; } // namespace chrono -# endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_LEAP_SECOND_H diff --git a/lib/libcxx/include/__chrono/local_info.h b/lib/libcxx/include/__chrono/local_info.h index cfe1448904d3..31cf77761d6a 100644 --- a/lib/libcxx/include/__chrono/local_info.h +++ b/lib/libcxx/include/__chrono/local_info.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/sys_info.h> # include <__config> @@ -45,6 +45,6 @@ struct local_info { _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_LOCAL_INFO_H diff --git a/lib/libcxx/include/__chrono/month.h b/lib/libcxx/include/__chrono/month.h index ce5cc21aab7d..77c67d0954ef 100644 --- a/lib/libcxx/include/__chrono/month.h +++ b/lib/libcxx/include/__chrono/month.h @@ -11,8 +11,8 @@ #define _LIBCPP___CHRONO_MONTH_H #include <__chrono/duration.h> +#include <__compare/ordering.h> #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__chrono/monthday.h b/lib/libcxx/include/__chrono/monthday.h index a89d16e51861..57712cf0b65a 100644 --- a/lib/libcxx/include/__chrono/monthday.h +++ b/lib/libcxx/include/__chrono/monthday.h @@ -13,8 +13,8 @@ #include <__chrono/calendar.h> #include <__chrono/day.h> #include <__chrono/month.h> +#include <__compare/ordering.h> #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__chrono/ostream.h b/lib/libcxx/include/__chrono/ostream.h index e6c43254eea1..ed9ad8e346ba 100644 --- a/lib/libcxx/include/__chrono/ostream.h +++ b/lib/libcxx/include/__chrono/ostream.h @@ -10,37 +10,42 @@ #ifndef _LIBCPP___CHRONO_OSTREAM_H #define _LIBCPP___CHRONO_OSTREAM_H -#include <__chrono/calendar.h> -#include <__chrono/day.h> -#include <__chrono/duration.h> -#include <__chrono/file_clock.h> -#include <__chrono/hh_mm_ss.h> -#include <__chrono/local_info.h> -#include <__chrono/month.h> -#include <__chrono/month_weekday.h> -#include <__chrono/monthday.h> -#include <__chrono/statically_widen.h> -#include <__chrono/sys_info.h> -#include <__chrono/system_clock.h> -#include <__chrono/weekday.h> -#include <__chrono/year.h> -#include <__chrono/year_month.h> -#include <__chrono/year_month_day.h> -#include <__chrono/year_month_weekday.h> -#include <__chrono/zoned_time.h> -#include <__concepts/same_as.h> #include <__config> -#include <__format/format_functions.h> -#include <__fwd/ostream.h> -#include -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +#if _LIBCPP_HAS_LOCALIZATION + +# include <__chrono/calendar.h> +# include <__chrono/day.h> +# include <__chrono/duration.h> +# include <__chrono/file_clock.h> +# include <__chrono/hh_mm_ss.h> +# include <__chrono/local_info.h> +# include <__chrono/month.h> +# include <__chrono/month_weekday.h> +# include <__chrono/monthday.h> +# include <__chrono/statically_widen.h> +# include <__chrono/sys_info.h> +# include <__chrono/system_clock.h> +# include <__chrono/utc_clock.h> +# include <__chrono/weekday.h> +# include <__chrono/year.h> +# include <__chrono/year_month.h> +# include <__chrono/year_month_day.h> +# include <__chrono/year_month_weekday.h> +# include <__chrono/zoned_time.h> +# include <__concepts/same_as.h> +# include <__format/format_functions.h> +# include <__fwd/ostream.h> +# include +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 20 namespace chrono { @@ -57,6 +62,18 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const sys_days& __dp) { return __os << year_month_day{__dp}; } +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +template +_LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& +operator<<(basic_ostream<_CharT, _Traits>& __os, const utc_time<_Duration>& __tp) { + return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T}"), __tp); +} + +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB +# endif // _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM + template _LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const file_time<_Duration> __tp) { @@ -82,11 +99,11 @@ _LIBCPP_HIDE_FROM_ABI auto __units_suffix() { else if constexpr (same_as) return _LIBCPP_STATICALLY_WIDEN(_CharT, "ns"); else if constexpr (same_as) -# ifndef _LIBCPP_HAS_NO_UNICODE +# if _LIBCPP_HAS_UNICODE return _LIBCPP_STATICALLY_WIDEN(_CharT, "\u00b5s"); -# else +# else return _LIBCPP_STATICALLY_WIDEN(_CharT, "us"); -# endif +# endif else if constexpr (same_as) return _LIBCPP_STATICALLY_WIDEN(_CharT, "ms"); else if constexpr (same_as) @@ -265,7 +282,7 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const hh_mm_ss<_Duration> __hms return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%T}"), __hms); } -# if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# if _LIBCPP_HAS_EXPERIMENTAL_TZDB template _LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& @@ -303,20 +320,21 @@ operator<<(basic_ostream<_CharT, _Traits>& __os, const local_info& __info) { _LIBCPP_STATICALLY_WIDEN(_CharT, "{}: {{{}, {}}}"), __result(), __info.first, __info.second); } -# if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM template _LIBCPP_HIDE_FROM_ABI basic_ostream<_CharT, _Traits>& operator<<(basic_ostream<_CharT, _Traits>& __os, const zoned_time<_Duration, _TimeZonePtr>& __tp) { return __os << std::format(__os.getloc(), _LIBCPP_STATICALLY_WIDEN(_CharT, "{:L%F %T %Z}"), __tp); } -# endif -# endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +# endif +# endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB } // namespace chrono -#endif // if _LIBCPP_STD_VER >= 20 +# endif // if _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_HAS_LOCALIZATION + #endif // _LIBCPP___CHRONO_OSTREAM_H diff --git a/lib/libcxx/include/__chrono/parser_std_format_spec.h b/lib/libcxx/include/__chrono/parser_std_format_spec.h index 785bbae198e4..4df8e603c6bc 100644 --- a/lib/libcxx/include/__chrono/parser_std_format_spec.h +++ b/lib/libcxx/include/__chrono/parser_std_format_spec.h @@ -11,20 +11,23 @@ #define _LIBCPP___CHRONO_PARSER_STD_FORMAT_SPEC_H #include <__config> -#include <__format/concepts.h> -#include <__format/format_error.h> -#include <__format/format_parse_context.h> -#include <__format/formatter_string.h> -#include <__format/parser_std_format_spec.h> -#include -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif +#if _LIBCPP_HAS_LOCALIZATION + +# include <__format/concepts.h> +# include <__format/format_error.h> +# include <__format/format_parse_context.h> +# include <__format/formatter_string.h> +# include <__format/parser_std_format_spec.h> +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif _LIBCPP_BEGIN_NAMESPACE_STD -#if _LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 20 namespace __format_spec { @@ -137,7 +140,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __validate_time_zone(__flags __flags) { template class _LIBCPP_TEMPLATE_VIS __parser_chrono { - using _ConstIterator = typename basic_format_parse_context<_CharT>::const_iterator; + using _ConstIterator _LIBCPP_NODEBUG = typename basic_format_parse_context<_CharT>::const_iterator; public: template @@ -409,8 +412,10 @@ class _LIBCPP_TEMPLATE_VIS __parser_chrono { } // namespace __format_spec -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_HAS_LOCALIZATION + #endif // _LIBCPP___CHRONO_PARSER_STD_FORMAT_SPEC_H diff --git a/lib/libcxx/include/__chrono/statically_widen.h b/lib/libcxx/include/__chrono/statically_widen.h index a18c46f057a8..40e085633b8c 100644 --- a/lib/libcxx/include/__chrono/statically_widen.h +++ b/lib/libcxx/include/__chrono/statically_widen.h @@ -24,7 +24,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <__fmt_char_type _CharT> _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __statically_widen(const char* __str, const wchar_t* __wstr) { if constexpr (same_as<_CharT, char>) @@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __statically_widen(const char* __s return __wstr; } # define _LIBCPP_STATICALLY_WIDEN(_CharT, __str) ::std::__statically_widen<_CharT>(__str, L##__str) -# else // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# else // _LIBCPP_HAS_WIDE_CHARACTERS // Without this indirection the unit test test/libcxx/modules_include.sh.cpp // fails for the CI build "No wide characters". This seems like a bug. @@ -43,9 +43,9 @@ _LIBCPP_HIDE_FROM_ABI constexpr const _CharT* __statically_widen(const char* __s return __str; } # define _LIBCPP_STATICALLY_WIDEN(_CharT, __str) ::std::__statically_widen<_CharT>(__str) -# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__chrono/steady_clock.h b/lib/libcxx/include/__chrono/steady_clock.h index 612a7f156e63..1b247b2c2860 100644 --- a/lib/libcxx/include/__chrono/steady_clock.h +++ b/lib/libcxx/include/__chrono/steady_clock.h @@ -22,7 +22,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace chrono { -#ifndef _LIBCPP_HAS_NO_MONOTONIC_CLOCK +#if _LIBCPP_HAS_MONOTONIC_CLOCK class _LIBCPP_EXPORTED_FROM_ABI steady_clock { public: typedef nanoseconds duration; diff --git a/lib/libcxx/include/__chrono/sys_info.h b/lib/libcxx/include/__chrono/sys_info.h index 11536cbde3a3..81e37f2b803f 100644 --- a/lib/libcxx/include/__chrono/sys_info.h +++ b/lib/libcxx/include/__chrono/sys_info.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/duration.h> # include <__chrono/system_clock.h> @@ -46,6 +46,6 @@ struct sys_info { _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_SYS_INFO_H diff --git a/lib/libcxx/include/__chrono/time_point.h b/lib/libcxx/include/__chrono/time_point.h index aaf0b098f280..5e79fa5d257f 100644 --- a/lib/libcxx/include/__chrono/time_point.h +++ b/lib/libcxx/include/__chrono/time_point.h @@ -32,8 +32,7 @@ namespace chrono { template class _LIBCPP_TEMPLATE_VIS time_point { - static_assert(__is_duration<_Duration>::value, - "Second template parameter of time_point must be a std::chrono::duration"); + static_assert(__is_duration_v<_Duration>, "Second template parameter of time_point must be a std::chrono::duration"); public: typedef _Clock clock; @@ -91,17 +90,17 @@ time_point_cast(const time_point<_Clock, _Duration>& __t) { } #if _LIBCPP_STD_VER >= 17 -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> floor(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::floor<_ToDuration>(__t.time_since_epoch())}; } -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> ceil(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::ceil<_ToDuration>(__t.time_since_epoch())}; } -template ::value, int> = 0> +template , int> = 0> inline _LIBCPP_HIDE_FROM_ABI constexpr time_point<_Clock, _ToDuration> round(const time_point<_Clock, _Duration>& __t) { return time_point<_Clock, _ToDuration>{chrono::round<_ToDuration>(__t.time_since_epoch())}; } diff --git a/lib/libcxx/include/__chrono/time_zone.h b/lib/libcxx/include/__chrono/time_zone.h index de11dac1eef0..d18d59d2736b 100644 --- a/lib/libcxx/include/__chrono/time_zone.h +++ b/lib/libcxx/include/__chrono/time_zone.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/calendar.h> # include <__chrono/duration.h> @@ -37,8 +37,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION namespace chrono { @@ -104,10 +103,14 @@ class _LIBCPP_AVAILABILITY_TZDB time_zone { to_sys(const local_time<_Duration>& __time, choose __z) const { local_info __info = get_info(__time); switch (__info.result) { - case local_info::unique: - case local_info::nonexistent: // first and second are the same + case local_info::unique: // first and second are the same return sys_time>{__time.time_since_epoch() - __info.first.offset}; + case local_info::nonexistent: + // first and second are the same + // All non-existing values are converted to the same time. + return sys_time>{__info.first.end}; + case local_info::ambiguous: switch (__z) { case choose::earliest: @@ -170,13 +173,13 @@ operator<=>(const time_zone& __x, const time_zone& __y) noexcept { } // namespace chrono -# endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) - // && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_TIME_ZONE_H diff --git a/lib/libcxx/include/__chrono/time_zone_link.h b/lib/libcxx/include/__chrono/time_zone_link.h index b2d365c5fd08..cae40b07c2ca 100644 --- a/lib/libcxx/include/__chrono/time_zone_link.h +++ b/lib/libcxx/include/__chrono/time_zone_link.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__compare/strong_order.h> # include <__config> @@ -31,8 +31,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION namespace chrono { @@ -68,12 +67,13 @@ operator<=>(const time_zone_link& __x, const time_zone_link& __y) noexcept { } // namespace chrono -# endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_TIME_ZONE_LINK_H diff --git a/lib/libcxx/include/__chrono/tzdb.h b/lib/libcxx/include/__chrono/tzdb.h index f731f8c318be..fb85f66b0196 100644 --- a/lib/libcxx/include/__chrono/tzdb.h +++ b/lib/libcxx/include/__chrono/tzdb.h @@ -14,15 +14,18 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__algorithm/ranges_lower_bound.h> # include <__chrono/leap_second.h> # include <__chrono/time_zone.h> # include <__chrono/time_zone_link.h> # include <__config> +# include <__memory/addressof.h> +# include <__vector/vector.h> +# include # include -# include +# include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -33,8 +36,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION namespace chrono { @@ -82,13 +84,13 @@ struct tzdb { } // namespace chrono -# endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) - // && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_TZDB_H diff --git a/lib/libcxx/include/__chrono/tzdb_list.h b/lib/libcxx/include/__chrono/tzdb_list.h index aeef4fe1aba3..2b83a6df1daf 100644 --- a/lib/libcxx/include/__chrono/tzdb_list.h +++ b/lib/libcxx/include/__chrono/tzdb_list.h @@ -14,13 +14,14 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/time_zone.h> # include <__chrono/tzdb.h> # include <__config> # include <__fwd/string.h> # include +# include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -28,8 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION namespace chrono { @@ -98,11 +98,11 @@ _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb(); } // namespace chrono -# endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) - // && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION _LIBCPP_END_NAMESPACE_STD -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_TZDB_LIST_H diff --git a/lib/libcxx/include/__chrono/utc_clock.h b/lib/libcxx/include/__chrono/utc_clock.h new file mode 100644 index 000000000000..2207b89c92c5 --- /dev/null +++ b/lib/libcxx/include/__chrono/utc_clock.h @@ -0,0 +1,163 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___CHRONO_UTC_CLOCK_H +#define _LIBCPP___CHRONO_UTC_CLOCK_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB + +# include <__chrono/duration.h> +# include <__chrono/leap_second.h> +# include <__chrono/system_clock.h> +# include <__chrono/time_point.h> +# include <__chrono/tzdb.h> +# include <__chrono/tzdb_list.h> +# include <__config> +# include <__type_traits/common_type.h> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION + +namespace chrono { + +class utc_clock; + +template +using utc_time = time_point; +using utc_seconds = utc_time; + +class utc_clock { +public: + using rep = system_clock::rep; + using period = system_clock::period; + using duration = chrono::duration; + using time_point = chrono::time_point; + static constexpr bool is_steady = false; // The system_clock is not steady. + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static time_point now() { return from_sys(system_clock::now()); } + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static sys_time> + to_sys(const utc_time<_Duration>& __time); + + template + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI static utc_time> + from_sys(const sys_time<_Duration>& __time) { + using _Rp = utc_time>; + // TODO TZDB investigate optimizations. + // + // The leap second database stores all transitions, this mean to calculate + // the current number of leap seconds the code needs to iterate over all + // leap seconds to accumulate the sum. Then the sum can be used to determine + // the sys_time. Accessing the database involves acquiring a mutex. + // + // The historic entries in the database are immutable. Hard-coding these + // values in a table would allow: + // - To store the sum, allowing a binary search on the data. + // - Avoid acquiring a mutex. + // The disadvantage are: + // - A slightly larger code size. + // + // There are two optimization directions + // - hard-code the database and do a linear search for future entries. This + // search can start at the back, and should probably contain very few + // entries. (Adding leap seconds is quite rare and new release of libc++ + // can add the new entries; they are announced half a year before they are + // added.) + // - During parsing the leap seconds store an additional database in the + // dylib with the list of the sum of the leap seconds. In that case there + // can be a private function __get_utc_to_sys_table that returns the + // table. + // + // Note for to_sys there are no optimizations to be done; it uses + // get_leap_second_info. The function get_leap_second_info could benefit + // from optimizations as described above; again both options apply. + + // Both UTC and the system clock use the same epoch. The Standard + // specifies from 1970-01-01 even when UTC starts at + // 1972-01-01 00:00:10 TAI. So when the sys_time is before epoch we can be + // sure there both clocks return the same value. + + const tzdb& __tzdb = chrono::get_tzdb(); + _Rp __result{__time.time_since_epoch()}; + for (const auto& __leap_second : __tzdb.leap_seconds) { + if (__leap_second > __time) + return __result; + + __result += __leap_second.value(); + } + return __result; + } +}; + +struct leap_second_info { + bool is_leap_second; + seconds elapsed; +}; + +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI leap_second_info get_leap_second_info(const utc_time<_Duration>& __time) { + const tzdb& __tzdb = chrono::get_tzdb(); + if (__tzdb.leap_seconds.empty()) [[unlikely]] + return {false, chrono::seconds{0}}; + + sys_seconds __sys{chrono::floor(__time).time_since_epoch()}; + seconds __elapsed{0}; + for (const auto& __leap_second : __tzdb.leap_seconds) { + if (__sys == __leap_second.date() + __elapsed) + // A time point may only be a leap second during a positive leap second + // insertion, since time points that occur during a (theoretical) + // negative leap second don't exist. + return {__leap_second.value() > 0s, __elapsed + __leap_second.value()}; + + if (__sys < __leap_second.date() + __elapsed) + return {false, __elapsed}; + + __elapsed += __leap_second.value(); + } + + return {false, __elapsed}; +} + +template +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI sys_time> +utc_clock::to_sys(const utc_time<_Duration>& __time) { + using _Dp = common_type_t<_Duration, seconds>; + leap_second_info __info = chrono::get_leap_second_info(__time); + + // [time.clock.utc.members]/2 + // Returns: A sys_time t, such that from_sys(t) == u if such a mapping + // exists. Otherwise u represents a time_point during a positive leap + // second insertion, the conversion counts that leap second as not + // inserted, and the last representable value of sys_time prior to the + // insertion of the leap second is returned. + sys_time> __result{__time.time_since_epoch() - __info.elapsed}; + if (__info.is_leap_second) + return chrono::floor(__result) + chrono::seconds{1} - _Dp{1}; + + return __result; +} + +} // namespace chrono + +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB + +#endif // _LIBCPP___CHRONO_UTC_CLOCK_H diff --git a/lib/libcxx/include/__chrono/weekday.h b/lib/libcxx/include/__chrono/weekday.h index 86c780cc7182..728cbb844633 100644 --- a/lib/libcxx/include/__chrono/weekday.h +++ b/lib/libcxx/include/__chrono/weekday.h @@ -79,25 +79,6 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr bool operator==(const weekday& __lhs, con return __lhs.c_encoding() == __rhs.c_encoding(); } -// TODO(LLVM 20): Remove the escape hatch -# ifdef _LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS -_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator<(const weekday& __lhs, const weekday& __rhs) noexcept { - return __lhs.c_encoding() < __rhs.c_encoding(); -} - -_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator>(const weekday& __lhs, const weekday& __rhs) noexcept { - return __rhs < __lhs; -} - -_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator<=(const weekday& __lhs, const weekday& __rhs) noexcept { - return !(__rhs < __lhs); -} - -_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator>=(const weekday& __lhs, const weekday& __rhs) noexcept { - return !(__lhs < __rhs); -} -# endif // _LIBCPP_ENABLE_REMOVED_WEEKDAY_RELATIONAL_OPERATORS - _LIBCPP_HIDE_FROM_ABI inline constexpr weekday operator+(const weekday& __lhs, const days& __rhs) noexcept { auto const __mu = static_cast(__lhs.c_encoding()) + __rhs.count(); auto const __yr = (__mu >= 0 ? __mu : __mu - 6) / 7; diff --git a/lib/libcxx/include/__chrono/year.h b/lib/libcxx/include/__chrono/year.h index 1899d09f38db..2ae5180cb8fc 100644 --- a/lib/libcxx/include/__chrono/year.h +++ b/lib/libcxx/include/__chrono/year.h @@ -11,8 +11,8 @@ #define _LIBCPP___CHRONO_YEAR_H #include <__chrono/duration.h> +#include <__compare/ordering.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__chrono/year_month.h b/lib/libcxx/include/__chrono/year_month.h index 369ea38f7560..cf9234bdb462 100644 --- a/lib/libcxx/include/__chrono/year_month.h +++ b/lib/libcxx/include/__chrono/year_month.h @@ -13,8 +13,8 @@ #include <__chrono/duration.h> #include <__chrono/month.h> #include <__chrono/year.h> +#include <__compare/ordering.h> #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__chrono/year_month_day.h b/lib/libcxx/include/__chrono/year_month_day.h index b06c0be03e0d..a0510a14f4ed 100644 --- a/lib/libcxx/include/__chrono/year_month_day.h +++ b/lib/libcxx/include/__chrono/year_month_day.h @@ -19,8 +19,8 @@ #include <__chrono/time_point.h> #include <__chrono/year.h> #include <__chrono/year_month.h> +#include <__compare/ordering.h> #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__chrono/zoned_time.h b/lib/libcxx/include/__chrono/zoned_time.h index 8cfa2122642c..8db687a422ab 100644 --- a/lib/libcxx/include/__chrono/zoned_time.h +++ b/lib/libcxx/include/__chrono/zoned_time.h @@ -14,7 +14,7 @@ #include // Enable the contents of the header only when libc++ was built with experimental features enabled. -#if !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#if _LIBCPP_HAS_EXPERIMENTAL_TZDB # include <__chrono/calendar.h> # include <__chrono/duration.h> @@ -22,12 +22,14 @@ # include <__chrono/system_clock.h> # include <__chrono/time_zone.h> # include <__chrono/tzdb_list.h> +# include <__concepts/constructible.h> # include <__config> -# include <__fwd/string_view.h> # include <__type_traits/common_type.h> # include <__type_traits/conditional.h> # include <__type_traits/remove_cvref.h> +# include <__utility/declval.h> # include <__utility/move.h> +# include # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -38,8 +40,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -# if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ - !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && _LIBCPP_HAS_LOCALIZATION namespace chrono { @@ -57,7 +58,7 @@ struct zoned_traits { template class zoned_time { // [time.zone.zonedtime.ctor]/2 - static_assert(__is_duration<_Duration>::value, + static_assert(__is_duration_v<_Duration>, "the program is ill-formed since _Duration is not a specialization of std::chrono::duration"); // The wording uses the constraints like @@ -65,7 +66,7 @@ class zoned_time { // Using these constraints in the code causes the compiler to give an // error that the constraint depends on itself. To avoid that issue use // the fact it is possible to create this object from a _TimeZonePtr. - using __traits = zoned_traits<_TimeZonePtr>; + using __traits _LIBCPP_NODEBUG = zoned_traits<_TimeZonePtr>; public: using duration = common_type_t<_Duration, seconds>; @@ -185,7 +186,7 @@ template zoned_time(sys_time<_Duration>) -> zoned_time>; template -using __time_zone_representation = +using __time_zone_representation _LIBCPP_NODEBUG = conditional_t, const time_zone*, remove_cvref_t<_TimeZonePtrOrName>>; @@ -201,8 +202,8 @@ template zoned_time(_TimeZonePtrOrName&&, local_time<_Duration>, choose = choose::earliest) -> zoned_time, __time_zone_representation<_TimeZonePtrOrName>>; -template -zoned_time(_TimeZonePtrOrName&&, zoned_time<_Duration, TimeZonePtr2>, choose = choose::earliest) +template +zoned_time(_TimeZonePtrOrName&&, zoned_time<_Duration, _TimeZonePtr2>, choose = choose::earliest) -> zoned_time, __time_zone_representation<_TimeZonePtrOrName>>; using zoned_seconds = zoned_time; @@ -215,13 +216,13 @@ operator==(const zoned_time<_Duration1, _TimeZonePtr>& __lhs, const zoned_time<_ } // namespace chrono -# endif // _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) - // && !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# endif // _LIBCPP_STD_VER >= 20 && _LIBCPP_HAS_TIME_ZONE_DATABASE && _LIBCPP_HAS_FILESYSTEM && + // _LIBCPP_HAS_LOCALIZATION _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS -#endif // !defined(_LIBCPP_HAS_NO_EXPERIMENTAL_TZDB) +#endif // _LIBCPP_HAS_EXPERIMENTAL_TZDB #endif // _LIBCPP___CHRONO_ZONED_TIME_H diff --git a/lib/libcxx/include/__compare/common_comparison_category.h b/lib/libcxx/include/__compare/common_comparison_category.h index 7aeb3da03a4f..215922abad6b 100644 --- a/lib/libcxx/include/__compare/common_comparison_category.h +++ b/lib/libcxx/include/__compare/common_comparison_category.h @@ -11,8 +11,8 @@ #include <__compare/ordering.h> #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/is_same.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__compare/compare_partial_order_fallback.h b/lib/libcxx/include/__compare/compare_partial_order_fallback.h index e0efa3ccb88d..80f2aca661fa 100644 --- a/lib/libcxx/include/__compare/compare_partial_order_fallback.h +++ b/lib/libcxx/include/__compare/compare_partial_order_fallback.h @@ -11,6 +11,7 @@ #include <__compare/ordering.h> #include <__compare/partial_order.h> +#include <__concepts/boolean_testable.h> #include <__config> #include <__type_traits/decay.h> #include <__type_traits/is_same.h> @@ -37,18 +38,16 @@ struct __fn { } template - requires is_same_v, decay_t<_Up>> - _LIBCPP_HIDE_FROM_ABI static constexpr auto __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept(noexcept( - std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? partial_ordering::equivalent - : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? partial_ordering::less - : std::forward<_Up>(__u) < std::forward<_Tp>(__t) - ? partial_ordering::greater - : partial_ordering::unordered)) - -> decltype(std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? partial_ordering::equivalent - : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? partial_ordering::less - : std::forward<_Up>(__u) < std::forward<_Tp>(__t) - ? partial_ordering::greater - : partial_ordering::unordered) { + requires is_same_v, decay_t<_Up>> && requires(_Tp&& __t, _Up&& __u) { + { std::forward<_Tp>(__t) == std::forward<_Up>(__u) } -> __boolean_testable; + { std::forward<_Tp>(__t) < std::forward<_Up>(__u) } -> __boolean_testable; + { std::forward<_Up>(__u) < std::forward<_Tp>(__t) } -> __boolean_testable; + } + _LIBCPP_HIDE_FROM_ABI static constexpr partial_ordering __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept( + noexcept(std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? partial_ordering::equivalent + : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? partial_ordering::less + : std::forward<_Up>(__u) < std::forward<_Tp>(__t) ? partial_ordering::greater + : partial_ordering::unordered)) { return std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? partial_ordering::equivalent : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? partial_ordering::less : std::forward<_Up>(__u) < std::forward<_Tp>(__t) diff --git a/lib/libcxx/include/__compare/compare_strong_order_fallback.h b/lib/libcxx/include/__compare/compare_strong_order_fallback.h index a94d517ed30f..c41a90c5afa8 100644 --- a/lib/libcxx/include/__compare/compare_strong_order_fallback.h +++ b/lib/libcxx/include/__compare/compare_strong_order_fallback.h @@ -11,6 +11,7 @@ #include <__compare/ordering.h> #include <__compare/strong_order.h> +#include <__concepts/boolean_testable.h> #include <__config> #include <__type_traits/decay.h> #include <__type_traits/is_same.h> @@ -37,16 +38,14 @@ struct __fn { } template - requires is_same_v, decay_t<_Up>> - _LIBCPP_HIDE_FROM_ABI static constexpr auto __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept(noexcept( - std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? strong_ordering::equal - : std::forward<_Tp>(__t) < std::forward<_Up>(__u) - ? strong_ordering::less - : strong_ordering::greater)) - -> decltype(std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? strong_ordering::equal - : std::forward<_Tp>(__t) < std::forward<_Up>(__u) - ? strong_ordering::less - : strong_ordering::greater) { + requires is_same_v, decay_t<_Up>> && requires(_Tp&& __t, _Up&& __u) { + { std::forward<_Tp>(__t) == std::forward<_Up>(__u) } -> __boolean_testable; + { std::forward<_Tp>(__t) < std::forward<_Up>(__u) } -> __boolean_testable; + } + _LIBCPP_HIDE_FROM_ABI static constexpr strong_ordering __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept( + noexcept(std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? strong_ordering::equal + : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? strong_ordering::less + : strong_ordering::greater)) { return std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? strong_ordering::equal : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? strong_ordering::less diff --git a/lib/libcxx/include/__compare/compare_three_way_result.h b/lib/libcxx/include/__compare/compare_three_way_result.h index d7508073433a..6ee2eff00302 100644 --- a/lib/libcxx/include/__compare/compare_three_way_result.h +++ b/lib/libcxx/include/__compare/compare_three_way_result.h @@ -33,7 +33,8 @@ struct _LIBCPP_HIDE_FROM_ABI __compare_three_way_result< }; template -struct _LIBCPP_TEMPLATE_VIS compare_three_way_result : __compare_three_way_result<_Tp, _Up, void> {}; +struct _LIBCPP_TEMPLATE_VIS _LIBCPP_NO_SPECIALIZATIONS compare_three_way_result + : __compare_three_way_result<_Tp, _Up, void> {}; template using compare_three_way_result_t = typename compare_three_way_result<_Tp, _Up>::type; diff --git a/lib/libcxx/include/__compare/compare_weak_order_fallback.h b/lib/libcxx/include/__compare/compare_weak_order_fallback.h index 062b7b582cd7..26689fbd9f44 100644 --- a/lib/libcxx/include/__compare/compare_weak_order_fallback.h +++ b/lib/libcxx/include/__compare/compare_weak_order_fallback.h @@ -11,6 +11,7 @@ #include <__compare/ordering.h> #include <__compare/weak_order.h> +#include <__concepts/boolean_testable.h> #include <__config> #include <__type_traits/decay.h> #include <__type_traits/is_same.h> @@ -37,16 +38,15 @@ struct __fn { } template - requires is_same_v, decay_t<_Up>> - _LIBCPP_HIDE_FROM_ABI static constexpr auto __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept(noexcept( + requires is_same_v, decay_t<_Up>> && requires(_Tp&& __t, _Up&& __u) { + { std::forward<_Tp>(__t) == std::forward<_Up>(__u) } -> __boolean_testable; + { std::forward<_Tp>(__t) < std::forward<_Up>(__u) } -> __boolean_testable; + } + _LIBCPP_HIDE_FROM_ABI static constexpr weak_ordering __go(_Tp&& __t, _Up&& __u, __priority_tag<0>) noexcept(noexcept( std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? weak_ordering::equivalent : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? weak_ordering::less - : weak_ordering::greater)) - -> decltype(std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? weak_ordering::equivalent - : std::forward<_Tp>(__t) < std::forward<_Up>(__u) - ? weak_ordering::less - : weak_ordering::greater) { + : weak_ordering::greater)) { return std::forward<_Tp>(__t) == std::forward<_Up>(__u) ? weak_ordering::equivalent : std::forward<_Tp>(__t) < std::forward<_Up>(__u) ? weak_ordering::less diff --git a/lib/libcxx/include/__compare/ordering.h b/lib/libcxx/include/__compare/ordering.h index 2995d381304f..902ef5329dd4 100644 --- a/lib/libcxx/include/__compare/ordering.h +++ b/lib/libcxx/include/__compare/ordering.h @@ -24,32 +24,35 @@ _LIBCPP_BEGIN_NAMESPACE_STD // exposition only enum class _OrdResult : signed char { __less = -1, __equiv = 0, __greater = 1 }; -enum class _NCmpResult : signed char { __unordered = -127 }; +enum class _PartialOrdResult : signed char { + __less = static_cast(_OrdResult::__less), + __equiv = static_cast(_OrdResult::__equiv), + __greater = static_cast(_OrdResult::__greater), + __unordered = -127, +}; class partial_ordering; class weak_ordering; class strong_ordering; -template -inline constexpr bool __one_of_v = (is_same_v<_Tp, _Args> || ...); - struct _CmpUnspecifiedParam { - _LIBCPP_HIDE_FROM_ABI constexpr _CmpUnspecifiedParam(int _CmpUnspecifiedParam::*) noexcept {} - - template >> - _CmpUnspecifiedParam(_Tp) = delete; + // If anything other than a literal 0 is provided, the behavior is undefined by the Standard. + // + // The alternative to the `__enable_if__` attribute would be to use the fact that a pointer + // can be constructed from literal 0, but this conflicts with `-Wzero-as-null-pointer-constant`. + template > > + _LIBCPP_HIDE_FROM_ABI consteval _CmpUnspecifiedParam(_Tp __zero) noexcept +# if __has_attribute(__enable_if__) + __attribute__((__enable_if__( + __zero == 0, "Only literal 0 is allowed as the operand of a comparison with one of the ordering types"))) +# endif + { + (void)__zero; + } }; class partial_ordering { - using _ValueT = signed char; - - _LIBCPP_HIDE_FROM_ABI explicit constexpr partial_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {} - - _LIBCPP_HIDE_FROM_ABI explicit constexpr partial_ordering(_NCmpResult __v) noexcept : __value_(_ValueT(__v)) {} - - _LIBCPP_HIDE_FROM_ABI constexpr bool __is_ordered() const noexcept { - return __value_ != _ValueT(_NCmpResult::__unordered); - } + _LIBCPP_HIDE_FROM_ABI explicit constexpr partial_ordering(_PartialOrdResult __v) noexcept : __value_(__v) {} public: // valid values @@ -62,39 +65,39 @@ class partial_ordering { _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(partial_ordering, partial_ordering) noexcept = default; _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(partial_ordering __v, _CmpUnspecifiedParam) noexcept { - return __v.__is_ordered() && __v.__value_ == 0; + return __v.__value_ == _PartialOrdResult::__equiv; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(partial_ordering __v, _CmpUnspecifiedParam) noexcept { - return __v.__is_ordered() && __v.__value_ < 0; + return __v.__value_ == _PartialOrdResult::__less; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(partial_ordering __v, _CmpUnspecifiedParam) noexcept { - return __v.__is_ordered() && __v.__value_ <= 0; + return __v.__value_ == _PartialOrdResult::__equiv || __v.__value_ == _PartialOrdResult::__less; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(partial_ordering __v, _CmpUnspecifiedParam) noexcept { - return __v.__is_ordered() && __v.__value_ > 0; + return __v.__value_ == _PartialOrdResult::__greater; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(partial_ordering __v, _CmpUnspecifiedParam) noexcept { - return __v.__is_ordered() && __v.__value_ >= 0; + return __v.__value_ == _PartialOrdResult::__equiv || __v.__value_ == _PartialOrdResult::__greater; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<(_CmpUnspecifiedParam, partial_ordering __v) noexcept { - return __v.__is_ordered() && 0 < __v.__value_; + return __v.__value_ == _PartialOrdResult::__greater; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator<=(_CmpUnspecifiedParam, partial_ordering __v) noexcept { - return __v.__is_ordered() && 0 <= __v.__value_; + return __v.__value_ == _PartialOrdResult::__equiv || __v.__value_ == _PartialOrdResult::__greater; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>(_CmpUnspecifiedParam, partial_ordering __v) noexcept { - return __v.__is_ordered() && 0 > __v.__value_; + return __v.__value_ == _PartialOrdResult::__less; } _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator>=(_CmpUnspecifiedParam, partial_ordering __v) noexcept { - return __v.__is_ordered() && 0 >= __v.__value_; + return __v.__value_ == _PartialOrdResult::__equiv || __v.__value_ == _PartialOrdResult::__less; } _LIBCPP_HIDE_FROM_ABI friend constexpr partial_ordering @@ -108,16 +111,16 @@ class partial_ordering { } private: - _ValueT __value_; + _PartialOrdResult __value_; }; -inline constexpr partial_ordering partial_ordering::less(_OrdResult::__less); -inline constexpr partial_ordering partial_ordering::equivalent(_OrdResult::__equiv); -inline constexpr partial_ordering partial_ordering::greater(_OrdResult::__greater); -inline constexpr partial_ordering partial_ordering::unordered(_NCmpResult ::__unordered); +inline constexpr partial_ordering partial_ordering::less(_PartialOrdResult::__less); +inline constexpr partial_ordering partial_ordering::equivalent(_PartialOrdResult::__equiv); +inline constexpr partial_ordering partial_ordering::greater(_PartialOrdResult::__greater); +inline constexpr partial_ordering partial_ordering::unordered(_PartialOrdResult::__unordered); class weak_ordering { - using _ValueT = signed char; + using _ValueT _LIBCPP_NODEBUG = signed char; _LIBCPP_HIDE_FROM_ABI explicit constexpr weak_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {} @@ -187,7 +190,7 @@ inline constexpr weak_ordering weak_ordering::equivalent(_OrdResult::__equiv); inline constexpr weak_ordering weak_ordering::greater(_OrdResult::__greater); class strong_ordering { - using _ValueT = signed char; + using _ValueT _LIBCPP_NODEBUG = signed char; _LIBCPP_HIDE_FROM_ABI explicit constexpr strong_ordering(_OrdResult __v) noexcept : __value_(_ValueT(__v)) {} @@ -269,7 +272,8 @@ inline constexpr strong_ordering strong_ordering::greater(_OrdResult::__greater) /// The types partial_ordering, weak_ordering, and strong_ordering are /// collectively termed the comparison category types. template -concept __comparison_category = __one_of_v<_Tp, partial_ordering, weak_ordering, strong_ordering>; +concept __comparison_category = + is_same_v<_Tp, partial_ordering> || is_same_v<_Tp, weak_ordering> || is_same_v<_Tp, strong_ordering>; #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__compare/synth_three_way.h b/lib/libcxx/include/__compare/synth_three_way.h index e48ce4979983..63bf56d0cf42 100644 --- a/lib/libcxx/include/__compare/synth_three_way.h +++ b/lib/libcxx/include/__compare/synth_three_way.h @@ -43,7 +43,8 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr auto __synth_three_way = [] -using __synth_three_way_result = decltype(std::__synth_three_way(std::declval<_Tp&>(), std::declval<_Up&>())); +using __synth_three_way_result _LIBCPP_NODEBUG = + decltype(std::__synth_three_way(std::declval<_Tp&>(), std::declval<_Up&>())); #endif // _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__concepts/predicate.h b/lib/libcxx/include/__concepts/predicate.h index 00731efc8fcd..e0263a878b06 100644 --- a/lib/libcxx/include/__concepts/predicate.h +++ b/lib/libcxx/include/__concepts/predicate.h @@ -12,7 +12,7 @@ #include <__concepts/boolean_testable.h> #include <__concepts/invocable.h> #include <__config> -#include <__functional/invoke.h> +#include <__type_traits/invoke.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__concepts/swappable.h b/lib/libcxx/include/__concepts/swappable.h index d339488a087a..985c733021a0 100644 --- a/lib/libcxx/include/__concepts/swappable.h +++ b/lib/libcxx/include/__concepts/swappable.h @@ -14,6 +14,7 @@ #include <__concepts/common_reference_with.h> #include <__concepts/constructible.h> #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/extent.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> @@ -22,7 +23,6 @@ #include <__utility/forward.h> #include <__utility/move.h> #include <__utility/swap.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__condition_variable/condition_variable.h b/lib/libcxx/include/__condition_variable/condition_variable.h index de35aaca1070..4521fe274614 100644 --- a/lib/libcxx/include/__condition_variable/condition_variable.h +++ b/lib/libcxx/include/__condition_variable/condition_variable.h @@ -16,7 +16,7 @@ #include <__config> #include <__mutex/mutex.h> #include <__mutex/unique_lock.h> -#include <__system_error/system_error.h> +#include <__system_error/throw_system_error.h> #include <__thread/support.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_floating_point.h> @@ -33,7 +33,7 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD -#ifndef _LIBCPP_HAS_NO_THREADS +#if _LIBCPP_HAS_THREADS // enum class cv_status _LIBCPP_DECLARE_STRONG_ENUM(cv_status){no_timeout, timeout}; @@ -45,7 +45,7 @@ class _LIBCPP_EXPORTED_FROM_ABI condition_variable { public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; -# ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION +# if _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION ~condition_variable() = default; # else ~condition_variable(); @@ -83,7 +83,7 @@ class _LIBCPP_EXPORTED_FROM_ABI condition_variable { private: void __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; -# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) +# if _LIBCPP_HAS_COND_CLOCKWAIT _LIBCPP_HIDE_FROM_ABI void __do_timed_wait(unique_lock& __lk, chrono::time_point) _NOEXCEPT; # endif @@ -91,7 +91,7 @@ class _LIBCPP_EXPORTED_FROM_ABI condition_variable { _LIBCPP_HIDE_FROM_ABI void __do_timed_wait(unique_lock& __lk, chrono::time_point<_Clock, chrono::nanoseconds>) _NOEXCEPT; }; -#endif // !_LIBCPP_HAS_NO_THREADS +#endif // _LIBCPP_HAS_THREADS template ::value, int> = 0> inline _LIBCPP_HIDE_FROM_ABI chrono::nanoseconds __safe_nanosecond_cast(chrono::duration<_Rep, _Period> __d) { @@ -140,7 +140,7 @@ inline _LIBCPP_HIDE_FROM_ABI chrono::nanoseconds __safe_nanosecond_cast(chrono:: return nanoseconds(__result); } -#ifndef _LIBCPP_HAS_NO_THREADS +#if _LIBCPP_HAS_THREADS template void condition_variable::wait(unique_lock& __lk, _Predicate __pred) { while (!__pred()) @@ -180,7 +180,7 @@ cv_status condition_variable::wait_for(unique_lock& __lk, const chrono::d using __ns_rep = nanoseconds::rep; steady_clock::time_point __c_now = steady_clock::now(); -# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) +# if _LIBCPP_HAS_COND_CLOCKWAIT using __clock_tp_ns = time_point; __ns_rep __now_count_ns = std::__safe_nanosecond_cast(__c_now.time_since_epoch()).count(); # else @@ -205,7 +205,7 @@ condition_variable::wait_for(unique_lock& __lk, const chrono::duration<_R return wait_until(__lk, chrono::steady_clock::now() + __d, std::move(__pred)); } -# if defined(_LIBCPP_HAS_COND_CLOCKWAIT) +# if _LIBCPP_HAS_COND_CLOCKWAIT inline void condition_variable::__do_timed_wait( unique_lock& __lk, chrono::time_point __tp) _NOEXCEPT { using namespace chrono; @@ -235,7 +235,7 @@ inline void condition_variable::__do_timed_wait(unique_lock& __lk, wait_for(__lk, __tp - _Clock::now()); } -#endif // _LIBCPP_HAS_NO_THREADS +#endif // _LIBCPP_HAS_THREADS _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config index 8165dbc54907..fec323f8125f 100644 --- a/lib/libcxx/include/__config +++ b/lib/libcxx/include/__config @@ -14,6 +14,7 @@ #include <__configuration/abi.h> #include <__configuration/availability.h> #include <__configuration/compiler.h> +#include <__configuration/language.h> #include <__configuration/platform.h> #ifndef _LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER @@ -27,10 +28,11 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190100 +# define _LIBCPP_VERSION 200100 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) +# define _LIBCPP_CONCAT3(X, Y, Z) _LIBCPP_CONCAT(X, _LIBCPP_CONCAT(Y, Z)) # if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING @@ -38,16 +40,9 @@ // HARDENING { -// This is for backward compatibility -- make enabling `_LIBCPP_ENABLE_ASSERTIONS` (which predates hardening modes) -// equivalent to setting the extensive mode. This is deprecated and will be removed in LLVM 20. +// TODO: Remove in LLVM 21. We're making this an error to catch folks who might not have migrated. # ifdef _LIBCPP_ENABLE_ASSERTIONS -# warning "_LIBCPP_ENABLE_ASSERTIONS is deprecated, please use _LIBCPP_HARDENING_MODE instead" -# if _LIBCPP_ENABLE_ASSERTIONS != 0 && _LIBCPP_ENABLE_ASSERTIONS != 1 -# error "_LIBCPP_ENABLE_ASSERTIONS must be set to 0 or 1" -# endif -# if _LIBCPP_ENABLE_ASSERTIONS -# define _LIBCPP_HARDENING_MODE _LIBCPP_HARDENING_MODE_EXTENSIVE -# endif +# error "_LIBCPP_ENABLE_ASSERTIONS has been removed, please use _LIBCPP_HARDENING_MODE instead" # endif // The library provides the macro `_LIBCPP_HARDENING_MODE` which can be set to one of the following values: @@ -191,25 +186,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # error "libc++ only supports C++03 with Clang-based compilers. Please enable C++11" # endif -// FIXME: ABI detection should be done via compiler builtin macros. This -// is just a placeholder until Clang implements such macros. For now assume -// that Windows compilers pretending to be MSVC++ target the Microsoft ABI, -// and allow the user to explicitly specify the ABI to handle cases where this -// heuristic falls short. -# if defined(_LIBCPP_ABI_FORCE_ITANIUM) && defined(_LIBCPP_ABI_FORCE_MICROSOFT) -# error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be defined" -# elif defined(_LIBCPP_ABI_FORCE_ITANIUM) -# define _LIBCPP_ABI_ITANIUM -# elif defined(_LIBCPP_ABI_FORCE_MICROSOFT) -# define _LIBCPP_ABI_MICROSOFT -# else -# if defined(_WIN32) && defined(_MSC_VER) -# define _LIBCPP_ABI_MICROSOFT -# else -# define _LIBCPP_ABI_ITANIUM -# endif -# endif - # if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) # define _LIBCPP_ABI_VCRUNTIME # endif @@ -222,13 +198,16 @@ _LIBCPP_HARDENING_MODE_DEBUG // Incomplete features get their own specific disabling flags. This makes it // easier to grep for target specific flags once the feature is complete. -# if !defined(_LIBCPP_ENABLE_EXPERIMENTAL) && !defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_HAS_NO_INCOMPLETE_PSTL -# define _LIBCPP_HAS_NO_EXPERIMENTAL_STOP_TOKEN -# define _LIBCPP_HAS_NO_EXPERIMENTAL_TZDB -# define _LIBCPP_HAS_NO_EXPERIMENTAL_SYNCSTREAM +# if defined(_LIBCPP_ENABLE_EXPERIMENTAL) || defined(_LIBCPP_BUILDING_LIBRARY) +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 1 +# else +# define _LIBCPP_HAS_EXPERIMENTAL_LIBRARY 0 # endif +# define _LIBCPP_HAS_EXPERIMENTAL_PSTL _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# define _LIBCPP_HAS_EXPERIMENTAL_TZDB _LIBCPP_HAS_EXPERIMENTAL_LIBRARY +# define _LIBCPP_HAS_EXPERIMENTAL_SYNCSTREAM _LIBCPP_HAS_EXPERIMENTAL_LIBRARY + # if defined(__MVS__) # include // for __NATIVE_ASCII_F # endif @@ -244,9 +223,14 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_MSVCRT // Using Microsoft's C Runtime library # endif # if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_ARM) || defined(__arm__)) -# define _LIBCPP_HAS_BITSCAN64 +# define _LIBCPP_HAS_BITSCAN64 1 +# else +# define _LIBCPP_HAS_BITSCAN64 0 # endif -# define _LIBCPP_HAS_OPEN_WITH_WCHAR +# define _LIBCPP_HAS_OPEN_WITH_WCHAR 1 +# else +# define _LIBCPP_HAS_OPEN_WITH_WCHAR 0 +# define _LIBCPP_HAS_BITSCAN64 0 # endif // defined(_WIN32) # if defined(_AIX) && !defined(__64BIT__) @@ -312,7 +296,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ALIGNOF(_Tp) alignof(_Tp) # define _ALIGNAS_TYPE(x) alignas(x) # define _ALIGNAS(x) alignas(x) -# define _LIBCPP_NORETURN [[noreturn]] # define _NOEXCEPT noexcept # define _NOEXCEPT_(...) noexcept(__VA_ARGS__) # define _LIBCPP_CONSTEXPR constexpr @@ -322,8 +305,6 @@ _LIBCPP_HARDENING_MODE_DEBUG # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) # define _ALIGNAS_TYPE(x) __attribute__((__aligned__(_LIBCPP_ALIGNOF(x)))) # define _ALIGNAS(x) __attribute__((__aligned__(x))) -# define _LIBCPP_NORETURN __attribute__((__noreturn__)) -# define _LIBCPP_HAS_NO_NOEXCEPT # define nullptr __nullptr # define _NOEXCEPT throw() # define _NOEXCEPT_(...) @@ -340,23 +321,33 @@ typedef __char32_t char32_t; // Objective-C++ features (opt-in) # if __has_feature(objc_arc) -# define _LIBCPP_HAS_OBJC_ARC +# define _LIBCPP_HAS_OBJC_ARC 1 +# else +# define _LIBCPP_HAS_OBJC_ARC 0 # endif # if __has_feature(objc_arc_weak) -# define _LIBCPP_HAS_OBJC_ARC_WEAK +# define _LIBCPP_HAS_OBJC_ARC_WEAK 1 +# else +# define _LIBCPP_HAS_OBJC_ARC_WEAK 0 # endif # if __has_extension(blocks) -# define _LIBCPP_HAS_EXTENSION_BLOCKS +# define _LIBCPP_HAS_EXTENSION_BLOCKS 1 +# else +# define _LIBCPP_HAS_EXTENSION_BLOCKS 0 # endif -# if defined(_LIBCPP_HAS_EXTENSION_BLOCKS) && defined(__APPLE__) -# define _LIBCPP_HAS_BLOCKS_RUNTIME +# if _LIBCPP_HAS_EXTENSION_BLOCKS && defined(__APPLE__) +# define _LIBCPP_HAS_BLOCKS_RUNTIME 1 +# else +# define _LIBCPP_HAS_BLOCKS_RUNTIME 0 # endif -# if !__has_feature(address_sanitizer) -# define _LIBCPP_HAS_NO_ASAN +# if __has_feature(address_sanitizer) +# define _LIBCPP_HAS_ASAN 1 +# else +# define _LIBCPP_HAS_ASAN 0 # endif # define _LIBCPP_ALWAYS_INLINE __attribute__((__always_inline__)) @@ -479,7 +470,7 @@ typedef __char32_t char32_t; # define _LIBCPP_HARDENING_SIG n // "none" # endif -# ifdef _LIBCPP_HAS_NO_EXCEPTIONS +# if !_LIBCPP_HAS_EXCEPTIONS # define _LIBCPP_EXCEPTIONS_SIG n # else # define _LIBCPP_EXCEPTIONS_SIG e @@ -593,6 +584,15 @@ typedef __char32_t char32_t; inline namespace _LIBCPP_ABI_NAMESPACE { # define _LIBCPP_END_NAMESPACE_STD }} _LIBCPP_POP_EXTENSION_DIAGNOSTICS +#define _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL namespace std { namespace experimental { +#define _LIBCPP_END_NAMESPACE_EXPERIMENTAL }} + +#define _LIBCPP_BEGIN_NAMESPACE_LFTS _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v1 { +#define _LIBCPP_END_NAMESPACE_LFTS } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + +#define _LIBCPP_BEGIN_NAMESPACE_LFTS_V2 _LIBCPP_BEGIN_NAMESPACE_EXPERIMENTAL inline namespace fundamentals_v2 { +#define _LIBCPP_END_NAMESPACE_LFTS_V2 } _LIBCPP_END_NAMESPACE_EXPERIMENTAL + #ifdef _LIBCPP_ABI_NO_FILESYSTEM_INLINE_NAMESPACE # define _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_STD namespace filesystem { # define _LIBCPP_END_NAMESPACE_FILESYSTEM } _LIBCPP_END_NAMESPACE_STD @@ -610,7 +610,9 @@ typedef __char32_t char32_t; # endif # if !defined(__SIZEOF_INT128__) || defined(_MSC_VER) -# define _LIBCPP_HAS_NO_INT128 +# define _LIBCPP_HAS_INT128 0 +# else +# define _LIBCPP_HAS_INT128 1 # endif # ifdef _LIBCPP_CXX03_LANG @@ -631,10 +633,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) # endif // _LIBCPP_CXX03_LANG -# if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || defined(__NetBSD__) -# define _LIBCPP_LOCALE__L_EXTENSIONS 1 -# endif - # ifdef __FreeBSD__ # define _DECLARE_C99_LDBL_MATH 1 # endif @@ -642,29 +640,39 @@ typedef __char32_t char32_t; // If we are getting operator new from the MSVC CRT, then allocation overloads // for align_val_t were added in 19.12, aka VS 2017 version 15.3. # if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912 -# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +# define _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION 0 # elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) // We're deferring to Microsoft's STL to provide aligned new et al. We don't // have it unless the language feature test macro is defined. -# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +# define _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION 0 # elif defined(__MVS__) -# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +# define _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION 0 +# else +# define _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION 1 # endif -# if defined(_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION) || (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606) -# define _LIBCPP_HAS_NO_ALIGNED_ALLOCATION +# if !_LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION || (!defined(__cpp_aligned_new) || __cpp_aligned_new < 201606) +# define _LIBCPP_HAS_ALIGNED_ALLOCATION 0 +# else +# define _LIBCPP_HAS_ALIGNED_ALLOCATION 1 # endif // It is not yet possible to use aligned_alloc() on all Apple platforms since // 10.15 was the first version to ship an implementation of aligned_alloc(). # if defined(__APPLE__) # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) -# define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC + __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ + __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) +# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0 +# else +# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 # endif # elif defined(__ANDROID__) && __ANDROID_API__ < 28 // Android only provides aligned_alloc when targeting API 28 or higher. -# define _LIBCPP_HAS_NO_C11_ALIGNED_ALLOC +# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 0 +# else +# define _LIBCPP_HAS_C11_ALIGNED_ALLOC 1 # endif # if defined(__APPLE__) || defined(__FreeBSD__) @@ -676,7 +684,9 @@ typedef __char32_t char32_t; # endif # if _LIBCPP_STD_VER <= 17 || !defined(__cpp_char8_t) -# define _LIBCPP_HAS_NO_CHAR8_T +# define _LIBCPP_HAS_CHAR8_T 0 +# else +# define _LIBCPP_HAS_CHAR8_T 1 # endif // Deprecation macros. @@ -699,14 +709,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_(m) # endif -# if _LIBCPP_STD_VER < 20 -# define _LIBCPP_DEPRECATED_ATOMIC_SYNC \ - _LIBCPP_DEPRECATED_("The C++20 synchronization library has been deprecated prior to C++20. Please update to " \ - "using -std=c++20 if you need to use these facilities.") -# else -# define _LIBCPP_DEPRECATED_ATOMIC_SYNC /* nothing */ -# endif - # if !defined(_LIBCPP_CXX03_LANG) # define _LIBCPP_DEPRECATED_IN_CXX11 _LIBCPP_DEPRECATED # else @@ -743,7 +745,7 @@ typedef __char32_t char32_t; # define _LIBCPP_DEPRECATED_IN_CXX26 # endif -# if !defined(_LIBCPP_HAS_NO_CHAR8_T) +# if _LIBCPP_HAS_CHAR8_T # define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED # else # define _LIBCPP_DEPRECATED_WITH_CHAR8_T @@ -796,16 +798,22 @@ typedef __char32_t char32_t; # define _LIBCPP_CONSTEXPR_SINCE_CXX23 # endif +# if _LIBCPP_STD_VER >= 26 +# define _LIBCPP_CONSTEXPR_SINCE_CXX26 constexpr +# else +# define _LIBCPP_CONSTEXPR_SINCE_CXX26 +# endif + # ifndef _LIBCPP_WEAK # define _LIBCPP_WEAK __attribute__((__weak__)) # endif // Thread API // clang-format off -# if !defined(_LIBCPP_HAS_NO_THREADS) && \ - !defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && \ - !defined(_LIBCPP_HAS_THREAD_API_WIN32) && \ - !defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) +# if _LIBCPP_HAS_THREADS && \ + !_LIBCPP_HAS_THREAD_API_PTHREAD && \ + !_LIBCPP_HAS_THREAD_API_WIN32 && \ + !_LIBCPP_HAS_THREAD_API_EXTERNAL # if defined(__FreeBSD__) || \ defined(__wasi__) || \ @@ -819,43 +827,49 @@ typedef __char32_t char32_t; defined(_AIX) || \ defined(__EMSCRIPTEN__) // clang-format on -# define _LIBCPP_HAS_THREAD_API_PTHREAD +# undef _LIBCPP_HAS_THREAD_API_PTHREAD +# define _LIBCPP_HAS_THREAD_API_PTHREAD 1 # elif defined(__Fuchsia__) // TODO(44575): Switch to C11 thread API when possible. -# define _LIBCPP_HAS_THREAD_API_PTHREAD +# undef _LIBCPP_HAS_THREAD_API_PTHREAD +# define _LIBCPP_HAS_THREAD_API_PTHREAD 1 # elif defined(_LIBCPP_WIN32API) -# define _LIBCPP_HAS_THREAD_API_WIN32 +# undef _LIBCPP_HAS_THREAD_API_WIN32 +# define _LIBCPP_HAS_THREAD_API_WIN32 1 # else # error "No thread API" # endif // _LIBCPP_HAS_THREAD_API -# endif // _LIBCPP_HAS_NO_THREADS +# endif // _LIBCPP_HAS_THREADS -# if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) +# if _LIBCPP_HAS_THREAD_API_PTHREAD # if defined(__ANDROID__) && __ANDROID_API__ >= 30 -# define _LIBCPP_HAS_COND_CLOCKWAIT +# define _LIBCPP_HAS_COND_CLOCKWAIT 1 # elif defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 30) -# define _LIBCPP_HAS_COND_CLOCKWAIT +# define _LIBCPP_HAS_COND_CLOCKWAIT 1 +# else +# define _LIBCPP_HAS_COND_CLOCKWAIT 0 # endif +# else +# define _LIBCPP_HAS_COND_CLOCKWAIT 0 # endif +# else +# define _LIBCPP_HAS_COND_CLOCKWAIT 0 # endif -# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_PTHREAD) -# error _LIBCPP_HAS_THREAD_API_PTHREAD may only be defined when \ - _LIBCPP_HAS_NO_THREADS is not defined. +# if !_LIBCPP_HAS_THREADS && _LIBCPP_HAS_THREAD_API_PTHREAD +# error _LIBCPP_HAS_THREAD_API_PTHREAD may only be true when _LIBCPP_HAS_THREADS is true. # endif -# if defined(_LIBCPP_HAS_NO_THREADS) && defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) -# error _LIBCPP_HAS_THREAD_API_EXTERNAL may not be defined when \ - _LIBCPP_HAS_NO_THREADS is defined. +# if !_LIBCPP_HAS_THREADS && _LIBCPP_HAS_THREAD_API_EXTERNAL +# error _LIBCPP_HAS_THREAD_API_EXTERNAL may only be true when _LIBCPP_HAS_THREADS is true. # endif -# if defined(_LIBCPP_HAS_NO_MONOTONIC_CLOCK) && !defined(_LIBCPP_HAS_NO_THREADS) -# error _LIBCPP_HAS_NO_MONOTONIC_CLOCK may only be defined when \ - _LIBCPP_HAS_NO_THREADS is defined. +# if !_LIBCPP_HAS_MONOTONIC_CLOCK && _LIBCPP_HAS_THREADS +# error _LIBCPP_HAS_MONOTONIC_CLOCK may only be false when _LIBCPP_HAS_THREADS is false. # endif -# if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) +# if _LIBCPP_HAS_THREADS && !defined(__STDCPP_THREADS__) # define __STDCPP_THREADS__ 1 # endif @@ -870,11 +884,13 @@ typedef __char32_t char32_t; // TODO(EricWF): Enable this optimization on Bionic after speaking to their // respective stakeholders. // clang-format off -# if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) || \ - (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || \ - defined(_LIBCPP_HAS_THREAD_API_WIN32) +# if (_LIBCPP_HAS_THREAD_API_PTHREAD && defined(__GLIBC__)) || \ + (_LIBCPP_HAS_THREAD_API_C11 && defined(__Fuchsia__)) || \ + _LIBCPP_HAS_THREAD_API_WIN32 // clang-format on -# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION +# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION 1 +# else +# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION 0 # endif // Destroying a condvar is a nop on Windows. @@ -885,25 +901,31 @@ typedef __char32_t char32_t; // // TODO(EricWF): This is potentially true for some pthread implementations // as well. -# if (defined(_LIBCPP_HAS_THREAD_API_C11) && defined(__Fuchsia__)) || defined(_LIBCPP_HAS_THREAD_API_WIN32) -# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION +# if (_LIBCPP_HAS_THREAD_API_C11 && defined(__Fuchsia__)) || _LIBCPP_HAS_THREAD_API_WIN32 +# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION 1 +# else +# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION 0 # endif # if defined(__BIONIC__) || defined(__NuttX__) || defined(__Fuchsia__) || defined(__wasi__) || \ - defined(_LIBCPP_HAS_MUSL_LIBC) || defined(__OpenBSD__) + _LIBCPP_HAS_MUSL_LIBC || defined(__OpenBSD__) || defined(__LLVM_LIBC__) # define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE # endif # if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic) -# define _LIBCPP_HAS_C_ATOMIC_IMP +# define _LIBCPP_HAS_C_ATOMIC_IMP 1 +# define _LIBCPP_HAS_GCC_ATOMIC_IMP 0 +# define _LIBCPP_HAS_EXTERNAL_ATOMIC_IMP 0 # elif defined(_LIBCPP_COMPILER_GCC) -# define _LIBCPP_HAS_GCC_ATOMIC_IMP +# define _LIBCPP_HAS_C_ATOMIC_IMP 0 +# define _LIBCPP_HAS_GCC_ATOMIC_IMP 1 +# define _LIBCPP_HAS_EXTERNAL_ATOMIC_IMP 0 # endif -# if !defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \ - !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP) -# define _LIBCPP_HAS_NO_ATOMIC_HEADER +# if !_LIBCPP_HAS_C_ATOMIC_IMP && !_LIBCPP_HAS_GCC_ATOMIC_IMP && !_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP +# define _LIBCPP_HAS_ATOMIC_HEADER 0 # else +# define _LIBCPP_HAS_ATOMIC_HEADER 1 # ifndef _LIBCPP_ATOMIC_FLAG_TYPE # define _LIBCPP_ATOMIC_FLAG_TYPE bool # endif @@ -915,19 +937,18 @@ typedef __char32_t char32_t; # define _LIBCPP_NO_THREAD_SAFETY_ANALYSIS # endif -# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) -# if defined(__clang__) && __has_attribute(acquire_capability) // Work around the attribute handling in clang. When both __declspec and // __attribute__ are present, the processing goes awry preventing the definition // of the types. In MinGW mode, __declspec evaluates to __attribute__, and thus // combining the two does work. -# if !defined(_MSC_VER) -# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS -# endif -# endif +# if defined(_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS) && defined(__clang__) && \ + __has_attribute(acquire_capability) && !defined(_MSC_VER) +# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS 1 +# else +# define _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS 0 # endif -# ifdef _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS +# if _LIBCPP_HAS_THREAD_SAFETY_ANNOTATIONS # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) __attribute__((x)) # else # define _LIBCPP_THREAD_SAFETY_ANNOTATION(x) @@ -962,7 +983,7 @@ typedef __char32_t char32_t; // When wide characters are disabled, it can be useful to have a quick way of // disabling it without having to resort to #if-#endif, which has a larger // impact on readability. -# if defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) +# if !_LIBCPP_HAS_WIDE_CHARACTERS # define _LIBCPP_IF_WIDE_CHARACTERS(...) # else # define _LIBCPP_IF_WIDE_CHARACTERS(...) __VA_ARGS__ @@ -999,28 +1020,16 @@ typedef __char32_t char32_t; // (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.) // However, MSVC implements [[msvc::no_unique_address]] which does what // [[no_unique_address]] is supposed to do, in general. - -// Clang-cl does not yet (14.0) implement either [[no_unique_address]] or -// [[msvc::no_unique_address]] though. If/when it does implement -// [[msvc::no_unique_address]], this should be preferred though. # define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] -# elif __has_cpp_attribute(no_unique_address) -# define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # else -# define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */ -// Note that this can be replaced by #error as soon as clang-cl -// implements msvc::no_unique_address, since there should be no C++20 -// compiler that doesn't support one of the two attributes at that point. -// We generally don't want to use this macro outside of C++20-only code, -// because using it conditionally in one language version only would make -// the ABI inconsistent. +# define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # endif // c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these // functions is gradually being added to existing C libraries. The conditions // below check for known C library versions and conditions under which these // functions are declared by the C library. -# define _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 +// // GNU libc 2.36 and newer declare c8rtomb() and mbrtoc8() in C++ modes if // __cpp_char8_t is defined or if C2X extensions are enabled. Determining // the latter depends on internal GNU libc details that are not appropriate @@ -1028,8 +1037,12 @@ typedef __char32_t char32_t; // defined are ignored. # if defined(_LIBCPP_GLIBC_PREREQ) # if _LIBCPP_GLIBC_PREREQ(2, 36) && defined(__cpp_char8_t) -# undef _LIBCPP_HAS_NO_C8RTOMB_MBRTOC8 +# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 1 +# else +# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0 # endif +# else +# define _LIBCPP_HAS_C8RTOMB_MBRTOC8 0 # endif // There are a handful of public standard library types that are intended to @@ -1124,15 +1137,6 @@ typedef __char32_t char32_t; # define _LIBCPP_USING_IF_EXISTS # endif -# if __has_cpp_attribute(__nodiscard__) -# define _LIBCPP_NODISCARD [[__nodiscard__]] -# else -// We can't use GCC's [[gnu::warn_unused_result]] and -// __attribute__((warn_unused_result)), because GCC does not silence them via -// (void) cast. -# define _LIBCPP_NODISCARD -# endif - # if __has_attribute(__no_destroy__) # define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) # else @@ -1160,10 +1164,19 @@ typedef __char32_t char32_t; # define _LIBCPP_LIFETIMEBOUND # endif -# if __has_attribute(__nodebug__) -# define _LIBCPP_NODEBUG __attribute__((__nodebug__)) +# if __has_cpp_attribute(_Clang::__noescape__) +# define _LIBCPP_NOESCAPE [[_Clang::__noescape__]] +# else +# define _LIBCPP_NOESCAPE +# endif + +# define _LIBCPP_NODEBUG [[__gnu__::__nodebug__]] + +# if __has_cpp_attribute(_Clang::__no_specializations__) +# define _LIBCPP_NO_SPECIALIZATIONS \ + [[_Clang::__no_specializations__("Users are not allowed to specialize this standard library entity")]] # else -# define _LIBCPP_NODEBUG +# define _LIBCPP_NO_SPECIALIZATIONS # endif # if __has_attribute(__standalone_debug__) @@ -1220,7 +1233,9 @@ typedef __char32_t char32_t; // Clang-18 has support for deducing this, but it does not set the FTM. # if defined(__cpp_explicit_this_parameter) || (defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1800) -# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER +# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 1 +# else +# define _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER 0 # endif #endif // __cplusplus diff --git a/lib/libcxx/include/__configuration/abi.h b/lib/libcxx/include/__configuration/abi.h index cfd878121380..1806dbc7c1b1 100644 --- a/lib/libcxx/include/__configuration/abi.h +++ b/lib/libcxx/include/__configuration/abi.h @@ -18,6 +18,25 @@ # pragma GCC system_header #endif +// FIXME: ABI detection should be done via compiler builtin macros. This +// is just a placeholder until Clang implements such macros. For now assume +// that Windows compilers pretending to be MSVC++ target the Microsoft ABI, +// and allow the user to explicitly specify the ABI to handle cases where this +// heuristic falls short. +#if _LIBCPP_ABI_FORCE_ITANIUM && _LIBCPP_ABI_FORCE_MICROSOFT +# error "Only one of _LIBCPP_ABI_FORCE_ITANIUM and _LIBCPP_ABI_FORCE_MICROSOFT can be true" +#elif _LIBCPP_ABI_FORCE_ITANIUM +# define _LIBCPP_ABI_ITANIUM +#elif _LIBCPP_ABI_FORCE_MICROSOFT +# define _LIBCPP_ABI_MICROSOFT +#else +# if defined(_WIN32) && defined(_MSC_VER) +# define _LIBCPP_ABI_MICROSOFT +# else +# define _LIBCPP_ABI_ITANIUM +# endif +#endif + #if _LIBCPP_ABI_VERSION >= 2 // Change short string representation so that string data starts at offset 0, // improving its alignment in some cases. @@ -98,10 +117,13 @@ // and WCHAR_MAX. This ABI setting determines whether we should instead track whether the fill // value has been initialized using a separate boolean, which changes the ABI. # define _LIBCPP_ABI_IOS_ALLOW_ARBITRARY_FILL_VALUE -// Make a std::pair of trivially copyable types trivially copyable. -// While this technically doesn't change the layout of pair itself, other types may decide to programatically change -// their representation based on whether something is trivially copyable. -# define _LIBCPP_ABI_TRIVIALLY_COPYABLE_PAIR +// Historically, libc++ used a type called `__compressed_pair` to reduce storage needs in cases of empty types (e.g. an +// empty allocator in std::vector). We switched to using `[[no_unique_address]]`. However, for ABI compatibility reasons +// we had to add artificial padding in a few places. +// +// This setting disables the addition of such artificial padding, leading to a more optimal +// representation for several types. +# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING #elif _LIBCPP_ABI_VERSION == 1 # if !(defined(_LIBCPP_OBJECT_FORMAT_COFF) || defined(_LIBCPP_OBJECT_FORMAT_XCOFF)) // Enable compiling copies of now inline methods into the dylib to support @@ -154,6 +176,26 @@ // ABI impact: changes the iterator type of `vector` (except `vector`). // #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_VECTOR +// Changes the iterator type of `array` to a bounded iterator that keeps track of whether it's within the bounds of the +// container and asserts it on every dereference and when performing iterator arithmetic. +// +// ABI impact: changes the iterator type of `array`, its size and its layout. +// #define _LIBCPP_ABI_BOUNDED_ITERATORS_IN_STD_ARRAY + +// [[msvc::no_unique_address]] seems to mostly affect empty classes, so the padding scheme for Itanium doesn't work. +#if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING) +# define _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING +#endif + +// Tracks the bounds of the array owned by std::unique_ptr, allowing it to trap when accessed out-of-bounds. +// Note that limited bounds checking is also available outside of this ABI configuration, but only some categories +// of types can be checked. +// +// ABI impact: This causes the layout of std::unique_ptr to change and its size to increase. +// This also affects the representation of a few library types that use std::unique_ptr +// internally, such as the unordered containers. +// #define _LIBCPP_ABI_BOUNDED_UNIQUE_PTR + #if defined(_LIBCPP_COMPILER_CLANG_BASED) # if defined(__APPLE__) # if defined(__i386__) || defined(__x86_64__) diff --git a/lib/libcxx/include/__configuration/availability.h b/lib/libcxx/include/__configuration/availability.h index ab483a07c9c1..261cf9c1ae9d 100644 --- a/lib/libcxx/include/__configuration/availability.h +++ b/lib/libcxx/include/__configuration/availability.h @@ -67,25 +67,19 @@ // // [1]: https://clang.llvm.org/docs/AttributeReference.html#availability -// For backwards compatibility, allow users to define _LIBCPP_DISABLE_AVAILABILITY -// for a while. -#if defined(_LIBCPP_DISABLE_AVAILABILITY) -# if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif -#endif - // Availability markup is disabled when building the library, or when a non-Clang // compiler is used because only Clang supports the necessary attributes. #if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCXXABI_BUILDING_LIBRARY) || !defined(_LIBCPP_COMPILER_CLANG_BASED) -# if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) -# define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS -# endif +# undef _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS +# define _LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS 0 #endif // When availability annotations are disabled, we take for granted that features introduced // in all versions of the library are available. -#if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +#if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS + +# define _LIBCPP_INTRODUCED_IN_LLVM_20 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_19 1 # define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE /* nothing */ @@ -93,9 +87,6 @@ # define _LIBCPP_INTRODUCED_IN_LLVM_18 1 # define _LIBCPP_INTRODUCED_IN_LLVM_18_ATTRIBUTE /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_17 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_17_ATTRIBUTE /* nothing */ - # define _LIBCPP_INTRODUCED_IN_LLVM_16 1 # define _LIBCPP_INTRODUCED_IN_LLVM_16_ATTRIBUTE /* nothing */ @@ -105,26 +96,17 @@ # define _LIBCPP_INTRODUCED_IN_LLVM_14 1 # define _LIBCPP_INTRODUCED_IN_LLVM_14_ATTRIBUTE /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_13 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_13_ATTRIBUTE /* nothing */ - # define _LIBCPP_INTRODUCED_IN_LLVM_12 1 # define _LIBCPP_INTRODUCED_IN_LLVM_12_ATTRIBUTE /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_11 1 # define _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_10 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_10_ATTRIBUTE /* nothing */ - # define _LIBCPP_INTRODUCED_IN_LLVM_9 1 # define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_PUSH /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_9_ATTRIBUTE_POP /* nothing */ -# define _LIBCPP_INTRODUCED_IN_LLVM_8 1 -# define _LIBCPP_INTRODUCED_IN_LLVM_8_ATTRIBUTE /* nothing */ - # define _LIBCPP_INTRODUCED_IN_LLVM_4 1 # define _LIBCPP_INTRODUCED_IN_LLVM_4_ATTRIBUTE /* nothing */ @@ -132,36 +114,42 @@ // clang-format off +// LLVM 20 +// TODO: Fill this in +# define _LIBCPP_INTRODUCED_IN_LLVM_20 0 +# define _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE __attribute__((unavailable)) + // LLVM 19 // TODO: Fill this in # define _LIBCPP_INTRODUCED_IN_LLVM_19 0 # define _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE __attribute__((unavailable)) // LLVM 18 -// TODO: Fill this in -# define _LIBCPP_INTRODUCED_IN_LLVM_18 0 -# define _LIBCPP_INTRODUCED_IN_LLVM_18_ATTRIBUTE __attribute__((unavailable)) - -// LLVM 17 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 140400) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 170400) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 170400) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 100400) -# define _LIBCPP_INTRODUCED_IN_LLVM_17 0 +# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 150000) || \ + (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 180000) || \ + (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 180000) || \ + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 110000) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 90000) || \ + (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 240000) +# define _LIBCPP_INTRODUCED_IN_LLVM_18 0 # else -# define _LIBCPP_INTRODUCED_IN_LLVM_17 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_18 1 # endif -# define _LIBCPP_INTRODUCED_IN_LLVM_17_ATTRIBUTE \ - __attribute__((availability(macos, strict, introduced = 14.4))) \ - __attribute__((availability(ios, strict, introduced = 17.4))) \ - __attribute__((availability(tvos, strict, introduced = 17.4))) \ - __attribute__((availability(watchos, strict, introduced = 10.4))) +# define _LIBCPP_INTRODUCED_IN_LLVM_18_ATTRIBUTE \ + __attribute__((availability(macos, strict, introduced = 15.0))) \ + __attribute__((availability(ios, strict, introduced = 18.0))) \ + __attribute__((availability(tvos, strict, introduced = 18.0))) \ + __attribute__((availability(watchos, strict, introduced = 11.0))) \ + __attribute__((availability(bridgeos, strict, introduced = 9.0))) \ + __attribute__((availability(driverkit, strict, introduced = 24.0))) // LLVM 16 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 140000) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 170000) || \ (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 170000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 100000) + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 100000) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 80000) || \ + (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 230000) # define _LIBCPP_INTRODUCED_IN_LLVM_16 0 # else # define _LIBCPP_INTRODUCED_IN_LLVM_16 1 @@ -170,13 +158,17 @@ __attribute__((availability(macos, strict, introduced = 14.0))) \ __attribute__((availability(ios, strict, introduced = 17.0))) \ __attribute__((availability(tvos, strict, introduced = 17.0))) \ - __attribute__((availability(watchos, strict, introduced = 10.0))) + __attribute__((availability(watchos, strict, introduced = 10.0))) \ + __attribute__((availability(bridgeos, strict, introduced = 8.0))) \ + __attribute__((availability(driverkit, strict, introduced = 23.0))) // LLVM 15 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130400) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160500) || \ (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160500) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500) + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90500) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 70500) || \ + (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 220400) # define _LIBCPP_INTRODUCED_IN_LLVM_15 0 # else # define _LIBCPP_INTRODUCED_IN_LLVM_15 1 @@ -185,32 +177,21 @@ __attribute__((availability(macos, strict, introduced = 13.4))) \ __attribute__((availability(ios, strict, introduced = 16.5))) \ __attribute__((availability(tvos, strict, introduced = 16.5))) \ - __attribute__((availability(watchos, strict, introduced = 9.5))) + __attribute__((availability(watchos, strict, introduced = 9.5))) \ + __attribute__((availability(bridgeos, strict, introduced = 7.5))) \ + __attribute__((availability(driverkit, strict, introduced = 22.4))) // LLVM 14 # define _LIBCPP_INTRODUCED_IN_LLVM_14 _LIBCPP_INTRODUCED_IN_LLVM_15 # define _LIBCPP_INTRODUCED_IN_LLVM_14_ATTRIBUTE _LIBCPP_INTRODUCED_IN_LLVM_15_ATTRIBUTE -// LLVM 13 -# if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 130000) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 160000) || \ - (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 160000) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 90000) -# define _LIBCPP_INTRODUCED_IN_LLVM_13 0 -# else -# define _LIBCPP_INTRODUCED_IN_LLVM_13 1 -# endif -# define _LIBCPP_INTRODUCED_IN_LLVM_13_ATTRIBUTE \ - __attribute__((availability(macos, strict, introduced = 13.0))) \ - __attribute__((availability(ios, strict, introduced = 16.0))) \ - __attribute__((availability(tvos, strict, introduced = 16.0))) \ - __attribute__((availability(watchos, strict, introduced = 9.0))) - // LLVM 12 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 120300) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 150300) || \ (defined(__ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ < 150300) || \ - (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80300) + (defined(__ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ < 80300) || \ + (defined(__ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_BRIDGE_OS_VERSION_MIN_REQUIRED__ < 60000) || \ + (defined(__ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_DRIVERKIT_VERSION_MIN_REQUIRED__ < 210300) # define _LIBCPP_INTRODUCED_IN_LLVM_12 0 # else # define _LIBCPP_INTRODUCED_IN_LLVM_12 1 @@ -219,7 +200,9 @@ __attribute__((availability(macos, strict, introduced = 12.3))) \ __attribute__((availability(ios, strict, introduced = 15.3))) \ __attribute__((availability(tvos, strict, introduced = 15.3))) \ - __attribute__((availability(watchos, strict, introduced = 8.3))) + __attribute__((availability(watchos, strict, introduced = 8.3))) \ + __attribute__((availability(bridgeos, strict, introduced = 6.0))) \ + __attribute__((availability(driverkit, strict, introduced = 21.3))) // LLVM 11 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 110000) || \ @@ -236,10 +219,6 @@ __attribute__((availability(tvos, strict, introduced = 14.0))) \ __attribute__((availability(watchos, strict, introduced = 7.0))) -// LLVM 10 -# define _LIBCPP_INTRODUCED_IN_LLVM_10 _LIBCPP_INTRODUCED_IN_LLVM_11 -# define _LIBCPP_INTRODUCED_IN_LLVM_10_ATTRIBUTE _LIBCPP_INTRODUCED_IN_LLVM_11_ATTRIBUTE - // LLVM 9 # if (defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 101500) || \ (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 130000) || \ @@ -375,10 +354,15 @@ #define _LIBCPP_AVAILABILITY_HAS_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19 #define _LIBCPP_AVAILABILITY_BAD_EXPECTED_ACCESS_KEY_FUNCTION _LIBCPP_INTRODUCED_IN_LLVM_19_ATTRIBUTE -// Define availability attributes that depend on _LIBCPP_HAS_NO_EXCEPTIONS. +// This controls the availability of floating-point std::from_chars functions. +// These overloads were added later than the integer overloads. +#define _LIBCPP_AVAILABILITY_HAS_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20 +#define _LIBCPP_AVAILABILITY_FROM_CHARS_FLOATING_POINT _LIBCPP_INTRODUCED_IN_LLVM_20_ATTRIBUTE + +// Define availability attributes that depend on _LIBCPP_HAS_EXCEPTIONS. // Those are defined in terms of the availability attributes above, and // should not be vendor-specific. -#if defined(_LIBCPP_HAS_NO_EXCEPTIONS) +#if !_LIBCPP_HAS_EXCEPTIONS # define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST # define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS # define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS @@ -389,8 +373,8 @@ #endif // Define availability attributes that depend on both -// _LIBCPP_HAS_NO_EXCEPTIONS and _LIBCPP_HAS_NO_RTTI. -#if defined(_LIBCPP_HAS_NO_EXCEPTIONS) || defined(_LIBCPP_HAS_NO_RTTI) +// _LIBCPP_HAS_EXCEPTIONS and _LIBCPP_HAS_RTTI. +#if !_LIBCPP_HAS_EXCEPTIONS || !_LIBCPP_HAS_RTTI # undef _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION # undef _LIBCPP_AVAILABILITY_INIT_PRIMARY_EXCEPTION # define _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION 0 diff --git a/lib/libcxx/include/__configuration/compiler.h b/lib/libcxx/include/__configuration/compiler.h index d109aa748f6a..caedfa9fd884 100644 --- a/lib/libcxx/include/__configuration/compiler.h +++ b/lib/libcxx/include/__configuration/compiler.h @@ -33,8 +33,8 @@ // Warn if a compiler version is used that is not supported anymore // LLVM RELEASE Update the minimum compiler versions # if defined(_LIBCPP_CLANG_VER) -# if _LIBCPP_CLANG_VER < 1700 -# warning "Libc++ only supports Clang 17 and later" +# if _LIBCPP_CLANG_VER < 1800 +# warning "Libc++ only supports Clang 18 and later" # endif # elif defined(_LIBCPP_APPLE_CLANG_VER) # if _LIBCPP_APPLE_CLANG_VER < 1500 diff --git a/lib/libcxx/include/__configuration/language.h b/lib/libcxx/include/__configuration/language.h index cca6c71486b7..6cf5805f2b8e 100644 --- a/lib/libcxx/include/__configuration/language.h +++ b/lib/libcxx/include/__configuration/language.h @@ -35,12 +35,16 @@ #endif // __cplusplus // NOLINTEND(libcpp-cpp-version-check) -#if !defined(__cpp_rtti) || __cpp_rtti < 199711L -# define _LIBCPP_HAS_NO_RTTI +#if defined(__cpp_rtti) && __cpp_rtti >= 199711L +# define _LIBCPP_HAS_RTTI 1 +#else +# define _LIBCPP_HAS_RTTI 0 #endif -#if !defined(__cpp_exceptions) || __cpp_exceptions < 199711L -# define _LIBCPP_HAS_NO_EXCEPTIONS +#if defined(__cpp_exceptions) && __cpp_exceptions >= 199711L +# define _LIBCPP_HAS_EXCEPTIONS 1 +#else +# define _LIBCPP_HAS_EXCEPTIONS 0 #endif #endif // _LIBCPP___CONFIGURATION_LANGUAGE_H diff --git a/lib/libcxx/include/__configuration/platform.h b/lib/libcxx/include/__configuration/platform.h index 540b30c55859..b03d7c0f39a3 100644 --- a/lib/libcxx/include/__configuration/platform.h +++ b/lib/libcxx/include/__configuration/platform.h @@ -31,14 +31,16 @@ #endif // Need to detect which libc we're using if we're on Linux. -#if defined(__linux__) -# include -# if defined(__GLIBC_PREREQ) -# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) -# else -# define _LIBCPP_GLIBC_PREREQ(a, b) 0 -# endif // defined(__GLIBC_PREREQ) -#endif // defined(__linux__) +#if defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__) +# if __has_include() +# include +# if defined(__GLIBC_PREREQ) +# define _LIBCPP_GLIBC_PREREQ(a, b) __GLIBC_PREREQ(a, b) +# else +# define _LIBCPP_GLIBC_PREREQ(a, b) 0 +# endif // defined(__GLIBC_PREREQ) +# endif +#endif #ifndef __BYTE_ORDER__ # error \ diff --git a/lib/libcxx/include/__coroutine/coroutine_handle.h b/lib/libcxx/include/__coroutine/coroutine_handle.h index 4557a6643c23..e2cde20498d8 100644 --- a/lib/libcxx/include/__coroutine/coroutine_handle.h +++ b/lib/libcxx/include/__coroutine/coroutine_handle.h @@ -11,11 +11,12 @@ #include <__assert> #include <__config> +#include <__cstddef/nullptr_t.h> +#include <__cstddef/size_t.h> #include <__functional/hash.h> #include <__memory/addressof.h> #include <__type_traits/remove_cv.h> #include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__cstddef/byte.h b/lib/libcxx/include/__cstddef/byte.h new file mode 100644 index 000000000000..09e1d75e0b41 --- /dev/null +++ b/lib/libcxx/include/__cstddef/byte.h @@ -0,0 +1,85 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___CSTDDEF_BYTE_H +#define _LIBCPP___CSTDDEF_BYTE_H + +#include <__config> +#include <__fwd/byte.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/is_integral.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 17 +namespace std { // purposefully not versioned + +enum class byte : unsigned char {}; + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator|(byte __lhs, byte __rhs) noexcept { + return static_cast( + static_cast(static_cast(__lhs) | static_cast(__rhs))); +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator|=(byte& __lhs, byte __rhs) noexcept { + return __lhs = __lhs | __rhs; +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator&(byte __lhs, byte __rhs) noexcept { + return static_cast( + static_cast(static_cast(__lhs) & static_cast(__rhs))); +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator&=(byte& __lhs, byte __rhs) noexcept { + return __lhs = __lhs & __rhs; +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator^(byte __lhs, byte __rhs) noexcept { + return static_cast( + static_cast(static_cast(__lhs) ^ static_cast(__rhs))); +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte& operator^=(byte& __lhs, byte __rhs) noexcept { + return __lhs = __lhs ^ __rhs; +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr byte operator~(byte __b) noexcept { + return static_cast(static_cast(~static_cast(__b))); +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr byte& operator<<=(byte& __lhs, _Integer __shift) noexcept { + return __lhs = __lhs << __shift; +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr byte operator<<(byte __lhs, _Integer __shift) noexcept { + return static_cast(static_cast(static_cast(__lhs) << __shift)); +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr byte& operator>>=(byte& __lhs, _Integer __shift) noexcept { + return __lhs = __lhs >> __shift; +} + +template ::value, int> = 0> +_LIBCPP_HIDE_FROM_ABI constexpr byte operator>>(byte __lhs, _Integer __shift) noexcept { + return static_cast(static_cast(static_cast(__lhs) >> __shift)); +} + +template ::value, int> = 0> +[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Integer to_integer(byte __b) noexcept { + return static_cast<_Integer>(__b); +} + +} // namespace std +#endif // _LIBCPP_STD_VER >= 17 + +#endif // _LIBCPP___CSTDDEF_BYTE_H diff --git a/lib/libcxx/include/__cstddef/max_align_t.h b/lib/libcxx/include/__cstddef/max_align_t.h new file mode 100644 index 000000000000..7c09c7e7f301 --- /dev/null +++ b/lib/libcxx/include/__cstddef/max_align_t.h @@ -0,0 +1,27 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___CSTDDEF_MAX_ALIGN_T_H +#define _LIBCPP___CSTDDEF_MAX_ALIGN_T_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if !defined(_LIBCPP_CXX03_LANG) +using ::max_align_t _LIBCPP_USING_IF_EXISTS; +#endif + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___CSTDDEF_MAX_ALIGN_T_H diff --git a/lib/libcxx/include/__cstddef/nullptr_t.h b/lib/libcxx/include/__cstddef/nullptr_t.h new file mode 100644 index 000000000000..7eaae0175396 --- /dev/null +++ b/lib/libcxx/include/__cstddef/nullptr_t.h @@ -0,0 +1,24 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___CSTDDEF_NULLPTR_T_H +#define _LIBCPP___CSTDDEF_NULLPTR_T_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +using nullptr_t = decltype(nullptr); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___CSTDDEF_NULLPTR_T_H diff --git a/lib/libcxx/include/__cstddef/ptrdiff_t.h b/lib/libcxx/include/__cstddef/ptrdiff_t.h new file mode 100644 index 000000000000..146f345a2c30 --- /dev/null +++ b/lib/libcxx/include/__cstddef/ptrdiff_t.h @@ -0,0 +1,24 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___CSTDDEF_PTRDIFF_T_H +#define _LIBCPP___CSTDDEF_PTRDIFF_T_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +using ptrdiff_t = decltype(static_cast(nullptr) - static_cast(nullptr)); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___CSTDDEF_PTRDIFF_T_H diff --git a/lib/libcxx/include/__cstddef/size_t.h b/lib/libcxx/include/__cstddef/size_t.h new file mode 100644 index 000000000000..59bad9367198 --- /dev/null +++ b/lib/libcxx/include/__cstddef/size_t.h @@ -0,0 +1,24 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___CSTDDEF_SIZE_T_H +#define _LIBCPP___CSTDDEF_SIZE_T_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +using size_t = decltype(sizeof(int)); + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___CSTDDEF_SIZE_T_H diff --git a/lib/libcxx/include/__debug_utils/sanitizers.h b/lib/libcxx/include/__debug_utils/sanitizers.h index d8547e324933..73d192711eab 100644 --- a/lib/libcxx/include/__debug_utils/sanitizers.h +++ b/lib/libcxx/include/__debug_utils/sanitizers.h @@ -17,7 +17,7 @@ # pragma GCC system_header #endif -#ifndef _LIBCPP_HAS_NO_ASAN +#if _LIBCPP_HAS_ASAN extern "C" { _LIBCPP_EXPORTED_FROM_ABI void @@ -28,12 +28,12 @@ _LIBCPP_EXPORTED_FROM_ABI int __sanitizer_verify_double_ended_contiguous_container(const void*, const void*, const void*, const void*); } -#endif // _LIBCPP_HAS_NO_ASAN +#endif // _LIBCPP_HAS_ASAN _LIBCPP_BEGIN_NAMESPACE_STD // ASan choices -#ifndef _LIBCPP_HAS_NO_ASAN +#if _LIBCPP_HAS_ASAN # define _LIBCPP_HAS_ASAN_CONTAINER_ANNOTATIONS_FOR_ALL_ALLOCATORS 1 #endif @@ -57,7 +57,7 @@ _LIBCPP_HIDE_FROM_ABI void __annotate_double_ended_contiguous_container( const void* __last_old_contained, const void* __first_new_contained, const void* __last_new_contained) { -#ifdef _LIBCPP_HAS_NO_ASAN +#if !_LIBCPP_HAS_ASAN (void)__first_storage; (void)__last_storage; (void)__first_old_contained; @@ -86,7 +86,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void __annotate_contiguous_c const void* __last_storage, const void* __old_last_contained, const void* __new_last_contained) { -#ifdef _LIBCPP_HAS_NO_ASAN +#if !_LIBCPP_HAS_ASAN (void)__first_storage; (void)__last_storage; (void)__old_last_contained; diff --git a/lib/libcxx/include/__exception/exception_ptr.h b/lib/libcxx/include/__exception/exception_ptr.h index beadd9212abd..6257e6f729bf 100644 --- a/lib/libcxx/include/__exception/exception_ptr.h +++ b/lib/libcxx/include/__exception/exception_ptr.h @@ -10,13 +10,12 @@ #define _LIBCPP___EXCEPTION_EXCEPTION_PTR_H #include <__config> +#include <__cstddef/nullptr_t.h> #include <__exception/operations.h> #include <__memory/addressof.h> #include <__memory/construct_at.h> #include <__type_traits/decay.h> -#include #include -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -67,7 +66,7 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { public: // exception_ptr is basically a COW string. - using __trivially_relocatable = exception_ptr; + using __trivially_relocatable _LIBCPP_NODEBUG = exception_ptr; _LIBCPP_HIDE_FROM_ABI exception_ptr() _NOEXCEPT : __ptr_() {} _LIBCPP_HIDE_FROM_ABI exception_ptr(nullptr_t) _NOEXCEPT : __ptr_() {} @@ -92,7 +91,7 @@ class _LIBCPP_EXPORTED_FROM_ABI exception_ptr { template _LIBCPP_HIDE_FROM_ABI exception_ptr make_exception_ptr(_Ep __e) _NOEXCEPT { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS # if _LIBCPP_AVAILABILITY_HAS_INIT_PRIMARY_EXCEPTION && __cplusplus >= 201103L using _Ep2 = __decay_t<_Ep>; @@ -159,7 +158,7 @@ _LIBCPP_EXPORTED_FROM_ABI void swap(exception_ptr&, exception_ptr&) _NOEXCEPT; _LIBCPP_EXPORTED_FROM_ABI exception_ptr __copy_exception_ptr(void* __except, const void* __ptr); _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; -_LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); +[[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); // This is a built-in template function which automagically extracts the required // information. diff --git a/lib/libcxx/include/__exception/nested_exception.h b/lib/libcxx/include/__exception/nested_exception.h index feb489f87f62..d560b6bbc35a 100644 --- a/lib/libcxx/include/__exception/nested_exception.h +++ b/lib/libcxx/include/__exception/nested_exception.h @@ -13,6 +13,8 @@ #include <__exception/exception_ptr.h> #include <__memory/addressof.h> #include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> #include <__type_traits/is_base_of.h> #include <__type_traits/is_class.h> #include <__type_traits/is_constructible.h> @@ -20,7 +22,6 @@ #include <__type_traits/is_final.h> #include <__type_traits/is_polymorphic.h> #include <__utility/forward.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -38,7 +39,7 @@ class _LIBCPP_EXPORTED_FROM_ABI nested_exception { virtual ~nested_exception() _NOEXCEPT; // access functions - _LIBCPP_NORETURN void rethrow_nested() const; + [[__noreturn__]] void rethrow_nested() const; _LIBCPP_HIDE_FROM_ABI exception_ptr nested_ptr() const _NOEXCEPT { return __ptr_; } }; @@ -47,26 +48,26 @@ struct __nested : public _Tp, public nested_exception { _LIBCPP_HIDE_FROM_ABI explicit __nested(const _Tp& __t) : _Tp(__t) {} }; -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS template struct __throw_with_nested; template struct __throw_with_nested<_Tp, _Up, true> { - _LIBCPP_NORETURN static inline _LIBCPP_HIDE_FROM_ABI void __do_throw(_Tp&& __t) { + [[__noreturn__]] static inline _LIBCPP_HIDE_FROM_ABI void __do_throw(_Tp&& __t) { throw __nested<_Up>(std::forward<_Tp>(__t)); } }; template struct __throw_with_nested<_Tp, _Up, false> { - _LIBCPP_NORETURN static inline _LIBCPP_HIDE_FROM_ABI void __do_throw(_Tp&& __t) { throw std::forward<_Tp>(__t); } + [[__noreturn__]] static inline _LIBCPP_HIDE_FROM_ABI void __do_throw(_Tp&& __t) { throw std::forward<_Tp>(__t); } }; #endif template -_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI void throw_with_nested(_Tp&& __t) { -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +[[__noreturn__]] _LIBCPP_HIDE_FROM_ABI void throw_with_nested(_Tp&& __t) { +#if _LIBCPP_HAS_EXCEPTIONS using _Up = __decay_t<_Tp>; static_assert(is_copy_constructible<_Up>::value, "type thrown must be CopyConstructible"); __throw_with_nested<_Tp, diff --git a/lib/libcxx/include/__exception/operations.h b/lib/libcxx/include/__exception/operations.h index 0a9c7a7c7f0d..15520c558a0b 100644 --- a/lib/libcxx/include/__exception/operations.h +++ b/lib/libcxx/include/__exception/operations.h @@ -10,7 +10,6 @@ #define _LIBCPP___EXCEPTION_OPERATIONS_H #include <__config> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -22,20 +21,22 @@ namespace std { // purposefully not using versioning namespace using unexpected_handler = void (*)(); _LIBCPP_EXPORTED_FROM_ABI unexpected_handler set_unexpected(unexpected_handler) _NOEXCEPT; _LIBCPP_EXPORTED_FROM_ABI unexpected_handler get_unexpected() _NOEXCEPT; -_LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void unexpected(); +[[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void unexpected(); #endif using terminate_handler = void (*)(); _LIBCPP_EXPORTED_FROM_ABI terminate_handler set_terminate(terminate_handler) _NOEXCEPT; _LIBCPP_EXPORTED_FROM_ABI terminate_handler get_terminate() _NOEXCEPT; -_LIBCPP_EXPORTED_FROM_ABI bool uncaught_exception() _NOEXCEPT; +#if _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION) +_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 bool uncaught_exception() _NOEXCEPT; +#endif // _LIBCPP_STD_VER <= 17 || defined(_LIBCPP_ENABLE_CXX20_REMOVED_UNCAUGHT_EXCEPTION) _LIBCPP_EXPORTED_FROM_ABI int uncaught_exceptions() _NOEXCEPT; class _LIBCPP_EXPORTED_FROM_ABI exception_ptr; _LIBCPP_EXPORTED_FROM_ABI exception_ptr current_exception() _NOEXCEPT; -_LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); +[[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void rethrow_exception(exception_ptr); } // namespace std #endif // _LIBCPP___EXCEPTION_OPERATIONS_H diff --git a/lib/libcxx/include/__exception/terminate.h b/lib/libcxx/include/__exception/terminate.h index e672471dc526..0bfc3506d379 100644 --- a/lib/libcxx/include/__exception/terminate.h +++ b/lib/libcxx/include/__exception/terminate.h @@ -16,7 +16,7 @@ #endif namespace std { // purposefully not using versioning namespace -_LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void terminate() _NOEXCEPT; +[[__noreturn__]] _LIBCPP_EXPORTED_FROM_ABI void terminate() _NOEXCEPT; } // namespace std #endif // _LIBCPP___EXCEPTION_TERMINATE_H diff --git a/lib/libcxx/include/__expected/expected.h b/lib/libcxx/include/__expected/expected.h index f618b20603e6..03bbd1623ed5 100644 --- a/lib/libcxx/include/__expected/expected.h +++ b/lib/libcxx/include/__expected/expected.h @@ -17,9 +17,11 @@ #include <__functional/invoke.h> #include <__memory/addressof.h> #include <__memory/construct_at.h> +#include <__type_traits/conditional.h> #include <__type_traits/conjunction.h> #include <__type_traits/disjunction.h> #include <__type_traits/integral_constant.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_assignable.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> @@ -71,7 +73,7 @@ struct __expected_construct_unexpected_from_invoke_tag {}; template _LIBCPP_HIDE_FROM_ABI void __throw_bad_expected_access(_Arg&& __arg) { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS throw bad_expected_access<_Err>(std::forward<_Arg>(__arg)); # else (void)__arg; @@ -457,14 +459,14 @@ class expected : private __expected_base<_Tp, _Err> { template friend class expected; - using __base = __expected_base<_Tp, _Err>; + using __base _LIBCPP_NODEBUG = __expected_base<_Tp, _Err>; public: using value_type = _Tp; using error_type = _Err; using unexpected_type = unexpected<_Err>; - using __trivially_relocatable = + using __trivially_relocatable _LIBCPP_NODEBUG = __conditional_t<__libcpp_is_trivially_relocatable<_Tp>::value && __libcpp_is_trivially_relocatable<_Err>::value, expected, void>; @@ -503,25 +505,24 @@ class expected : private __expected_base<_Tp, _Err> { private: template - using __can_convert = - _And< is_constructible<_Tp, _UfQual>, - is_constructible<_Err, _OtherErrQual>, - _If<_Not, bool>>::value, - _And< - _Not<_And, is_same<_Err, _OtherErr>>>, // use the copy constructor instead, see #92676 - _Not&>>, - _Not>>, - _Not&>>, - _Not>>, - _Not&, _Tp>>, - _Not&&, _Tp>>, - _Not&, _Tp>>, - _Not&&, _Tp>>>, - true_type>, - _Not, expected<_Up, _OtherErr>&>>, - _Not, expected<_Up, _OtherErr>>>, - _Not, const expected<_Up, _OtherErr>&>>, - _Not, const expected<_Up, _OtherErr>>> >; + using __can_convert _LIBCPP_NODEBUG = _And< + is_constructible<_Tp, _UfQual>, + is_constructible<_Err, _OtherErrQual>, + _If<_Not, bool>>::value, + _And< _Not<_And, is_same<_Err, _OtherErr>>>, // use the copy constructor instead, see #92676 + _Not&>>, + _Not>>, + _Not&>>, + _Not>>, + _Not&, _Tp>>, + _Not&&, _Tp>>, + _Not&, _Tp>>, + _Not&&, _Tp>>>, + true_type>, + _Not, expected<_Up, _OtherErr>&>>, + _Not, expected<_Up, _OtherErr>>>, + _Not, const expected<_Up, _OtherErr>&>>, + _Not, const expected<_Up, _OtherErr>>> >; template _LIBCPP_HIDE_FROM_ABI constexpr explicit expected( @@ -918,9 +919,9 @@ class expected : private __expected_base<_Tp, _Err> { requires is_constructible_v<_Err, _Err&> _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) & { using _Up = remove_cvref_t>; - static_assert(__is_std_expected<_Up>::value, "The result of f(**this) must be a specialization of std::expected"); + static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected"); static_assert(is_same_v, - "The result of f(**this) must have the same error_type as this expected"); + "The result of f(value()) must have the same error_type as this expected"); if (has_value()) { return std::invoke(std::forward<_Func>(__f), this->__val()); } @@ -931,9 +932,9 @@ class expected : private __expected_base<_Tp, _Err> { requires is_constructible_v<_Err, const _Err&> _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const& { using _Up = remove_cvref_t>; - static_assert(__is_std_expected<_Up>::value, "The result of f(**this) must be a specialization of std::expected"); + static_assert(__is_std_expected<_Up>::value, "The result of f(value()) must be a specialization of std::expected"); static_assert(is_same_v, - "The result of f(**this) must have the same error_type as this expected"); + "The result of f(value()) must have the same error_type as this expected"); if (has_value()) { return std::invoke(std::forward<_Func>(__f), this->__val()); } @@ -945,9 +946,9 @@ class expected : private __expected_base<_Tp, _Err> { _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) && { using _Up = remove_cvref_t>; static_assert( - __is_std_expected<_Up>::value, "The result of f(std::move(**this)) must be a specialization of std::expected"); + __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected"); static_assert(is_same_v, - "The result of f(std::move(**this)) must have the same error_type as this expected"); + "The result of f(std::move(value())) must have the same error_type as this expected"); if (has_value()) { return std::invoke(std::forward<_Func>(__f), std::move(this->__val())); } @@ -959,9 +960,9 @@ class expected : private __expected_base<_Tp, _Err> { _LIBCPP_HIDE_FROM_ABI constexpr auto and_then(_Func&& __f) const&& { using _Up = remove_cvref_t>; static_assert( - __is_std_expected<_Up>::value, "The result of f(std::move(**this)) must be a specialization of std::expected"); + __is_std_expected<_Up>::value, "The result of f(std::move(value())) must be a specialization of std::expected"); static_assert(is_same_v, - "The result of f(std::move(**this)) must have the same error_type as this expected"); + "The result of f(std::move(value())) must have the same error_type as this expected"); if (has_value()) { return std::invoke(std::forward<_Func>(__f), std::move(this->__val())); } @@ -1362,7 +1363,7 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> { friend class expected; template - using __can_convert = + using __can_convert _LIBCPP_NODEBUG = _And< is_void<_Up>, is_constructible<_Err, _OtherErrQual>, _Not, expected<_Up, _OtherErr>&>>, @@ -1370,7 +1371,7 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> { _Not, const expected<_Up, _OtherErr>&>>, _Not, const expected<_Up, _OtherErr>>>>; - using __base = __expected_void_base<_Err>; + using __base _LIBCPP_NODEBUG = __expected_void_base<_Err>; public: using value_type = _Tp; @@ -1492,8 +1493,6 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> { return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr expected& operator=(expected&&) = delete; - _LIBCPP_HIDE_FROM_ABI constexpr expected& operator=(expected&& __rhs) noexcept(is_nothrow_move_assignable_v<_Err> && is_nothrow_move_constructible_v<_Err>) requires(is_move_assignable_v<_Err> && is_move_constructible_v<_Err>) diff --git a/lib/libcxx/include/__expected/unexpected.h b/lib/libcxx/include/__expected/unexpected.h index c7fe3c52e431..6904889b8c6b 100644 --- a/lib/libcxx/include/__expected/unexpected.h +++ b/lib/libcxx/include/__expected/unexpected.h @@ -48,12 +48,12 @@ template struct __is_std_unexpected> : true_type {}; template -using __valid_std_unexpected = _BoolConstant< // - is_object_v<_Tp> && // - !is_array_v<_Tp> && // - !__is_std_unexpected<_Tp>::value && // - !is_const_v<_Tp> && // - !is_volatile_v<_Tp> // +using __valid_std_unexpected _LIBCPP_NODEBUG = _BoolConstant< // + is_object_v<_Tp> && // + !is_array_v<_Tp> && // + !__is_std_unexpected<_Tp>::value && // + !is_const_v<_Tp> && // + !is_volatile_v<_Tp> // >; template @@ -108,7 +108,7 @@ class unexpected { template _LIBCPP_HIDE_FROM_ABI friend constexpr bool operator==(const unexpected& __x, const unexpected<_Err2>& __y) { - return __x.__unex_ == __y.__unex_; + return __x.__unex_ == __y.error(); } private: diff --git a/lib/libcxx/include/__filesystem/directory_entry.h b/lib/libcxx/include/__filesystem/directory_entry.h index 96d88dcd90b4..11e07acdbe00 100644 --- a/lib/libcxx/include/__filesystem/directory_entry.h +++ b/lib/libcxx/include/__filesystem/directory_entry.h @@ -20,8 +20,11 @@ #include <__filesystem/operations.h> #include <__filesystem/path.h> #include <__filesystem/perms.h> +#include <__fwd/ostream.h> #include <__system_error/errc.h> +#include <__system_error/error_category.h> #include <__system_error/error_code.h> +#include <__system_error/error_condition.h> #include <__utility/move.h> #include <__utility/unreachable.h> #include @@ -33,7 +36,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#if _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -201,7 +204,9 @@ class directory_entry { _IterNonSymlink, _RefreshSymlink, _RefreshSymlinkUnresolved, - _RefreshNonSymlink + _RefreshNonSymlink, + _IterCachedSymlink, + _IterCachedNonSymlink }; struct __cached_data { @@ -240,6 +245,29 @@ class directory_entry { return __data; } + _LIBCPP_HIDE_FROM_ABI static __cached_data + __create_iter_cached_result(file_type __ft, uintmax_t __size, perms __perm, file_time_type __write_time) { + __cached_data __data; + __data.__type_ = __ft; + __data.__size_ = __size; + __data.__write_time_ = __write_time; + if (__ft == file_type::symlink) + __data.__sym_perms_ = __perm; + else + __data.__non_sym_perms_ = __perm; + __data.__cache_type_ = [&]() { + switch (__ft) { + case file_type::none: + return _Empty; + case file_type::symlink: + return _IterCachedSymlink; + default: + return _IterCachedNonSymlink; + } + }(); + return __data; + } + _LIBCPP_HIDE_FROM_ABI void __assign_iter_entry(_Path&& __p, __cached_data __dt) { __p_ = std::move(__p); __data_ = __dt; @@ -248,15 +276,7 @@ class directory_entry { _LIBCPP_EXPORTED_FROM_ABI error_code __do_refresh() noexcept; _LIBCPP_HIDE_FROM_ABI static bool __is_dne_error(error_code const& __ec) { - if (!__ec) - return true; - switch (static_cast(__ec.value())) { - case errc::no_such_file_or_directory: - case errc::not_a_directory: - return true; - default: - return false; - } + return !__ec || __ec == errc::no_such_file_or_directory || __ec == errc::not_a_directory; } _LIBCPP_HIDE_FROM_ABI void @@ -281,13 +301,15 @@ class directory_entry { case _Empty: return __symlink_status(__p_, __ec).type(); case _IterSymlink: + case _IterCachedSymlink: case _RefreshSymlink: case _RefreshSymlinkUnresolved: if (__ec) __ec->clear(); return file_type::symlink; + case _IterCachedNonSymlink: case _IterNonSymlink: - case _RefreshNonSymlink: + case _RefreshNonSymlink: { file_status __st(__data_.__type_); if (__ec && !filesystem::exists(__st)) *__ec = make_error_code(errc::no_such_file_or_directory); @@ -295,6 +317,7 @@ class directory_entry { __ec->clear(); return __data_.__type_; } + } __libcpp_unreachable(); } @@ -302,8 +325,10 @@ class directory_entry { switch (__data_.__cache_type_) { case _Empty: case _IterSymlink: + case _IterCachedSymlink: case _RefreshSymlinkUnresolved: return __status(__p_, __ec).type(); + case _IterCachedNonSymlink: case _IterNonSymlink: case _RefreshNonSymlink: case _RefreshSymlink: { @@ -323,8 +348,10 @@ class directory_entry { case _Empty: case _IterNonSymlink: case _IterSymlink: + case _IterCachedSymlink: case _RefreshSymlinkUnresolved: return __status(__p_, __ec); + case _IterCachedNonSymlink: case _RefreshNonSymlink: case _RefreshSymlink: return file_status(__get_ft(__ec), __data_.__non_sym_perms_); @@ -338,8 +365,10 @@ class directory_entry { case _IterNonSymlink: case _IterSymlink: return __symlink_status(__p_, __ec); + case _IterCachedNonSymlink: case _RefreshNonSymlink: return file_status(__get_sym_ft(__ec), __data_.__non_sym_perms_); + case _IterCachedSymlink: case _RefreshSymlink: case _RefreshSymlinkUnresolved: return file_status(__get_sym_ft(__ec), __data_.__sym_perms_); @@ -352,8 +381,10 @@ class directory_entry { case _Empty: case _IterNonSymlink: case _IterSymlink: + case _IterCachedSymlink: case _RefreshSymlinkUnresolved: return filesystem::__file_size(__p_, __ec); + case _IterCachedNonSymlink: case _RefreshSymlink: case _RefreshNonSymlink: { error_code __m_ec; @@ -374,6 +405,8 @@ class directory_entry { case _Empty: case _IterNonSymlink: case _IterSymlink: + case _IterCachedNonSymlink: + case _IterCachedSymlink: case _RefreshSymlinkUnresolved: return filesystem::__hard_link_count(__p_, __ec); case _RefreshSymlink: @@ -392,8 +425,10 @@ class directory_entry { case _Empty: case _IterNonSymlink: case _IterSymlink: + case _IterCachedSymlink: case _RefreshSymlinkUnresolved: return filesystem::__last_write_time(__p_, __ec); + case _IterCachedNonSymlink: case _RefreshSymlink: case _RefreshNonSymlink: { error_code __m_ec; @@ -428,7 +463,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_END_NAMESPACE_FILESYSTEM -#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__filesystem/directory_iterator.h b/lib/libcxx/include/__filesystem/directory_iterator.h index e0246d8001e1..f5085b39ebf9 100644 --- a/lib/libcxx/include/__filesystem/directory_iterator.h +++ b/lib/libcxx/include/__filesystem/directory_iterator.h @@ -22,7 +22,6 @@ #include <__ranges/enable_view.h> #include <__system_error/error_code.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -31,7 +30,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#if _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -144,7 +143,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool # endif // _LIBCPP_STD_VER >= 20 -#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__filesystem/filesystem_error.h b/lib/libcxx/include/__filesystem/filesystem_error.h index 80a11e3b1932..73592bba31da 100644 --- a/lib/libcxx/include/__filesystem/filesystem_error.h +++ b/lib/libcxx/include/__filesystem/filesystem_error.h @@ -67,15 +67,15 @@ class _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY _LIBCPP_EXPORTED_FROM_ABI filesyst shared_ptr<_Storage> __storage_; }; -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS template -_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void +[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void __throw_filesystem_error(_Args&&... __args) { throw filesystem_error(std::forward<_Args>(__args)...); } # else template -_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void +[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY void __throw_filesystem_error(_Args&&...) { _LIBCPP_VERBOSE_ABORT("filesystem_error was thrown in -fno-exceptions mode"); } diff --git a/lib/libcxx/include/__filesystem/operations.h b/lib/libcxx/include/__filesystem/operations.h index f588189ed1d9..904023d2fb33 100644 --- a/lib/libcxx/include/__filesystem/operations.h +++ b/lib/libcxx/include/__filesystem/operations.h @@ -27,7 +27,7 @@ # pragma GCC system_header #endif -#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#if _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -305,6 +305,6 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_POP _LIBCPP_END_NAMESPACE_FILESYSTEM -#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM #endif // _LIBCPP___FILESYSTEM_OPERATIONS_H diff --git a/lib/libcxx/include/__filesystem/path.h b/lib/libcxx/include/__filesystem/path.h index ff468d517722..0a751ba32954 100644 --- a/lib/libcxx/include/__filesystem/path.h +++ b/lib/libcxx/include/__filesystem/path.h @@ -21,11 +21,11 @@ #include <__type_traits/is_pointer.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_pointer.h> -#include +#include <__utility/move.h> #include #include -#if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +#if _LIBCPP_HAS_LOCALIZATION # include // for quoted # include #endif @@ -51,30 +51,30 @@ template struct __can_convert_char : public __can_convert_char<_Tp> {}; template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char; + static const bool value = true; + using __char_type _LIBCPP_NODEBUG = char; }; template <> struct __can_convert_char { - static const bool value = true; - using __char_type = wchar_t; + static const bool value = true; + using __char_type _LIBCPP_NODEBUG = wchar_t; }; -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char8_t; + static const bool value = true; + using __char_type _LIBCPP_NODEBUG = char8_t; }; # endif template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char16_t; + static const bool value = true; + using __char_type _LIBCPP_NODEBUG = char16_t; }; template <> struct __can_convert_char { - static const bool value = true; - using __char_type = char32_t; + static const bool value = true; + using __char_type _LIBCPP_NODEBUG = char32_t; }; template ::value, int> = 0> @@ -86,7 +86,7 @@ _LIBCPP_HIDE_FROM_ABI bool __is_separator(_ECharT __e) { # endif } -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T typedef u8string __u8_string; # else typedef string __u8_string; @@ -95,7 +95,7 @@ typedef string __u8_string; struct _NullSentinel {}; template -using _Void = void; +using _Void _LIBCPP_NODEBUG = void; template struct __is_pathable_string : public false_type {}; @@ -104,7 +104,7 @@ template struct __is_pathable_string< basic_string<_ECharT, _Traits, _Alloc>, _Void::__char_type> > : public __can_convert_char<_ECharT> { - using _Str = basic_string<_ECharT, _Traits, _Alloc>; + using _Str _LIBCPP_NODEBUG = basic_string<_ECharT, _Traits, _Alloc>; _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } @@ -117,7 +117,7 @@ template struct __is_pathable_string< basic_string_view<_ECharT, _Traits>, _Void::__char_type> > : public __can_convert_char<_ECharT> { - using _Str = basic_string_view<_ECharT, _Traits>; + using _Str _LIBCPP_NODEBUG = basic_string_view<_ECharT, _Traits>; _LIBCPP_HIDE_FROM_ABI static _ECharT const* __range_begin(_Str const& __s) { return __s.data(); } @@ -157,7 +157,7 @@ struct __is_pathable_iter< true, _Void::value_type>::__char_type> > : __can_convert_char::value_type> { - using _ECharT = typename iterator_traits<_Iter>::value_type; + using _ECharT _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::value_type; _LIBCPP_HIDE_FROM_ABI static _Iter __range_begin(_Iter __b) { return __b; } @@ -199,7 +199,7 @@ _LIBCPP_EXPORTED_FROM_ABI size_t __char_to_wide(const string&, wchar_t*, size_t) template struct _PathCVT; -# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_LOCALIZATION template struct _PathCVT { static_assert(__can_convert_char<_ECharT>::value, "Char type not convertible"); @@ -258,7 +258,7 @@ struct _PathCVT { __append_range(__dest, _Traits::__range_begin(__s), _Traits::__range_end(__s)); } }; -# endif // !_LIBCPP_HAS_NO_LOCALIZATION +# endif // _LIBCPP_HAS_LOCALIZATION template <> struct _PathCVT<__path_value> { @@ -365,7 +365,7 @@ struct _PathExport { } }; -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T template <> struct _PathExport { typedef __narrow_to_utf8 _Narrower; @@ -375,18 +375,18 @@ struct _PathExport { _Narrower()(back_inserter(__dest), __src.data(), __src.data() + __src.size()); } }; -# endif /* !_LIBCPP_HAS_NO_CHAR8_T */ +# endif // _LIBCPP_HAS_CHAR8_T # endif /* _LIBCPP_WIN32API */ class _LIBCPP_EXPORTED_FROM_ABI path { template - using _EnableIfPathable = __enable_if_t<__is_pathable<_SourceOrIter>::value, _Tp>; + using _EnableIfPathable _LIBCPP_NODEBUG = __enable_if_t<__is_pathable<_SourceOrIter>::value, _Tp>; template - using _SourceChar = typename __is_pathable<_Tp>::__char_type; + using _SourceChar _LIBCPP_NODEBUG = typename __is_pathable<_Tp>::__char_type; template - using _SourceCVT = _PathCVT<_SourceChar<_Tp> >; + using _SourceCVT _LIBCPP_NODEBUG = _PathCVT<_SourceChar<_Tp> >; public: # if defined(_LIBCPP_WIN32API) @@ -420,7 +420,7 @@ class _LIBCPP_EXPORTED_FROM_ABI path { } /* - #if !defined(_LIBCPP_HAS_NO_LOCALIZATION) + #if _LIBCPP_HAS_LOCALIZATION // TODO Implement locale conversions. template > path(const _Source& __src, const locale& __loc, format = format::auto_format); @@ -682,7 +682,7 @@ class _LIBCPP_EXPORTED_FROM_ABI path { return __s; } -# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _Str = basic_string<_ECharT, _Traits, _Allocator>; @@ -725,17 +725,17 @@ class _LIBCPP_EXPORTED_FROM_ABI path { std::replace(__s.begin(), __s.end(), '\\', '/'); return __s; } -# endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +# endif // _LIBCPP_HAS_LOCALIZATION # else /* _LIBCPP_WIN32API */ _LIBCPP_HIDE_FROM_ABI std::string string() const { return __pn_; } -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T _LIBCPP_HIDE_FROM_ABI std::u8string u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); } # else _LIBCPP_HIDE_FROM_ABI std::string u8string() const { return __pn_; } # endif -# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> string(const _Allocator& __a = _Allocator()) const { using _CVT = __widen_from_utf8; @@ -746,34 +746,34 @@ class _LIBCPP_EXPORTED_FROM_ABI path { return __s; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_HIDE_FROM_ABI std::wstring wstring() const { return string(); } # endif _LIBCPP_HIDE_FROM_ABI std::u16string u16string() const { return string(); } _LIBCPP_HIDE_FROM_ABI std::u32string u32string() const { return string(); } -# endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +# endif // _LIBCPP_HAS_LOCALIZATION // generic format observers _LIBCPP_HIDE_FROM_ABI std::string generic_string() const { return __pn_; } -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T _LIBCPP_HIDE_FROM_ABI std::u8string generic_u8string() const { return std::u8string(__pn_.begin(), __pn_.end()); } # else _LIBCPP_HIDE_FROM_ABI std::string generic_u8string() const { return __pn_; } # endif -# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_LOCALIZATION template , class _Allocator = allocator<_ECharT> > _LIBCPP_HIDE_FROM_ABI basic_string<_ECharT, _Traits, _Allocator> generic_string(const _Allocator& __a = _Allocator()) const { return string<_ECharT, _Traits, _Allocator>(__a); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_HIDE_FROM_ABI std::wstring generic_wstring() const { return string(); } # endif _LIBCPP_HIDE_FROM_ABI std::u16string generic_u16string() const { return string(); } _LIBCPP_HIDE_FROM_ABI std::u32string generic_u32string() const { return string(); } -# endif /* !_LIBCPP_HAS_NO_LOCALIZATION */ +# endif // _LIBCPP_HAS_LOCALIZATION # endif /* !_LIBCPP_WIN32API */ private: @@ -811,7 +811,7 @@ class _LIBCPP_EXPORTED_FROM_ABI path { _LIBCPP_HIDE_FROM_ABI path extension() const { return string_type(__extension()); } // query - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __pn_.empty(); } _LIBCPP_HIDE_FROM_ABI bool has_root_name() const { return !__root_name().empty(); } _LIBCPP_HIDE_FROM_ABI bool has_root_directory() const { return !__root_directory().empty(); } @@ -866,7 +866,7 @@ class _LIBCPP_EXPORTED_FROM_ABI path { iterator begin() const; iterator end() const; -# if !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# if _LIBCPP_HAS_LOCALIZATION template < class _CharT, class _Traits, @@ -895,7 +895,7 @@ class _LIBCPP_EXPORTED_FROM_ABI path { __p = __tmp; return __is; } -# endif // !_LIBCPP_HAS_NO_LOCALIZATION +# endif // _LIBCPP_HAS_LOCALIZATION private: inline _LIBCPP_HIDE_FROM_ABI path& __assign_view(__string_view const& __s) { diff --git a/lib/libcxx/include/__filesystem/path_iterator.h b/lib/libcxx/include/__filesystem/path_iterator.h index f4d486d86cf3..e0f601662d46 100644 --- a/lib/libcxx/include/__filesystem/path_iterator.h +++ b/lib/libcxx/include/__filesystem/path_iterator.h @@ -14,9 +14,6 @@ #include <__config> #include <__filesystem/path.h> #include <__iterator/iterator_traits.h> -#include -#include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h index caa1396eb301..ad01a9982b69 100644 --- a/lib/libcxx/include/__filesystem/recursive_directory_iterator.h +++ b/lib/libcxx/include/__filesystem/recursive_directory_iterator.h @@ -21,7 +21,6 @@ #include <__ranges/enable_view.h> #include <__system_error/error_code.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -30,7 +29,7 @@ _LIBCPP_PUSH_MACROS #include <__undef_macros> -#if _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#if _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM @@ -157,7 +156,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY inline constexpr bool # endif // _LIBCPP_STD_VER >= 20 -#endif // _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_FILESYSTEM) +#endif // _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_FILESYSTEM _LIBCPP_POP_MACROS diff --git a/lib/libcxx/include/__filesystem/u8path.h b/lib/libcxx/include/__filesystem/u8path.h index dae5823128f0..e13980298d9e 100644 --- a/lib/libcxx/include/__filesystem/u8path.h +++ b/lib/libcxx/include/__filesystem/u8path.h @@ -34,7 +34,7 @@ _LIBCPP_AVAILABILITY_FILESYSTEM_LIBRARY_PUSH template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _InputIt __l) { static_assert( -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || # endif is_same::__char_type, char>::value, @@ -56,7 +56,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, _NullSentinel) { static_assert( -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || # endif is_same::__char_type, char>::value, @@ -77,7 +77,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(_InputIt __f, template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_WITH_CHAR8_T path u8path(const _Source& __s) { static_assert( -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T is_same::__char_type, char8_t>::value || # endif is_same::__char_type, char>::value, diff --git a/lib/libcxx/include/__flat_map/flat_map.h b/lib/libcxx/include/__flat_map/flat_map.h new file mode 100644 index 000000000000..a0594ed9dc41 --- /dev/null +++ b/lib/libcxx/include/__flat_map/flat_map.h @@ -0,0 +1,1199 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_FLAT_MAP_H +#define _LIBCPP___FLAT_MAP_FLAT_MAP_H + +#include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/min.h> +#include <__algorithm/ranges_adjacent_find.h> +#include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_inplace_merge.h> +#include <__algorithm/ranges_lower_bound.h> +#include <__algorithm/ranges_partition_point.h> +#include <__algorithm/ranges_sort.h> +#include <__algorithm/ranges_unique.h> +#include <__algorithm/ranges_upper_bound.h> +#include <__algorithm/remove_if.h> +#include <__assert> +#include <__compare/synth_three_way.h> +#include <__concepts/swappable.h> +#include <__config> +#include <__cstddef/byte.h> +#include <__cstddef/ptrdiff_t.h> +#include <__flat_map/key_value_iterator.h> +#include <__flat_map/sorted_unique.h> +#include <__flat_map/utils.h> +#include <__functional/invoke.h> +#include <__functional/is_transparent.h> +#include <__functional/operations.h> +#include <__fwd/vector.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/next.h> +#include <__iterator/ranges_iterator_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__memory/allocator_traits.h> +#include <__memory/uses_allocator.h> +#include <__memory/uses_allocator_construction.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/container_compatible_range.h> +#include <__ranges/drop_view.h> +#include <__ranges/from_range.h> +#include <__ranges/ref_view.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> +#include <__ranges/zip_view.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_allocator.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> +#include <__utility/exception_guard.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <__utility/scope_guard.h> +#include <__vector/vector.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , + class _KeyContainer = vector<_Key>, + class _MappedContainer = vector<_Tp>> +class flat_map { + template + friend class flat_map; + + static_assert(is_same_v<_Key, typename _KeyContainer::value_type>); + static_assert(is_same_v<_Tp, typename _MappedContainer::value_type>); + static_assert(!is_same_v<_KeyContainer, std::vector>, "vector is not a sequence container"); + static_assert(!is_same_v<_MappedContainer, std::vector>, "vector is not a sequence container"); + + template + using __iterator _LIBCPP_NODEBUG = __key_value_iterator; + +public: + // types + using key_type = _Key; + using mapped_type = _Tp; + using value_type = pair; + using key_compare = __type_identity_t<_Compare>; + using reference = pair; + using const_reference = pair; + using size_type = size_t; + using difference_type = ptrdiff_t; + using iterator = __iterator; // see [container.requirements] + using const_iterator = __iterator; // see [container.requirements] + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using key_container_type = _KeyContainer; + using mapped_container_type = _MappedContainer; + + class value_compare { + private: + key_compare __comp_; + _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} + friend flat_map; + + public: + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return __comp_(__x.first, __y.first); + } + }; + + struct containers { + key_container_type keys; + mapped_container_type values; + }; + +private: + template + _LIBCPP_HIDE_FROM_ABI static constexpr bool __allocator_ctor_constraint = + _And, uses_allocator>::value; + + _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare>; + +public: + // [flat.map.cons], construct/copy/destroy + _LIBCPP_HIDE_FROM_ABI flat_map() noexcept( + is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> && + is_nothrow_default_constructible_v<_Compare>) + : __containers_(), __compare_() {} + + _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other) noexcept( + is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> && + is_nothrow_move_constructible_v<_Compare>) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __containers_(std::move(__other.__containers_)), __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + // gcc does not like the `throw` keyword in a conditionally noexcept function + if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> && + is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>)) { + throw; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(const flat_map& __other, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, + __alloc, + __other.__containers_.keys, + __other.__containers_.values, + __other.__compare_) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(flat_map&& __other, const _Allocator& __alloc) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : flat_map(__ctor_uses_allocator_tag{}, + __alloc, + std::move(__other.__containers_.keys), + std::move(__other.__containers_.values), + std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + throw; +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + _LIBCPP_HIDE_FROM_ABI flat_map( + key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + __sort_and_unique(); + } + + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + key_container_type __key_cont, + mapped_container_type __mapped_cont, + const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_map keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys), "Either the key container is not sorted or it contains duplicates"); + } + + _LIBCPP_HIDE_FROM_ABI explicit flat_map(const key_compare& __comp) : __containers_(), __compare_(__comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI explicit flat_map(const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) {} + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI flat_map(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(__first, __last); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t __fr, _Range&& __rg) + : flat_map(__fr, std::forward<_Range>(__rg), key_compare()) {} + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_map(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(sorted_unique, __first, __last); + } + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(sorted_unique, __first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_map(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(sorted_unique, __first, __last); + } + + _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const key_compare& __comp = key_compare()) + : flat_map(__il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(__il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(initializer_list __il, const _Allocator& __alloc) + : flat_map(__il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp = key_compare()) + : flat_map(sorted_unique, __il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(sorted_unique_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_map(sorted_unique, __il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(sorted_unique_t, initializer_list __il, const _Allocator& __alloc) + : flat_map(sorted_unique, __il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(initializer_list __il) { + clear(); + insert(__il); + return *this; + } + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(const flat_map&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_map& operator=(flat_map&& __other) noexcept( + is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> && + is_nothrow_move_assignable_v<_Compare>) { + // No matter what happens, we always want to clear the other container before returning + // since we moved from it + auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); + { + // If an exception is thrown, we have no choice but to clear *this to preserve invariants + auto __on_exception = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_ = std::move(__other.__containers_); + __compare_ = std::move(__other.__compare_); + __on_exception.__complete(); + } + return *this; + } + + // iterators + _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { + return iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { + return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { + return iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { + return const_iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + + _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + // [flat.map.capacity], capacity + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); } + + _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); } + + _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { + return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); + } + + // [flat.map.access], element access + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](const key_type& __x) + requires is_constructible_v + { + return try_emplace(__x).first->second; + } + + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](key_type&& __x) + requires is_constructible_v + { + return try_emplace(std::move(__x)).first->second; + } + + template + requires(__is_compare_transparent && is_constructible_v && is_constructible_v && + !is_convertible_v<_Kp &&, const_iterator> && !is_convertible_v<_Kp &&, iterator>) + _LIBCPP_HIDE_FROM_ABI mapped_type& operator[](_Kp&& __x) { + return try_emplace(std::forward<_Kp>(__x)).first->second; + } + + _LIBCPP_HIDE_FROM_ABI mapped_type& at(const key_type& __x) { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const key_type&): Key does not exist"); + } + return __it->second; + } + + _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const key_type& __x) const { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const key_type&) const: Key does not exist"); + } + return __it->second; + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI mapped_type& at(const _Kp& __x) { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const K&): Key does not exist"); + } + return __it->second; + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const mapped_type& at(const _Kp& __x) const { + auto __it = find(__x); + if (__it == end()) { + std::__throw_out_of_range("flat_map::at(const K&) const: Key does not exist"); + } + return __it->second; + } + + // [flat.map.modifiers], modifiers + template + requires is_constructible_v, _Args...> + _LIBCPP_HIDE_FROM_ABI pair emplace(_Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + return __try_emplace(std::move(__pair.first), std::move(__pair.second)); + } + + template + requires is_constructible_v, _Args...> + _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + return __try_emplace_hint(__hint, std::move(__pair.first), std::move(__pair.second)).first; + } + + _LIBCPP_HIDE_FROM_ABI pair insert(const value_type& __x) { return emplace(__x); } + + _LIBCPP_HIDE_FROM_ABI pair insert(value_type&& __x) { return emplace(std::move(__x)); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + return emplace_hint(__hint, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + return emplace_hint(__hint, std::move(__x)); + } + + template + requires is_constructible_v, _PairLike> + _LIBCPP_HIDE_FROM_ABI pair insert(_PairLike&& __x) { + return emplace(std::forward<_PairLike>(__x)); + } + + template + requires is_constructible_v, _PairLike> + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _PairLike&& __x) { + return emplace_hint(__hint, std::forward<_PairLike>(__x)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, _InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + + __append_sort_merge_unique(std::move(__first), std::move(__last)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge_unique(ranges::begin(__range), ranges::end(__range)); + } + + _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + + _LIBCPP_HIDE_FROM_ABI void insert(sorted_unique_t, initializer_list __il) { + insert(sorted_unique, __il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI containers extract() && { + auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); + auto __ret = std::move(__containers_); + return __ret; + } + + _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE( + __key_cont.size() == __mapped_cont.size(), "flat_map keys and mapped containers have different size"); + + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__key_cont), "Either the key container is not sorted or it contains duplicates"); + auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_.keys = std::move(__key_cont); + __containers_.values = std::move(__mapped_cont); + __guard.__complete(); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair try_emplace(const key_type& __key, _Args&&... __args) { + return __try_emplace(__key, std::forward<_Args>(__args)...); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair try_emplace(key_type&& __key, _Args&&... __args) { + return __try_emplace(std::move(__key), std::forward<_Args>(__args)...); + } + + template + requires(__is_compare_transparent && is_constructible_v && + is_constructible_v && !is_convertible_v<_Kp &&, const_iterator> && + !is_convertible_v<_Kp &&, iterator>) + _LIBCPP_HIDE_FROM_ABI pair try_emplace(_Kp&& __key, _Args&&... __args) { + return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...); + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, const key_type& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, __key, std::forward<_Args>(__args)...).first; + } + + template + requires is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, key_type&& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, std::move(__key), std::forward<_Args>(__args)...).first; + } + + template + requires __is_compare_transparent && is_constructible_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator try_emplace(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + return __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Args>(__args)...).first; + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(const key_type& __key, _Mapped&& __obj) { + return __insert_or_assign(__key, std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(key_type&& __key, _Mapped&& __obj) { + return __insert_or_assign(std::move(__key), std::forward<_Mapped>(__obj)); + } + + template + requires __is_compare_transparent && is_constructible_v && is_assignable_v && + is_constructible_v + _LIBCPP_HIDE_FROM_ABI pair insert_or_assign(_Kp&& __key, _Mapped&& __obj) { + return __insert_or_assign(std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, const key_type& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, __key, std::forward<_Mapped>(__obj)); + } + + template + requires is_assignable_v && is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, key_type&& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, std::move(__key), std::forward<_Mapped>(__obj)); + } + + template + requires __is_compare_transparent && is_constructible_v && is_assignable_v && + is_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __obj) { + return __insert_or_assign(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__obj)); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + auto __iter = find(__x); + if (__iter != end()) { + erase(__iter); + return 1; + } + return 0; + } + + template + requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> && + !is_convertible_v<_Kp &&, const_iterator>) + _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_it = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_); + auto __mapped_it = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_); + __on_failure.__complete(); + return iterator(std::move(__key_it), std::move(__mapped_it)); + } + + _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __y) noexcept { + // warning: The spec has unconditional noexcept, which means that + // if any of the following functions throw an exception, + // std::terminate will be called. + // This is discussed in P2767, which hasn't been voted on yet. + ranges::swap(__compare_, __y.__compare_); + ranges::swap(__containers_.keys, __y.__containers_.keys); + ranges::swap(__containers_.values, __y.__containers_.values); + } + + _LIBCPP_HIDE_FROM_ABI void clear() noexcept { + __containers_.keys.clear(); + __containers_.values.clear(); + } + + // observers + _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); } + + _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; } + _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; } + + // map operations + _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + return __find_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { return contains(__x) ? 1 : 0; } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + return contains(__x) ? 1 : 0; + } + + _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + return __lower_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + return __upper_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + return __equal_range_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + return __equal_range_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + return __equal_range_impl(*this, __x); + } + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + return __equal_range_impl(*this, __x); + } + + friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_map& __x, const flat_map& __y) { + return ranges::equal(__x, __y); + } + + friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_map& __x, const flat_map& __y) { + return std::lexicographical_compare_three_way( + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + } + + friend _LIBCPP_HIDE_FROM_ABI void swap(flat_map& __x, flat_map& __y) noexcept { __x.swap(__y); } + +private: + struct __ctor_uses_allocator_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default; + }; + struct __ctor_uses_allocator_empty_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default; + }; + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_map(__ctor_uses_allocator_tag, + const _Allocator& __alloc, + _KeyCont&& __key_cont, + _MappedCont&& __mapped_cont, + _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator( + __alloc, std::forward<_KeyCont>(__key_cont)), + .values = std::make_obj_using_allocator( + __alloc, std::forward<_MappedCont>(__mapped_cont))}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_map(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator(__alloc), + .values = std::make_obj_using_allocator(__alloc)}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + _LIBCPP_HIDE_FROM_ABI bool __is_sorted_and_unique(auto&& __key_container) const { + auto __greater_or_equal_to = [this](const auto& __x, const auto& __y) { return !__compare_(__x, __y); }; + return ranges::adjacent_find(__key_container, __greater_or_equal_to) == ranges::end(__key_container); + } + + // This function is only used in constructors. So there is not exception handling in this function. + // If the function exits via an exception, there will be no flat_map object constructed, thus, there + // is no invariant state to preserve + _LIBCPP_HIDE_FROM_ABI void __sort_and_unique() { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + ranges::sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); }); + auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin(); + auto __dist = ranges::distance(__zv.begin(), __dup_start); + __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end()); + __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end()); + } + + template + _LIBCPP_HIDE_FROM_ABI void __append_sort_merge_unique(_InputIterator __first, _Sentinel __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + size_t __num_of_appended = __flat_map_utils::__append(*this, std::move(__first), std::move(__last)); + if (__num_of_appended != 0) { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + auto __append_start_offset = __containers_.keys.size() - __num_of_appended; + auto __end = __zv.end(); + auto __compare_key = [this](const auto& __p1, const auto& __p2) { + return __compare_(std::get<0>(__p1), std::get<0>(__p2)); + }; + if constexpr (!_WasSorted) { + ranges::sort(__zv.begin() + __append_start_offset, __end, __compare_key); + } else { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted_and_unique(__containers_.keys | ranges::views::drop(__append_start_offset)), + "Either the key container is not sorted or it contains duplicates"); + } + ranges::inplace_merge(__zv.begin(), __zv.begin() + __append_start_offset, __end, __compare_key); + + auto __dup_start = ranges::unique(__zv, __key_equiv(__compare_)).begin(); + auto __dist = ranges::distance(__zv.begin(), __dup_start); + __containers_.keys.erase(__containers_.keys.begin() + __dist, __containers_.keys.end()); + __containers_.values.erase(__containers_.values.begin() + __dist, __containers_.values.end()); + } + __on_failure.__complete(); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + auto __it = __self.lower_bound(__key); + auto __last = __self.end(); + if (__it == __last || __self.__compare_(__key, __it->first)) { + return __last; + } + return __it; + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __key_equal_range(_Self&& __self, const _Kp& __key) { + auto __it = ranges::lower_bound(__self.__containers_.keys, __key, __self.__compare_); + auto __last = __self.__containers_.keys.end(); + if (__it == __last || __self.__compare_(__key, *__it)) { + return std::make_pair(__it, __it); + } + return std::make_pair(__it, std::next(__it)); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + auto [__key_first, __key_last] = __key_equal_range(__self, __key); + + const auto __make_mapped_iter = [&](const auto& __key_iter) { + return __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + }; + + using __iterator_type = ranges::iterator_t; + return std::make_pair(__iterator_type(__key_first, __make_mapped_iter(__key_first)), + __iterator_type(__key_last, __make_mapped_iter(__key_last))); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { + return __binary_search<_Res>(__self, ranges::lower_bound, __x); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { + return __binary_search<_Res>(__self, ranges::upper_bound, __x); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __binary_search(_Self&& __self, _Fn __search_fn, _Kp& __x) { + auto __key_iter = __search_fn(__self.__containers_.keys, __x, __self.__compare_); + auto __mapped_iter = + __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + + return _Res(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + _LIBCPP_HIDE_FROM_ABI pair __try_emplace(_KeyArg&& __key, _MArgs&&... __mapped_args) { + auto __key_it = ranges::lower_bound(__containers_.keys, __key, __compare_); + auto __mapped_it = __containers_.values.begin() + ranges::distance(__containers_.keys.begin(), __key_it); + + if (__key_it == __containers_.keys.end() || __compare_(__key, *__key_it)) { + return pair( + __flat_map_utils::__emplace_exact_pos( + *this, + std::move(__key_it), + std::move(__mapped_it), + std::forward<_KeyArg>(__key), + std::forward<_MArgs>(__mapped_args)...), + true); + } else { + return pair(iterator(std::move(__key_it), std::move(__mapped_it)), false); + } + } + + template + _LIBCPP_HIDE_FROM_ABI bool __is_hint_correct(const_iterator __hint, _Kp&& __key) { + if (__hint != cbegin() && !__compare_((__hint - 1)->first, __key)) { + return false; + } + if (__hint != cend() && __compare_(__hint->first, __key)) { + return false; + } + return true; + } + + template + _LIBCPP_HIDE_FROM_ABI pair __try_emplace_hint(const_iterator __hint, _Kp&& __key, _Args&&... __args) { + if (__is_hint_correct(__hint, __key)) { + if (__hint == cend() || __compare_(__key, __hint->first)) { + return {__flat_map_utils::__emplace_exact_pos( + *this, + __hint.__key_iter_, + __hint.__mapped_iter_, + std::forward<_Kp>(__key), + std::forward<_Args>(__args)...), + true}; + } else { + // key equals + auto __dist = __hint - cbegin(); + return {iterator(__containers_.keys.begin() + __dist, __containers_.values.begin() + __dist), false}; + } + } else { + return __try_emplace(std::forward<_Kp>(__key), std::forward<_Args>(__args)...); + } + } + + template + _LIBCPP_HIDE_FROM_ABI pair __insert_or_assign(_Kp&& __key, _Mapped&& __mapped) { + auto __r = try_emplace(std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); + if (!__r.second) { + __r.first->second = std::forward<_Mapped>(__mapped); + } + return __r; + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __insert_or_assign(const_iterator __hint, _Kp&& __key, _Mapped&& __mapped) { + auto __r = __try_emplace_hint(__hint, std::forward<_Kp>(__key), std::forward<_Mapped>(__mapped)); + if (!__r.second) { + __r.first->second = std::forward<_Mapped>(__mapped); + } + return __r.first; + } + + _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + if constexpr (requires { __containers_.keys.reserve(__size); }) { + __containers_.keys.reserve(__size); + } + + if constexpr (requires { __containers_.values.reserve(__size); }) { + __containers_.values.reserve(__size); + } + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_iter = __containers_.keys.erase(__key_iter_to_remove); + auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove); + __on_failure.__complete(); + return iterator(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + friend typename flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type + erase_if(flat_map<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); + + friend __flat_map_utils; + + containers __containers_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; + + struct __key_equiv { + _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); + } + key_compare __comp_; + }; +}; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_map(_KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_map; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_map(_KeyContainer, _MappedContainer, _Allocator) + -> flat_map, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_map(_KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_map; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_map; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Allocator) + -> flat_map, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_map(sorted_unique_t, _KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_map; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_map(_InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_map(sorted_unique_t, _InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_map<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >, + class _Allocator = allocator, + class = __enable_if_t::value && __is_allocator<_Allocator>::value>> +flat_map(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_map< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + _Compare, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template ::value>> +flat_map(from_range_t, _Range&&, _Allocator) -> flat_map< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + less<__range_key_type<_Range>>, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_map(initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_map(sorted_unique_t, initializer_list>, _Compare = _Compare()) -> flat_map<_Key, _Tp, _Compare>; + +template +struct uses_allocator, _Allocator> + : bool_constant && uses_allocator_v<_MappedContainer, _Allocator>> {}; + +template +_LIBCPP_HIDE_FROM_ABI typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type +erase_if(flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_map, _Predicate __pred) { + auto __zv = ranges::views::zip(__flat_map.__containers_.keys, __flat_map.__containers_.values); + auto __first = __zv.begin(); + auto __last = __zv.end(); + auto __guard = std::__make_exception_guard([&] { __flat_map.clear(); }); + auto __it = std::remove_if(__first, __last, [&](auto&& __zipped) -> bool { + using _Ref = typename flat_map<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::const_reference; + return __pred(_Ref(std::get<0>(__zipped), std::get<1>(__zipped))); + }); + auto __res = __last - __it; + auto __offset = __it - __first; + + const auto __erase_container = [&](auto& __cont) { __cont.erase(__cont.begin() + __offset, __cont.end()); }; + + __erase_container(__flat_map.__containers_.keys); + __erase_container(__flat_map.__containers_.values); + + __guard.__complete(); + return __res; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_MAP_FLAT_MAP_H diff --git a/lib/libcxx/include/__flat_map/flat_multimap.h b/lib/libcxx/include/__flat_map/flat_multimap.h new file mode 100644 index 000000000000..ea77fb5d79bd --- /dev/null +++ b/lib/libcxx/include/__flat_map/flat_multimap.h @@ -0,0 +1,1010 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H +#define _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H + +#include <__algorithm/lexicographical_compare_three_way.h> +#include <__algorithm/min.h> +#include <__algorithm/ranges_equal.h> +#include <__algorithm/ranges_equal_range.h> +#include <__algorithm/ranges_inplace_merge.h> +#include <__algorithm/ranges_is_sorted.h> +#include <__algorithm/ranges_lower_bound.h> +#include <__algorithm/ranges_partition_point.h> +#include <__algorithm/ranges_sort.h> +#include <__algorithm/ranges_unique.h> +#include <__algorithm/ranges_upper_bound.h> +#include <__algorithm/remove_if.h> +#include <__assert> +#include <__compare/synth_three_way.h> +#include <__concepts/convertible_to.h> +#include <__concepts/swappable.h> +#include <__config> +#include <__cstddef/byte.h> +#include <__cstddef/ptrdiff_t.h> +#include <__flat_map/key_value_iterator.h> +#include <__flat_map/sorted_equivalent.h> +#include <__flat_map/utils.h> +#include <__functional/invoke.h> +#include <__functional/is_transparent.h> +#include <__functional/operations.h> +#include <__fwd/vector.h> +#include <__iterator/concepts.h> +#include <__iterator/distance.h> +#include <__iterator/iterator_traits.h> +#include <__iterator/ranges_iterator_traits.h> +#include <__iterator/reverse_iterator.h> +#include <__memory/allocator_traits.h> +#include <__memory/uses_allocator.h> +#include <__memory/uses_allocator_construction.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/container_compatible_range.h> +#include <__ranges/drop_view.h> +#include <__ranges/from_range.h> +#include <__ranges/ref_view.h> +#include <__ranges/size.h> +#include <__ranges/subrange.h> +#include <__ranges/zip_view.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/container_traits.h> +#include <__type_traits/invoke.h> +#include <__type_traits/is_allocator.h> +#include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/maybe_const.h> +#include <__utility/exception_guard.h> +#include <__utility/move.h> +#include <__utility/pair.h> +#include <__utility/scope_guard.h> +#include <__vector/vector.h> +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +template , + class _KeyContainer = vector<_Key>, + class _MappedContainer = vector<_Tp>> +class flat_multimap { + template + friend class flat_multimap; + + static_assert(is_same_v<_Key, typename _KeyContainer::value_type>); + static_assert(is_same_v<_Tp, typename _MappedContainer::value_type>); + static_assert(!is_same_v<_KeyContainer, std::vector>, "vector is not a sequence container"); + static_assert(!is_same_v<_MappedContainer, std::vector>, "vector is not a sequence container"); + + template + using __iterator _LIBCPP_NODEBUG = __key_value_iterator; + +public: + // types + using key_type = _Key; + using mapped_type = _Tp; + using value_type = pair; + using key_compare = __type_identity_t<_Compare>; + using reference = pair; + using const_reference = pair; + using size_type = size_t; + using difference_type = ptrdiff_t; + using iterator = __iterator; // see [container.requirements] + using const_iterator = __iterator; // see [container.requirements] + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + using key_container_type = _KeyContainer; + using mapped_container_type = _MappedContainer; + + class value_compare { + private: + key_compare __comp_; + _LIBCPP_HIDE_FROM_ABI value_compare(key_compare __c) : __comp_(__c) {} + friend flat_multimap; + + public: + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return __comp_(__x.first, __y.first); + } + }; + + struct containers { + key_container_type keys; + mapped_container_type values; + }; + +private: + template + _LIBCPP_HIDE_FROM_ABI static constexpr bool __allocator_ctor_constraint = + _And, uses_allocator>::value; + + _LIBCPP_HIDE_FROM_ABI static constexpr bool __is_compare_transparent = __is_transparent_v<_Compare>; + +public: + // [flat.map.cons], construct/copy/destroy + _LIBCPP_HIDE_FROM_ABI flat_multimap() noexcept( + is_nothrow_default_constructible_v<_KeyContainer> && is_nothrow_default_constructible_v<_MappedContainer> && + is_nothrow_default_constructible_v<_Compare>) + : __containers_(), __compare_() {} + + _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap&) = default; + + // The copy/move constructors are not specified in the spec, which means they should be defaulted. + // However, the move constructor can potentially leave a moved-from object in an inconsistent + // state if an exception is thrown. + _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other) noexcept( + is_nothrow_move_constructible_v<_KeyContainer> && is_nothrow_move_constructible_v<_MappedContainer> && + is_nothrow_move_constructible_v<_Compare>) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : __containers_(std::move(__other.__containers_)), __compare_(std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + // gcc does not like the `throw` keyword in a conditionally noexcept function + if constexpr (!(is_nothrow_move_constructible_v<_KeyContainer> && + is_nothrow_move_constructible_v<_MappedContainer> && is_nothrow_move_constructible_v<_Compare>)) { + throw; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(const flat_multimap& __other, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_tag{}, + __alloc, + __other.__containers_.keys, + __other.__containers_.values, + __other.__compare_) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(flat_multimap&& __other, const _Allocator& __alloc) +# if _LIBCPP_HAS_EXCEPTIONS + try +# endif // _LIBCPP_HAS_EXCEPTIONS + : flat_multimap(__ctor_uses_allocator_tag{}, + __alloc, + std::move(__other.__containers_.keys), + std::move(__other.__containers_.values), + std::move(__other.__compare_)) { + __other.clear(); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + __other.clear(); + throw; +# endif // _LIBCPP_HAS_EXCEPTIONS + } + + _LIBCPP_HIDE_FROM_ABI flat_multimap( + key_container_type __key_cont, mapped_container_type __mapped_cont, const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + __sort(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap( + const key_container_type& __key_cont, const mapped_container_type& __mapped_cont, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + __sort(); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_multimap(const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + __sort(); + } + + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, + key_container_type __key_cont, + mapped_container_type __mapped_cont, + const key_compare& __comp = key_compare()) + : __containers_{.keys = std::move(__key_cont), .values = std::move(__mapped_cont)}, __compare_(__comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted"); + } + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, + const key_container_type& __key_cont, + const mapped_container_type& __mapped_cont, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_tag{}, __alloc, __key_cont, __mapped_cont, __comp) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE(__containers_.keys.size() == __containers_.values.size(), + "flat_multimap keys and mapped containers have different size"); + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__containers_.keys), "Key container is not sorted"); + } + + _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const key_compare& __comp) : __containers_(), __compare_(__comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(const key_compare& __comp, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI explicit flat_multimap(const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) {} + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI + flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_multimap(_InputIterator __first, _InputIterator __last, const key_compare& __comp, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(__first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI flat_multimap(_InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(__first, __last); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t __fr, _Range&& __rg) + : flat_multimap(__fr, std::forward<_Range>(__rg), key_compare()) {} + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp) : flat_multimap(__comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template <_ContainerCompatibleRange _Range, class _Allocator> + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(from_range_t, _Range&& __rg, const key_compare& __comp, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert_range(std::forward<_Range>(__rg)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI flat_multimap( + sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const key_compare& __comp = key_compare()) + : __containers_(), __compare_(__comp) { + insert(sorted_equivalent, __first, __last); + } + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, + _InputIterator __first, + _InputIterator __last, + const key_compare& __comp, + const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc, __comp) { + insert(sorted_equivalent, __first, __last); + } + + template + requires(__has_input_iterator_category<_InputIterator>::value && __allocator_ctor_constraint<_Allocator>) + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, _InputIterator __first, _InputIterator __last, const _Allocator& __alloc) + : flat_multimap(__ctor_uses_allocator_empty_tag{}, __alloc) { + insert(sorted_equivalent, __first, __last); + } + + _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list __il, const key_compare& __comp = key_compare()) + : flat_multimap(__il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_multimap(initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_multimap(__il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(initializer_list __il, const _Allocator& __alloc) + : flat_multimap(__il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI + flat_multimap(sorted_equivalent_t, initializer_list __il, const key_compare& __comp = key_compare()) + : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap( + sorted_equivalent_t, initializer_list __il, const key_compare& __comp, const _Allocator& __alloc) + : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __comp, __alloc) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(sorted_equivalent_t, initializer_list __il, const _Allocator& __alloc) + : flat_multimap(sorted_equivalent, __il.begin(), __il.end(), __alloc) {} + + _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(initializer_list __il) { + clear(); + insert(__il); + return *this; + } + + // copy/move assignment are not specified in the spec (defaulted) + // but move assignment can potentially leave moved from object in an inconsistent + // state if an exception is thrown + _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(const flat_multimap&) = default; + + _LIBCPP_HIDE_FROM_ABI flat_multimap& operator=(flat_multimap&& __other) noexcept( + is_nothrow_move_assignable_v<_KeyContainer> && is_nothrow_move_assignable_v<_MappedContainer> && + is_nothrow_move_assignable_v<_Compare>) { + auto __clear_other_guard = std::__make_scope_guard([&]() noexcept { __other.clear() /* noexcept */; }); + auto __clear_self_guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_ = std::move(__other.__containers_); + __compare_ = std::move(__other.__compare_); + __clear_self_guard.__complete(); + return *this; + } + + // iterators + _LIBCPP_HIDE_FROM_ABI iterator begin() noexcept { + return iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator begin() const noexcept { + return const_iterator(__containers_.keys.begin(), __containers_.values.begin()); + } + + _LIBCPP_HIDE_FROM_ABI iterator end() noexcept { + return iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI const_iterator end() const noexcept { + return const_iterator(__containers_.keys.end(), __containers_.values.end()); + } + + _LIBCPP_HIDE_FROM_ABI reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); } + + _LIBCPP_HIDE_FROM_ABI const_iterator cbegin() const noexcept { return begin(); } + _LIBCPP_HIDE_FROM_ABI const_iterator cend() const noexcept { return end(); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); } + _LIBCPP_HIDE_FROM_ABI const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); } + + // [flat.map.capacity], capacity + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool empty() const noexcept { return __containers_.keys.empty(); } + + _LIBCPP_HIDE_FROM_ABI size_type size() const noexcept { return __containers_.keys.size(); } + + _LIBCPP_HIDE_FROM_ABI size_type max_size() const noexcept { + return std::min(__containers_.keys.max_size(), __containers_.values.max_size()); + } + + // [flat.map.modifiers], modifiers + template + requires is_constructible_v, _Args...> && is_move_constructible_v && + is_move_constructible_v + _LIBCPP_HIDE_FROM_ABI iterator emplace(_Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + auto __key_it = ranges::upper_bound(__containers_.keys, __pair.first, __compare_); + auto __mapped_it = __corresponding_mapped_it(*this, __key_it); + + return __flat_map_utils::__emplace_exact_pos( + *this, std::move(__key_it), std::move(__mapped_it), std::move(__pair.first), std::move(__pair.second)); + } + + template + requires is_constructible_v, _Args...> + _LIBCPP_HIDE_FROM_ABI iterator emplace_hint(const_iterator __hint, _Args&&... __args) { + std::pair __pair(std::forward<_Args>(__args)...); + + auto __prev_larger = __hint != cbegin() && __compare_(__pair.first, (__hint - 1)->first); + auto __next_smaller = __hint != cend() && __compare_(__hint->first, __pair.first); + + auto __hint_distance = __hint.__key_iter_ - __containers_.keys.cbegin(); + auto __key_iter = __containers_.keys.begin() + __hint_distance; + auto __mapped_iter = __containers_.values.begin() + __hint_distance; + + if (!__prev_larger && !__next_smaller) [[likely]] { + // hint correct, just use exact hint iterators + } else if (__prev_larger && !__next_smaller) { + // the hint position is more to the right than the key should have been. + // we want to emplace the element to a position as right as possible + // e.g. Insert new element "2" in the following range + // 1, 1, 2, 2, 2, 3, 4, 6 + // ^ + // | + // hint + // We want to insert "2" after the last existing "2" + __key_iter = ranges::upper_bound(__containers_.keys.begin(), __key_iter, __pair.first, __compare_); + __mapped_iter = __corresponding_mapped_it(*this, __key_iter); + } else { + _LIBCPP_ASSERT_INTERNAL(!__prev_larger && __next_smaller, "this means that the multimap is not sorted"); + + // the hint position is more to the left than the key should have been. + // we want to emplace the element to a position as left as possible + // 1, 1, 2, 2, 2, 3, 4, 6 + // ^ + // | + // hint + // We want to insert "2" before the first existing "2" + __key_iter = ranges::lower_bound(__key_iter, __containers_.keys.end(), __pair.first, __compare_); + __mapped_iter = __corresponding_mapped_it(*this, __key_iter); + } + return __flat_map_utils::__emplace_exact_pos( + *this, __key_iter, __mapped_iter, std::move(__pair.first), std::move(__pair.second)); + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const value_type& __x) { return emplace(__x); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(value_type&& __x) { return emplace(std::move(__x)); } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, const value_type& __x) { + return emplace_hint(__hint, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, value_type&& __x) { + return emplace_hint(__hint, std::move(__x)); + } + + template + requires is_constructible_v, _PairLike> + _LIBCPP_HIDE_FROM_ABI iterator insert(_PairLike&& __x) { + return emplace(std::forward<_PairLike>(__x)); + } + + template + requires is_constructible_v, _PairLike> + _LIBCPP_HIDE_FROM_ABI iterator insert(const_iterator __hint, _PairLike&& __x) { + return emplace_hint(__hint, std::forward<_PairLike>(__x)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(_InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + __append_sort_merge(std::move(__first), std::move(__last)); + } + + template + requires __has_input_iterator_category<_InputIterator>::value + _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, _InputIterator __first, _InputIterator __last) { + if constexpr (sized_sentinel_for<_InputIterator, _InputIterator>) { + __reserve(__last - __first); + } + + __append_sort_merge(std::move(__first), std::move(__last)); + } + + template <_ContainerCompatibleRange _Range> + _LIBCPP_HIDE_FROM_ABI void insert_range(_Range&& __range) { + if constexpr (ranges::sized_range<_Range>) { + __reserve(ranges::size(__range)); + } + + __append_sort_merge(ranges::begin(__range), ranges::end(__range)); + } + + _LIBCPP_HIDE_FROM_ABI void insert(initializer_list __il) { insert(__il.begin(), __il.end()); } + + _LIBCPP_HIDE_FROM_ABI void insert(sorted_equivalent_t, initializer_list __il) { + insert(sorted_equivalent, __il.begin(), __il.end()); + } + + _LIBCPP_HIDE_FROM_ABI containers extract() && { + auto __guard = std::__make_scope_guard([&]() noexcept { clear() /* noexcept */; }); + auto __ret = std::move(__containers_); + return __ret; + } + + _LIBCPP_HIDE_FROM_ABI void replace(key_container_type&& __key_cont, mapped_container_type&& __mapped_cont) { + _LIBCPP_ASSERT_VALID_INPUT_RANGE( + __key_cont.size() == __mapped_cont.size(), "flat_multimap keys and mapped containers have different size"); + + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(__is_sorted(__key_cont), "Key container is not sorted"); + auto __guard = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + __containers_.keys = std::move(__key_cont); + __containers_.values = std::move(__mapped_cont); + __guard.__complete(); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __position) { + return __erase(__position.__key_iter_, __position.__mapped_iter_); + } + + _LIBCPP_HIDE_FROM_ABI size_type erase(const key_type& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + template + requires(__is_compare_transparent && !is_convertible_v<_Kp &&, iterator> && + !is_convertible_v<_Kp &&, const_iterator>) + _LIBCPP_HIDE_FROM_ABI size_type erase(_Kp&& __x) { + auto [__first, __last] = equal_range(__x); + auto __res = __last - __first; + erase(__first, __last); + return __res; + } + + _LIBCPP_HIDE_FROM_ABI iterator erase(const_iterator __first, const_iterator __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_it = __containers_.keys.erase(__first.__key_iter_, __last.__key_iter_); + auto __mapped_it = __containers_.values.erase(__first.__mapped_iter_, __last.__mapped_iter_); + __on_failure.__complete(); + return iterator(std::move(__key_it), std::move(__mapped_it)); + } + + _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __y) noexcept { + // warning: The spec has unconditional noexcept, which means that + // if any of the following functions throw an exception, + // std::terminate will be called + ranges::swap(__compare_, __y.__compare_); + ranges::swap(__containers_.keys, __y.__containers_.keys); + ranges::swap(__containers_.values, __y.__containers_.values); + } + + _LIBCPP_HIDE_FROM_ABI void clear() noexcept { + __containers_.keys.clear(); + __containers_.values.clear(); + } + + // observers + _LIBCPP_HIDE_FROM_ABI key_compare key_comp() const { return __compare_; } + _LIBCPP_HIDE_FROM_ABI value_compare value_comp() const { return value_compare(__compare_); } + + _LIBCPP_HIDE_FROM_ABI const key_container_type& keys() const noexcept { return __containers_.keys; } + _LIBCPP_HIDE_FROM_ABI const mapped_container_type& values() const noexcept { return __containers_.values; } + + // map operations + _LIBCPP_HIDE_FROM_ABI iterator find(const key_type& __x) { return __find_impl(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator find(const key_type& __x) const { return __find_impl(*this, __x); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator find(const _Kp& __x) { + return __find_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator find(const _Kp& __x) const { + return __find_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI size_type count(const key_type& __x) const { + auto [__first, __last] = equal_range(__x); + return __last - __first; + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI size_type count(const _Kp& __x) const { + auto [__first, __last] = equal_range(__x); + return __last - __first; + } + + _LIBCPP_HIDE_FROM_ABI bool contains(const key_type& __x) const { return find(__x) != end(); } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI bool contains(const _Kp& __x) const { + return find(__x) != end(); + } + + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const key_type& __x) { return __lower_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const key_type& __x) const { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator lower_bound(const _Kp& __x) { + return __lower_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator lower_bound(const _Kp& __x) const { + return __lower_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const key_type& __x) { return __upper_bound(*this, __x); } + + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const key_type& __x) const { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI iterator upper_bound(const _Kp& __x) { + return __upper_bound(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI const_iterator upper_bound(const _Kp& __x) const { + return __upper_bound(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) { + return __equal_range_impl(*this, __x); + } + + _LIBCPP_HIDE_FROM_ABI pair equal_range(const key_type& __x) const { + return __equal_range_impl(*this, __x); + } + + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) { + return __equal_range_impl(*this, __x); + } + template + requires __is_compare_transparent + _LIBCPP_HIDE_FROM_ABI pair equal_range(const _Kp& __x) const { + return __equal_range_impl(*this, __x); + } + + friend _LIBCPP_HIDE_FROM_ABI bool operator==(const flat_multimap& __x, const flat_multimap& __y) { + return ranges::equal(__x, __y); + } + + friend _LIBCPP_HIDE_FROM_ABI auto operator<=>(const flat_multimap& __x, const flat_multimap& __y) { + return std::lexicographical_compare_three_way( + __x.begin(), __x.end(), __y.begin(), __y.end(), std::__synth_three_way); + } + + friend _LIBCPP_HIDE_FROM_ABI void swap(flat_multimap& __x, flat_multimap& __y) noexcept { __x.swap(__y); } + +private: + struct __ctor_uses_allocator_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_tag() = default; + }; + struct __ctor_uses_allocator_empty_tag { + explicit _LIBCPP_HIDE_FROM_ABI __ctor_uses_allocator_empty_tag() = default; + }; + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI + flat_multimap(__ctor_uses_allocator_tag, + const _Allocator& __alloc, + _KeyCont&& __key_cont, + _MappedCont&& __mapped_cont, + _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator( + __alloc, std::forward<_KeyCont>(__key_cont)), + .values = std::make_obj_using_allocator( + __alloc, std::forward<_MappedCont>(__mapped_cont))}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + template + requires __allocator_ctor_constraint<_Allocator> + _LIBCPP_HIDE_FROM_ABI flat_multimap(__ctor_uses_allocator_empty_tag, const _Allocator& __alloc, _CompArg&&... __comp) + : __containers_{.keys = std::make_obj_using_allocator(__alloc), + .values = std::make_obj_using_allocator(__alloc)}, + __compare_(std::forward<_CompArg>(__comp)...) {} + + _LIBCPP_HIDE_FROM_ABI bool __is_sorted(auto&& __key_container) const { + return ranges::is_sorted(__key_container, __compare_); + } + + _LIBCPP_HIDE_FROM_ABI void __sort() { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + ranges::sort(__zv, __compare_, [](const auto& __p) -> decltype(auto) { return std::get<0>(__p); }); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __corresponding_mapped_it(_Self&& __self, _KeyIter&& __key_iter) { + return __self.__containers_.values.begin() + + static_cast>( + ranges::distance(__self.__containers_.keys.begin(), __key_iter)); + } + + template + _LIBCPP_HIDE_FROM_ABI void __append_sort_merge(_InputIterator __first, _Sentinel __last) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + size_t __num_appended = __flat_map_utils::__append(*this, std::move(__first), std::move(__last)); + if (__num_appended != 0) { + auto __zv = ranges::views::zip(__containers_.keys, __containers_.values); + auto __append_start_offset = __containers_.keys.size() - __num_appended; + auto __end = __zv.end(); + auto __compare_key = [this](const auto& __p1, const auto& __p2) { + return __compare_(std::get<0>(__p1), std::get<0>(__p2)); + }; + if constexpr (!_WasSorted) { + ranges::sort(__zv.begin() + __append_start_offset, __end, __compare_key); + } else { + _LIBCPP_ASSERT_SEMANTIC_REQUIREMENT( + __is_sorted(__containers_.keys | ranges::views::drop(__append_start_offset)), + "Key container is not sorted"); + } + ranges::inplace_merge(__zv.begin(), __zv.begin() + __append_start_offset, __end, __compare_key); + } + __on_failure.__complete(); + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __find_impl(_Self&& __self, const _Kp& __key) { + auto __it = __self.lower_bound(__key); + auto __last = __self.end(); + if (__it == __last || __self.__compare_(__key, __it->first)) { + return __last; + } + return __it; + } + + template + _LIBCPP_HIDE_FROM_ABI static auto __equal_range_impl(_Self&& __self, const _Kp& __key) { + auto [__key_first, __key_last] = ranges::equal_range(__self.__containers_.keys, __key, __self.__compare_); + + using __iterator_type = ranges::iterator_t; + return std::make_pair(__iterator_type(__key_first, __corresponding_mapped_it(__self, __key_first)), + __iterator_type(__key_last, __corresponding_mapped_it(__self, __key_last))); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __lower_bound(_Self&& __self, _Kp& __x) { + auto __key_iter = ranges::lower_bound(__self.__containers_.keys, __x, __self.__compare_); + auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); + return _Res(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + _LIBCPP_HIDE_FROM_ABI static _Res __upper_bound(_Self&& __self, _Kp& __x) { + auto __key_iter = ranges::upper_bound(__self.__containers_.keys, __x, __self.__compare_); + auto __mapped_iter = __corresponding_mapped_it(__self, __key_iter); + return _Res(std::move(__key_iter), std::move(__mapped_iter)); + } + + _LIBCPP_HIDE_FROM_ABI void __reserve(size_t __size) { + if constexpr (requires { __containers_.keys.reserve(__size); }) { + __containers_.keys.reserve(__size); + } + + if constexpr (requires { __containers_.values.reserve(__size); }) { + __containers_.values.reserve(__size); + } + } + + template + _LIBCPP_HIDE_FROM_ABI iterator __erase(_KIter __key_iter_to_remove, _MIter __mapped_iter_to_remove) { + auto __on_failure = std::__make_exception_guard([&]() noexcept { clear() /* noexcept */; }); + auto __key_iter = __containers_.keys.erase(__key_iter_to_remove); + auto __mapped_iter = __containers_.values.erase(__mapped_iter_to_remove); + __on_failure.__complete(); + return iterator(std::move(__key_iter), std::move(__mapped_iter)); + } + + template + friend typename flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>::size_type + erase_if(flat_multimap<_Key2, _Tp2, _Compare2, _KeyContainer2, _MappedContainer2>&, _Predicate); + + friend __flat_map_utils; + + containers __containers_; + _LIBCPP_NO_UNIQUE_ADDRESS key_compare __compare_; + + struct __key_equiv { + _LIBCPP_HIDE_FROM_ABI __key_equiv(key_compare __c) : __comp_(__c) {} + _LIBCPP_HIDE_FROM_ABI bool operator()(const_reference __x, const_reference __y) const { + return !__comp_(std::get<0>(__x), std::get<0>(__y)) && !__comp_(std::get<0>(__y), std::get<0>(__x)); + } + key_compare __comp_; + }; +}; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_multimap(_KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_multimap; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_multimap(_KeyContainer, _MappedContainer, _Allocator) + -> flat_multimap, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_multimap(_KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_multimap; + +template > + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && + is_invocable_v) +flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare = _Compare()) + -> flat_multimap; + +template + requires(uses_allocator_v<_KeyContainer, _Allocator> && uses_allocator_v<_MappedContainer, _Allocator> && + !__is_allocator<_KeyContainer>::value && !__is_allocator<_MappedContainer>::value) +flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Allocator) + -> flat_multimap, + _KeyContainer, + _MappedContainer>; + +template + requires(!__is_allocator<_Compare>::value && !__is_allocator<_KeyContainer>::value && + !__is_allocator<_MappedContainer>::value && uses_allocator_v<_KeyContainer, _Allocator> && + uses_allocator_v<_MappedContainer, _Allocator> && + is_invocable_v) +flat_multimap(sorted_equivalent_t, _KeyContainer, _MappedContainer, _Compare, _Allocator) + -> flat_multimap; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_multimap(_InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >> + requires(__has_input_iterator_category<_InputIterator>::value && !__is_allocator<_Compare>::value) +flat_multimap(sorted_equivalent_t, _InputIterator, _InputIterator, _Compare = _Compare()) + -> flat_multimap<__iter_key_type<_InputIterator>, __iter_mapped_type<_InputIterator>, _Compare>; + +template >, + class _Allocator = allocator, + class = __enable_if_t::value && __is_allocator<_Allocator>::value>> +flat_multimap(from_range_t, _Range&&, _Compare = _Compare(), _Allocator = _Allocator()) -> flat_multimap< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + _Compare, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template ::value>> +flat_multimap(from_range_t, _Range&&, _Allocator) -> flat_multimap< + __range_key_type<_Range>, + __range_mapped_type<_Range>, + less<__range_key_type<_Range>>, + vector<__range_key_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_key_type<_Range>>>, + vector<__range_mapped_type<_Range>, __allocator_traits_rebind_t<_Allocator, __range_mapped_type<_Range>>>>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_multimap(initializer_list>, _Compare = _Compare()) -> flat_multimap<_Key, _Tp, _Compare>; + +template > + requires(!__is_allocator<_Compare>::value) +flat_multimap(sorted_equivalent_t, initializer_list>, _Compare = _Compare()) + -> flat_multimap<_Key, _Tp, _Compare>; + +template +struct uses_allocator, _Allocator> + : bool_constant && uses_allocator_v<_MappedContainer, _Allocator>> {}; + +template +_LIBCPP_HIDE_FROM_ABI typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::size_type +erase_if(flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>& __flat_multimap, _Predicate __pred) { + auto __zv = ranges::views::zip(__flat_multimap.__containers_.keys, __flat_multimap.__containers_.values); + auto __first = __zv.begin(); + auto __last = __zv.end(); + auto __guard = std::__make_exception_guard([&] { __flat_multimap.clear(); }); + auto __it = std::remove_if(__first, __last, [&](auto&& __zipped) -> bool { + using _Ref = typename flat_multimap<_Key, _Tp, _Compare, _KeyContainer, _MappedContainer>::const_reference; + return __pred(_Ref(std::get<0>(__zipped), std::get<1>(__zipped))); + }); + auto __res = __last - __it; + auto __offset = __it - __first; + + const auto __erase_container = [&](auto& __cont) { __cont.erase(__cont.begin() + __offset, __cont.end()); }; + + __erase_container(__flat_multimap.__containers_.keys); + __erase_container(__flat_multimap.__containers_.values); + + __guard.__complete(); + return __res; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_MAP_FLAT_MULTIMAP_H diff --git a/lib/libcxx/include/__flat_map/key_value_iterator.h b/lib/libcxx/include/__flat_map/key_value_iterator.h new file mode 100644 index 000000000000..3ebb653deb19 --- /dev/null +++ b/lib/libcxx/include/__flat_map/key_value_iterator.h @@ -0,0 +1,176 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H +#define _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H + +#include <__compare/three_way_comparable.h> +#include <__concepts/convertible_to.h> +#include <__config> +#include <__iterator/iterator_traits.h> +#include <__memory/addressof.h> +#include <__type_traits/conditional.h> +#include <__utility/move.h> +#include <__utility/pair.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +/** + * __key_value_iterator is a proxy iterator which zips the underlying + * _KeyContainer::iterator and the underlying _MappedContainer::iterator. + * The two underlying iterators will be incremented/decremented together. + * And the reference is a pair of the const key reference and the value reference. + */ +template +struct __key_value_iterator { +private: + using __key_iterator _LIBCPP_NODEBUG = typename _KeyContainer::const_iterator; + using __mapped_iterator _LIBCPP_NODEBUG = + _If<_Const, typename _MappedContainer::const_iterator, typename _MappedContainer::iterator>; + using __reference _LIBCPP_NODEBUG = _If<_Const, typename _Owner::const_reference, typename _Owner::reference>; + + struct __arrow_proxy { + __reference __ref_; + _LIBCPP_HIDE_FROM_ABI __reference* operator->() { return std::addressof(__ref_); } + }; + + __key_iterator __key_iter_; + __mapped_iterator __mapped_iter_; + + friend _Owner; + + template + friend struct __key_value_iterator; + +public: + using iterator_concept = random_access_iterator_tag; + // `__key_value_iterator` only satisfy "Cpp17InputIterator" named requirements, because + // its `reference` is not a reference type. + // However, to avoid surprising runtime behaviour when it is used with the + // Cpp17 algorithms or operations, iterator_category is set to random_access_iterator_tag. + using iterator_category = random_access_iterator_tag; + using value_type = typename _Owner::value_type; + using difference_type = typename _Owner::difference_type; + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator() = default; + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_value_iterator<_Owner, _KeyContainer, _MappedContainer, !_Const> __i) + requires _Const && convertible_to && + convertible_to + : __key_iter_(std::move(__i.__key_iter_)), __mapped_iter_(std::move(__i.__mapped_iter_)) {} + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator(__key_iterator __key_iter, __mapped_iterator __mapped_iter) + : __key_iter_(std::move(__key_iter)), __mapped_iter_(std::move(__mapped_iter)) {} + + _LIBCPP_HIDE_FROM_ABI __reference operator*() const { return __reference(*__key_iter_, *__mapped_iter_); } + _LIBCPP_HIDE_FROM_ABI __arrow_proxy operator->() const { return __arrow_proxy{**this}; } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator++() { + ++__key_iter_; + ++__mapped_iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator++(int) { + __key_value_iterator __tmp(*this); + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator--() { + --__key_iter_; + --__mapped_iter_; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator operator--(int) { + __key_value_iterator __tmp(*this); + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator+=(difference_type __x) { + __key_iter_ += __x; + __mapped_iter_ += __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __key_value_iterator& operator-=(difference_type __x) { + __key_iter_ -= __x; + __mapped_iter_ -= __x; + return *this; + } + + _LIBCPP_HIDE_FROM_ABI __reference operator[](difference_type __n) const { return *(*this + __n); } + + _LIBCPP_HIDE_FROM_ABI friend constexpr bool + operator==(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return __x.__key_iter_ == __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator<(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return __x.__key_iter_ < __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator>(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return __y < __x; + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator<=(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return !(__y < __x); + } + + _LIBCPP_HIDE_FROM_ABI friend bool operator>=(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return !(__x < __y); + } + + _LIBCPP_HIDE_FROM_ABI friend auto operator<=>(const __key_value_iterator& __x, const __key_value_iterator& __y) + requires three_way_comparable<__key_iterator> + { + return __x.__key_iter_ <=> __y.__key_iter_; + } + + _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(const __key_value_iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp += __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator+(difference_type __n, const __key_value_iterator& __i) { + return __i + __n; + } + + _LIBCPP_HIDE_FROM_ABI friend __key_value_iterator operator-(const __key_value_iterator& __i, difference_type __n) { + auto __tmp = __i; + __tmp -= __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI friend difference_type + operator-(const __key_value_iterator& __x, const __key_value_iterator& __y) { + return difference_type(__x.__key_iter_ - __y.__key_iter_); + } +}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___FLAT_MAP_KEY_VALUE_ITERATOR_H diff --git a/lib/libcxx/include/__flat_map/sorted_equivalent.h b/lib/libcxx/include/__flat_map/sorted_equivalent.h new file mode 100644 index 000000000000..1db935cc6ee7 --- /dev/null +++ b/lib/libcxx/include/__flat_map/sorted_equivalent.h @@ -0,0 +1,31 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H +#define _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +struct sorted_equivalent_t { + explicit sorted_equivalent_t() = default; +}; +inline constexpr sorted_equivalent_t sorted_equivalent{}; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +#endif // _LIBCPP___FLAT_MAP_SORTED_EQUIVALENT_H diff --git a/lib/libcxx/include/__type_traits/add_cv.h b/lib/libcxx/include/__flat_map/sorted_unique.h similarity index 62% rename from lib/libcxx/include/__type_traits/add_cv.h rename to lib/libcxx/include/__flat_map/sorted_unique.h index 9e23e5ceb7a3..0189a5ff1d56 100644 --- a/lib/libcxx/include/__type_traits/add_cv.h +++ b/lib/libcxx/include/__flat_map/sorted_unique.h @@ -1,3 +1,4 @@ +// -*- C++ -*- //===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -5,9 +6,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___TYPE_TRAITS_ADD_CV_H -#define _LIBCPP___TYPE_TRAITS_ADD_CV_H +#ifndef _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H +#define _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H #include <__config> @@ -15,18 +15,17 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 23 + _LIBCPP_BEGIN_NAMESPACE_STD -template -struct _LIBCPP_TEMPLATE_VIS add_cv { - typedef _LIBCPP_NODEBUG const volatile _Tp type; +struct sorted_unique_t { + explicit sorted_unique_t() = default; }; - -#if _LIBCPP_STD_VER >= 14 -template -using add_cv_t = typename add_cv<_Tp>::type; -#endif +inline constexpr sorted_unique_t sorted_unique{}; _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___TYPE_TRAITS_ADD_CV_H +#endif // _LIBCPP_STD_VER >= 23 + +#endif // _LIBCPP___FLAT_MAP_SORTED_UNIQUE_H diff --git a/lib/libcxx/include/__flat_map/utils.h b/lib/libcxx/include/__flat_map/utils.h new file mode 100644 index 000000000000..acb7dca7ffe9 --- /dev/null +++ b/lib/libcxx/include/__flat_map/utils.h @@ -0,0 +1,103 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___FLAT_MAP_UTILS_H +#define _LIBCPP___FLAT_MAP_UTILS_H + +#include <__config> +#include <__type_traits/container_traits.h> +#include <__utility/exception_guard.h> +#include <__utility/forward.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +#if _LIBCPP_STD_VER >= 23 + +_LIBCPP_BEGIN_NAMESPACE_STD + +// These utilities are defined in a class instead of a namespace so that this class can be befriended more easily. +struct __flat_map_utils { + // Emplace a {key: value} into a flat_{multi}map, at the exact position that + // __it_key and __it_mapped point to, assuming that the key is not already present in the map. + // When an exception is thrown during the emplacement, the function will try its best to + // roll back the changes it made to the map. If it cannot roll back the changes, it will + // clear the map. + template + _LIBCPP_HIDE_FROM_ABI static typename _Map::iterator __emplace_exact_pos( + _Map& __map, _IterK&& __it_key, _IterM&& __it_mapped, _KeyArg&& __key, _MArgs&&... __mapped_args) { + auto __on_key_failed = std::__make_exception_guard([&]() noexcept { + using _KeyContainer = typename _Map::key_container_type; + if constexpr (__container_traits<_KeyContainer>::__emplacement_has_strong_exception_safety_guarantee) { + // Nothing to roll back! + } else { + // we need to clear both because we don't know the state of our keys anymore + __map.clear() /* noexcept */; + } + }); + auto __key_it = __map.__containers_.keys.emplace(__it_key, std::forward<_KeyArg>(__key)); + __on_key_failed.__complete(); + + auto __on_value_failed = std::__make_exception_guard([&]() noexcept { + using _MappedContainer = typename _Map::mapped_container_type; + if constexpr (!__container_traits<_MappedContainer>::__emplacement_has_strong_exception_safety_guarantee) { + // we need to clear both because we don't know the state of our values anymore + __map.clear() /* noexcept */; + } else { + // In this case, we know the values are just like before we attempted emplacement, + // and we also know that the keys have been emplaced successfully. Just roll back the keys. +# if _LIBCPP_HAS_EXCEPTIONS + try { +# endif // _LIBCPP_HAS_EXCEPTIONS + __map.__containers_.keys.erase(__key_it); +# if _LIBCPP_HAS_EXCEPTIONS + } catch (...) { + // Now things are funky for real. We're failing to rollback the keys. + // Just give up and clear the whole thing. + // + // Also, swallow the exception that happened during the rollback and let the + // original value-emplacement exception propagate normally. + __map.clear() /* noexcept */; + } +# endif // _LIBCPP_HAS_EXCEPTIONS + } + }); + auto __mapped_it = __map.__containers_.values.emplace(__it_mapped, std::forward<_MArgs>(__mapped_args)...); + __on_value_failed.__complete(); + + return typename _Map::iterator(std::move(__key_it), std::move(__mapped_it)); + } + + // TODO: We could optimize this, see + // https://github.com/llvm/llvm-project/issues/108624 + template + _LIBCPP_HIDE_FROM_ABI static typename _Map::size_type + __append(_Map& __map, _InputIterator __first, _Sentinel __last) { + typename _Map::size_type __num_appended = 0; + for (; __first != __last; ++__first) { + typename _Map::value_type __kv = *__first; + __map.__containers_.keys.insert(__map.__containers_.keys.end(), std::move(__kv.first)); + __map.__containers_.values.insert(__map.__containers_.values.end(), std::move(__kv.second)); + ++__num_appended; + } + return __num_appended; + } +}; +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_STD_VER >= 23 + +_LIBCPP_POP_MACROS + +#endif // #define _LIBCPP___FLAT_MAP_UTILS_H diff --git a/lib/libcxx/include/__format/buffer.h b/lib/libcxx/include/__format/buffer.h index 8598f0a1c039..0c054bbc3a1d 100644 --- a/lib/libcxx/include/__format/buffer.h +++ b/lib/libcxx/include/__format/buffer.h @@ -14,6 +14,7 @@ #include <__algorithm/fill_n.h> #include <__algorithm/max.h> #include <__algorithm/min.h> +#include <__algorithm/ranges_copy.h> #include <__algorithm/ranges_copy_n.h> #include <__algorithm/transform.h> #include <__algorithm/unwrap_iter.h> @@ -29,6 +30,7 @@ #include <__iterator/wrap_iter.h> #include <__memory/addressof.h> #include <__memory/allocate_at_least.h> +#include <__memory/allocator.h> #include <__memory/allocator_traits.h> #include <__memory/construct_at.h> #include <__memory/ranges_construct_at.h> @@ -37,7 +39,7 @@ #include <__type_traits/conditional.h> #include <__utility/exception_guard.h> #include <__utility/move.h> -#include +#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -53,24 +55,147 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format { +// A helper to limit the total size of code units written. +class _LIBCPP_HIDE_FROM_ABI __max_output_size { +public: + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __max_output_size(size_t __max_size) : __max_size_{__max_size} {} + + // This function adjusts the size of a (bulk) write operations. It ensures the + // number of code units written by a __output_buffer never exceeds + // __max_size_ code units. + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __write_request(size_t __code_units) { + size_t __result = + __code_units_written_ < __max_size_ ? std::min(__code_units, __max_size_ - __code_units_written_) : 0; + __code_units_written_ += __code_units; + return __result; + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __code_units_written() const noexcept { return __code_units_written_; } + +private: + size_t __max_size_; + // The code units that would have been written if there was no limit. + // format_to_n returns this value. + size_t __code_units_written_{0}; +}; + /// A "buffer" that handles writing to the proper iterator. /// /// This helper is used together with the @ref back_insert_iterator to offer /// type-erasure for the formatting functions. This reduces the number to /// template instantiations. +/// +/// The design is the following: +/// - There is an external object that connects the buffer to the output. +/// - This buffer object: +/// - inherits publicly from this class. +/// - has a static or dynamic buffer. +/// - has a static member function to make space in its buffer write +/// operations. This can be done by increasing the size of the internal +/// buffer or by writing the contents of the buffer to the output iterator. +/// +/// This member function is a constructor argument, so its name is not +/// fixed. The code uses the name __prepare_write. +/// - The number of output code units can be limited by a __max_output_size +/// object. This is used in format_to_n This object: +/// - Contains the maximum number of code units to be written. +/// - Contains the number of code units that are requested to be written. +/// This number is returned to the user of format_to_n. +/// - The write functions call the object's __request_write member function. +/// This function: +/// - Updates the number of code units that are requested to be written. +/// - Returns the number of code units that can be written without +/// exceeding the maximum number of code units to be written. +/// +/// Documentation for the buffer usage members: +/// - __ptr_ +/// The start of the buffer. +/// - __capacity_ +/// The number of code units that can be written. This means +/// [__ptr_, __ptr_ + __capacity_) is a valid range to write to. +/// - __size_ +/// The number of code units written in the buffer. The next code unit will +/// be written at __ptr_ + __size_. This __size_ may NOT contain the total +/// number of code units written by the __output_buffer. Whether or not it +/// does depends on the sub-class used. Typically the total number of code +/// units written is not interesting. It is interesting for format_to_n which +/// has its own way to track this number. +/// +/// Documentation for the modifying buffer operations: +/// The subclasses have a function with the following signature: +/// +/// static void __prepare_write( +/// __output_buffer<_CharT>& __buffer, size_t __code_units); +/// +/// This function is called when a write function writes more code units than +/// the buffer's available space. When an __max_output_size object is provided +/// the number of code units is the number of code units returned from +/// __max_output_size::__request_write function. +/// +/// - The __buffer contains *this. Since the class containing this function +/// inherits from __output_buffer it's safe to cast it to the subclass being +/// used. +/// - The __code_units is the number of code units the caller will write + 1. +/// - This value does not take the available space of the buffer into account. +/// - The push_back function is more efficient when writing before resizing, +/// this means the buffer should always have room for one code unit. Hence +/// the + 1 is the size. +/// - When the function returns there is room for at least one additional code +/// unit. There is no requirement there is room for __code_units code units: +/// - The class has some "bulk" operations. For example, __copy which copies +/// the contents of a basic_string_view to the output. If the sub-class has +/// a fixed size buffer the size of the basic_string_view may be larger +/// than the buffer. In that case it's impossible to honor the requested +/// size. +/// - When the buffer has room for at least one code unit the function may be +/// a no-op. +/// - When the function makes space for more code units it uses one for these +/// functions to signal the change: +/// - __buffer_flushed() +/// - This function is typically used for a fixed sized buffer. +/// - The current contents of [__ptr_, __ptr_ + __size_) have been +/// processed. +/// - __ptr_ remains unchanged. +/// - __capacity_ remains unchanged. +/// - __size_ will be set to 0. +/// - __buffer_moved(_CharT* __ptr, size_t __capacity) +/// - This function is typically used for a dynamic sized buffer. There the +/// location of the buffer changes due to reallocations. +/// - __ptr_ will be set to __ptr. (This value may be the old value of +/// __ptr_). +/// - __capacity_ will be set to __capacity. (This value may be the old +/// value of __capacity_). +/// - __size_ remains unchanged, +/// - The range [__ptr, __ptr + __size_) contains the original data of the +/// range [__ptr_, __ptr_ + __size_). +/// +/// The push_back function expects a valid buffer and a capacity of at least 1. +/// This means: +/// - The class is constructed with a valid buffer, +/// - __buffer_moved is called with a valid buffer is used before the first +/// write operation, +/// - no write function is ever called, or +/// - the class is constructed with a __max_output_size object with __max_size 0. +/// +/// The latter option allows formatted_size to use the output buffer without +/// ever writing anything to the buffer. template <__fmt_char_type _CharT> class _LIBCPP_TEMPLATE_VIS __output_buffer { public: - using value_type = _CharT; + using value_type _LIBCPP_NODEBUG = _CharT; + using __prepare_write_type _LIBCPP_NODEBUG = void (*)(__output_buffer<_CharT>&, size_t); - template - _LIBCPP_HIDE_FROM_ABI explicit __output_buffer(_CharT* __ptr, size_t __capacity, _Tp* __obj) - : __ptr_(__ptr), - __capacity_(__capacity), - __flush_([](_CharT* __p, size_t __n, void* __o) { static_cast<_Tp*>(__o)->__flush(__p, __n); }), - __obj_(__obj) {} + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit __output_buffer(_CharT* __ptr, size_t __capacity, __prepare_write_type __function) + : __output_buffer{__ptr, __capacity, __function, nullptr} {} - _LIBCPP_HIDE_FROM_ABI void __reset(_CharT* __ptr, size_t __capacity) { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __output_buffer( + _CharT* __ptr, size_t __capacity, __prepare_write_type __function, __max_output_size* __max_output_size) + : __ptr_(__ptr), __capacity_(__capacity), __prepare_write_(__function), __max_output_size_(__max_output_size) {} + + _LIBCPP_HIDE_FROM_ABI void __buffer_flushed() { __size_ = 0; } + + _LIBCPP_HIDE_FROM_ABI void __buffer_moved(_CharT* __ptr, size_t __capacity) { __ptr_ = __ptr; __capacity_ = __capacity; } @@ -79,12 +204,18 @@ class _LIBCPP_TEMPLATE_VIS __output_buffer { // Used in std::back_insert_iterator. _LIBCPP_HIDE_FROM_ABI void push_back(_CharT __c) { + if (__max_output_size_ && __max_output_size_->__write_request(1) == 0) + return; + + _LIBCPP_ASSERT_INTERNAL( + __ptr_ && __size_ < __capacity_ && __available() >= 1, "attempted to write outside the buffer"); + __ptr_[__size_++] = __c; // Profiling showed flushing after adding is more efficient than flushing // when entering the function. if (__size_ == __capacity_) - __flush(); + __prepare_write(0); } /// Copies the input __str to the buffer. @@ -105,25 +236,20 @@ class _LIBCPP_TEMPLATE_VIS __output_buffer { // upper case. For integral these strings are short. // TODO FMT Look at the improvements above. size_t __n = __str.size(); - - __flush_on_overflow(__n); - if (__n < __capacity_) { // push_back requires the buffer to have room for at least one character (so use <). - std::copy_n(__str.data(), __n, std::addressof(__ptr_[__size_])); - __size_ += __n; - return; + if (__max_output_size_) { + __n = __max_output_size_->__write_request(__n); + if (__n == 0) + return; } - // The output doesn't fit in the internal buffer. - // Copy the data in "__capacity_" sized chunks. - _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); const _InCharT* __first = __str.data(); do { - size_t __chunk = std::min(__n, __capacity_); + __prepare_write(__n); + size_t __chunk = std::min(__n, __available()); std::copy_n(__first, __chunk, std::addressof(__ptr_[__size_])); - __size_ = __chunk; + __size_ += __chunk; __first += __chunk; __n -= __chunk; - __flush(); } while (__n); } @@ -137,121 +263,59 @@ class _LIBCPP_TEMPLATE_VIS __output_buffer { _LIBCPP_ASSERT_INTERNAL(__first <= __last, "not a valid range"); size_t __n = static_cast(__last - __first); - __flush_on_overflow(__n); - if (__n < __capacity_) { // push_back requires the buffer to have room for at least one character (so use <). - std::transform(__first, __last, std::addressof(__ptr_[__size_]), std::move(__operation)); - __size_ += __n; - return; + if (__max_output_size_) { + __n = __max_output_size_->__write_request(__n); + if (__n == 0) + return; } - // The output doesn't fit in the internal buffer. - // Transform the data in "__capacity_" sized chunks. - _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); do { - size_t __chunk = std::min(__n, __capacity_); + __prepare_write(__n); + size_t __chunk = std::min(__n, __available()); std::transform(__first, __first + __chunk, std::addressof(__ptr_[__size_]), __operation); - __size_ = __chunk; + __size_ += __chunk; __first += __chunk; __n -= __chunk; - __flush(); } while (__n); } /// A \c fill_n wrapper. _LIBCPP_HIDE_FROM_ABI void __fill(size_t __n, _CharT __value) { - __flush_on_overflow(__n); - if (__n < __capacity_) { // push_back requires the buffer to have room for at least one character (so use <). - std::fill_n(std::addressof(__ptr_[__size_]), __n, __value); - __size_ += __n; - return; + if (__max_output_size_) { + __n = __max_output_size_->__write_request(__n); + if (__n == 0) + return; } - // The output doesn't fit in the internal buffer. - // Fill the buffer in "__capacity_" sized chunks. - _LIBCPP_ASSERT_INTERNAL(__size_ == 0, "the buffer should be flushed by __flush_on_overflow"); do { - size_t __chunk = std::min(__n, __capacity_); + __prepare_write(__n); + size_t __chunk = std::min(__n, __available()); std::fill_n(std::addressof(__ptr_[__size_]), __chunk, __value); - __size_ = __chunk; + __size_ += __chunk; __n -= __chunk; - __flush(); } while (__n); } - _LIBCPP_HIDE_FROM_ABI void __flush() { - __flush_(__ptr_, __size_, __obj_); - __size_ = 0; - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __capacity() const { return __capacity_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __size() const { return __size_; } private: _CharT* __ptr_; size_t __capacity_; size_t __size_{0}; - void (*__flush_)(_CharT*, size_t, void*); - void* __obj_; + void (*__prepare_write_)(__output_buffer<_CharT>&, size_t); + __max_output_size* __max_output_size_; - /// Flushes the buffer when the output operation would overflow the buffer. - /// - /// A simple approach for the overflow detection would be something along the - /// lines: - /// \code - /// // The internal buffer is large enough. - /// if (__n <= __capacity_) { - /// // Flush when we really would overflow. - /// if (__size_ + __n >= __capacity_) - /// __flush(); - /// ... - /// } - /// \endcode - /// - /// This approach works for all cases but one: - /// A __format_to_n_buffer_base where \ref __enable_direct_output is true. - /// In that case the \ref __capacity_ of the buffer changes during the first - /// \ref __flush. During that operation the output buffer switches from its - /// __writer_ to its __storage_. The \ref __capacity_ of the former depends - /// on the value of n, of the latter is a fixed size. For example: - /// - a format_to_n call with a 10'000 char buffer, - /// - the buffer is filled with 9'500 chars, - /// - adding 1'000 elements would overflow the buffer so the buffer gets - /// changed and the \ref __capacity_ decreases from 10'000 to - /// __buffer_size (256 at the time of writing). - /// - /// This means that the \ref __flush for this class may need to copy a part of - /// the internal buffer to the proper output. In this example there will be - /// 500 characters that need this copy operation. - /// - /// Note it would be more efficient to write 500 chars directly and then swap - /// the buffers. This would make the code more complex and \ref format_to_n is - /// not the most common use case. Therefore the optimization isn't done. - _LIBCPP_HIDE_FROM_ABI void __flush_on_overflow(size_t __n) { - if (__size_ + __n >= __capacity_) - __flush(); - } -}; - -/// A storage using an internal buffer. -/// -/// This storage is used when writing a single element to the output iterator -/// is expensive. -template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __internal_storage { -public: - _LIBCPP_HIDE_FROM_ABI _CharT* __begin() { return __buffer_; } - - static constexpr size_t __buffer_size = 256 / sizeof(_CharT); + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __available() const { return __capacity_ - __size_; } -private: - _CharT __buffer_[__buffer_size]; + _LIBCPP_HIDE_FROM_ABI void __prepare_write(size_t __code_units) { + // Always have space for one additional code unit. This is a precondition of the push_back function. + __code_units += 1; + if (__available() < __code_units) + __prepare_write_(*this, __code_units + 1); + } }; -/// A storage writing directly to the storage. -/// -/// This requires the storage to be a contiguous buffer of \a _CharT. -/// Since the output is directly written to the underlying storage this class -/// is just an empty class. -template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __direct_storage {}; - template concept __enable_direct_output = __fmt_char_type<_CharT> && @@ -260,40 +324,6 @@ concept __enable_direct_output = // `#ifdef`. || same_as<_OutIt, __wrap_iter<_CharT*>>); -/// Write policy for directly writing to the underlying output. -template -class _LIBCPP_TEMPLATE_VIS __writer_direct { -public: - _LIBCPP_HIDE_FROM_ABI explicit __writer_direct(_OutIt __out_it) : __out_it_(__out_it) {} - - _LIBCPP_HIDE_FROM_ABI _OutIt __out_it() { return __out_it_; } - - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT*, size_t __n) { - // _OutIt can be a __wrap_iter. Therefore the original iterator - // is adjusted. - __out_it_ += __n; - } - -private: - _OutIt __out_it_; -}; - -/// Write policy for copying the buffer to the output. -template -class _LIBCPP_TEMPLATE_VIS __writer_iterator { -public: - _LIBCPP_HIDE_FROM_ABI explicit __writer_iterator(_OutIt __out_it) : __out_it_{std::move(__out_it)} {} - - _LIBCPP_HIDE_FROM_ABI _OutIt __out_it() && { return std::move(__out_it_); } - - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { - __out_it_ = std::ranges::copy_n(__ptr, __n, std::move(__out_it_)).out; - } - -private: - _OutIt __out_it_; -}; - /// Concept to see whether a \a _Container is insertable. /// /// The concept is used to validate whether multiple calls to a @@ -311,196 +341,220 @@ concept __insertable = /// Extract the container type of a \ref back_insert_iterator. template struct _LIBCPP_TEMPLATE_VIS __back_insert_iterator_container { - using type = void; + using type _LIBCPP_NODEBUG = void; }; template <__insertable _Container> struct _LIBCPP_TEMPLATE_VIS __back_insert_iterator_container> { - using type = _Container; + using type _LIBCPP_NODEBUG = _Container; }; -/// Write policy for inserting the buffer in a container. -template -class _LIBCPP_TEMPLATE_VIS __writer_container { +// A dynamically growing buffer. +template <__fmt_char_type _CharT> +class _LIBCPP_TEMPLATE_VIS __allocating_buffer : public __output_buffer<_CharT> { public: - using _CharT = typename _Container::value_type; + __allocating_buffer(const __allocating_buffer&) = delete; + __allocating_buffer& operator=(const __allocating_buffer&) = delete; - _LIBCPP_HIDE_FROM_ABI explicit __writer_container(back_insert_iterator<_Container> __out_it) - : __container_{__out_it.__get_container()} {} + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __allocating_buffer() : __allocating_buffer{nullptr} {} - _LIBCPP_HIDE_FROM_ABI auto __out_it() { return std::back_inserter(*__container_); } + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit __allocating_buffer(__max_output_size* __max_output_size) + : __output_buffer<_CharT>{__small_buffer_, __buffer_size_, __prepare_write, __max_output_size} {} - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { - __container_->insert(__container_->end(), __ptr, __ptr + __n); + _LIBCPP_HIDE_FROM_ABI ~__allocating_buffer() { + if (__ptr_ != __small_buffer_) + _Alloc{}.deallocate(__ptr_, this->__capacity()); } -private: - _Container* __container_; -}; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI basic_string_view<_CharT> __view() { return {__ptr_, this->__size()}; } -/// Selects the type of the writer used for the output iterator. -template -class _LIBCPP_TEMPLATE_VIS __writer_selector { - using _Container = typename __back_insert_iterator_container<_OutIt>::type; +private: + using _Alloc _LIBCPP_NODEBUG = allocator<_CharT>; -public: - using type = - conditional_t, - __writer_container<_Container>, - conditional_t<__enable_direct_output<_OutIt, _CharT>, - __writer_direct<_OutIt, _CharT>, - __writer_iterator<_OutIt, _CharT>>>; -}; + // Since allocating is expensive the class has a small internal buffer. When + // its capacity is exceeded a dynamic buffer will be allocated. + static constexpr size_t __buffer_size_ = 256; + _CharT __small_buffer_[__buffer_size_]; -/// The generic formatting buffer. -template - requires(output_iterator<_OutIt, const _CharT&>) -class _LIBCPP_TEMPLATE_VIS __format_buffer { - using _Storage = - conditional_t<__enable_direct_output<_OutIt, _CharT>, __direct_storage<_CharT>, __internal_storage<_CharT>>; + _CharT* __ptr_{__small_buffer_}; -public: - _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) - requires(same_as<_Storage, __internal_storage<_CharT>>) - : __output_(__storage_.__begin(), __storage_.__buffer_size, this), __writer_(std::move(__out_it)) {} + _LIBCPP_HIDE_FROM_ABI void __grow_buffer(size_t __capacity) { + if (__capacity < __buffer_size_) + return; - _LIBCPP_HIDE_FROM_ABI explicit __format_buffer(_OutIt __out_it) - requires(same_as<_Storage, __direct_storage<_CharT>>) - : __output_(std::__unwrap_iter(__out_it), size_t(-1), this), __writer_(std::move(__out_it)) {} + _LIBCPP_ASSERT_INTERNAL(__capacity > this->__capacity(), "the buffer must grow"); - _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return __output_.__make_output_iterator(); } + // _CharT is an implicit lifetime type so can be used without explicit + // construction or destruction. + _Alloc __alloc; + auto __result = std::__allocate_at_least(__alloc, __capacity); + std::copy_n(__ptr_, this->__size(), __result.ptr); + if (__ptr_ != __small_buffer_) + __alloc.deallocate(__ptr_, this->__capacity()); - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { __writer_.__flush(__ptr, __n); } + __ptr_ = __result.ptr; + this->__buffer_moved(__ptr_, __result.count); + } - _LIBCPP_HIDE_FROM_ABI _OutIt __out_it() && { - __output_.__flush(); - return std::move(__writer_).__out_it(); + _LIBCPP_HIDE_FROM_ABI void __prepare_write(size_t __size_hint) { + __grow_buffer(std::max(this->__capacity() + __size_hint, this->__capacity() * 1.6)); } -private: - _LIBCPP_NO_UNIQUE_ADDRESS _Storage __storage_; - __output_buffer<_CharT> __output_; - typename __writer_selector<_OutIt, _CharT>::type __writer_; + _LIBCPP_HIDE_FROM_ABI static void __prepare_write(__output_buffer<_CharT>& __buffer, size_t __size_hint) { + static_cast<__allocating_buffer<_CharT>&>(__buffer).__prepare_write(__size_hint); + } }; -/// A buffer that counts the number of insertions. -/// -/// Since \ref formatted_size only needs to know the size, the output itself is -/// discarded. -template <__fmt_char_type _CharT> -class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer { +// A buffer that directly writes to the underlying buffer. +template +class _LIBCPP_TEMPLATE_VIS __direct_iterator_buffer : public __output_buffer<_CharT> { public: - _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return __output_.__make_output_iterator(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __direct_iterator_buffer(_OutIt __out_it) + : __direct_iterator_buffer{__out_it, nullptr} {} - _LIBCPP_HIDE_FROM_ABI void __flush(const _CharT*, size_t __n) { __size_ += __n; } + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit __direct_iterator_buffer(_OutIt __out_it, __max_output_size* __max_output_size) + : __output_buffer<_CharT>{std::__unwrap_iter(__out_it), __buffer_size, __prepare_write, __max_output_size}, + __out_it_(__out_it) {} - _LIBCPP_HIDE_FROM_ABI size_t __result() && { - __output_.__flush(); - return __size_; - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI _OutIt __out_it() && { return __out_it_ + this->__size(); } private: - __internal_storage<_CharT> __storage_; - __output_buffer<_CharT> __output_{__storage_.__begin(), __storage_.__buffer_size, this}; - size_t __size_{0}; -}; + // The function format_to expects a buffer large enough for the output. The + // function format_to_n has its own helper class that restricts the number of + // write options. So this function class can pretend to have an infinite + // buffer. + static constexpr size_t __buffer_size = -1; + + _OutIt __out_it_; -/// The base of a buffer that counts and limits the number of insertions. -template - requires(output_iterator<_OutIt, const _CharT&>) -struct _LIBCPP_TEMPLATE_VIS __format_to_n_buffer_base { - using _Size = iter_difference_t<_OutIt>; + _LIBCPP_HIDE_FROM_ABI static void + __prepare_write([[maybe_unused]] __output_buffer<_CharT>& __buffer, [[maybe_unused]] size_t __size_hint) { + std::__throw_length_error("__direct_iterator_buffer"); + } +}; +// A buffer that writes its output to the end of a container. +template +class _LIBCPP_TEMPLATE_VIS __container_inserter_buffer : public __output_buffer<_CharT> { public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __max_size) - : __writer_(std::move(__out_it)), __max_size_(std::max(_Size(0), __max_size)) {} + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __container_inserter_buffer(_OutIt __out_it) + : __container_inserter_buffer{__out_it, nullptr} {} - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { - if (_Size(__size_) <= __max_size_) - __writer_.__flush(__ptr, std::min(_Size(__n), __max_size_ - __size_)); - __size_ += __n; + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit __container_inserter_buffer(_OutIt __out_it, __max_output_size* __max_output_size) + : __output_buffer<_CharT>{__small_buffer_, __buffer_size, __prepare_write, __max_output_size}, + __container_{__out_it.__get_container()} {} + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __out_it() && { + __container_->insert(__container_->end(), __small_buffer_, __small_buffer_ + this->__size()); + return std::back_inserter(*__container_); } -protected: - __internal_storage<_CharT> __storage_; - __output_buffer<_CharT> __output_{__storage_.__begin(), __storage_.__buffer_size, this}; - typename __writer_selector<_OutIt, _CharT>::type __writer_; +private: + typename __back_insert_iterator_container<_OutIt>::type* __container_; + + // This class uses a fixed size buffer and appends the elements in + // __buffer_size chunks. An alternative would be to use an allocating buffer + // and append the output in a single write operation. Benchmarking showed no + // performance difference. + static constexpr size_t __buffer_size = 256; + _CharT __small_buffer_[__buffer_size]; + + _LIBCPP_HIDE_FROM_ABI void __prepare_write() { + __container_->insert(__container_->end(), __small_buffer_, __small_buffer_ + this->__size()); + this->__buffer_flushed(); + } - _Size __max_size_; - _Size __size_{0}; + _LIBCPP_HIDE_FROM_ABI static void + __prepare_write(__output_buffer<_CharT>& __buffer, [[maybe_unused]] size_t __size_hint) { + static_cast<__container_inserter_buffer<_OutIt, _CharT>&>(__buffer).__prepare_write(); + } }; -/// The base of a buffer that counts and limits the number of insertions. -/// -/// This version is used when \c __enable_direct_output<_OutIt, _CharT> == true. -/// -/// This class limits the size available to the direct writer so it will not -/// exceed the maximum number of code units. +// A buffer that writes to an iterator. +// +// Unlike the __container_inserter_buffer this class' performance does benefit +// from allocating and then inserting. template - requires(output_iterator<_OutIt, const _CharT&>) -class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer_base<_OutIt, _CharT, true> { - using _Size = iter_difference_t<_OutIt>; - +class _LIBCPP_TEMPLATE_VIS __iterator_buffer : public __allocating_buffer<_CharT> { public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer_base(_OutIt __out_it, _Size __max_size) - : __output_(std::__unwrap_iter(__out_it), __max_size, this), - __writer_(std::move(__out_it)), - __max_size_(__max_size) { - if (__max_size <= 0) [[unlikely]] - __output_.__reset(__storage_.__begin(), __storage_.__buffer_size); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __iterator_buffer(_OutIt __out_it) + : __allocating_buffer<_CharT>{}, __out_it_{std::move(__out_it)} {} - _LIBCPP_HIDE_FROM_ABI void __flush(_CharT* __ptr, size_t __n) { - // A __flush to the direct writer happens in the following occasions: - // - The format function has written the maximum number of allowed code - // units. At this point it's no longer valid to write to this writer. So - // switch to the internal storage. This internal storage doesn't need to - // be written anywhere so the __flush for that storage writes no output. - // - Like above, but the next "mass write" operation would overflow the - // buffer. In that case the buffer is pre-emptively switched. The still - // valid code units will be written separately. - // - The format_to_n function is finished. In this case there's no need to - // switch the buffer, but for simplicity the buffers are still switched. - // When the __max_size <= 0 the constructor already switched the buffers. - if (__size_ == 0 && __ptr != __storage_.__begin()) { - __writer_.__flush(__ptr, __n); - __output_.__reset(__storage_.__begin(), __storage_.__buffer_size); - } else if (__size_ < __max_size_) { - // Copies a part of the internal buffer to the output up to n characters. - // See __output_buffer<_CharT>::__flush_on_overflow for more information. - _Size __s = std::min(_Size(__n), __max_size_ - __size_); - std::copy_n(__ptr, __s, __writer_.__out_it()); - __writer_.__flush(__ptr, __s); - } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI explicit __iterator_buffer(_OutIt __out_it, __max_output_size* __max_output_size) + : __allocating_buffer<_CharT>{__max_output_size}, __out_it_{std::move(__out_it)} {} - __size_ += __n; + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __out_it() && { + return std::ranges::copy(this->__view(), std::move(__out_it_)).out; } -protected: - __internal_storage<_CharT> __storage_; - __output_buffer<_CharT> __output_; - __writer_direct<_OutIt, _CharT> __writer_; +private: + _OutIt __out_it_; +}; + +// Selects the type of the buffer used for the output iterator. +template +class _LIBCPP_TEMPLATE_VIS __buffer_selector { + using _Container _LIBCPP_NODEBUG = __back_insert_iterator_container<_OutIt>::type; - _Size __max_size_; - _Size __size_{0}; +public: + using type _LIBCPP_NODEBUG = + conditional_t, + __container_inserter_buffer<_OutIt, _CharT>, + conditional_t<__enable_direct_output<_OutIt, _CharT>, + __direct_iterator_buffer<_OutIt, _CharT>, + __iterator_buffer<_OutIt, _CharT>>>; }; -/// The buffer that counts and limits the number of insertions. +// A buffer that counts and limits the number of insertions. template - requires(output_iterator<_OutIt, const _CharT&>) -struct _LIBCPP_TEMPLATE_VIS __format_to_n_buffer final - : public __format_to_n_buffer_base< _OutIt, _CharT, __enable_direct_output<_OutIt, _CharT>> { - using _Base = __format_to_n_buffer_base<_OutIt, _CharT, __enable_direct_output<_OutIt, _CharT>>; - using _Size = iter_difference_t<_OutIt>; +class _LIBCPP_TEMPLATE_VIS __format_to_n_buffer : private __buffer_selector<_OutIt, _CharT>::type { +public: + using _Base _LIBCPP_NODEBUG = __buffer_selector<_OutIt, _CharT>::type; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __format_to_n_buffer(_OutIt __out_it, iter_difference_t<_OutIt> __n) + : _Base{std::move(__out_it), std::addressof(__max_output_size_)}, + __max_output_size_{__n < 0 ? size_t{0} : static_cast(__n)} {} + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return _Base::__make_output_iterator(); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __result() && { + return {static_cast<_Base&&>(*this).__out_it(), + static_cast>(__max_output_size_.__code_units_written())}; + } + +private: + __max_output_size __max_output_size_; +}; +// A buffer that counts the number of insertions. +// +// Since formatted_size only needs to know the size, the output itself is +// discarded. +template <__fmt_char_type _CharT> +class _LIBCPP_TEMPLATE_VIS __formatted_size_buffer : private __output_buffer<_CharT> { public: - _LIBCPP_HIDE_FROM_ABI explicit __format_to_n_buffer(_OutIt __out_it, _Size __max_size) - : _Base(std::move(__out_it), __max_size) {} - _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return this->__output_.__make_output_iterator(); } + using _Base _LIBCPP_NODEBUG = __output_buffer<_CharT>; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __formatted_size_buffer() + : _Base{nullptr, 0, __prepare_write, std::addressof(__max_output_size_)} {} + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI auto __make_output_iterator() { return _Base::__make_output_iterator(); } + + // This function does not need to be r-value qualified, however this is + // consistent with similar objects. + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI size_t __result() && { return __max_output_size_.__code_units_written(); } + +private: + __max_output_size __max_output_size_{0}; - _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> __result() && { - this->__output_.__flush(); - return {std::move(this->__writer_).__out_it(), this->__size_}; + _LIBCPP_HIDE_FROM_ABI static void + __prepare_write([[maybe_unused]] __output_buffer<_CharT>& __buffer, [[maybe_unused]] size_t __size_hint) { + // Note this function does not satisfy the requirement of giving a 1 code unit buffer. + _LIBCPP_ASSERT_INTERNAL( + false, "Since __max_output_size_.__max_size_ == 0 there should never be call to this function."); } }; @@ -524,14 +578,14 @@ struct _LIBCPP_TEMPLATE_VIS __format_to_n_buffer final // would lead to a circular include with formatter for vector. template <__fmt_char_type _CharT> class _LIBCPP_TEMPLATE_VIS __retarget_buffer { - using _Alloc = allocator<_CharT>; + using _Alloc _LIBCPP_NODEBUG = allocator<_CharT>; public: - using value_type = _CharT; + using value_type _LIBCPP_NODEBUG = _CharT; struct __iterator { - using difference_type = ptrdiff_t; - using value_type = _CharT; + using difference_type _LIBCPP_NODEBUG = ptrdiff_t; + using value_type _LIBCPP_NODEBUG = _CharT; _LIBCPP_HIDE_FROM_ABI constexpr explicit __iterator(__retarget_buffer& __buffer) : __buffer_(std::addressof(__buffer)) {} @@ -646,7 +700,7 @@ class _LIBCPP_TEMPLATE_VIS __retarget_buffer { } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/concepts.h b/lib/libcxx/include/__format/concepts.h index 13380e9b91af..28297c612db7 100644 --- a/lib/libcxx/include/__format/concepts.h +++ b/lib/libcxx/include/__format/concepts.h @@ -34,7 +34,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template concept __fmt_char_type = same_as<_CharT, char> -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS || same_as<_CharT, wchar_t> # endif ; @@ -44,7 +44,7 @@ concept __fmt_char_type = // (Note testing for (w)format_context would be a valid choice, but requires // selecting the proper one depending on the type of _CharT.) template -using __fmt_iter_for = _CharT*; +using __fmt_iter_for _LIBCPP_NODEBUG = _CharT*; template >> concept __formattable_with = @@ -75,8 +75,8 @@ template concept __fmt_pair_like = __is_specialization_v<_Tp, pair> || (__is_specialization_v<_Tp, tuple> && tuple_size_v<_Tp> == 2); -# endif //_LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/container_adaptor.h b/lib/libcxx/include/__format/container_adaptor.h index 9f49ca03bf4f..48d42ee7d901 100644 --- a/lib/libcxx/include/__format/container_adaptor.h +++ b/lib/libcxx/include/__format/container_adaptor.h @@ -37,8 +37,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct _LIBCPP_TEMPLATE_VIS __formatter_container_adaptor { private: - using __maybe_const_container = __fmt_maybe_const; - using __maybe_const_adaptor = __maybe_const, _Adaptor>; + using __maybe_const_container _LIBCPP_NODEBUG = __fmt_maybe_const; + using __maybe_const_adaptor _LIBCPP_NODEBUG = __maybe_const, _Adaptor>; formatter, _CharT> __underlying_; public: @@ -66,7 +66,7 @@ template _Container> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_container_adaptor, _CharT> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/enable_insertable.h b/lib/libcxx/include/__format/enable_insertable.h index 86ef94a325b1..29fe566ff06a 100644 --- a/lib/libcxx/include/__format/enable_insertable.h +++ b/lib/libcxx/include/__format/enable_insertable.h @@ -28,7 +28,7 @@ inline constexpr bool __enable_insertable = false; } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/escaped_output_table.h b/lib/libcxx/include/__format/escaped_output_table.h index f7be2dc61f21..7a0b35239861 100644 --- a/lib/libcxx/include/__format/escaped_output_table.h +++ b/lib/libcxx/include/__format/escaped_output_table.h @@ -63,7 +63,7 @@ #include <__algorithm/ranges_upper_bound.h> #include <__config> -#include +#include <__cstddef/ptrdiff_t.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -856,7 +856,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[711] = { // clang-format on } // namespace __escaped_output_table -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h index 48581d8a5dde..7653a9e03b81 100644 --- a/lib/libcxx/include/__format/extended_grapheme_cluster_table.h +++ b/lib/libcxx/include/__format/extended_grapheme_cluster_table.h @@ -63,8 +63,8 @@ #include <__algorithm/ranges_upper_bound.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/access.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -1656,7 +1656,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[1496] = { } // namespace __extended_grapheme_custer_property_boundary -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_arg.h b/lib/libcxx/include/__format/format_arg.h index aa02f81dc40e..10f0ba9928ce 100644 --- a/lib/libcxx/include/__format/format_arg.h +++ b/lib/libcxx/include/__format/format_arg.h @@ -13,6 +13,7 @@ #include <__assert> #include <__concepts/arithmetic.h> #include <__config> +#include <__cstddef/size_t.h> #include <__format/concepts.h> #include <__format/format_parse_context.h> #include <__functional/invoke.h> @@ -113,7 +114,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_ case __format::__arg_t::__long_long: return std::invoke(std::forward<_Visitor>(__vis), __arg.__value_.__long_long_); case __format::__arg_t::__i128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return std::invoke(std::forward<_Visitor>(__vis), __arg.__value_.__i128_); # else __libcpp_unreachable(); @@ -123,7 +124,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_ case __format::__arg_t::__unsigned_long_long: return std::invoke(std::forward<_Visitor>(__vis), __arg.__value_.__unsigned_long_long_); case __format::__arg_t::__u128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return std::invoke(std::forward<_Visitor>(__vis), __arg.__value_.__u128_); # else __libcpp_unreachable(); @@ -148,7 +149,7 @@ _LIBCPP_HIDE_FROM_ABI decltype(auto) __visit_format_arg(_Visitor&& __vis, basic_ __libcpp_unreachable(); } -# if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER template _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { @@ -164,7 +165,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< case __format::__arg_t::__long_long: return std::invoke_r<_Rp>(std::forward<_Visitor>(__vis), __arg.__value_.__long_long_); case __format::__arg_t::__i128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return std::invoke_r<_Rp>(std::forward<_Visitor>(__vis), __arg.__value_.__i128_); # else __libcpp_unreachable(); @@ -174,7 +175,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< case __format::__arg_t::__unsigned_long_long: return std::invoke_r<_Rp>(std::forward<_Visitor>(__vis), __arg.__value_.__unsigned_long_long_); case __format::__arg_t::__u128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return std::invoke_r<_Rp>(std::forward<_Visitor>(__vis), __arg.__value_.__u128_); # else __libcpp_unreachable(); @@ -199,7 +200,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< __libcpp_unreachable(); } -# endif // _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER /// Contains the values used in basic_format_arg. /// @@ -207,7 +208,7 @@ _LIBCPP_HIDE_FROM_ABI _Rp __visit_format_arg(_Visitor&& __vis, basic_format_arg< /// separate arrays. template class __basic_format_arg_value { - using _CharT = typename _Context::char_type; + using _CharT _LIBCPP_NODEBUG = typename _Context::char_type; public: /// Contains the implementation for basic_format_arg::handle. @@ -237,7 +238,7 @@ class __basic_format_arg_value { unsigned __unsigned_; long long __long_long_; unsigned long long __unsigned_long_long_; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 __int128_t __i128_; __uint128_t __u128_; # endif @@ -261,7 +262,7 @@ class __basic_format_arg_value { _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(long long __value) noexcept : __long_long_(__value) {} _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(unsigned long long __value) noexcept : __unsigned_long_long_(__value) {} -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__int128_t __value) noexcept : __i128_(__value) {} _LIBCPP_HIDE_FROM_ABI __basic_format_arg_value(__uint128_t __value) noexcept : __u128_(__value) {} # endif @@ -276,7 +277,7 @@ class __basic_format_arg_value { }; template -class _LIBCPP_TEMPLATE_VIS basic_format_arg { +class _LIBCPP_TEMPLATE_VIS _LIBCPP_NO_SPECIALIZATIONS basic_format_arg { public: class _LIBCPP_TEMPLATE_VIS handle; @@ -284,14 +285,14 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg { _LIBCPP_HIDE_FROM_ABI explicit operator bool() const noexcept { return __type_ != __format::__arg_t::__none; } -# if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template _LIBCPP_HIDE_FROM_ABI decltype(auto) visit(this basic_format_arg __arg, _Visitor&& __vis) { switch (__arg.__type_) { -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 case __format::__arg_t::__i128: { typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__i128_}; return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); @@ -312,7 +313,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg { template _LIBCPP_HIDE_FROM_ABI _Rp visit(this basic_format_arg __arg, _Visitor&& __vis) { switch (__arg.__type_) { -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 case __format::__arg_t::__i128: { typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__i128_}; return std::invoke_r<_Rp>(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); @@ -328,7 +329,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg { } } -# endif // _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER private: using char_type = typename _Context::char_type; @@ -370,13 +371,13 @@ class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle { // This function is user facing, so it must wrap the non-standard types of // the "variant" in a handle to stay conforming. See __arg_t for more details. template -# if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER _LIBCPP_DEPRECATED_IN_CXX26 # endif _LIBCPP_HIDE_FROM_ABI decltype(auto) visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) { switch (__arg.__type_) { -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 case __format::__arg_t::__i128: { typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__i128_}; return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); @@ -386,13 +387,13 @@ _LIBCPP_DEPRECATED_IN_CXX26 typename __basic_format_arg_value<_Context>::__handle __h{__arg.__value_.__u128_}; return std::invoke(std::forward<_Visitor>(__vis), typename basic_format_arg<_Context>::handle{__h}); } -# endif // _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER default: return std::__visit_format_arg(std::forward<_Visitor>(__vis), __arg); } } -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_arg_store.h b/lib/libcxx/include/__format/format_arg_store.h index 23a599e99575..4c5ee9e9e4fd 100644 --- a/lib/libcxx/include/__format/format_arg_store.h +++ b/lib/libcxx/include/__format/format_arg_store.h @@ -22,6 +22,7 @@ #include <__type_traits/conditional.h> #include <__type_traits/extent.h> #include <__type_traits/remove_const.h> +#include #include #include @@ -48,7 +49,7 @@ template _Tp> consteval __arg_t __determine_arg_t() { return __arg_t::__char_type; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template requires(same_as && same_as<_CharT, char>) consteval __arg_t __determine_arg_t() { @@ -63,7 +64,7 @@ consteval __arg_t __determine_arg_t() { return __arg_t::__int; else if constexpr (sizeof(_Tp) <= sizeof(long long)) return __arg_t::__long_long; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 else if constexpr (sizeof(_Tp) == sizeof(__int128_t)) return __arg_t::__i128; # endif @@ -78,7 +79,7 @@ consteval __arg_t __determine_arg_t() { return __arg_t::__unsigned; else if constexpr (sizeof(_Tp) <= sizeof(unsigned long long)) return __arg_t::__unsigned_long_long; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 else if constexpr (sizeof(_Tp) == sizeof(__uint128_t)) return __arg_t::__u128; # endif @@ -172,7 +173,7 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu // final else requires no adjustment. if constexpr (__arg == __arg_t::__char_type) -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS if constexpr (same_as && same_as<_Dp, char>) return basic_format_arg<_Context>{__arg, static_cast(static_cast(__value))}; else @@ -233,6 +234,11 @@ struct __packed_format_arg_store { uint64_t __types_ = 0; }; +template +struct __packed_format_arg_store<_Context, 0> { + uint64_t __types_ = 0; +}; + template struct __unpacked_format_arg_store { basic_format_arg<_Context> __args_[_Np]; @@ -251,7 +257,7 @@ struct _LIBCPP_TEMPLATE_VIS __format_arg_store { } } - using _Storage = + using _Storage _LIBCPP_NODEBUG = conditional_t<__format::__use_packed_format_arg_store(sizeof...(_Args)), __format::__packed_format_arg_store<_Context, sizeof...(_Args)>, __format::__unpacked_format_arg_store<_Context, sizeof...(_Args)>>; @@ -259,7 +265,7 @@ struct _LIBCPP_TEMPLATE_VIS __format_arg_store { _Storage __storage; }; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_args.h b/lib/libcxx/include/__format/format_args.h index 07923570f389..b98663c06ea4 100644 --- a/lib/libcxx/include/__format/format_args.h +++ b/lib/libcxx/include/__format/format_args.h @@ -11,10 +11,10 @@ #define _LIBCPP___FORMAT_FORMAT_ARGS_H #include <__config> +#include <__cstddef/size_t.h> #include <__format/format_arg.h> #include <__format/format_arg_store.h> #include <__fwd/format.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -71,7 +71,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_args { template basic_format_args(__format_arg_store<_Context, _Args...>) -> basic_format_args<_Context>; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_context.h b/lib/libcxx/include/__format/format_context.h index 20c07559eae4..4dbfdbc02a26 100644 --- a/lib/libcxx/include/__format/format_context.h +++ b/lib/libcxx/include/__format/format_context.h @@ -23,9 +23,8 @@ #include <__memory/addressof.h> #include <__utility/move.h> #include <__variant/monostate.h> -#include -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +#if _LIBCPP_HAS_LOCALIZATION # include <__locale> # include #endif @@ -45,7 +44,7 @@ template requires output_iterator<_OutIt, const _CharT&> class _LIBCPP_TEMPLATE_VIS basic_format_context; -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION /** * Helper to create a basic_format_context. * @@ -67,7 +66,7 @@ __format_context_create(_OutIt __out_it, basic_format_args>, char>; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS using wformat_context = basic_format_context< back_insert_iterator<__format::__output_buffer>, wchar_t>; # endif @@ -89,7 +88,7 @@ class _LIBCPP_HIDE_FROM_ABI basic_format_arg arg(size_t __id) const noexcept { return __args_.get(__id); } -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION _LIBCPP_HIDE_FROM_ABI std::locale locale() { if (!__loc_) __loc_ = std::locale{}; @@ -102,7 +101,7 @@ class private: iterator __out_it_; basic_format_args __args_; -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION // The Standard doesn't specify how the locale is stored. // [format.context]/6 @@ -132,6 +131,7 @@ class : __out_it_(std::move(__out_it)), __args_(__args) {} # endif +public: basic_format_context(const basic_format_context&) = delete; basic_format_context& operator=(const basic_format_context&) = delete; }; @@ -163,7 +163,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context _LIBCPP_HIDE_FROM_ABI explicit basic_format_context(iterator __out_it, _Context& __ctx) : __out_it_(std::move(__out_it)), -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION __loc_([](void* __c) { return static_cast<_Context*>(__c)->locale(); }), # endif __ctx_(std::addressof(__ctx)), @@ -180,20 +180,20 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context(), __basic_format_arg_value(__arg)}; }; -# if _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# if _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER return static_cast<_Context*>(__c)->arg(__id).visit(std::move(__visitor)); # else _LIBCPP_SUPPRESS_DEPRECATED_PUSH return std::visit_format_arg(std::move(__visitor), static_cast<_Context*>(__c)->arg(__id)); _LIBCPP_SUPPRESS_DEPRECATED_POP -# endif // _LIBCPP_STD_VER >= 26 && defined(_LIBCPP_HAS_EXPLICIT_THIS_PARAMETER) +# endif // _LIBCPP_STD_VER >= 26 && _LIBCPP_HAS_EXPLICIT_THIS_PARAMETER }) { } _LIBCPP_HIDE_FROM_ABI basic_format_arg arg(size_t __id) const noexcept { return __arg_(__ctx_, __id); } -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION _LIBCPP_HIDE_FROM_ABI std::locale locale() { return __loc_(__ctx_); } # endif _LIBCPP_HIDE_FROM_ABI iterator out() { return std::move(__out_it_); } @@ -202,7 +202,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_error.h b/lib/libcxx/include/__format/format_error.h index ed40e395d6af..b92e6d1de00e 100644 --- a/lib/libcxx/include/__format/format_error.h +++ b/lib/libcxx/include/__format/format_error.h @@ -35,15 +35,15 @@ class _LIBCPP_EXPORTED_FROM_ABI format_error : public runtime_error { }; _LIBCPP_DIAGNOSTIC_POP -_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI void __throw_format_error(const char* __s) { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +[[noreturn]] inline _LIBCPP_HIDE_FROM_ABI void __throw_format_error(const char* __s) { +# if _LIBCPP_HAS_EXCEPTIONS throw format_error(__s); # else _LIBCPP_VERBOSE_ABORT("format_error was thrown in -fno-exceptions mode with message \"%s\"", __s); # endif } -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_functions.h b/lib/libcxx/include/__format/format_functions.h index d14b49aff149..5feaf7e5a064 100644 --- a/lib/libcxx/include/__format/format_functions.h +++ b/lib/libcxx/include/__format/format_functions.h @@ -31,7 +31,6 @@ #include <__format/formatter_pointer.h> #include <__format/formatter_string.h> #include <__format/parser_std_format_spec.h> -#include <__iterator/back_insert_iterator.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> // iter_value_t @@ -40,7 +39,7 @@ #include #include -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +#if _LIBCPP_HAS_LOCALIZATION # include <__locale> #endif @@ -61,7 +60,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // to do this optimization now. using format_args = basic_format_args; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS using wformat_args = basic_format_args; # endif @@ -70,7 +69,7 @@ template return std::__format_arg_store<_Context, _Args...>(__args...); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI __format_arg_store make_wformat_args(_Args&... __args) { return std::__format_arg_store(__args...); @@ -206,7 +205,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __compile_time_visit_format_arg( case __arg_t::__long_long: return __format::__compile_time_validate_argument<_CharT, long long>(__parse_ctx, __ctx); case __arg_t::__i128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return __format::__compile_time_validate_argument<_CharT, __int128_t>(__parse_ctx, __ctx); # else std::__throw_format_error("Invalid argument"); @@ -217,7 +216,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr void __compile_time_visit_format_arg( case __arg_t::__unsigned_long_long: return __format::__compile_time_validate_argument<_CharT, unsigned long long>(__parse_ctx, __ctx); case __arg_t::__u128: -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 return __format::__compile_time_validate_argument<_CharT, __uint128_t>(__parse_ctx, __ctx); # else std::__throw_format_error("Invalid argument"); @@ -355,12 +354,12 @@ struct _LIBCPP_TEMPLATE_VIS __runtime_format_string { }; _LIBCPP_HIDE_FROM_ABI inline __runtime_format_string runtime_format(string_view __fmt) noexcept { return __fmt; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_HIDE_FROM_ABI inline __runtime_format_string runtime_format(wstring_view __fmt) noexcept { return __fmt; } # endif -# endif //_LIBCPP_STD_VER >= 26 +# endif // _LIBCPP_STD_VER >= 26 template struct _LIBCPP_TEMPLATE_VIS basic_format_string { @@ -379,7 +378,7 @@ struct _LIBCPP_TEMPLATE_VIS basic_format_string { private: basic_string_view<_CharT> __str_; - using _Context = __format::__compile_time_basic_format_context<_CharT>; + using _Context _LIBCPP_NODEBUG = __format::__compile_time_basic_format_context<_CharT>; static constexpr array<__format::__arg_t, sizeof...(_Args)> __types_{ __format::__determine_arg_t<_Context, remove_cvref_t<_Args>>()...}; @@ -397,7 +396,7 @@ struct _LIBCPP_TEMPLATE_VIS basic_format_string { template using format_string = basic_format_string...>; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template using wformat_string = basic_format_string...>; # endif @@ -411,7 +410,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __vformat_to(_OutIt __out_it, return std::__format::__vformat_to( basic_format_parse_context{__fmt, __args.__size()}, std::__format_context_create(std::move(__out_it), __args)); else { - __format::__format_buffer<_OutIt, _CharT> __buffer{std::move(__out_it)}; + typename __format::__buffer_selector<_OutIt, _CharT>::type __buffer{std::move(__out_it)}; std::__format::__vformat_to(basic_format_parse_context{__fmt, __args.__size()}, std::__format_context_create(__buffer.__make_output_iterator(), __args)); return std::move(__buffer).__out_it(); @@ -426,7 +425,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, s return std::__vformat_to(std::move(__out_it), __fmt, __args); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, wstring_view __fmt, wformat_args __args) { @@ -440,7 +439,7 @@ format_to(_OutIt __out_it, format_string<_Args...> __fmt, _Args&&... __args) { return std::vformat_to(std::move(__out_it), __fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -452,20 +451,20 @@ format_to(_OutIt __out_it, wformat_string<_Args...> __fmt, _Args&&... __args) { // fires too eagerly, see http://llvm.org/PR61563. template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(string_view __fmt, format_args __args) { - string __res; - std::vformat_to(std::back_inserter(__res), __fmt, __args); - return __res; + __format::__allocating_buffer __buffer; + std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); + return string{__buffer.__view()}; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS // TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup // fires too eagerly, see http://llvm.org/PR61563. template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(wstring_view __fmt, wformat_args __args) { - wstring __res; - std::vformat_to(std::back_inserter(__res), __fmt, __args); - return __res; + __format::__allocating_buffer __buffer; + std::vformat_to(__buffer.__make_output_iterator(), __fmt, __args); + return wstring{__buffer.__view()}; } # endif @@ -475,7 +474,7 @@ format(format_string<_Args...> __fmt, _Args&&... __args) { return std::vformat(__fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template [[nodiscard]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -501,7 +500,7 @@ format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, format_string<_Args. return std::__vformat_to_n(std::move(__out_it), __n, __fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt, class... _Args> _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n(_OutIt __out_it, iter_difference_t<_OutIt> __n, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -523,7 +522,7 @@ formatted_size(format_string<_Args...> __fmt, _Args&&... __args) { return std::__vformatted_size(__fmt.get(), basic_format_args{std::make_format_args(__args...)}); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template [[nodiscard]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -531,7 +530,7 @@ formatted_size(wformat_string<_Args...> __fmt, _Args&&... __args) { } # endif -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION template requires(output_iterator<_OutIt, const _CharT&>) @@ -544,7 +543,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __vformat_to( return std::__format::__vformat_to(basic_format_parse_context{__fmt, __args.__size()}, std::__format_context_create(std::move(__out_it), __args, std::move(__loc))); else { - __format::__format_buffer<_OutIt, _CharT> __buffer{std::move(__out_it)}; + typename __format::__buffer_selector<_OutIt, _CharT>::type __buffer{std::move(__out_it)}; std::__format::__vformat_to( basic_format_parse_context{__fmt, __args.__size()}, std::__format_context_create(__buffer.__make_output_iterator(), __args, std::move(__loc))); @@ -558,7 +557,7 @@ vformat_to(_OutIt __out_it, locale __loc, string_view __fmt, format_args __args) return std::__vformat_to(std::move(__out_it), std::move(__loc), __fmt, __args); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt vformat_to(_OutIt __out_it, locale __loc, wstring_view __fmt, wformat_args __args) { @@ -572,7 +571,7 @@ format_to(_OutIt __out_it, locale __loc, format_string<_Args...> __fmt, _Args&&. return std::vformat_to(std::move(__out_it), std::move(__loc), __fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI _OutIt format_to(_OutIt __out_it, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -585,20 +584,20 @@ format_to(_OutIt __out_it, locale __loc, wformat_string<_Args...> __fmt, _Args&& template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI string vformat(locale __loc, string_view __fmt, format_args __args) { - string __res; - std::vformat_to(std::back_inserter(__res), std::move(__loc), __fmt, __args); - return __res; + __format::__allocating_buffer __buffer; + std::vformat_to(__buffer.__make_output_iterator(), std::move(__loc), __fmt, __args); + return string{__buffer.__view()}; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS // TODO FMT This needs to be a template or std::to_chars(floating-point) availability markup // fires too eagerly, see http://llvm.org/PR61563. template [[nodiscard]] _LIBCPP_ALWAYS_INLINE inline _LIBCPP_HIDE_FROM_ABI wstring vformat(locale __loc, wstring_view __fmt, wformat_args __args) { - wstring __res; - std::vformat_to(std::back_inserter(__res), std::move(__loc), __fmt, __args); - return __res; + __format::__allocating_buffer __buffer; + std::vformat_to(__buffer.__make_output_iterator(), std::move(__loc), __fmt, __args); + return wstring{__buffer.__view()}; } # endif @@ -608,7 +607,7 @@ format(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return std::vformat(std::move(__loc), __fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template [[nodiscard]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI wstring format(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -637,7 +636,7 @@ _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to std::move(__out_it), __n, std::move(__loc), __fmt.get(), std::make_format_args(__args...)); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template _OutIt, class... _Args> _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI format_to_n_result<_OutIt> format_to_n( _OutIt __out_it, iter_difference_t<_OutIt> __n, locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -661,7 +660,7 @@ formatted_size(locale __loc, format_string<_Args...> __fmt, _Args&&... __args) { return std::__vformatted_size(std::move(__loc), __fmt.get(), basic_format_args{std::make_format_args(__args...)}); } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template [[nodiscard]] _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI size_t formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) { @@ -669,9 +668,9 @@ formatted_size(locale __loc, wformat_string<_Args...> __fmt, _Args&&... __args) } # endif -# endif // _LIBCPP_HAS_NO_LOCALIZATION +# endif // _LIBCPP_HAS_LOCALIZATION -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_parse_context.h b/lib/libcxx/include/__format/format_parse_context.h index aefcd5497f3b..459db751c9df 100644 --- a/lib/libcxx/include/__format/format_parse_context.h +++ b/lib/libcxx/include/__format/format_parse_context.h @@ -94,11 +94,11 @@ class _LIBCPP_TEMPLATE_VIS basic_format_parse_context { _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(basic_format_parse_context); using format_parse_context = basic_format_parse_context; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS using wformat_parse_context = basic_format_parse_context; # endif -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_string.h b/lib/libcxx/include/__format/format_string.h index bdf3cff7f49b..5db5973dd588 100644 --- a/lib/libcxx/include/__format/format_string.h +++ b/lib/libcxx/include/__format/format_string.h @@ -12,10 +12,10 @@ #include <__assert> #include <__config> +#include <__cstddef/size_t.h> #include <__format/format_error.h> #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> // iter_value_t -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -153,7 +153,7 @@ __parse_arg_id(_Iterator __begin, _Iterator __end, auto& __parse_ctx) { } // namespace __format -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/format_to_n_result.h b/lib/libcxx/include/__format/format_to_n_result.h index 6f30546dec08..344299e32f0e 100644 --- a/lib/libcxx/include/__format/format_to_n_result.h +++ b/lib/libcxx/include/__format/format_to_n_result.h @@ -28,7 +28,7 @@ struct _LIBCPP_TEMPLATE_VIS format_to_n_result { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(format_to_n_result); -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter.h b/lib/libcxx/include/__format/formatter.h index e2f418f936ee..39c2670dd843 100644 --- a/lib/libcxx/include/__format/formatter.h +++ b/lib/libcxx/include/__format/formatter.h @@ -39,6 +39,9 @@ struct _LIBCPP_TEMPLATE_VIS formatter { # if _LIBCPP_STD_VER >= 23 +template +constexpr bool enable_nonlocking_formatter_optimization = false; + template _LIBCPP_HIDE_FROM_ABI constexpr void __set_debug_format(_Tp& __formatter) { if constexpr (requires { __formatter.set_debug_format(); }) diff --git a/lib/libcxx/include/__format/formatter_bool.h b/lib/libcxx/include/__format/formatter_bool.h index 17dc69541e8f..d08acd474439 100644 --- a/lib/libcxx/include/__format/formatter_bool.h +++ b/lib/libcxx/include/__format/formatter_bool.h @@ -20,7 +20,7 @@ #include <__format/parser_std_format_spec.h> #include <__utility/unreachable.h> -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +#if _LIBCPP_HAS_LOCALIZATION # include <__locale> #endif @@ -69,7 +69,11 @@ struct _LIBCPP_TEMPLATE_VIS formatter { __format_spec::__parser<_CharT> __parser_; }; -#endif //_LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_char.h b/lib/libcxx/include/__format/formatter_char.h index d33e84368a76..8b8fd2d42c9f 100644 --- a/lib/libcxx/include/__format/formatter_char.h +++ b/lib/libcxx/include/__format/formatter_char.h @@ -77,16 +77,24 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_char { template <> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; template <> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_char {}; +# endif // _LIBCPP_HAS_WIDE_CHARACTERS -# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# if _LIBCPP_HAS_WIDE_CHARACTERS +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_STD_VER >= 23 -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_floating_point.h b/lib/libcxx/include/__format/formatter_floating_point.h index fa42ba203b0b..ac4be9b61935 100644 --- a/lib/libcxx/include/__format/formatter_floating_point.h +++ b/lib/libcxx/include/__format/formatter_floating_point.h @@ -23,6 +23,7 @@ #include <__concepts/arithmetic.h> #include <__concepts/same_as.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__format/concepts.h> #include <__format/format_parse_context.h> #include <__format/formatter.h> @@ -36,9 +37,8 @@ #include <__utility/move.h> #include <__utility/unreachable.h> #include -#include -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +#if _LIBCPP_HAS_LOCALIZATION # include <__locale> #endif @@ -141,7 +141,7 @@ struct __traits { /// on the stack or the heap. template class _LIBCPP_TEMPLATE_VIS __float_buffer { - using _Traits = __traits<_Fp>; + using _Traits _LIBCPP_NODEBUG = __traits<_Fp>; public: // TODO FMT Improve this constructor to do a better estimate. @@ -491,7 +491,7 @@ _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer( } } -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION template _LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form( _OutIt __out_it, @@ -576,7 +576,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form( // alignment return __formatter::__fill(std::move(__out_it), __padding.__after_, __specs.__fill_); } -# endif // _LIBCPP_HAS_NO_LOCALIZATION +# endif // _LIBCPP_HAS_LOCALIZATION template _LIBCPP_HIDE_FROM_ABI _OutIt __format_floating_point_non_finite( @@ -705,7 +705,7 @@ __format_floating_point(_Tp __value, _FormatContext& __ctx, __format_spec::__par } } -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION if (__specs.__std_.__locale_specific_form_) return __formatter::__format_locale_specific_form(__ctx.out(), __buffer, __result, __ctx.locale(), __specs); # endif @@ -774,7 +774,15 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_float template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_floating_point<_CharT> {}; -#endif //_LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_integer.h b/lib/libcxx/include/__format/formatter_integer.h index 41400f00478e..3f51b10d75aa 100644 --- a/lib/libcxx/include/__format/formatter_integer.h +++ b/lib/libcxx/include/__format/formatter_integer.h @@ -67,7 +67,7 @@ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter<__int128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif @@ -83,12 +83,43 @@ template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_integer<_CharT> {}; -# ifndef _LIBCPP_HAS_NO_INT128 +# if _LIBCPP_HAS_INT128 template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter<__uint128_t, _CharT> : public __formatter_integer<_CharT> {}; # endif -#endif //_LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# if _LIBCPP_HAS_INT128 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization<__int128_t> = true; +# endif + +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# if _LIBCPP_HAS_INT128 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization<__uint128_t> = true; +# endif +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_integral.h b/lib/libcxx/include/__format/formatter_integral.h index eca966f8886f..996b7620b3e3 100644 --- a/lib/libcxx/include/__format/formatter_integral.h +++ b/lib/libcxx/include/__format/formatter_integral.h @@ -27,11 +27,12 @@ #include <__type_traits/make_unsigned.h> #include <__utility/unreachable.h> #include +#include #include #include #include -#ifndef _LIBCPP_HAS_NO_LOCALIZATION +#if _LIBCPP_HAS_LOCALIZATION # include <__locale> #endif @@ -297,7 +298,7 @@ _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_integer( _Iterator __last = __formatter::__to_buffer(__first, __end, __value, __base); -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION if (__specs.__std_.__locale_specific_form_) { const auto& __np = std::use_facet>(__ctx.locale()); string __grouping = __np.grouping(); @@ -411,7 +412,7 @@ struct _LIBCPP_TEMPLATE_VIS __bool_strings { static constexpr string_view __false{"false"}; }; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct _LIBCPP_TEMPLATE_VIS __bool_strings { static constexpr wstring_view __true{L"true"}; @@ -422,7 +423,7 @@ struct _LIBCPP_TEMPLATE_VIS __bool_strings { template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator __format_bool(bool __value, _FormatContext& __ctx, __format_spec::__parsed_specifications<_CharT> __specs) { -# ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_HAS_LOCALIZATION if (__specs.__std_.__locale_specific_form_) { const auto& __np = std::use_facet>(__ctx.locale()); basic_string<_CharT> __str = __value ? __np.truename() : __np.falsename(); @@ -436,7 +437,7 @@ __format_bool(bool __value, _FormatContext& __ctx, __format_spec::__parsed_speci } // namespace __formatter -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_output.h b/lib/libcxx/include/__format/formatter_output.h index 1498f64c4aef..e1f1309cd2c5 100644 --- a/lib/libcxx/include/__format/formatter_output.h +++ b/lib/libcxx/include/__format/formatter_output.h @@ -16,6 +16,8 @@ #include <__bit/countl.h> #include <__concepts/same_as.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__format/buffer.h> #include <__format/concepts.h> #include <__format/formatter.h> @@ -28,7 +30,6 @@ #include <__memory/pointer_traits.h> #include <__utility/move.h> #include <__utility/unreachable.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -168,7 +169,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, _CharT __value) } } -# ifndef _LIBCPP_HAS_NO_UNICODE +# if _LIBCPP_HAS_UNICODE template <__fmt_char_type _CharT, output_iterator _OutIt> requires(same_as<_CharT, char>) _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { @@ -182,7 +183,7 @@ _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec:: return __out_it; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <__fmt_char_type _CharT, output_iterator _OutIt> requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { @@ -200,13 +201,13 @@ template <__fmt_char_type _CharT, output_iterator _OutIt> _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); } -# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS -# else // _LIBCPP_HAS_NO_UNICODE +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# else // _LIBCPP_HAS_UNICODE template <__fmt_char_type _CharT, output_iterator _OutIt> _LIBCPP_HIDE_FROM_ABI _OutIt __fill(_OutIt __out_it, size_t __n, __format_spec::__code_point<_CharT> __value) { return __formatter::__fill(std::move(__out_it), __n, __value.__data[0]); } -# endif // _LIBCPP_HAS_NO_UNICODE +# endif // _LIBCPP_HAS_UNICODE /// Writes the input to the output with the required padding. /// @@ -294,8 +295,7 @@ _LIBCPP_HIDE_FROM_ABI auto __write_transformed( /// /// \pre !__specs.__has_precision() /// -/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the -/// input is ASCII. +/// \note When \c _LIBCPP_HAS_UNICODE is false the function assumes the input is ASCII. template _LIBCPP_HIDE_FROM_ABI auto __write_string_no_precision( basic_string_view<_CharT> __str, @@ -326,7 +326,7 @@ _LIBCPP_HIDE_FROM_ABI int __truncate(basic_string_view<_CharT>& __str, int __pre } // namespace __formatter -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_pointer.h b/lib/libcxx/include/__format/formatter_pointer.h index 6941343efd91..4ef48c168d0d 100644 --- a/lib/libcxx/include/__format/formatter_pointer.h +++ b/lib/libcxx/include/__format/formatter_pointer.h @@ -11,13 +11,13 @@ #define _LIBCPP___FORMAT_FORMATTER_POINTER_H #include <__config> +#include <__cstddef/nullptr_t.h> #include <__format/concepts.h> #include <__format/format_parse_context.h> #include <__format/formatter.h> #include <__format/formatter_integral.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -65,7 +65,15 @@ struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointe template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_pointer<_CharT> {}; -#endif //_LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_string.h b/lib/libcxx/include/__format/formatter_string.h index 347439fc8dff..30084e582214 100644 --- a/lib/libcxx/include/__format/formatter_string.h +++ b/lib/libcxx/include/__format/formatter_string.h @@ -59,44 +59,26 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_string { // Formatter const char*. template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter : public __formatter_string<_CharT> { - using _Base = __formatter_string<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(const _CharT* __str, _FormatContext& __ctx) const { _LIBCPP_ASSERT_INTERNAL(__str, "The basic_format_arg constructor should have prevented an invalid pointer."); - - __format_spec::__parsed_specifications<_CharT> __specs = _Base::__parser_.__get_parsed_std_specifications(__ctx); -# if _LIBCPP_STD_VER >= 23 - if (_Base::__parser_.__type_ == __format_spec::__type::__debug) - return __formatter::__format_escaped_string(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); -# endif - - // When using a center or right alignment and the width option the length - // of __str must be known to add the padding upfront. This case is handled - // by the base class by converting the argument to a basic_string_view. + // Converting the input to a basic_string_view means the data is looped over twice; + // - once to determine the length, and + // - once to process the data. // - // When using left alignment and the width option the padding is added - // after outputting __str so the length can be determined while outputting - // __str. The same holds true for the precision, during outputting __str it - // can be validated whether the precision threshold has been reached. For - // now these optimizations aren't implemented. Instead the base class - // handles these options. - // TODO FMT Implement these improvements. - if (__specs.__has_width() || __specs.__has_precision()) - return __formatter::__write_string(basic_string_view<_CharT>{__str}, __ctx.out(), __specs); - - // No formatting required, copy the string to the output. - auto __out_it = __ctx.out(); - while (*__str) - *__out_it++ = *__str++; - return __out_it; + // This sounds slower than writing the output directly. However internally + // the output algorithms have optimizations for "bulk" operations, which + // makes this faster than a single-pass character-by-character output. + return _Base::format(basic_string_view<_CharT>(__str), __ctx); } }; // Formatter char*. template <__fmt_char_type _CharT> struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter { - using _Base = formatter; + using _Base _LIBCPP_NODEBUG = formatter; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(_CharT* __str, _FormatContext& __ctx) const { @@ -107,7 +89,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter<_CharT*, _CharT> : public formatter struct _LIBCPP_TEMPLATE_VIS formatter<_CharT[_Size], _CharT> : public __formatter_string<_CharT> { - using _Base = __formatter_string<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator @@ -120,7 +102,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter<_CharT[_Size], _CharT> : public __formatte template <__fmt_char_type _CharT, class _Traits, class _Allocator> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { - using _Base = __formatter_string<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator @@ -133,7 +115,7 @@ struct _LIBCPP_TEMPLATE_VIS formatter, // Formatter std::string_view. template <__fmt_char_type _CharT, class _Traits> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_string<_CharT> { - using _Base = __formatter_string<_CharT>; + using _Base _LIBCPP_NODEBUG = __formatter_string<_CharT>; template _LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator @@ -143,7 +125,32 @@ struct _LIBCPP_TEMPLATE_VIS formatter, _CharT } }; -#endif //_LIBCPP_STD_VER >= 20 +# if _LIBCPP_STD_VER >= 23 +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization> = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization> = true; + +# if _LIBCPP_HAS_WIDE_CHARACTERS +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template <> +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization> = true; +template +inline constexpr bool enable_nonlocking_formatter_optimization> = true; +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/formatter_tuple.h b/lib/libcxx/include/__format/formatter_tuple.h index 030097a8797d..bb841ef11440 100644 --- a/lib/libcxx/include/__format/formatter_tuple.h +++ b/lib/libcxx/include/__format/formatter_tuple.h @@ -143,7 +143,7 @@ template <__fmt_char_type _CharT, formattable<_CharT>... _Args> struct _LIBCPP_TEMPLATE_VIS formatter, _CharT> : public __formatter_tuple<_CharT, tuple<_Args...>, _Args...> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/indic_conjunct_break_table.h b/lib/libcxx/include/__format/indic_conjunct_break_table.h index 44521d27498c..df6cfe6a02f3 100644 --- a/lib/libcxx/include/__format/indic_conjunct_break_table.h +++ b/lib/libcxx/include/__format/indic_conjunct_break_table.h @@ -63,8 +63,8 @@ #include <__algorithm/ranges_upper_bound.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/access.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -343,7 +343,7 @@ _LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[201] = { } // namespace __indic_conjunct_break -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/parser_std_format_spec.h b/lib/libcxx/include/__format/parser_std_format_spec.h index 150bdde89f3b..415261acf0ff 100644 --- a/lib/libcxx/include/__format/parser_std_format_spec.h +++ b/lib/libcxx/include/__format/parser_std_format_spec.h @@ -52,13 +52,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD namespace __format_spec { -_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void +[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_option_format_error(const char* __id, const char* __option) { std::__throw_format_error( (string("The format specifier for ") + __id + " does not allow the " + __option + " option").c_str()); } -_LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) { +[[noreturn]] _LIBCPP_HIDE_FROM_ABI inline void __throw_invalid_type_format_error(const char* __id) { std::__throw_format_error( (string("The type option contains an invalid value for ") + __id + " formatting argument").c_str()); } @@ -268,7 +268,7 @@ struct __code_point { char __data[4] = {' '}; }; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct __code_point { wchar_t __data[4 / sizeof(wchar_t)] = {L' '}; @@ -321,7 +321,7 @@ struct __parsed_specifications { // value in formatting functions. static_assert(sizeof(__parsed_specifications) == 16); static_assert(is_trivially_copyable_v<__parsed_specifications>); -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS static_assert(sizeof(__parsed_specifications) == 16); static_assert(is_trivially_copyable_v<__parsed_specifications>); # endif @@ -580,11 +580,11 @@ class _LIBCPP_TEMPLATE_VIS __parser { std::__throw_format_error("The fill option contains an invalid value"); } -# ifndef _LIBCPP_HAS_NO_UNICODE +# if _LIBCPP_HAS_UNICODE // range-fill and tuple-fill are identical template requires same_as<_CharT, char> -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS || (same_as<_CharT, wchar_t> && sizeof(wchar_t) == 2) # endif _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { @@ -617,7 +617,7 @@ class _LIBCPP_TEMPLATE_VIS __parser { return true; } -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS template requires(same_as<_CharT, wchar_t> && sizeof(wchar_t) == 4) _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { @@ -643,9 +643,9 @@ class _LIBCPP_TEMPLATE_VIS __parser { return true; } -# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS -# else // _LIBCPP_HAS_NO_UNICODE +# else // _LIBCPP_HAS_UNICODE // range-fill and tuple-fill are identical template _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_fill_align(_Iterator& __begin, _Iterator __end) { @@ -670,7 +670,7 @@ class _LIBCPP_TEMPLATE_VIS __parser { return true; } -# endif // _LIBCPP_HAS_NO_UNICODE +# endif // _LIBCPP_HAS_UNICODE template _LIBCPP_HIDE_FROM_ABI constexpr bool __parse_sign(_Iterator& __begin) { @@ -874,7 +874,7 @@ class _LIBCPP_TEMPLATE_VIS __parser { // Validates whether the reserved bitfields don't change the size. static_assert(sizeof(__parser) == 16); -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS static_assert(sizeof(__parser) == 16); # endif @@ -1026,7 +1026,7 @@ __column_width_result(size_t, _Iterator) -> __column_width_result<_Iterator>; /// "rounded up". enum class __column_width_rounding { __down, __up }; -# ifndef _LIBCPP_HAS_NO_UNICODE +# if _LIBCPP_HAS_UNICODE namespace __detail { template @@ -1148,7 +1148,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result<_Iterator> __estimate_colu __result.__width_ += __ascii_size; return __result; } -# else // !defined(_LIBCPP_HAS_NO_UNICODE) +# else // _LIBCPP_HAS_UNICODE template _LIBCPP_HIDE_FROM_ABI constexpr __column_width_result::const_iterator> __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __column_width_rounding) noexcept { @@ -1159,11 +1159,11 @@ __estimate_column_width(basic_string_view<_CharT> __str, size_t __maximum, __col return {__width, __str.begin() + __width}; } -# endif // !defined(_LIBCPP_HAS_NO_UNICODE) +# endif // _LIBCPP_HAS_UNICODE } // namespace __format_spec -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/range_default_formatter.h b/lib/libcxx/include/__format/range_default_formatter.h index b35223ae9332..bb4c520f5ea1 100644 --- a/lib/libcxx/include/__format/range_default_formatter.h +++ b/lib/libcxx/include/__format/range_default_formatter.h @@ -40,7 +40,7 @@ concept __const_formattable_range = ranges::input_range && formattable, _CharT>; template -using __fmt_maybe_const = conditional_t<__const_formattable_range<_Rp, _CharT>, const _Rp, _Rp>; +using __fmt_maybe_const _LIBCPP_NODEBUG = conditional_t<__const_formattable_range<_Rp, _CharT>, const _Rp, _Rp>; _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wshadow") @@ -95,7 +95,7 @@ struct _LIBCPP_TEMPLATE_VIS __range_default_formatter; template struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: - using __maybe_const_r = __fmt_maybe_const<_Rp, _CharT>; + using __maybe_const_r _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; range_formatter>, _CharT> __underlying_; public: @@ -122,8 +122,8 @@ struct _LIBCPP_TEMPLATE_VIS __range_default_formatter struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: - using __maybe_const_map = __fmt_maybe_const<_Rp, _CharT>; - using __element_type = remove_cvref_t>; + using __maybe_const_map _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; + using __element_type _LIBCPP_NODEBUG = remove_cvref_t>; range_formatter<__element_type, _CharT> __underlying_; public: @@ -150,8 +150,8 @@ struct _LIBCPP_TEMPLATE_VIS __range_default_formatter struct _LIBCPP_TEMPLATE_VIS __range_default_formatter { private: - using __maybe_const_set = __fmt_maybe_const<_Rp, _CharT>; - using __element_type = remove_cvref_t>; + using __maybe_const_set _LIBCPP_NODEBUG = __fmt_maybe_const<_Rp, _CharT>; + using __element_type _LIBCPP_NODEBUG = remove_cvref_t>; range_formatter<__element_type, _CharT> __underlying_; public: @@ -207,7 +207,7 @@ template requires(format_kind<_Rp> != range_format::disabled && formattable, _CharT>) struct _LIBCPP_TEMPLATE_VIS formatter<_Rp, _CharT> : __range_default_formatter, _Rp, _CharT> {}; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/range_formatter.h b/lib/libcxx/include/__format/range_formatter.h index 691563074349..def55c86ce51 100644 --- a/lib/libcxx/include/__format/range_formatter.h +++ b/lib/libcxx/include/__format/range_formatter.h @@ -257,7 +257,7 @@ struct _LIBCPP_TEMPLATE_VIS range_formatter { basic_string_view<_CharT> __closing_bracket_ = _LIBCPP_STATICALLY_WIDEN(_CharT, "]"); }; -#endif //_LIBCPP_STD_VER >= 23 +#endif // _LIBCPP_STD_VER >= 23 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/unicode.h b/lib/libcxx/include/__format/unicode.h index de7d0fea1df5..46096fda1e8a 100644 --- a/lib/libcxx/include/__format/unicode.h +++ b/lib/libcxx/include/__format/unicode.h @@ -54,7 +54,7 @@ struct __consume_result { }; static_assert(sizeof(__consume_result) == sizeof(char32_t)); -# ifndef _LIBCPP_HAS_NO_UNICODE +# if _LIBCPP_HAS_UNICODE /// Implements the grapheme cluster boundary rules /// @@ -123,7 +123,7 @@ class __code_point_view; /// UTF-8 specialization. template <> class __code_point_view { - using _Iterator = basic_string_view::const_iterator; + using _Iterator _LIBCPP_NODEBUG = basic_string_view::const_iterator; public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last) @@ -235,7 +235,7 @@ class __code_point_view { _Iterator __last_; }; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_HIDE_FROM_ABI constexpr bool __is_surrogate_pair_high(wchar_t __value) { return __value >= 0xd800 && __value <= 0xdbff; } @@ -249,7 +249,7 @@ _LIBCPP_HIDE_FROM_ABI constexpr bool __is_surrogate_pair_low(wchar_t __value) { /// - 4 UTF-32 (for example Linux) template <> class __code_point_view { - using _Iterator = typename basic_string_view::const_iterator; + using _Iterator _LIBCPP_NODEBUG = typename basic_string_view::const_iterator; public: static_assert(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4, "sizeof(wchar_t) has a not implemented value"); @@ -292,7 +292,7 @@ class __code_point_view { _Iterator __first_; _Iterator __last_; }; -# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +# endif // _LIBCPP_HAS_WIDE_CHARACTERS // State machine to implement the Extended Grapheme Cluster Boundary // @@ -300,8 +300,8 @@ class __code_point_view { // This implements the extended rules see // https://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries class __extended_grapheme_cluster_break { - using __EGC_property = __extended_grapheme_custer_property_boundary::__property; - using __inCB_property = __indic_conjunct_break::__property; + using __EGC_property _LIBCPP_NODEBUG = __extended_grapheme_custer_property_boundary::__property; + using __inCB_property _LIBCPP_NODEBUG = __indic_conjunct_break::__property; public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_break(char32_t __first_code_point) @@ -527,7 +527,7 @@ class __extended_grapheme_cluster_break { /// Therefore only this code point is extracted. template class __extended_grapheme_cluster_view { - using _Iterator = typename basic_string_view<_CharT>::const_iterator; + using _Iterator _LIBCPP_NODEBUG = typename basic_string_view<_CharT>::const_iterator; public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __extended_grapheme_cluster_view(_Iterator __first, _Iterator __last) @@ -566,13 +566,13 @@ class __extended_grapheme_cluster_view { template __extended_grapheme_cluster_view(_Iterator, _Iterator) -> __extended_grapheme_cluster_view>; -# else // _LIBCPP_HAS_NO_UNICODE +# else // _LIBCPP_HAS_UNICODE // For ASCII every character is a "code point". -// This makes it easier to write code agnostic of the _LIBCPP_HAS_NO_UNICODE define. +// This makes it easier to write code agnostic of the _LIBCPP_HAS_UNICODE define. template class __code_point_view { - using _Iterator = typename basic_string_view<_CharT>::const_iterator; + using _Iterator _LIBCPP_NODEBUG = typename basic_string_view<_CharT>::const_iterator; public: _LIBCPP_HIDE_FROM_ABI constexpr explicit __code_point_view(_Iterator __first, _Iterator __last) @@ -591,11 +591,11 @@ class __code_point_view { _Iterator __last_; }; -# endif // _LIBCPP_HAS_NO_UNICODE +# endif // _LIBCPP_HAS_UNICODE } // namespace __unicode -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/width_estimation_table.h b/lib/libcxx/include/__format/width_estimation_table.h index 11f61dea18d6..5b4b3950c6a1 100644 --- a/lib/libcxx/include/__format/width_estimation_table.h +++ b/lib/libcxx/include/__format/width_estimation_table.h @@ -63,7 +63,7 @@ #include <__algorithm/ranges_upper_bound.h> #include <__config> -#include +#include <__cstddef/ptrdiff_t.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -263,7 +263,7 @@ inline constexpr uint32_t __table_upper_bound = 0x0003fffd; } // namespace __width_estimation_table -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__format/write_escaped.h b/lib/libcxx/include/__format/write_escaped.h index 052ea98c3c3b..aa74940032f7 100644 --- a/lib/libcxx/include/__format/write_escaped.h +++ b/lib/libcxx/include/__format/write_escaped.h @@ -16,6 +16,7 @@ #include <__charconv/to_chars_result.h> #include <__chrono/statically_widen.h> #include <__format/escaped_output_table.h> +#include <__format/extended_grapheme_cluster_table.h> #include <__format/formatter_output.h> #include <__format/parser_std_format_spec.h> #include <__format/unicode.h> @@ -41,8 +42,7 @@ namespace __formatter { /// Writes a string using format's width estimation algorithm. /// -/// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the -/// input is ASCII. +/// \note When \c _LIBCPP_HAS_UNICODE is false the function assumes the input is ASCII. template _LIBCPP_HIDE_FROM_ABI auto __write_string(basic_string_view<_CharT> __str, @@ -103,7 +103,7 @@ _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_Cha template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) { -# ifdef _LIBCPP_HAS_NO_UNICODE +# if !_LIBCPP_HAS_UNICODE // For ASCII assume everything above 127 is printable. if (__value > 127) return false; diff --git a/lib/libcxx/include/__functional/binary_function.h b/lib/libcxx/include/__functional/binary_function.h index ddee3b170311..bde8b03ef828 100644 --- a/lib/libcxx/include/__functional/binary_function.h +++ b/lib/libcxx/include/__functional/binary_function.h @@ -42,11 +42,11 @@ struct __binary_function_keep_layout_base { _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") template -using __binary_function = binary_function<_Arg1, _Arg2, _Result>; +using __binary_function _LIBCPP_NODEBUG = binary_function<_Arg1, _Arg2, _Result>; _LIBCPP_DIAGNOSTIC_POP #else template -using __binary_function = __binary_function_keep_layout_base<_Arg1, _Arg2, _Result>; +using __binary_function _LIBCPP_NODEBUG = __binary_function_keep_layout_base<_Arg1, _Arg2, _Result>; #endif _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional/bind.h b/lib/libcxx/include/__functional/bind.h index b4f46441da50..a3c327ab40cc 100644 --- a/lib/libcxx/include/__functional/bind.h +++ b/lib/libcxx/include/__functional/bind.h @@ -11,13 +11,12 @@ #define _LIBCPP___FUNCTIONAL_BIND_H #include <__config> -#include <__functional/invoke.h> #include <__functional/weak_result_type.h> #include <__fwd/functional.h> #include <__type_traits/decay.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_reference_wrapper.h> #include <__type_traits/is_void.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -83,13 +82,13 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp& __mu(reference_w } template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __invoke_of<_Ti&, _Uj...>::type +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...> __mu_expand(_Ti& __ti, tuple<_Uj...>& __uj, __tuple_indices<_Indx...>) { return __ti(std::forward<_Uj>(std::get<_Indx>(__uj))...); } template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __invoke_of<_Ti&, _Uj...>::type +inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t<_Ti&, _Uj...> __mu(_Ti& __ti, tuple<_Uj...>& __uj) { typedef typename __make_tuple_indices::type __indices; return std::__mu_expand(__ti, __uj, __indices()); @@ -131,12 +130,12 @@ struct __mu_return_invokable // false template struct __mu_return_invokable { - typedef typename __invoke_of<_Ti&, _Uj...>::type type; + using type = __invoke_result_t<_Ti&, _Uj...>; }; template struct __mu_return_impl<_Ti, false, true, false, tuple<_Uj...> > - : public __mu_return_invokable<__invokable<_Ti&, _Uj...>::value, _Ti, _Uj...> {}; + : public __mu_return_invokable<__is_invocable_v<_Ti&, _Uj...>, _Ti, _Uj...> {}; template struct __mu_return_impl<_Ti, false, false, true, _TupleUj> { @@ -169,12 +168,12 @@ struct __is_valid_bind_return { template struct __is_valid_bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj> { - static const bool value = __invokable<_Fp, typename __mu_return<_BoundArgs, _TupleUj>::type...>::value; + static const bool value = __is_invocable_v<_Fp, typename __mu_return<_BoundArgs, _TupleUj>::type...>; }; template struct __is_valid_bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj> { - static const bool value = __invokable<_Fp, typename __mu_return::type...>::value; + static const bool value = __is_invocable_v<_Fp, typename __mu_return::type...>; }; template ::value> @@ -182,12 +181,12 @@ struct __bind_return; template struct __bind_return<_Fp, tuple<_BoundArgs...>, _TupleUj, true> { - typedef typename __invoke_of< _Fp&, typename __mu_return< _BoundArgs, _TupleUj >::type... >::type type; + using type = __invoke_result_t< _Fp&, typename __mu_return< _BoundArgs, _TupleUj >::type... >; }; template struct __bind_return<_Fp, const tuple<_BoundArgs...>, _TupleUj, true> { - typedef typename __invoke_of< _Fp&, typename __mu_return< const _BoundArgs, _TupleUj >::type... >::type type; + using type = __invoke_result_t< _Fp&, typename __mu_return< const _BoundArgs, _TupleUj >::type... >; }; template @@ -199,7 +198,7 @@ __apply_functor(_Fp& __f, _BoundArgs& __bound_args, __tuple_indices<_Indx...>, _ template class __bind : public __weak_result_type<__decay_t<_Fp> > { protected: - using _Fd = __decay_t<_Fp>; + using _Fd _LIBCPP_NODEBUG = __decay_t<_Fp>; typedef tuple<__decay_t<_BoundArgs>...> _Td; private: @@ -257,8 +256,7 @@ class __bind_r : public __bind<_Fp, _BoundArgs...> { is_void<_Rp>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 result_type operator()(_Args&&... __args) { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(static_cast(*this), std::forward<_Args>(__args)...); + return std::__invoke_r<_Rp>(static_cast(*this), std::forward<_Args>(__args)...); } template { is_void<_Rp>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 result_type operator()(_Args&&... __args) const { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(static_cast(*this), std::forward<_Args>(__args)...); + return std::__invoke_r<_Rp>(static_cast(*this), std::forward<_Args>(__args)...); } }; diff --git a/lib/libcxx/include/__functional/boyer_moore_searcher.h b/lib/libcxx/include/__functional/boyer_moore_searcher.h index 648b60c50521..1e49cc5464be 100644 --- a/lib/libcxx/include/__functional/boyer_moore_searcher.h +++ b/lib/libcxx/include/__functional/boyer_moore_searcher.h @@ -22,9 +22,10 @@ #include <__memory/shared_ptr.h> #include <__type_traits/make_unsigned.h> #include <__utility/pair.h> +#include <__vector/vector.h> #include +#include #include -#include #if _LIBCPP_STD_VER >= 17 @@ -91,7 +92,7 @@ class _LIBCPP_TEMPLATE_VIS boyer_moore_searcher { private: using difference_type = typename std::iterator_traits<_RandomAccessIterator1>::difference_type; using value_type = typename std::iterator_traits<_RandomAccessIterator1>::value_type; - using __skip_table_type = + using __skip_table_type _LIBCPP_NODEBUG = _BMSkipTable::difference_type; using value_type = typename iterator_traits<_RandomAccessIterator1>::value_type; - using __skip_table_type = + using __skip_table_type _LIBCPP_NODEBUG = _BMSkipTable #include <__config> +#include <__cstddef/nullptr_t.h> #include <__exception/exception.h> #include <__functional/binary_function.h> #include <__functional/invoke.h> @@ -21,7 +22,6 @@ #include <__memory/allocator.h> #include <__memory/allocator_destructor.h> #include <__memory/allocator_traits.h> -#include <__memory/builtin_new_allocator.h> #include <__memory/compressed_pair.h> #include <__memory/unique_ptr.h> #include <__type_traits/aligned_storage.h> @@ -37,7 +37,6 @@ #include <__utility/piecewise_construct.h> #include <__utility/swap.h> #include <__verbose_abort> -#include #include #include @@ -78,8 +77,8 @@ class _LIBCPP_EXPORTED_FROM_ABI bad_function_call : public exception { }; _LIBCPP_DIAGNOSTIC_POP -_LIBCPP_NORETURN inline _LIBCPP_HIDE_FROM_ABI void __throw_bad_function_call() { -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +[[__noreturn__]] inline _LIBCPP_HIDE_FROM_ABI void __throw_bad_function_call() { +# if _LIBCPP_HAS_EXCEPTIONS throw bad_function_call(); # else _LIBCPP_VERBOSE_ABORT("bad_function_call was thrown in -fno-exceptions mode"); @@ -123,7 +122,7 @@ _LIBCPP_HIDE_FROM_ABI bool __not_null(function<_Fp> const& __f) { return !!__f; } -# ifdef _LIBCPP_HAS_EXTENSION_BLOCKS +# if _LIBCPP_HAS_EXTENSION_BLOCKS template _LIBCPP_HIDE_FROM_ABI bool __not_null(_Rp (^__p)(_Args...)) { return __p; @@ -143,45 +142,45 @@ class __default_alloc_func; template class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)> { - __compressed_pair<_Fp, _Ap> __f_; + _LIBCPP_COMPRESSED_PAIR(_Fp, __func_, _Ap, __alloc_); public: - typedef _LIBCPP_NODEBUG _Fp _Target; - typedef _LIBCPP_NODEBUG _Ap _Alloc; + using _Target _LIBCPP_NODEBUG = _Fp; + using _Alloc _LIBCPP_NODEBUG = _Ap; - _LIBCPP_HIDE_FROM_ABI const _Target& __target() const { return __f_.first(); } + _LIBCPP_HIDE_FROM_ABI const _Target& __target() const { return __func_; } // WIN32 APIs may define __allocator, so use __get_allocator instead. - _LIBCPP_HIDE_FROM_ABI const _Alloc& __get_allocator() const { return __f_.second(); } + _LIBCPP_HIDE_FROM_ABI const _Alloc& __get_allocator() const { return __alloc_; } - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(_Target&& __f) - : __f_(piecewise_construct, std::forward_as_tuple(std::move(__f)), std::forward_as_tuple()) {} + _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(_Target&& __f) : __func_(std::move(__f)), __alloc_() {} - _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(const _Target& __f, const _Alloc& __a) - : __f_(piecewise_construct, std::forward_as_tuple(__f), std::forward_as_tuple(__a)) {} + _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(const _Target& __f, const _Alloc& __a) : __func_(__f), __alloc_(__a) {} _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(const _Target& __f, _Alloc&& __a) - : __f_(piecewise_construct, std::forward_as_tuple(__f), std::forward_as_tuple(std::move(__a))) {} + : __func_(__f), __alloc_(std::move(__a)) {} _LIBCPP_HIDE_FROM_ABI explicit __alloc_func(_Target&& __f, _Alloc&& __a) - : __f_(piecewise_construct, std::forward_as_tuple(std::move(__f)), std::forward_as_tuple(std::move(__a))) {} + : __func_(std::move(__f)), __alloc_(std::move(__a)) {} _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __arg) { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(__f_.first(), std::forward<_ArgTypes>(__arg)...); + return std::__invoke_r<_Rp>(__func_, std::forward<_ArgTypes>(__arg)...); } _LIBCPP_HIDE_FROM_ABI __alloc_func* __clone() const { typedef allocator_traits<_Alloc> __alloc_traits; typedef __rebind_alloc<__alloc_traits, __alloc_func> _AA; - _AA __a(__f_.second()); + _AA __a(__alloc_); typedef __allocator_destructor<_AA> _Dp; unique_ptr<__alloc_func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new ((void*)__hold.get()) __alloc_func(__f_.first(), _Alloc(__a)); + ::new ((void*)__hold.get()) __alloc_func(__func_, _Alloc(__a)); return __hold.release(); } - _LIBCPP_HIDE_FROM_ABI void destroy() _NOEXCEPT { __f_.~__compressed_pair<_Target, _Alloc>(); } + _LIBCPP_HIDE_FROM_ABI void destroy() _NOEXCEPT { + __func_.~_Fp(); + __alloc_.~_Alloc(); + } _LIBCPP_HIDE_FROM_ABI static void __destroy_and_delete(__alloc_func* __f) { typedef allocator_traits<_Alloc> __alloc_traits; @@ -192,12 +191,19 @@ class __alloc_func<_Fp, _Ap, _Rp(_ArgTypes...)> { } }; +template +struct __deallocating_deleter { + _LIBCPP_HIDE_FROM_ABI void operator()(void* __p) const { + std::__libcpp_deallocate<_Tp>(static_cast<_Tp*>(__p), __element_count(1)); + } +}; + template class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { _Fp __f_; public: - typedef _LIBCPP_NODEBUG _Fp _Target; + using _Target _LIBCPP_NODEBUG = _Fp; _LIBCPP_HIDE_FROM_ABI const _Target& __target() const { return __f_; } @@ -206,13 +212,13 @@ class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { _LIBCPP_HIDE_FROM_ABI explicit __default_alloc_func(const _Target& __f) : __f_(__f) {} _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes&&... __arg) { - typedef __invoke_void_return_wrapper<_Rp> _Invoker; - return _Invoker::__call(__f_, std::forward<_ArgTypes>(__arg)...); + return std::__invoke_r<_Rp>(__f_, std::forward<_ArgTypes>(__arg)...); } _LIBCPP_HIDE_FROM_ABI __default_alloc_func* __clone() const { - __builtin_new_allocator::__holder_t __hold = __builtin_new_allocator::__allocate_type<__default_alloc_func>(1); - __default_alloc_func* __res = ::new ((void*)__hold.get()) __default_alloc_func(__f_); + using _Self = __default_alloc_func; + unique_ptr<_Self, __deallocating_deleter<_Self>> __hold(std::__libcpp_allocate<_Self>(__element_count(1))); + _Self* __res = ::new ((void*)__hold.get()) _Self(__f_); (void)__hold.release(); return __res; } @@ -221,7 +227,7 @@ class __default_alloc_func<_Fp, _Rp(_ArgTypes...)> { _LIBCPP_HIDE_FROM_ABI static void __destroy_and_delete(__default_alloc_func* __f) { __f->destroy(); - __builtin_new_allocator::__deallocate_type<__default_alloc_func>(__f, 1); + std::__libcpp_deallocate<__default_alloc_func>(__f, __element_count(1)); } }; @@ -243,10 +249,10 @@ class __base<_Rp(_ArgTypes...)> { virtual void destroy() _NOEXCEPT = 0; virtual void destroy_deallocate() _NOEXCEPT = 0; virtual _Rp operator()(_ArgTypes&&...) = 0; -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI virtual const void* target(const type_info&) const _NOEXCEPT = 0; virtual const std::type_info& target_type() const _NOEXCEPT = 0; -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; // __func implements __base for a given functor type. @@ -272,10 +278,10 @@ class __func<_Fp, _Alloc, _Rp(_ArgTypes...)> : public __base<_Rp(_ArgTypes...)> _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy() _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy_deallocate() _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual _Rp operator()(_ArgTypes&&... __arg); -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const void* target(const type_info&) const _NOEXCEPT; _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const std::type_info& target_type() const _NOEXCEPT; -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; template @@ -313,7 +319,7 @@ _Rp __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::operator()(_ArgTypes&&... __arg) { return __f_(std::forward<_ArgTypes>(__arg)...); } -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI template const void* __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target(const type_info& __ti) const _NOEXCEPT { @@ -327,7 +333,7 @@ const std::type_info& __func<_Fp, _Alloc, _Rp(_ArgTypes...)>::target_type() cons return typeid(_Fp); } -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI // __value_func creates a value-type from a __func. @@ -464,7 +470,7 @@ class __value_func<_Rp(_ArgTypes...)> { _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return __f_ != nullptr; } -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT { if (__f_ == nullptr) return typeid(void); @@ -477,7 +483,7 @@ class __value_func<_Rp(_ArgTypes...)> { return nullptr; return (const _Tp*)__f_->target(typeid(_Tp)); } -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; // Storage for a functor object, to be used with __policy to manage copy and @@ -520,7 +526,7 @@ struct __policy { nullptr, nullptr, true, -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI &typeid(void) # else nullptr @@ -547,7 +553,7 @@ struct __policy { &__large_clone<_Fun>, &__large_destroy<_Fun>, false, -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI &typeid(typename _Fun::_Target) # else nullptr @@ -562,7 +568,7 @@ struct __policy { nullptr, nullptr, false, -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI &typeid(typename _Fun::_Target) # else nullptr @@ -575,7 +581,7 @@ struct __policy { // Used to choose between perfect forwarding or pass-by-value. Pass-by-value is // faster for types that can be passed in registers. template -using __fast_forward = __conditional_t::value, _Tp, _Tp&&>; +using __fast_forward _LIBCPP_NODEBUG = __conditional_t::value, _Tp, _Tp&&>; // __policy_invoker calls an instance of __alloc_func held in __policy_storage. @@ -667,8 +673,8 @@ class __policy_func<_Rp(_ArgTypes...)> { if (__use_small_storage<_Fun>()) { ::new ((void*)&__buf_.__small) _Fun(std::move(__f)); } else { - __builtin_new_allocator::__holder_t __hold = __builtin_new_allocator::__allocate_type<_Fun>(1); - __buf_.__large = ::new ((void*)__hold.get()) _Fun(std::move(__f)); + unique_ptr<_Fun, __deallocating_deleter<_Fun>> __hold(std::__libcpp_allocate<_Fun>(__element_count(1))); + __buf_.__large = ::new ((void*)__hold.get()) _Fun(std::move(__f)); (void)__hold.release(); } } @@ -724,7 +730,7 @@ class __policy_func<_Rp(_ArgTypes...)> { _LIBCPP_HIDE_FROM_ABI explicit operator bool() const _NOEXCEPT { return !__policy_->__is_null; } -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT { return *__policy_->__type_info; } template @@ -736,10 +742,10 @@ class __policy_func<_Rp(_ArgTypes...)> { else return reinterpret_cast(&__buf_.__small); } -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; -# if defined(_LIBCPP_HAS_BLOCKS_RUNTIME) +# if _LIBCPP_HAS_BLOCKS_RUNTIME extern "C" void* _Block_copy(const void*); extern "C" void _Block_release(const void*); @@ -751,7 +757,7 @@ class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base public: _LIBCPP_HIDE_FROM_ABI explicit __func(__block_type const& __f) -# ifdef _LIBCPP_HAS_OBJC_ARC +# if _LIBCPP_HAS_OBJC_ARC : __f_(__f) # else : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) @@ -762,7 +768,7 @@ class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base // [TODO] add && to save on a retain _LIBCPP_HIDE_FROM_ABI explicit __func(__block_type __f, const _Alloc& /* unused */) -# ifdef _LIBCPP_HAS_OBJC_ARC +# if _LIBCPP_HAS_OBJC_ARC : __f_(__f) # else : __f_(reinterpret_cast<__block_type>(__f ? _Block_copy(__f) : nullptr)) @@ -784,7 +790,7 @@ class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base } _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual void destroy() _NOEXCEPT { -# ifndef _LIBCPP_HAS_OBJC_ARC +# if !_LIBCPP_HAS_OBJC_ARC if (__f_) _Block_release(__f_); # endif @@ -803,7 +809,7 @@ class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base return std::__invoke(__f_, std::forward<_ArgTypes>(__arg)...); } -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const void* target(type_info const& __ti) const _NOEXCEPT { if (__ti == typeid(__func::__block_type)) return &__f_; @@ -813,7 +819,7 @@ class __func<_Rp1 (^)(_ArgTypes1...), _Alloc, _Rp(_ArgTypes...)> : public __base _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual const std::type_info& target_type() const _NOEXCEPT { return typeid(__func::__block_type); } -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; # endif // _LIBCPP_HAS_EXTENSION_BLOCKS @@ -833,12 +839,12 @@ class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> __func __f_; template , function>, __invokable<_Fp, _ArgTypes...> >::value> + bool = _And<_IsNotSame<__remove_cvref_t<_Fp>, function>, __is_invocable<_Fp, _ArgTypes...> >::value> struct __callable; template struct __callable<_Fp, true> { static const bool value = - is_void<_Rp>::value || __is_core_convertible::type, _Rp>::value; + is_void<_Rp>::value || __is_core_convertible<__invoke_result_t<_Fp, _ArgTypes...>, _Rp>::value; }; template struct __callable<_Fp, false> { @@ -846,14 +852,14 @@ class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> }; template - using _EnableIfLValueCallable = __enable_if_t<__callable<_Fp&>::value>; + using _EnableIfLValueCallable _LIBCPP_NODEBUG = __enable_if_t<__callable<_Fp&>::value>; public: typedef _Rp result_type; // construct/copy/destroy: _LIBCPP_HIDE_FROM_ABI function() _NOEXCEPT {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDE_FROM_ABI function(nullptr_t) _NOEXCEPT {} + _LIBCPP_HIDE_FROM_ABI function(nullptr_t) _NOEXCEPT {} _LIBCPP_HIDE_FROM_ABI function(const function&); _LIBCPP_HIDE_FROM_ABI function(function&&) _NOEXCEPT; template > @@ -905,14 +911,14 @@ class _LIBCPP_TEMPLATE_VIS function<_Rp(_ArgTypes...)> // function invocation: _LIBCPP_HIDE_FROM_ABI _Rp operator()(_ArgTypes...) const; -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI // function target access: _LIBCPP_HIDE_FROM_ABI const std::type_info& target_type() const _NOEXCEPT; template _LIBCPP_HIDE_FROM_ABI _Tp* target() _NOEXCEPT; template _LIBCPP_HIDE_FROM_ABI const _Tp* target() const _NOEXCEPT; -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI }; # if _LIBCPP_STD_VER >= 17 @@ -989,7 +995,7 @@ _Rp function<_Rp(_ArgTypes...)>::operator()(_ArgTypes... __arg) const { return __f_(std::forward<_ArgTypes>(__arg)...); } -# ifndef _LIBCPP_HAS_NO_RTTI +# if _LIBCPP_HAS_RTTI template const std::type_info& function<_Rp(_ArgTypes...)>::target_type() const _NOEXCEPT { @@ -1008,7 +1014,7 @@ const _Tp* function<_Rp(_ArgTypes...)>::target() const _NOEXCEPT { return __f_.template target<_Tp>(); } -# endif // _LIBCPP_HAS_NO_RTTI +# endif // _LIBCPP_HAS_RTTI template inline _LIBCPP_HIDE_FROM_ABI bool operator==(const function<_Rp(_ArgTypes...)>& __f, nullptr_t) _NOEXCEPT { diff --git a/lib/libcxx/include/__functional/hash.h b/lib/libcxx/include/__functional/hash.h index a9e450edd39f..28b2635ab125 100644 --- a/lib/libcxx/include/__functional/hash.h +++ b/lib/libcxx/include/__functional/hash.h @@ -10,16 +10,17 @@ #define _LIBCPP___FUNCTIONAL_HASH_H #include <__config> +#include <__cstddef/nullptr_t.h> #include <__functional/unary_function.h> #include <__fwd/functional.h> #include <__type_traits/conjunction.h> +#include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_enum.h> #include <__type_traits/underlying_type.h> #include <__utility/pair.h> #include <__utility/swap.h> -#include #include #include @@ -355,12 +356,12 @@ struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function(__v); } }; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { _LIBCPP_HIDE_FROM_ABI size_t operator()(char8_t __v) const _NOEXCEPT { return static_cast(__v); } }; -#endif // !_LIBCPP_HAS_NO_CHAR8_T +#endif // _LIBCPP_HAS_CHAR8_T template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { @@ -372,12 +373,12 @@ struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function(__v); } }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { _LIBCPP_HIDE_FROM_ABI size_t operator()(wchar_t __v) const _NOEXCEPT { return static_cast(__v); } }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { @@ -406,7 +407,11 @@ struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { template <> struct _LIBCPP_TEMPLATE_VIS hash : public __unary_function { - _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned long __v) const _NOEXCEPT { return static_cast(__v); } + _LIBCPP_HIDE_FROM_ABI size_t operator()(unsigned long __v) const _NOEXCEPT { + static_assert(sizeof(size_t) >= sizeof(unsigned long), + "This would be a terrible hash function on a platform where size_t is smaller than unsigned long"); + return static_cast(__v); + } }; template <> @@ -415,7 +420,7 @@ struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash {} template <> struct _LIBCPP_TEMPLATE_VIS hash : public __scalar_hash {}; -#ifndef _LIBCPP_HAS_NO_INT128 +#if _LIBCPP_HAS_INT128 template <> struct _LIBCPP_TEMPLATE_VIS hash<__int128_t> : public __scalar_hash<__int128_t> {}; @@ -517,7 +522,7 @@ template using __check_hash_requirements _LIBCPP_NODEBUG = integral_constant::value && is_move_constructible<_Hash>::value && - __invokable_r::value >; + __is_invocable_r_v >; template > using __has_enabled_hash _LIBCPP_NODEBUG = diff --git a/lib/libcxx/include/__functional/identity.h b/lib/libcxx/include/__functional/identity.h index 8468de3dae26..1b1c6cf73c37 100644 --- a/lib/libcxx/include/__functional/identity.h +++ b/lib/libcxx/include/__functional/identity.h @@ -26,7 +26,7 @@ struct __is_identity : false_type {}; struct __identity { template - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _Tp&& operator()(_Tp&& __t) const _NOEXCEPT { return std::forward<_Tp>(__t); } diff --git a/lib/libcxx/include/__functional/invoke.h b/lib/libcxx/include/__functional/invoke.h index ef4bf25f0775..ab201e94206e 100644 --- a/lib/libcxx/include/__functional/invoke.h +++ b/lib/libcxx/include/__functional/invoke.h @@ -12,6 +12,7 @@ #include <__config> #include <__type_traits/invoke.h> +#include <__type_traits/is_void.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__functional/is_transparent.h b/lib/libcxx/include/__functional/is_transparent.h index b2d62f2e3ead..567df1a662f5 100644 --- a/lib/libcxx/include/__functional/is_transparent.h +++ b/lib/libcxx/include/__functional/is_transparent.h @@ -21,11 +21,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 14 -template +template inline const bool __is_transparent_v = false; -template -inline const bool __is_transparent_v<_Tp, _Up, __void_t > = true; +template +inline const bool __is_transparent_v<_Tp, _Key, __void_t > = true; #endif diff --git a/lib/libcxx/include/__functional/mem_fn.h b/lib/libcxx/include/__functional/mem_fn.h index ee07a71774f9..690393988c5a 100644 --- a/lib/libcxx/include/__functional/mem_fn.h +++ b/lib/libcxx/include/__functional/mem_fn.h @@ -12,8 +12,8 @@ #include <__config> #include <__functional/binary_function.h> -#include <__functional/invoke.h> #include <__functional/weak_result_type.h> +#include <__type_traits/invoke.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -36,10 +36,8 @@ class __mem_fn : public __weak_result_type<_Tp> { // invoke template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 - - typename __invoke_return::type - operator()(_ArgTypes&&... __args) const { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t + operator()(_ArgTypes&&... __args) const _NOEXCEPT_(__is_nothrow_invocable_v) { return std::__invoke(__f_, std::forward<_ArgTypes>(__args)...); } }; diff --git a/lib/libcxx/include/__functional/not_fn.h b/lib/libcxx/include/__functional/not_fn.h index 4b3ce5524a74..e6f14be799db 100644 --- a/lib/libcxx/include/__functional/not_fn.h +++ b/lib/libcxx/include/__functional/not_fn.h @@ -16,6 +16,8 @@ #include <__type_traits/decay.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> +#include <__type_traits/is_member_pointer.h> +#include <__type_traits/is_pointer.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -48,6 +50,27 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 auto not_fn(_Fn&& __f) { #endif // _LIBCPP_STD_VER >= 17 +#if _LIBCPP_STD_VER >= 26 + +template +struct __nttp_not_fn_t { + template + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Args&&... __args) const + noexcept(noexcept(!std::invoke(_Fn, std::forward<_Args>(__args)...))) + -> decltype(!std::invoke(_Fn, std::forward<_Args>(__args)...)) { + return !std::invoke(_Fn, std::forward<_Args>(__args)...); + } +}; + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI constexpr auto not_fn() noexcept { + if constexpr (using _Ty = decltype(_Fn); is_pointer_v<_Ty> || is_member_pointer_v<_Ty>) + static_assert(_Fn != nullptr, "f cannot be equal to nullptr"); + return __nttp_not_fn_t<_Fn>(); +} + +#endif // _LIBCPP_STD_VER >= 26 + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FUNCTIONAL_NOT_FN_H diff --git a/lib/libcxx/include/__functional/operations.h b/lib/libcxx/include/__functional/operations.h index 0a6320f19de3..67d9da289aea 100644 --- a/lib/libcxx/include/__functional/operations.h +++ b/lib/libcxx/include/__functional/operations.h @@ -14,6 +14,7 @@ #include <__functional/binary_function.h> #include <__functional/unary_function.h> #include <__type_traits/desugars_to.h> +#include <__type_traits/is_integral.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -364,6 +365,9 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(less); template inline const bool __desugars_to_v<__less_tag, less<_Tp>, _Tp, _Tp> = true; +template +inline const bool __desugars_to_v<__totally_ordered_less_tag, less<_Tp>, _Tp, _Tp> = is_integral<_Tp>::value; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS less { @@ -376,8 +380,11 @@ struct _LIBCPP_TEMPLATE_VIS less { typedef void is_transparent; }; +template +inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Up> = true; + template -inline const bool __desugars_to_v<__less_tag, less<>, _Tp, _Tp> = true; +inline const bool __desugars_to_v<__totally_ordered_less_tag, less<>, _Tp, _Tp> = is_integral<_Tp>::value; #endif #if _LIBCPP_STD_VER >= 14 @@ -445,6 +452,9 @@ struct _LIBCPP_TEMPLATE_VIS greater : __binary_function<_Tp, _Tp, bool> { }; _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(greater); +template +inline const bool __desugars_to_v<__greater_tag, greater<_Tp>, _Tp, _Tp> = true; + #if _LIBCPP_STD_VER >= 14 template <> struct _LIBCPP_TEMPLATE_VIS greater { @@ -456,6 +466,9 @@ struct _LIBCPP_TEMPLATE_VIS greater { } typedef void is_transparent; }; + +template +inline const bool __desugars_to_v<__greater_tag, greater<>, _Tp, _Up> = true; #endif // Logical operations diff --git a/lib/libcxx/include/__functional/perfect_forward.h b/lib/libcxx/include/__functional/perfect_forward.h index 74177c789b4a..37c3d15b4bec 100644 --- a/lib/libcxx/include/__functional/perfect_forward.h +++ b/lib/libcxx/include/__functional/perfect_forward.h @@ -11,6 +11,7 @@ #define _LIBCPP___FUNCTIONAL_PERFECT_FORWARD_H #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/enable_if.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constructible.h> @@ -93,7 +94,7 @@ struct __perfect_forward_impl<_Op, index_sequence<_Idx...>, _BoundArgs...> { // __perfect_forward implements a perfect-forwarding call wrapper as explained in [func.require]. template -using __perfect_forward = __perfect_forward_impl<_Op, index_sequence_for<_Args...>, _Args...>; +using __perfect_forward _LIBCPP_NODEBUG = __perfect_forward_impl<_Op, index_sequence_for<_Args...>, _Args...>; #endif // _LIBCPP_STD_VER >= 17 diff --git a/lib/libcxx/include/__functional/ranges_operations.h b/lib/libcxx/include/__functional/ranges_operations.h index 27f06eadd0eb..df95843e7c9a 100644 --- a/lib/libcxx/include/__functional/ranges_operations.h +++ b/lib/libcxx/include/__functional/ranges_operations.h @@ -99,9 +99,15 @@ struct greater_equal { template inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__totally_ordered_less_tag, ranges::less, _Tp, _Up> = true; + template inline const bool __desugars_to_v<__less_tag, ranges::less, _Tp, _Up> = true; +template +inline const bool __desugars_to_v<__greater_tag, ranges::greater, _Tp, _Up> = true; + #endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional/reference_wrapper.h b/lib/libcxx/include/__functional/reference_wrapper.h index 3570e2673c80..d6cd6428f22d 100644 --- a/lib/libcxx/include/__functional/reference_wrapper.h +++ b/lib/libcxx/include/__functional/reference_wrapper.h @@ -13,10 +13,10 @@ #include <__compare/synth_three_way.h> #include <__concepts/boolean_testable.h> #include <__config> -#include <__functional/invoke.h> #include <__functional/weak_result_type.h> #include <__memory/addressof.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_const.h> #include <__type_traits/remove_cvref.h> #include <__type_traits/void_t.h> @@ -57,7 +57,7 @@ class _LIBCPP_TEMPLATE_VIS reference_wrapper : public __weak_result_type<_Tp> { // invoke template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 typename __invoke_of::type + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __invoke_result_t operator()(_ArgTypes&&... __args) const #if _LIBCPP_STD_VER >= 17 // Since is_nothrow_invocable requires C++17 LWG3764 is not backported diff --git a/lib/libcxx/include/__functional/unary_function.h b/lib/libcxx/include/__functional/unary_function.h index 69b1bc94220a..769ffc9893a7 100644 --- a/lib/libcxx/include/__functional/unary_function.h +++ b/lib/libcxx/include/__functional/unary_function.h @@ -39,11 +39,11 @@ struct __unary_function_keep_layout_base { _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wdeprecated-declarations") template -using __unary_function = unary_function<_Arg, _Result>; +using __unary_function _LIBCPP_NODEBUG = unary_function<_Arg, _Result>; _LIBCPP_DIAGNOSTIC_POP #else template -using __unary_function = __unary_function_keep_layout_base<_Arg, _Result>; +using __unary_function _LIBCPP_NODEBUG = __unary_function_keep_layout_base<_Arg, _Result>; #endif _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional/weak_result_type.h b/lib/libcxx/include/__functional/weak_result_type.h index ad7a8395186c..233d86009a20 100644 --- a/lib/libcxx/include/__functional/weak_result_type.h +++ b/lib/libcxx/include/__functional/weak_result_type.h @@ -12,9 +12,9 @@ #include <__config> #include <__functional/binary_function.h> -#include <__functional/invoke.h> #include <__functional/unary_function.h> #include <__type_traits/integral_constant.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_same.h> #include <__utility/declval.h> @@ -221,11 +221,6 @@ struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const volatile> { #endif }; -template -struct __invoke_return { - typedef decltype(std::__invoke(std::declval<_Tp>(), std::declval<_Args>()...)) type; -}; - _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FUNCTIONAL_WEAK_RESULT_TYPE_H diff --git a/lib/libcxx/include/__fwd/array.h b/lib/libcxx/include/__fwd/array.h index b429d0c5a954..794779ae46ab 100644 --- a/lib/libcxx/include/__fwd/array.h +++ b/lib/libcxx/include/__fwd/array.h @@ -10,7 +10,8 @@ #define _LIBCPP___FWD_ARRAY_H #include <__config> -#include +#include <__cstddef/size_t.h> +#include <__type_traits/integral_constant.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,11 +36,11 @@ template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 const _Tp&& get(const array<_Tp, _Size>&&) _NOEXCEPT; #endif -template -struct __is_std_array : false_type {}; +template +inline const bool __is_std_array_v = false; template -struct __is_std_array > : true_type {}; +inline const bool __is_std_array_v > = true; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/bit_reference.h b/lib/libcxx/include/__fwd/bit_reference.h index 237efb6db664..30462b6ce4c9 100644 --- a/lib/libcxx/include/__fwd/bit_reference.h +++ b/lib/libcxx/include/__fwd/bit_reference.h @@ -20,6 +20,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class __bit_iterator; +template +struct __size_difference_type_traits; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FWD_BIT_REFERENCE_H diff --git a/lib/libcxx/include/__fwd/byte.h b/lib/libcxx/include/__fwd/byte.h new file mode 100644 index 000000000000..0301833d93cf --- /dev/null +++ b/lib/libcxx/include/__fwd/byte.h @@ -0,0 +1,26 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_BYTE_H +#define _LIBCPP___FWD_BYTE_H + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if _LIBCPP_STD_VER >= 17 +namespace std { // purposefully not versioned + +enum class byte : unsigned char; + +} // namespace std +#endif // _LIBCPP_STD_VER >= 17 + +#endif // _LIBCPP___FWD_BYTE_H diff --git a/lib/libcxx/include/__fwd/complex.h b/lib/libcxx/include/__fwd/complex.h index 22c78c5cc3c7..092d2e10b12b 100644 --- a/lib/libcxx/include/__fwd/complex.h +++ b/lib/libcxx/include/__fwd/complex.h @@ -10,7 +10,7 @@ #define _LIBCPP___FWD_COMPLEX_H #include <__config> -#include +#include <__cstddef/size_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__fwd/format.h b/lib/libcxx/include/__fwd/format.h index b30c220f8a04..815e3e1922c6 100644 --- a/lib/libcxx/include/__fwd/format.h +++ b/lib/libcxx/include/__fwd/format.h @@ -31,7 +31,7 @@ class _LIBCPP_TEMPLATE_VIS basic_format_context; template struct _LIBCPP_TEMPLATE_VIS formatter; -#endif //_LIBCPP_STD_VER >= 20 +#endif // _LIBCPP_STD_VER >= 20 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__fwd/fstream.h b/lib/libcxx/include/__fwd/fstream.h index b4a112bfd4de..e6c430dbf75b 100644 --- a/lib/libcxx/include/__fwd/fstream.h +++ b/lib/libcxx/include/__fwd/fstream.h @@ -32,7 +32,7 @@ using ifstream = basic_ifstream; using ofstream = basic_ofstream; using fstream = basic_fstream; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wfilebuf = basic_filebuf; using wifstream = basic_ifstream; using wofstream = basic_ofstream; diff --git a/lib/libcxx/include/__fwd/get.h b/lib/libcxx/include/__fwd/get.h new file mode 100644 index 000000000000..6121ed0efd2b --- /dev/null +++ b/lib/libcxx/include/__fwd/get.h @@ -0,0 +1,24 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_GET_H +#define _LIBCPP___FWD_GET_H + +#include <__config> +#include <__fwd/array.h> +#include <__fwd/complex.h> +#include <__fwd/pair.h> +#include <__fwd/subrange.h> +#include <__fwd/tuple.h> +#include <__fwd/variant.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#endif // _LIBCPP___FWD_GET_H diff --git a/lib/libcxx/include/__fwd/ios.h b/lib/libcxx/include/__fwd/ios.h index 48350709d4ce..bb0c6eb49b52 100644 --- a/lib/libcxx/include/__fwd/ios.h +++ b/lib/libcxx/include/__fwd/ios.h @@ -24,7 +24,7 @@ template > class _LIBCPP_TEMPLATE_VIS basic_ios; using ios = basic_ios; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wios = basic_ios; #endif diff --git a/lib/libcxx/include/__fwd/istream.h b/lib/libcxx/include/__fwd/istream.h index a06907a6c8ef..66a6708544e5 100644 --- a/lib/libcxx/include/__fwd/istream.h +++ b/lib/libcxx/include/__fwd/istream.h @@ -27,7 +27,7 @@ class _LIBCPP_TEMPLATE_VIS basic_iostream; using istream = basic_istream; using iostream = basic_iostream; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wistream = basic_istream; using wiostream = basic_iostream; #endif diff --git a/lib/libcxx/include/__fwd/memory.h b/lib/libcxx/include/__fwd/memory.h index b9e151855ad7..564000997dec 100644 --- a/lib/libcxx/include/__fwd/memory.h +++ b/lib/libcxx/include/__fwd/memory.h @@ -20,6 +20,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class _LIBCPP_TEMPLATE_VIS allocator; +template +class _LIBCPP_TEMPLATE_VIS shared_ptr; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FWD_MEMORY_H diff --git a/lib/libcxx/include/__fwd/memory_resource.h b/lib/libcxx/include/__fwd/memory_resource.h index d68b2c2b6315..ca9d3770945c 100644 --- a/lib/libcxx/include/__fwd/memory_resource.h +++ b/lib/libcxx/include/__fwd/memory_resource.h @@ -15,6 +15,8 @@ # pragma GCC system_header #endif +#if _LIBCPP_STD_VER >= 17 + _LIBCPP_BEGIN_NAMESPACE_STD namespace pmr { @@ -24,4 +26,6 @@ class _LIBCPP_AVAILABILITY_PMR _LIBCPP_TEMPLATE_VIS polymorphic_allocator; _LIBCPP_END_NAMESPACE_STD +#endif // _LIBCPP_STD_VER >= 17 + #endif // _LIBCPP___FWD_MEMORY_RESOURCE_H diff --git a/lib/libcxx/include/__fwd/ostream.h b/lib/libcxx/include/__fwd/ostream.h index 3347e0f71d7a..ff5a3612ef87 100644 --- a/lib/libcxx/include/__fwd/ostream.h +++ b/lib/libcxx/include/__fwd/ostream.h @@ -23,7 +23,7 @@ class _LIBCPP_TEMPLATE_VIS basic_ostream; using ostream = basic_ostream; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wostream = basic_ostream; #endif diff --git a/lib/libcxx/include/__fwd/pair.h b/lib/libcxx/include/__fwd/pair.h index af32628fe1e0..b8ba2b7e9232 100644 --- a/lib/libcxx/include/__fwd/pair.h +++ b/lib/libcxx/include/__fwd/pair.h @@ -10,8 +10,8 @@ #define _LIBCPP___FWD_PAIR_H #include <__config> +#include <__cstddef/size_t.h> #include <__fwd/tuple.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__fwd/span.h b/lib/libcxx/include/__fwd/span.h index 8dafa742c19d..5d473ee51c6b 100644 --- a/lib/libcxx/include/__fwd/span.h +++ b/lib/libcxx/include/__fwd/span.h @@ -11,7 +11,7 @@ #define _LIBCPP___FWD_SPAN_H #include <__config> -#include +#include <__cstddef/size_t.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__fwd/sstream.h b/lib/libcxx/include/__fwd/sstream.h index 39a9c3faf1f8..c176db6e5ada 100644 --- a/lib/libcxx/include/__fwd/sstream.h +++ b/lib/libcxx/include/__fwd/sstream.h @@ -34,7 +34,7 @@ using istringstream = basic_istringstream; using ostringstream = basic_ostringstream; using stringstream = basic_stringstream; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wstringbuf = basic_stringbuf; using wistringstream = basic_istringstream; using wostringstream = basic_ostringstream; diff --git a/lib/libcxx/include/__fwd/streambuf.h b/lib/libcxx/include/__fwd/streambuf.h index b35afa6afe34..aee0ebb3ce0f 100644 --- a/lib/libcxx/include/__fwd/streambuf.h +++ b/lib/libcxx/include/__fwd/streambuf.h @@ -23,7 +23,7 @@ class _LIBCPP_TEMPLATE_VIS basic_streambuf; using streambuf = basic_streambuf; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wstreambuf = basic_streambuf; #endif diff --git a/lib/libcxx/include/__fwd/string.h b/lib/libcxx/include/__fwd/string.h index 2418e1f9b23d..89dec82d6ffc 100644 --- a/lib/libcxx/include/__fwd/string.h +++ b/lib/libcxx/include/__fwd/string.h @@ -24,7 +24,7 @@ struct _LIBCPP_TEMPLATE_VIS char_traits; template <> struct char_traits; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T template <> struct char_traits; #endif @@ -34,7 +34,7 @@ struct char_traits; template <> struct char_traits; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> struct char_traits; #endif @@ -44,11 +44,11 @@ class _LIBCPP_TEMPLATE_VIS basic_string; using string = basic_string; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS using wstring = basic_string; #endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T using u8string = basic_string; #endif @@ -63,11 +63,11 @@ using basic_string _LIBCPP_AVAILABILITY_PMR = std::basic_string<_CharT, _Traits, using string _LIBCPP_AVAILABILITY_PMR = basic_string; -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS using wstring _LIBCPP_AVAILABILITY_PMR = basic_string; # endif -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T using u8string _LIBCPP_AVAILABILITY_PMR = basic_string; # endif @@ -80,20 +80,20 @@ using u32string _LIBCPP_AVAILABILITY_PMR = basic_string; // clang-format off template class _LIBCPP_PREFERRED_NAME(string) -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_PREFERRED_NAME(wstring) #endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T _LIBCPP_PREFERRED_NAME(u8string) #endif _LIBCPP_PREFERRED_NAME(u16string) _LIBCPP_PREFERRED_NAME(u32string) #if _LIBCPP_STD_VER >= 17 _LIBCPP_PREFERRED_NAME(pmr::string) -# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +# if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_PREFERRED_NAME(pmr::wstring) # endif -# ifndef _LIBCPP_HAS_NO_CHAR8_T +# if _LIBCPP_HAS_CHAR8_T _LIBCPP_PREFERRED_NAME(pmr::u8string) # endif _LIBCPP_PREFERRED_NAME(pmr::u16string) diff --git a/lib/libcxx/include/__fwd/string_view.h b/lib/libcxx/include/__fwd/string_view.h index 72a64be5b00b..b848cb7f60f5 100644 --- a/lib/libcxx/include/__fwd/string_view.h +++ b/lib/libcxx/include/__fwd/string_view.h @@ -23,22 +23,22 @@ template > class _LIBCPP_TEMPLATE_VIS basic_string_view; typedef basic_string_view string_view; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T typedef basic_string_view u8string_view; #endif typedef basic_string_view u16string_view; typedef basic_string_view u32string_view; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS typedef basic_string_view wstring_view; #endif // clang-format off template class _LIBCPP_PREFERRED_NAME(string_view) -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_PREFERRED_NAME(wstring_view) #endif -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T _LIBCPP_PREFERRED_NAME(u8string_view) #endif _LIBCPP_PREFERRED_NAME(u16string_view) diff --git a/lib/libcxx/include/__fwd/subrange.h b/lib/libcxx/include/__fwd/subrange.h index 60a41da23dd4..5b3a07e55348 100644 --- a/lib/libcxx/include/__fwd/subrange.h +++ b/lib/libcxx/include/__fwd/subrange.h @@ -11,8 +11,8 @@ #include <__concepts/copyable.h> #include <__config> +#include <__cstddef/size_t.h> #include <__iterator/concepts.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__fwd/tuple.h b/lib/libcxx/include/__fwd/tuple.h index 902770c29555..2ed32bc0df4e 100644 --- a/lib/libcxx/include/__fwd/tuple.h +++ b/lib/libcxx/include/__fwd/tuple.h @@ -10,7 +10,7 @@ #define _LIBCPP___FWD_TUPLE_H #include <__config> -#include +#include <__cstddef/size_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__fwd/variant.h b/lib/libcxx/include/__fwd/variant.h new file mode 100644 index 000000000000..71c792f46a90 --- /dev/null +++ b/lib/libcxx/include/__fwd/variant.h @@ -0,0 +1,77 @@ +//===---------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LIBCPP___FWD_VARIANT_H +#define _LIBCPP___FWD_VARIANT_H + +#include <__config> +#include <__cstddef/size_t.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER >= 17 + +template +class _LIBCPP_TEMPLATE_VIS variant; + +template +struct _LIBCPP_TEMPLATE_VIS variant_size; + +template +inline constexpr size_t variant_size_v = variant_size<_Tp>::value; + +template +struct _LIBCPP_TEMPLATE_VIS variant_alternative; + +template +using variant_alternative_t = typename variant_alternative<_Ip, _Tp>::type; + +inline constexpr size_t variant_npos = static_cast(-1); + +template +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>& +get(variant<_Types...>&); + +template +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr variant_alternative_t<_Ip, variant<_Types...>>&& +get(variant<_Types...>&&); + +template +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>& +get(const variant<_Types...>&); + +template +_LIBCPP_HIDE_FROM_ABI +_LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const variant_alternative_t<_Ip, variant<_Types...>>&& +get(const variant<_Types...>&&); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp& get(variant<_Types...>&); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr _Tp&& get(variant<_Types...>&&); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp& get(const variant<_Types...>&); + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS constexpr const _Tp&& +get(const variant<_Types...>&&); + +#endif // _LIBCPP_STD_VER >= 17 + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___FWD_VARIANT_H diff --git a/lib/libcxx/include/__fwd/vector.h b/lib/libcxx/include/__fwd/vector.h index c9cc96137449..6980e40ec918 100644 --- a/lib/libcxx/include/__fwd/vector.h +++ b/lib/libcxx/include/__fwd/vector.h @@ -21,6 +21,9 @@ _LIBCPP_BEGIN_NAMESPACE_STD template > class _LIBCPP_TEMPLATE_VIS vector; +template +class vector; + _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP___FWD_VECTOR_H diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table index 025758528573..9a82ec51daee 100644 --- a/lib/libcxx/include/__hash_table +++ b/lib/libcxx/include/__hash_table @@ -15,9 +15,11 @@ #include <__assert> #include <__bit/countl.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__functional/hash.h> -#include <__functional/invoke.h> #include <__iterator/iterator_traits.h> +#include <__math/rounding_functions.h> #include <__memory/addressof.h> #include <__memory/allocator_traits.h> #include <__memory/compressed_pair.h> @@ -25,14 +27,16 @@ #include <__memory/pointer_traits.h> #include <__memory/swap_allocator.h> #include <__memory/unique_ptr.h> +#include <__new/launder.h> #include <__type_traits/can_extract_key.h> -#include <__type_traits/conditional.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_const.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_nothrow_assignable.h> #include <__type_traits/is_nothrow_constructible.h> -#include <__type_traits/is_pointer.h> #include <__type_traits/is_reference.h> +#include <__type_traits/is_same.h> #include <__type_traits/is_swappable.h> #include <__type_traits/remove_const.h> #include <__type_traits/remove_cvref.h> @@ -40,10 +44,7 @@ #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/swap.h> -#include -#include -#include -#include // __launder +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -77,11 +78,18 @@ struct __hash_node_base { typedef __hash_node_base __first_node; typedef __rebind_pointer_t<_NodePtr, __first_node> __node_base_pointer; typedef _NodePtr __node_pointer; - -#if defined(_LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB) typedef __node_base_pointer __next_pointer; -#else - typedef __conditional_t::value, __node_base_pointer, __node_pointer> __next_pointer; + +// TODO(LLVM 22): Remove this check +#ifndef _LIBCPP_ABI_FIX_UNORDERED_NODE_POINTER_UB + static_assert(sizeof(__node_base_pointer) == sizeof(__node_pointer) && _LIBCPP_ALIGNOF(__node_base_pointer) == + _LIBCPP_ALIGNOF(__node_pointer), + "It looks like you are using std::__hash_table (an implementation detail for the unordered containers) " + "with a fancy pointer type that thas a different representation depending on whether it points to a " + "__hash_table base pointer or a __hash_table node pointer (both of which are implementation details of " + "the standard library). This means that your ABI is being broken between LLVM 19 and LLVM 20. If you " + "don't care about your ABI being broken, define the _LIBCPP_ABI_TREE_REMOVE_NODE_POINTER_UB macro to " + "silence this diagnostic."); #endif __next_pointer __next_; @@ -103,8 +111,8 @@ struct __hash_node_base { template struct __hash_node : public __hash_node_base< __rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > > { typedef _Tp __node_value_type; - using _Base = __hash_node_base<__rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > >; - using __next_pointer = typename _Base::__next_pointer; + using _Base _LIBCPP_NODEBUG = __hash_node_base<__rebind_pointer_t<_VoidPtr, __hash_node<_Tp, _VoidPtr> > >; + using __next_pointer _LIBCPP_NODEBUG = typename _Base::__next_pointer; size_t __hash_; @@ -554,29 +562,29 @@ class __bucket_list_deallocator { typedef allocator_traits __alloc_traits; typedef typename __alloc_traits::size_type size_type; - __compressed_pair __data_; + _LIBCPP_COMPRESSED_PAIR(size_type, __size_, allocator_type, __alloc_); public: typedef typename __alloc_traits::pointer pointer; _LIBCPP_HIDE_FROM_ABI __bucket_list_deallocator() _NOEXCEPT_(is_nothrow_default_constructible::value) - : __data_(0, __default_init_tag()) {} + : __size_(0) {} _LIBCPP_HIDE_FROM_ABI __bucket_list_deallocator(const allocator_type& __a, size_type __size) _NOEXCEPT_(is_nothrow_copy_constructible::value) - : __data_(__size, __a) {} + : __size_(__size), __alloc_(__a) {} _LIBCPP_HIDE_FROM_ABI __bucket_list_deallocator(__bucket_list_deallocator&& __x) _NOEXCEPT_(is_nothrow_move_constructible::value) - : __data_(std::move(__x.__data_)) { + : __size_(std::move(__x.__size_)), __alloc_(std::move(__x.__alloc_)) { __x.size() = 0; } - _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __data_.first(); } - _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __data_.first(); } + _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; } + _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } - _LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __data_.second(); } - _LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __data_.second(); } + _LIBCPP_HIDE_FROM_ABI allocator_type& __alloc() _NOEXCEPT { return __alloc_; } + _LIBCPP_HIDE_FROM_ABI const allocator_type& __alloc() const _NOEXCEPT { return __alloc_; } _LIBCPP_HIDE_FROM_ABI void operator()(pointer __p) _NOEXCEPT { __alloc_traits::deallocate(__alloc(), __p, size()); } }; @@ -642,9 +650,9 @@ struct __enforce_unordered_container_requirements { template #ifndef _LIBCPP_CXX03_LANG -_LIBCPP_DIAGNOSE_WARNING(!__invokable<_Equal const&, _Key const&, _Key const&>::value, +_LIBCPP_DIAGNOSE_WARNING(!__is_invocable_v<_Equal const&, _Key const&, _Key const&>, "the specified comparator type does not provide a viable const call operator") -_LIBCPP_DIAGNOSE_WARNING(!__invokable<_Hash const&, _Key const&>::value, +_LIBCPP_DIAGNOSE_WARNING(!__is_invocable_v<_Hash const&, _Key const&>, "the specified hash functor does not provide a viable const call operator") #endif typename __enforce_unordered_container_requirements<_Key, _Hash, _Equal>::type @@ -716,27 +724,27 @@ private: // --- Member data begin --- __bucket_list __bucket_list_; - __compressed_pair<__first_node, __node_allocator> __p1_; - __compressed_pair __p2_; - __compressed_pair __p3_; + _LIBCPP_COMPRESSED_PAIR(__first_node, __first_node_, __node_allocator, __node_alloc_); + _LIBCPP_COMPRESSED_PAIR(size_type, __size_, hasher, __hasher_); + _LIBCPP_COMPRESSED_PAIR(float, __max_load_factor_, key_equal, __key_eq_); // --- Member data end --- - _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __p2_.first(); } + _LIBCPP_HIDE_FROM_ABI size_type& size() _NOEXCEPT { return __size_; } public: - _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __p2_.first(); } + _LIBCPP_HIDE_FROM_ABI size_type size() const _NOEXCEPT { return __size_; } - _LIBCPP_HIDE_FROM_ABI hasher& hash_function() _NOEXCEPT { return __p2_.second(); } - _LIBCPP_HIDE_FROM_ABI const hasher& hash_function() const _NOEXCEPT { return __p2_.second(); } + _LIBCPP_HIDE_FROM_ABI hasher& hash_function() _NOEXCEPT { return __hasher_; } + _LIBCPP_HIDE_FROM_ABI const hasher& hash_function() const _NOEXCEPT { return __hasher_; } - _LIBCPP_HIDE_FROM_ABI float& max_load_factor() _NOEXCEPT { return __p3_.first(); } - _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __p3_.first(); } + _LIBCPP_HIDE_FROM_ABI float& max_load_factor() _NOEXCEPT { return __max_load_factor_; } + _LIBCPP_HIDE_FROM_ABI float max_load_factor() const _NOEXCEPT { return __max_load_factor_; } - _LIBCPP_HIDE_FROM_ABI key_equal& key_eq() _NOEXCEPT { return __p3_.second(); } - _LIBCPP_HIDE_FROM_ABI const key_equal& key_eq() const _NOEXCEPT { return __p3_.second(); } + _LIBCPP_HIDE_FROM_ABI key_equal& key_eq() _NOEXCEPT { return __key_eq_; } + _LIBCPP_HIDE_FROM_ABI const key_equal& key_eq() const _NOEXCEPT { return __key_eq_; } - _LIBCPP_HIDE_FROM_ABI __node_allocator& __node_alloc() _NOEXCEPT { return __p1_.second(); } - _LIBCPP_HIDE_FROM_ABI const __node_allocator& __node_alloc() const _NOEXCEPT { return __p1_.second(); } + _LIBCPP_HIDE_FROM_ABI __node_allocator& __node_alloc() _NOEXCEPT { return __node_alloc_; } + _LIBCPP_HIDE_FROM_ABI const __node_allocator& __node_alloc() const _NOEXCEPT { return __node_alloc_; } public: typedef __hash_iterator<__node_pointer> iterator; @@ -875,10 +883,10 @@ public: _LIBCPP_HIDE_FROM_ABI void __rehash_unique(size_type __n) { __rehash(__n); } _LIBCPP_HIDE_FROM_ABI void __rehash_multi(size_type __n) { __rehash(__n); } _LIBCPP_HIDE_FROM_ABI void __reserve_unique(size_type __n) { - __rehash_unique(static_cast(std::ceil(__n / max_load_factor()))); + __rehash_unique(static_cast(__math::ceil(__n / max_load_factor()))); } _LIBCPP_HIDE_FROM_ABI void __reserve_multi(size_type __n) { - __rehash_multi(static_cast(std::ceil(__n / max_load_factor()))); + __rehash_multi(static_cast(__math::ceil(__n / max_load_factor()))); } _LIBCPP_HIDE_FROM_ABI size_type bucket_count() const _NOEXCEPT { return __bucket_list_.get_deleter().size(); } @@ -1022,26 +1030,34 @@ inline __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table() _NOEXCEPT_( is_nothrow_default_constructible<__bucket_list>::value&& is_nothrow_default_constructible<__first_node>::value&& is_nothrow_default_constructible<__node_allocator>::value&& is_nothrow_default_constructible::value&& is_nothrow_default_constructible::value) - : __p2_(0, __default_init_tag()), __p3_(1.0f, __default_init_tag()) {} + : __size_(0), __max_load_factor_(1.0f) {} template inline __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const hasher& __hf, const key_equal& __eql) - : __bucket_list_(nullptr, __bucket_list_deleter()), __p1_(), __p2_(0, __hf), __p3_(1.0f, __eql) {} + : __bucket_list_(nullptr, __bucket_list_deleter()), + __first_node_(), + __node_alloc_(), + __size_(0), + __hasher_(__hf), + __max_load_factor_(1.0f), + __key_eq_(__eql) {} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table( const hasher& __hf, const key_equal& __eql, const allocator_type& __a) : __bucket_list_(nullptr, __bucket_list_deleter(__pointer_allocator(__a), 0)), - __p1_(__default_init_tag(), __node_allocator(__a)), - __p2_(0, __hf), - __p3_(1.0f, __eql) {} + __node_alloc_(__node_allocator(__a)), + __size_(0), + __hasher_(__hf), + __max_load_factor_(1.0f), + __key_eq_(__eql) {} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const allocator_type& __a) : __bucket_list_(nullptr, __bucket_list_deleter(__pointer_allocator(__a), 0)), - __p1_(__default_init_tag(), __node_allocator(__a)), - __p2_(0, __default_init_tag()), - __p3_(1.0f, __default_init_tag()) {} + __node_alloc_(__node_allocator(__a)), + __size_(0), + __max_load_factor_(1.0f) {} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u) @@ -1049,17 +1065,20 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u) __bucket_list_deleter(allocator_traits<__pointer_allocator>::select_on_container_copy_construction( __u.__bucket_list_.get_deleter().__alloc()), 0)), - __p1_(__default_init_tag(), - allocator_traits<__node_allocator>::select_on_container_copy_construction(__u.__node_alloc())), - __p2_(0, __u.hash_function()), - __p3_(__u.__p3_) {} + __node_alloc_(allocator_traits<__node_allocator>::select_on_container_copy_construction(__u.__node_alloc())), + __size_(0), + __hasher_(__u.hash_function()), + __max_load_factor_(__u.__max_load_factor_), + __key_eq_(__u.__key_eq_) {} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u, const allocator_type& __a) : __bucket_list_(nullptr, __bucket_list_deleter(__pointer_allocator(__a), 0)), - __p1_(__default_init_tag(), __node_allocator(__a)), - __p2_(0, __u.hash_function()), - __p3_(__u.__p3_) {} + __node_alloc_(__node_allocator(__a)), + __size_(0), + __hasher_(__u.hash_function()), + __max_load_factor_(__u.__max_load_factor_), + __key_eq_(__u.__key_eq_) {} template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u) _NOEXCEPT_( @@ -1067,12 +1086,15 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u) _NOEX is_nothrow_move_constructible<__node_allocator>::value&& is_nothrow_move_constructible::value&& is_nothrow_move_constructible::value) : __bucket_list_(std::move(__u.__bucket_list_)), - __p1_(std::move(__u.__p1_)), - __p2_(std::move(__u.__p2_)), - __p3_(std::move(__u.__p3_)) { + __first_node_(std::move(__u.__first_node_)), + __node_alloc_(std::move(__u.__node_alloc_)), + __size_(std::move(__u.__size_)), + __hasher_(std::move(__u.__hasher_)), + __max_load_factor_(__u.__max_load_factor_), + __key_eq_(std::move(__u.__key_eq_)) { if (size() > 0) { - __bucket_list_[std::__constrain_hash(__p1_.first().__next_->__hash(), bucket_count())] = __p1_.first().__ptr(); - __u.__p1_.first().__next_ = nullptr; + __bucket_list_[std::__constrain_hash(__first_node_.__next_->__hash(), bucket_count())] = __first_node_.__ptr(); + __u.__first_node_.__next_ = nullptr; __u.size() = 0; } } @@ -1080,17 +1102,19 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u) _NOEX template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u, const allocator_type& __a) : __bucket_list_(nullptr, __bucket_list_deleter(__pointer_allocator(__a), 0)), - __p1_(__default_init_tag(), __node_allocator(__a)), - __p2_(0, std::move(__u.hash_function())), - __p3_(std::move(__u.__p3_)) { + __node_alloc_(__node_allocator(__a)), + __size_(0), + __hasher_(std::move(__u.__hasher_)), + __max_load_factor_(__u.__max_load_factor_), + __key_eq_(std::move(__u.__key_eq_)) { if (__a == allocator_type(__u.__node_alloc())) { __bucket_list_.reset(__u.__bucket_list_.release()); __bucket_list_.get_deleter().size() = __u.__bucket_list_.get_deleter().size(); __u.__bucket_list_.get_deleter().size() = 0; if (__u.size() > 0) { - __p1_.first().__next_ = __u.__p1_.first().__next_; - __u.__p1_.first().__next_ = nullptr; - __bucket_list_[std::__constrain_hash(__p1_.first().__next_->__hash(), bucket_count())] = __p1_.first().__ptr(); + __first_node_.__next_ = __u.__first_node_.__next_; + __u.__first_node_.__next_ = nullptr; + __bucket_list_[std::__constrain_hash(__first_node_.__next_->__hash(), bucket_count())] = __first_node_.__ptr(); size() = __u.size(); __u.size() = 0; } @@ -1104,7 +1128,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() { static_assert(is_copy_constructible::value, "Hasher must be copy-constructible."); #endif - __deallocate_node(__p1_.first().__next_); + __deallocate_node(__first_node_.__next_); } template @@ -1150,8 +1174,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT { for (size_type __i = 0; __i < __bc; ++__i) __bucket_list_[__i] = nullptr; size() = 0; - __next_pointer __cache = __p1_.first().__next_; - __p1_.first().__next_ = nullptr; + __next_pointer __cache = __first_node_.__next_; + __first_node_.__next_ = nullptr; return __cache; } @@ -1168,10 +1192,10 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, hash_function() = std::move(__u.hash_function()); max_load_factor() = __u.max_load_factor(); key_eq() = std::move(__u.key_eq()); - __p1_.first().__next_ = __u.__p1_.first().__next_; + __first_node_.__next_ = __u.__first_node_.__next_; if (size() > 0) { - __bucket_list_[std::__constrain_hash(__p1_.first().__next_->__hash(), bucket_count())] = __p1_.first().__ptr(); - __u.__p1_.first().__next_ = nullptr; + __bucket_list_[std::__constrain_hash(__first_node_.__next_->__hash(), bucket_count())] = __first_node_.__ptr(); + __u.__first_node_.__next_ = nullptr; __u.size() = 0; } } @@ -1186,9 +1210,9 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, max_load_factor() = __u.max_load_factor(); if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS try { -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS const_iterator __i = __u.begin(); while (__cache != nullptr && __u.size() != 0) { __cache->__upcast()->__get_value() = std::move(__u.remove(__i++)->__get_value()); @@ -1196,12 +1220,12 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(__hash_table& __u, __node_insert_multi(__cache->__upcast()); __cache = __next; } -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS } catch (...) { __deallocate_node(__cache); throw; } -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS __deallocate_node(__cache); } const_iterator __i = __u.begin(); @@ -1232,21 +1256,21 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_unique(_InputIterator __ if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS try { -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { __cache->__upcast()->__get_value() = *__first; __next_pointer __next = __cache->__next_; __node_insert_unique(__cache->__upcast()); __cache = __next; } -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS } catch (...) { __deallocate_node(__cache); throw; } -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS __deallocate_node(__cache); } for (; __first != __last; ++__first) @@ -1264,21 +1288,21 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f " or the nodes value type"); if (bucket_count() != 0) { __next_pointer __cache = __detach(); -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS try { -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS for (; __cache != nullptr && __first != __last; ++__first) { __cache->__upcast()->__get_value() = *__first; __next_pointer __next = __cache->__next_; __node_insert_multi(__cache->__upcast()); __cache = __next; } -#ifndef _LIBCPP_HAS_NO_EXCEPTIONS +#if _LIBCPP_HAS_EXCEPTIONS } catch (...) { __deallocate_node(__cache); throw; } -#endif // _LIBCPP_HAS_NO_EXCEPTIONS +#endif // _LIBCPP_HAS_EXCEPTIONS __deallocate_node(__cache); } for (; __first != __last; ++__first) @@ -1288,7 +1312,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__assign_multi(_InputIterator __f template inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() _NOEXCEPT { - return iterator(__p1_.first().__next_); + return iterator(__first_node_.__next_); } template @@ -1300,7 +1324,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() _NOEXCEPT { template inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() const _NOEXCEPT { - return const_iterator(__p1_.first().__next_); + return const_iterator(__first_node_.__next_); } template @@ -1312,8 +1336,8 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT { template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::clear() _NOEXCEPT { if (size() > 0) { - __deallocate_node(__p1_.first().__next_); - __p1_.first().__next_ = nullptr; + __deallocate_node(__first_node_.__next_); + __first_node_.__next_ = nullptr; size_type __bc = bucket_count(); for (size_type __i = 0; __i < __bc; ++__i) __bucket_list_[__i] = nullptr; @@ -1348,7 +1372,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_prepare(size_t __ } if (size() + 1 > __bc * max_load_factor() || __bc == 0) { __rehash_unique(std::max( - 2 * __bc + !std::__is_hash_power2(__bc), size_type(std::ceil(float(size() + 1) / max_load_factor())))); + 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor())))); } return nullptr; } @@ -1365,7 +1389,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique_perform(__node_po // insert_after __bucket_list_[__chash], or __first_node if bucket is null __next_pointer __pn = __bucket_list_[__chash]; if (__pn == nullptr) { - __pn = __p1_.first().__ptr(); + __pn = __first_node_.__ptr(); __nd->__next_ = __pn->__next_; __pn->__next_ = __nd->__ptr(); // fix up __bucket_list_ @@ -1408,7 +1432,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_prepare(size_t __c size_type __bc = bucket_count(); if (size() + 1 > __bc * max_load_factor() || __bc == 0) { __rehash_multi(std::max( - 2 * __bc + !std::__is_hash_power2(__bc), size_type(std::ceil(float(size() + 1) / max_load_factor())))); + 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); } size_t __chash = std::__constrain_hash(__cp_hash, __bc); @@ -1445,7 +1469,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi_perform( size_type __bc = bucket_count(); size_t __chash = std::__constrain_hash(__cp->__hash_, __bc); if (__pn == nullptr) { - __pn = __p1_.first().__ptr(); + __pn = __first_node_.__ptr(); __cp->__next_ = __pn->__next_; __pn->__next_ = __cp->__ptr(); // fix up __bucket_list_ @@ -1483,7 +1507,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(const_iterator __p size_type __bc = bucket_count(); if (size() + 1 > __bc * max_load_factor() || __bc == 0) { __rehash_multi(std::max( - 2 * __bc + !std::__is_hash_power2(__bc), size_type(std::ceil(float(size() + 1) / max_load_factor())))); + 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); } size_t __chash = std::__constrain_hash(__cp->__hash_, __bc); @@ -1523,14 +1547,14 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __node_holder __h = __construct_node_hash(__hash, std::forward<_Args>(__args)...); if (size() + 1 > __bc * max_load_factor() || __bc == 0) { __rehash_unique(std::max( - 2 * __bc + !std::__is_hash_power2(__bc), size_type(std::ceil(float(size() + 1) / max_load_factor())))); + 2 * __bc + !std::__is_hash_power2(__bc), size_type(__math::ceil(float(size() + 1) / max_load_factor())))); __bc = bucket_count(); __chash = std::__constrain_hash(__hash, __bc); } // insert_after __bucket_list_[__chash], or __first_node if bucket is null __next_pointer __pn = __bucket_list_[__chash]; if (__pn == nullptr) { - __pn = __p1_.first().__ptr(); + __pn = __first_node_.__ptr(); __h->__next_ = __pn->__next_; __pn->__next_ = __h.get()->__ptr(); // fix up __bucket_list_ @@ -1692,8 +1716,8 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __n) _LIBCPP_D else if (__n < __bc) { __n = std::max( __n, - std::__is_hash_power2(__bc) ? std::__next_hash_pow2(size_t(std::ceil(float(size()) / max_load_factor()))) - : std::__next_prime(size_t(std::ceil(float(size()) / max_load_factor())))); + std::__is_hash_power2(__bc) ? std::__next_hash_pow2(size_t(__math::ceil(float(size()) / max_load_factor()))) + : std::__next_prime(size_t(__math::ceil(float(size()) / max_load_factor())))); if (__n < __bc) __do_rehash<_UniqueKeys>(__n); } @@ -1708,7 +1732,7 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__do_rehash(size_type __nbc) { if (__nbc > 0) { for (size_type __i = 0; __i < __nbc; ++__i) __bucket_list_[__i] = nullptr; - __next_pointer __pp = __p1_.first().__ptr(); + __next_pointer __pp = __first_node_.__ptr(); __next_pointer __cp = __pp->__next_; if (__cp != nullptr) { size_type __chash = std::__constrain_hash(__cp->__hash(), __nbc); @@ -1885,7 +1909,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::remove(const_iterator __p) _NOEXCEPT { // Fix up __bucket_list_ // if __pn is not in same bucket (before begin is not in same bucket) && // if __cn->__next_ is not in same bucket (nullptr is not in same bucket) - if (__pn == __p1_.first().__ptr() || std::__constrain_hash(__pn->__hash(), __bc) != __chash) { + if (__pn == __first_node_.__ptr() || std::__constrain_hash(__pn->__hash(), __bc) != __chash) { if (__cn->__next_ == nullptr || std::__constrain_hash(__cn->__next_->__hash(), __bc) != __chash) __bucket_list_[__chash] = nullptr; } @@ -2004,14 +2028,17 @@ void __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) std::swap(__bucket_list_.get_deleter().size(), __u.__bucket_list_.get_deleter().size()); std::__swap_allocator(__bucket_list_.get_deleter().__alloc(), __u.__bucket_list_.get_deleter().__alloc()); std::__swap_allocator(__node_alloc(), __u.__node_alloc()); - std::swap(__p1_.first().__next_, __u.__p1_.first().__next_); - __p2_.swap(__u.__p2_); - __p3_.swap(__u.__p3_); + std::swap(__first_node_.__next_, __u.__first_node_.__next_); + using std::swap; + swap(__size_, __u.__size_); + swap(__hasher_, __u.__hasher_); + swap(__max_load_factor_, __u.__max_load_factor_); + swap(__key_eq_, __u.__key_eq_); if (size() > 0) - __bucket_list_[std::__constrain_hash(__p1_.first().__next_->__hash(), bucket_count())] = __p1_.first().__ptr(); + __bucket_list_[std::__constrain_hash(__first_node_.__next_->__hash(), bucket_count())] = __first_node_.__ptr(); if (__u.size() > 0) - __u.__bucket_list_[std::__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = - __u.__p1_.first().__ptr(); + __u.__bucket_list_[std::__constrain_hash(__u.__first_node_.__next_->__hash(), __u.bucket_count())] = + __u.__first_node_.__ptr(); } template diff --git a/lib/libcxx/include/__iterator/access.h b/lib/libcxx/include/__iterator/access.h index acc4f60bf697..d42855f92548 100644 --- a/lib/libcxx/include/__iterator/access.h +++ b/lib/libcxx/include/__iterator/access.h @@ -11,7 +11,7 @@ #define _LIBCPP___ITERATOR_ACCESS_H #include <__config> -#include +#include <__cstddef/size_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/advance.h b/lib/libcxx/include/__iterator/advance.h index 296db1aaab65..57b1b845f1af 100644 --- a/lib/libcxx/include/__iterator/advance.h +++ b/lib/libcxx/include/__iterator/advance.h @@ -76,9 +76,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 void advance(_InputIter& __i // [range.iter.op.advance] namespace ranges { -namespace __advance { - -struct __fn { +struct __advance { private: template _LIBCPP_HIDE_FROM_ABI static constexpr void __advance_forward(_Ip& __i, iter_difference_t<_Ip> __n) { @@ -189,10 +187,8 @@ struct __fn { } }; -} // namespace __advance - inline namespace __cpo { -inline constexpr auto advance = __advance::__fn{}; +inline constexpr auto advance = __advance{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__iterator/aliasing_iterator.h b/lib/libcxx/include/__iterator/aliasing_iterator.h index 94ba577078b5..e01127142ae9 100644 --- a/lib/libcxx/include/__iterator/aliasing_iterator.h +++ b/lib/libcxx/include/__iterator/aliasing_iterator.h @@ -10,10 +10,10 @@ #define _LIBCPP___ITERATOR_ALIASING_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> #include <__type_traits/is_trivial.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -31,8 +31,8 @@ struct __aliasing_iterator_wrapper { class __iterator { _BaseIter __base_ = nullptr; - using __iter_traits = iterator_traits<_BaseIter>; - using __base_value_type = typename __iter_traits::value_type; + using __iter_traits _LIBCPP_NODEBUG = iterator_traits<_BaseIter>; + using __base_value_type _LIBCPP_NODEBUG = typename __iter_traits::value_type; static_assert(__has_random_access_iterator_category<_BaseIter>::value, "The base iterator has to be a random access iterator!"); @@ -120,7 +120,7 @@ struct __aliasing_iterator_wrapper { // This is required to avoid ADL instantiations on _BaseT template -using __aliasing_iterator = typename __aliasing_iterator_wrapper<_BaseT, _Alias>::__iterator; +using __aliasing_iterator _LIBCPP_NODEBUG = typename __aliasing_iterator_wrapper<_BaseT, _Alias>::__iterator; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__iterator/back_insert_iterator.h b/lib/libcxx/include/__iterator/back_insert_iterator.h index 6d3dd4b12966..9a5948753388 100644 --- a/lib/libcxx/include/__iterator/back_insert_iterator.h +++ b/lib/libcxx/include/__iterator/back_insert_iterator.h @@ -11,11 +11,11 @@ #define _LIBCPP___ITERATOR_BACK_INSERT_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/bounded_iter.h b/lib/libcxx/include/__iterator/bounded_iter.h index 8a81c9ffbfc3..d12750d1f81a 100644 --- a/lib/libcxx/include/__iterator/bounded_iter.h +++ b/lib/libcxx/include/__iterator/bounded_iter.h @@ -16,9 +16,13 @@ #include <__config> #include <__iterator/iterator_traits.h> #include <__memory/pointer_traits.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_convertible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/make_const_lvalue_ref.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -47,8 +51,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD // pointer, it is undefined at the language level (see [expr.add]). If // bounded iterators exhibited this undefined behavior, we risk compiler // optimizations deleting non-redundant bounds checks. -template ::value > > +template struct __bounded_iter { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value, + "Only contiguous iterators can be adapted by __bounded_iter."); + using value_type = typename iterator_traits<_Iterator>::value_type; using difference_type = typename iterator_traits<_Iterator>::difference_type; using pointer = typename iterator_traits<_Iterator>::pointer; @@ -60,14 +67,19 @@ struct __bounded_iter { // Create a singular iterator. // - // Such an iterator points past the end of an empty span, so it is not dereferenceable. - // Observing operations like comparison and assignment are valid. + // Such an iterator points past the end of an empty range, so it is not dereferenceable. + // Operations like comparison and assignment are valid. _LIBCPP_HIDE_FROM_ABI __bounded_iter() = default; _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter const&) = default; _LIBCPP_HIDE_FROM_ABI __bounded_iter(__bounded_iter&&) = default; - template ::value, int> = 0> + template < class _OtherIterator, + __enable_if_t< + _And< is_convertible, + _Or >, + is_same > > > >::value, + int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __bounded_iter(__bounded_iter<_OtherIterator> const& __other) _NOEXCEPT : __current_(__other.__current_), __begin_(__other.__begin_), @@ -209,9 +221,7 @@ struct __bounded_iter { operator!=(__bounded_iter const& __x, __bounded_iter const& __y) _NOEXCEPT { return __x.__current_ != __y.__current_; } -#endif - // TODO(mordante) disable these overloads in the LLVM 20 release. _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool operator<(__bounded_iter const& __x, __bounded_iter const& __y) _NOEXCEPT { return __x.__current_ < __y.__current_; @@ -229,7 +239,7 @@ struct __bounded_iter { return __x.__current_ >= __y.__current_; } -#if _LIBCPP_STD_VER >= 20 +#else _LIBCPP_HIDE_FROM_ABI constexpr friend strong_ordering operator<=>(__bounded_iter const& __x, __bounded_iter const& __y) noexcept { if constexpr (three_way_comparable<_Iterator, strong_ordering>) { @@ -249,7 +259,7 @@ struct __bounded_iter { private: template friend struct pointer_traits; - template + template friend struct __bounded_iter; _Iterator __current_; // current iterator _Iterator __begin_, __end_; // valid range represented as [begin, end] diff --git a/lib/libcxx/include/__iterator/common_iterator.h b/lib/libcxx/include/__iterator/common_iterator.h index 199de2cc7337..31fc8267e5af 100644 --- a/lib/libcxx/include/__iterator/common_iterator.h +++ b/lib/libcxx/include/__iterator/common_iterator.h @@ -26,6 +26,7 @@ #include <__iterator/iterator_traits.h> #include <__iterator/readable_traits.h> #include <__memory/addressof.h> +#include <__type_traits/conditional.h> #include <__type_traits/is_pointer.h> #include <__utility/declval.h> #include @@ -235,7 +236,7 @@ class common_iterator { return std::__unchecked_get<_Sent>(__x.__hold_) - std::__unchecked_get<_I2>(__y.__hold_); } - _LIBCPP_HIDE_FROM_ABI friend constexpr iter_rvalue_reference_t<_Iter> + _LIBCPP_HIDE_FROM_ABI friend constexpr decltype(auto) iter_move(const common_iterator& __i) noexcept(noexcept(ranges::iter_move(std::declval()))) requires input_iterator<_Iter> { diff --git a/lib/libcxx/include/__iterator/concepts.h b/lib/libcxx/include/__iterator/concepts.h index 0a4878308d55..6e5ac1d3af37 100644 --- a/lib/libcxx/include/__iterator/concepts.h +++ b/lib/libcxx/include/__iterator/concepts.h @@ -26,7 +26,6 @@ #include <__concepts/semiregular.h> #include <__concepts/totally_ordered.h> #include <__config> -#include <__functional/invoke.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iter_move.h> #include <__iterator/iterator_traits.h> @@ -34,7 +33,10 @@ #include <__memory/pointer_traits.h> #include <__type_traits/add_pointer.h> #include <__type_traits/common_reference.h> +#include <__type_traits/integral_constant.h> +#include <__type_traits/invoke.h> #include <__type_traits/is_pointer.h> +#include <__type_traits/is_primary_template.h> #include <__type_traits/is_reference.h> #include <__type_traits/remove_cv.h> #include <__type_traits/remove_cvref.h> @@ -64,8 +66,33 @@ concept __indirectly_readable_impl = template concept indirectly_readable = __indirectly_readable_impl>; +template +using __projected_iterator_t _LIBCPP_NODEBUG = typename _Tp::__projected_iterator; + +template +using __projected_projection_t _LIBCPP_NODEBUG = typename _Tp::__projected_projection; + +template +concept __specialization_of_projected = requires { + typename __projected_iterator_t<_Tp>; + typename __projected_projection_t<_Tp>; +} && __is_primary_template<_Tp>::value; + +template +struct __indirect_value_t_impl { + using type = iter_value_t<_Tp>&; +}; +template <__specialization_of_projected _Tp> +struct __indirect_value_t_impl<_Tp> { + using type = invoke_result_t<__projected_projection_t<_Tp>&, + typename __indirect_value_t_impl<__projected_iterator_t<_Tp>>::type>; +}; + +template +using __indirect_value_t _LIBCPP_NODEBUG = typename __indirect_value_t_impl<_Tp>::type; + template -using iter_common_reference_t = common_reference_t, iter_value_t<_Tp>&>; +using iter_common_reference_t = common_reference_t, __indirect_value_t<_Tp>>; // [iterator.concept.writable] template @@ -176,43 +203,45 @@ concept __has_arrow = input_iterator<_Ip> && (is_pointer_v<_Ip> || requires(_Ip // [indirectcallable.indirectinvocable] template concept indirectly_unary_invocable = - indirectly_readable<_It> && copy_constructible<_Fp> && invocable<_Fp&, iter_value_t<_It>&> && + indirectly_readable<_It> && copy_constructible<_Fp> && invocable<_Fp&, __indirect_value_t<_It>> && invocable<_Fp&, iter_reference_t<_It>> && - common_reference_with< invoke_result_t<_Fp&, iter_value_t<_It>&>, invoke_result_t<_Fp&, iter_reference_t<_It>>>; + common_reference_with< invoke_result_t<_Fp&, __indirect_value_t<_It>>, + invoke_result_t<_Fp&, iter_reference_t<_It>>>; template concept indirectly_regular_unary_invocable = - indirectly_readable<_It> && copy_constructible<_Fp> && regular_invocable<_Fp&, iter_value_t<_It>&> && + indirectly_readable<_It> && copy_constructible<_Fp> && regular_invocable<_Fp&, __indirect_value_t<_It>> && regular_invocable<_Fp&, iter_reference_t<_It>> && - common_reference_with< invoke_result_t<_Fp&, iter_value_t<_It>&>, invoke_result_t<_Fp&, iter_reference_t<_It>>>; + common_reference_with< invoke_result_t<_Fp&, __indirect_value_t<_It>>, + invoke_result_t<_Fp&, iter_reference_t<_It>>>; template concept indirect_unary_predicate = - indirectly_readable<_It> && copy_constructible<_Fp> && predicate<_Fp&, iter_value_t<_It>&> && + indirectly_readable<_It> && copy_constructible<_Fp> && predicate<_Fp&, __indirect_value_t<_It>> && predicate<_Fp&, iter_reference_t<_It>>; template concept indirect_binary_predicate = indirectly_readable<_It1> && indirectly_readable<_It2> && copy_constructible<_Fp> && - predicate<_Fp&, iter_value_t<_It1>&, iter_value_t<_It2>&> && - predicate<_Fp&, iter_value_t<_It1>&, iter_reference_t<_It2>> && - predicate<_Fp&, iter_reference_t<_It1>, iter_value_t<_It2>&> && + predicate<_Fp&, __indirect_value_t<_It1>, __indirect_value_t<_It2>> && + predicate<_Fp&, __indirect_value_t<_It1>, iter_reference_t<_It2>> && + predicate<_Fp&, iter_reference_t<_It1>, __indirect_value_t<_It2>> && predicate<_Fp&, iter_reference_t<_It1>, iter_reference_t<_It2>>; template concept indirect_equivalence_relation = indirectly_readable<_It1> && indirectly_readable<_It2> && copy_constructible<_Fp> && - equivalence_relation<_Fp&, iter_value_t<_It1>&, iter_value_t<_It2>&> && - equivalence_relation<_Fp&, iter_value_t<_It1>&, iter_reference_t<_It2>> && - equivalence_relation<_Fp&, iter_reference_t<_It1>, iter_value_t<_It2>&> && + equivalence_relation<_Fp&, __indirect_value_t<_It1>, __indirect_value_t<_It2>> && + equivalence_relation<_Fp&, __indirect_value_t<_It1>, iter_reference_t<_It2>> && + equivalence_relation<_Fp&, iter_reference_t<_It1>, __indirect_value_t<_It2>> && equivalence_relation<_Fp&, iter_reference_t<_It1>, iter_reference_t<_It2>>; template concept indirect_strict_weak_order = indirectly_readable<_It1> && indirectly_readable<_It2> && copy_constructible<_Fp> && - strict_weak_order<_Fp&, iter_value_t<_It1>&, iter_value_t<_It2>&> && - strict_weak_order<_Fp&, iter_value_t<_It1>&, iter_reference_t<_It2>> && - strict_weak_order<_Fp&, iter_reference_t<_It1>, iter_value_t<_It2>&> && + strict_weak_order<_Fp&, __indirect_value_t<_It1>, __indirect_value_t<_It2>> && + strict_weak_order<_Fp&, __indirect_value_t<_It1>, iter_reference_t<_It2>> && + strict_weak_order<_Fp&, iter_reference_t<_It1>, __indirect_value_t<_It2>> && strict_weak_order<_Fp&, iter_reference_t<_It1>, iter_reference_t<_It2>>; template @@ -245,7 +274,7 @@ concept indirectly_copyable_storable = #endif // _LIBCPP_STD_VER >= 20 template -using __has_random_access_iterator_category_or_concept +using __has_random_access_iterator_category_or_concept _LIBCPP_NODEBUG #if _LIBCPP_STD_VER >= 20 = integral_constant>; #else // _LIBCPP_STD_VER < 20 diff --git a/lib/libcxx/include/__iterator/counted_iterator.h b/lib/libcxx/include/__iterator/counted_iterator.h index ea2832e3b978..65e178bc0cf2 100644 --- a/lib/libcxx/include/__iterator/counted_iterator.h +++ b/lib/libcxx/include/__iterator/counted_iterator.h @@ -11,6 +11,7 @@ #define _LIBCPP___ITERATOR_COUNTED_ITERATOR_H #include <__assert> +#include <__compare/ordering.h> #include <__concepts/assignable.h> #include <__concepts/common_with.h> #include <__concepts/constructible.h> @@ -28,7 +29,6 @@ #include <__type_traits/add_pointer.h> #include <__type_traits/conditional.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -132,7 +132,7 @@ class counted_iterator _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator++(int) { _LIBCPP_ASSERT_UNCATEGORIZED(__count_ > 0, "Iterator already at or past end."); --__count_; -# ifndef _LIBCPP_HAS_NO_EXCEPTIONS +# if _LIBCPP_HAS_EXCEPTIONS try { return __current_++; } catch (...) { @@ -141,7 +141,7 @@ class counted_iterator } # else return __current_++; -# endif // _LIBCPP_HAS_NO_EXCEPTIONS +# endif // _LIBCPP_HAS_EXCEPTIONS } _LIBCPP_HIDE_FROM_ABI constexpr counted_iterator operator++(int) @@ -249,7 +249,7 @@ class counted_iterator return __rhs.__count_ <=> __lhs.__count_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr iter_rvalue_reference_t<_Iter> + _LIBCPP_HIDE_FROM_ABI friend constexpr decltype(auto) iter_move(const counted_iterator& __i) noexcept(noexcept(ranges::iter_move(__i.__current_))) requires input_iterator<_Iter> { diff --git a/lib/libcxx/include/__iterator/data.h b/lib/libcxx/include/__iterator/data.h index b7c1603652b0..5f2624c2b819 100644 --- a/lib/libcxx/include/__iterator/data.h +++ b/lib/libcxx/include/__iterator/data.h @@ -11,7 +11,6 @@ #define _LIBCPP___ITERATOR_DATA_H #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__iterator/distance.h b/lib/libcxx/include/__iterator/distance.h index 75bd49c9ae73..1732aa527f64 100644 --- a/lib/libcxx/include/__iterator/distance.h +++ b/lib/libcxx/include/__iterator/distance.h @@ -52,9 +52,7 @@ distance(_InputIter __first, _InputIter __last) { // [range.iter.op.distance] namespace ranges { -namespace __distance { - -struct __fn { +struct __distance { template _Sp> requires(!sized_sentinel_for<_Sp, _Ip>) _LIBCPP_HIDE_FROM_ABI constexpr iter_difference_t<_Ip> operator()(_Ip __first, _Sp __last) const { @@ -85,10 +83,8 @@ struct __fn { } }; -} // namespace __distance - inline namespace __cpo { -inline constexpr auto distance = __distance::__fn{}; +inline constexpr auto distance = __distance{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__iterator/empty.h b/lib/libcxx/include/__iterator/empty.h index 773f2776955b..f2c653bcb329 100644 --- a/lib/libcxx/include/__iterator/empty.h +++ b/lib/libcxx/include/__iterator/empty.h @@ -11,7 +11,6 @@ #define _LIBCPP___ITERATOR_EMPTY_H #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__iterator/front_insert_iterator.h b/lib/libcxx/include/__iterator/front_insert_iterator.h index 7f2c54ec8744..80819cd22ae6 100644 --- a/lib/libcxx/include/__iterator/front_insert_iterator.h +++ b/lib/libcxx/include/__iterator/front_insert_iterator.h @@ -11,11 +11,11 @@ #define _LIBCPP___ITERATOR_FRONT_INSERT_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/incrementable_traits.h b/lib/libcxx/include/__iterator/incrementable_traits.h index a228b228f6e5..37c8daddf8a8 100644 --- a/lib/libcxx/include/__iterator/incrementable_traits.h +++ b/lib/libcxx/include/__iterator/incrementable_traits.h @@ -12,13 +12,13 @@ #include <__concepts/arithmetic.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__type_traits/conditional.h> #include <__type_traits/is_object.h> #include <__type_traits/is_primary_template.h> #include <__type_traits/make_signed.h> #include <__type_traits/remove_cvref.h> #include <__utility/declval.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/insert_iterator.h b/lib/libcxx/include/__iterator/insert_iterator.h index 8b7574dc9ec0..e0ee0ce035e2 100644 --- a/lib/libcxx/include/__iterator/insert_iterator.h +++ b/lib/libcxx/include/__iterator/insert_iterator.h @@ -11,12 +11,12 @@ #define _LIBCPP___ITERATOR_INSERT_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__ranges/access.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -29,10 +29,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -using __insert_iterator_iter_t = ranges::iterator_t<_Container>; +using __insert_iterator_iter_t _LIBCPP_NODEBUG = ranges::iterator_t<_Container>; #else template -using __insert_iterator_iter_t = typename _Container::iterator; +using __insert_iterator_iter_t _LIBCPP_NODEBUG = typename _Container::iterator; #endif _LIBCPP_SUPPRESS_DEPRECATED_PUSH diff --git a/lib/libcxx/include/__iterator/istream_iterator.h b/lib/libcxx/include/__iterator/istream_iterator.h index 58c9ac6d4ccc..a6c74d00178d 100644 --- a/lib/libcxx/include/__iterator/istream_iterator.h +++ b/lib/libcxx/include/__iterator/istream_iterator.h @@ -11,13 +11,13 @@ #define _LIBCPP___ITERATOR_ISTREAM_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__fwd/istream.h> #include <__fwd/string.h> #include <__iterator/default_sentinel.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/istreambuf_iterator.h b/lib/libcxx/include/__iterator/istreambuf_iterator.h index 51c4ecff351f..162873b9559e 100644 --- a/lib/libcxx/include/__iterator/istreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/istreambuf_iterator.h @@ -16,6 +16,8 @@ #include <__iterator/default_sentinel.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> +#include <__string/char_traits.h> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/iterator.h b/lib/libcxx/include/__iterator/iterator.h index ba9308f3c224..1591655313dd 100644 --- a/lib/libcxx/include/__iterator/iterator.h +++ b/lib/libcxx/include/__iterator/iterator.h @@ -11,7 +11,7 @@ #define _LIBCPP___ITERATOR_ITERATOR_H #include <__config> -#include +#include <__cstddef/ptrdiff_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/iterator_traits.h b/lib/libcxx/include/__iterator/iterator_traits.h index 11af9e301842..db68dd2c377a 100644 --- a/lib/libcxx/include/__iterator/iterator_traits.h +++ b/lib/libcxx/include/__iterator/iterator_traits.h @@ -18,12 +18,15 @@ #include <__concepts/same_as.h> #include <__concepts/totally_ordered.h> #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__fwd/pair.h> #include <__iterator/incrementable_traits.h> #include <__iterator/readable_traits.h> #include <__type_traits/common_reference.h> #include <__type_traits/conditional.h> #include <__type_traits/disjunction.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_object.h> #include <__type_traits/is_primary_template.h> @@ -34,7 +37,6 @@ #include <__type_traits/remove_cvref.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -45,7 +47,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 20 template -using __with_reference = _Tp&; +using __with_reference _LIBCPP_NODEBUG = _Tp&; template concept __can_reference = requires { typename __with_reference<_Tp>; }; @@ -78,19 +80,20 @@ struct __iter_traits_cache { using type = _If< __is_primary_template >::value, _Iter, iterator_traits<_Iter> >; }; template -using _ITER_TRAITS = typename __iter_traits_cache<_Iter>::type; +using _ITER_TRAITS _LIBCPP_NODEBUG = typename __iter_traits_cache<_Iter>::type; struct __iter_concept_concept_test { template - using _Apply = typename _ITER_TRAITS<_Iter>::iterator_concept; + using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_concept; }; struct __iter_concept_category_test { template - using _Apply = typename _ITER_TRAITS<_Iter>::iterator_category; + using _Apply _LIBCPP_NODEBUG = typename _ITER_TRAITS<_Iter>::iterator_category; }; struct __iter_concept_random_fallback { template - using _Apply = __enable_if_t< __is_primary_template >::value, random_access_iterator_tag >; + using _Apply _LIBCPP_NODEBUG = + __enable_if_t<__is_primary_template >::value, random_access_iterator_tag>; }; template @@ -104,7 +107,7 @@ struct __iter_concept_cache { }; template -using _ITER_CONCEPT = typename __iter_concept_cache<_Iter>::type::template _Apply<_Iter>; +using _ITER_CONCEPT _LIBCPP_NODEBUG = typename __iter_concept_cache<_Iter>::type::template _Apply<_Iter>; template struct __has_iterator_typedefs { @@ -362,7 +365,7 @@ struct __iterator_traits<_Ip> { template struct iterator_traits : __iterator_traits<_Ip> { - using __primary_template = iterator_traits; + using __primary_template _LIBCPP_NODEBUG = iterator_traits; }; #else // _LIBCPP_STD_VER >= 20 @@ -395,7 +398,7 @@ struct __iterator_traits<_Iter, true> template struct _LIBCPP_TEMPLATE_VIS iterator_traits : __iterator_traits<_Iter, __has_iterator_typedefs<_Iter>::value> { - using __primary_template = iterator_traits; + using __primary_template _LIBCPP_NODEBUG = iterator_traits; }; #endif // _LIBCPP_STD_VER >= 20 @@ -428,16 +431,19 @@ template struct __has_iterator_concept_convertible_to<_Tp, _Up, false> : false_type {}; template -using __has_input_iterator_category = __has_iterator_category_convertible_to<_Tp, input_iterator_tag>; +using __has_input_iterator_category _LIBCPP_NODEBUG = __has_iterator_category_convertible_to<_Tp, input_iterator_tag>; template -using __has_forward_iterator_category = __has_iterator_category_convertible_to<_Tp, forward_iterator_tag>; +using __has_forward_iterator_category _LIBCPP_NODEBUG = + __has_iterator_category_convertible_to<_Tp, forward_iterator_tag>; template -using __has_bidirectional_iterator_category = __has_iterator_category_convertible_to<_Tp, bidirectional_iterator_tag>; +using __has_bidirectional_iterator_category _LIBCPP_NODEBUG = + __has_iterator_category_convertible_to<_Tp, bidirectional_iterator_tag>; template -using __has_random_access_iterator_category = __has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>; +using __has_random_access_iterator_category _LIBCPP_NODEBUG = + __has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>; // __libcpp_is_contiguous_iterator determines if an iterator is known by // libc++ to be contiguous, either because it advertises itself as such @@ -464,48 +470,49 @@ template class __wrap_iter; template -using __has_exactly_input_iterator_category = +using __has_exactly_input_iterator_category _LIBCPP_NODEBUG = integral_constant::value && !__has_iterator_category_convertible_to<_Tp, forward_iterator_tag>::value>; template -using __has_exactly_forward_iterator_category = +using __has_exactly_forward_iterator_category _LIBCPP_NODEBUG = integral_constant::value && !__has_iterator_category_convertible_to<_Tp, bidirectional_iterator_tag>::value>; template -using __has_exactly_bidirectional_iterator_category = +using __has_exactly_bidirectional_iterator_category _LIBCPP_NODEBUG = integral_constant::value && !__has_iterator_category_convertible_to<_Tp, random_access_iterator_tag>::value>; template -using __iter_value_type = typename iterator_traits<_InputIterator>::value_type; +using __iter_value_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type; template -using __iter_key_type = __remove_const_t::value_type::first_type>; +using __iter_key_type _LIBCPP_NODEBUG = + __remove_const_t::value_type::first_type>; template -using __iter_mapped_type = typename iterator_traits<_InputIterator>::value_type::second_type; +using __iter_mapped_type _LIBCPP_NODEBUG = typename iterator_traits<_InputIterator>::value_type::second_type; template -using __iter_to_alloc_type = +using __iter_to_alloc_type _LIBCPP_NODEBUG = pair::value_type::first_type, typename iterator_traits<_InputIterator>::value_type::second_type>; template -using __iterator_category_type = typename iterator_traits<_Iter>::iterator_category; +using __iterator_category_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::iterator_category; template -using __iterator_pointer_type = typename iterator_traits<_Iter>::pointer; +using __iterator_pointer_type _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::pointer; template -using __iter_diff_t = typename iterator_traits<_Iter>::difference_type; +using __iter_diff_t _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::difference_type; template -using __iter_reference = typename iterator_traits<_Iter>::reference; +using __iter_reference _LIBCPP_NODEBUG = typename iterator_traits<_Iter>::reference; #if _LIBCPP_STD_VER >= 20 diff --git a/lib/libcxx/include/__iterator/next.h b/lib/libcxx/include/__iterator/next.h index 21d3688ad9eb..1f68a5bec8f3 100644 --- a/lib/libcxx/include/__iterator/next.h +++ b/lib/libcxx/include/__iterator/next.h @@ -25,7 +25,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter next(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = 1) { // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. // Note that this check duplicates the similar check in `std::advance`. @@ -41,38 +41,35 @@ next(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = // [range.iter.op.next] namespace ranges { -namespace __next { - -struct __fn { +struct __next { template - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x) const { ++__x; return __x; } template - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n) const { ranges::advance(__x, __n); return __x; } template _Sp> - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, _Sp __bound_sentinel) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, _Sp __bound_sentinel) const { ranges::advance(__x, __bound_sentinel); return __x; } template _Sp> - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n, _Sp __bound_sentinel) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip + operator()(_Ip __x, iter_difference_t<_Ip> __n, _Sp __bound_sentinel) const { ranges::advance(__x, __n, __bound_sentinel); return __x; } }; -} // namespace __next - inline namespace __cpo { -inline constexpr auto next = __next::__fn{}; +inline constexpr auto next = __next{}; } // namespace __cpo } // namespace ranges diff --git a/lib/libcxx/include/__iterator/ostream_iterator.h b/lib/libcxx/include/__iterator/ostream_iterator.h index 05697e62d9dc..93ecc03010d0 100644 --- a/lib/libcxx/include/__iterator/ostream_iterator.h +++ b/lib/libcxx/include/__iterator/ostream_iterator.h @@ -11,12 +11,12 @@ #define _LIBCPP___ITERATOR_OSTREAM_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__fwd/ostream.h> #include <__fwd/string.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/ostreambuf_iterator.h b/lib/libcxx/include/__iterator/ostreambuf_iterator.h index dda0094dc3f5..f00449355e4e 100644 --- a/lib/libcxx/include/__iterator/ostreambuf_iterator.h +++ b/lib/libcxx/include/__iterator/ostreambuf_iterator.h @@ -11,10 +11,13 @@ #define _LIBCPP___ITERATOR_OSTREAMBUF_ITERATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__fwd/ios.h> +#include <__fwd/ostream.h> +#include <__fwd/streambuf.h> #include <__iterator/iterator.h> #include <__iterator/iterator_traits.h> -#include -#include // for forward declaration of basic_streambuf +#include // for forward declaration of ostreambuf_iterator #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -62,9 +65,11 @@ class _LIBCPP_TEMPLATE_VIS ostreambuf_iterator _LIBCPP_HIDE_FROM_ABI ostreambuf_iterator& operator++(int) { return *this; } _LIBCPP_HIDE_FROM_ABI bool failed() const _NOEXCEPT { return __sbuf_ == nullptr; } +#if _LIBCPP_HAS_LOCALIZATION template friend _LIBCPP_HIDE_FROM_ABI ostreambuf_iterator<_Ch, _Tr> __pad_and_output( ostreambuf_iterator<_Ch, _Tr> __s, const _Ch* __ob, const _Ch* __op, const _Ch* __oe, ios_base& __iob, _Ch __fl); +#endif // _LIBCPP_HAS_LOCALIZATION }; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__iterator/prev.h b/lib/libcxx/include/__iterator/prev.h index 2f0e6a088edb..bffd5527dc95 100644 --- a/lib/libcxx/include/__iterator/prev.h +++ b/lib/libcxx/include/__iterator/prev.h @@ -17,16 +17,20 @@ #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> #include <__type_traits/enable_if.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD template ::value, int> = 0> -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter -prev(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = 1) { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter +prev(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n) { // Calling `advance` with a negative value on a non-bidirectional iterator is a no-op in the current implementation. // Note that this check duplicates the similar check in `std::advance`. _LIBCPP_ASSERT_PEDANTIC(__n <= 0 || __has_bidirectional_iterator_category<_InputIter>::value, @@ -35,37 +39,44 @@ prev(_InputIter __x, typename iterator_traits<_InputIter>::difference_type __n = return __x; } +// LWG 3197 +// It is unclear what the implications of "BidirectionalIterator" in the standard are. +// However, calling std::prev(non-bidi-iterator) is obviously an error and we should catch it at compile time. +template ::value, int> = 0> +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 _InputIter prev(_InputIter __it) { + static_assert(__has_bidirectional_iterator_category<_InputIter>::value, + "Attempt to prev(it) with a non-bidirectional iterator"); + return std::prev(std::move(__it), 1); +} + #if _LIBCPP_STD_VER >= 20 // [range.iter.op.prev] namespace ranges { -namespace __prev { - -struct __fn { +struct __prev { template - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x) const { --__x; return __x; } template - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n) const { ranges::advance(__x, -__n); return __x; } template - _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __x, iter_difference_t<_Ip> __n, _Ip __bound_iter) const { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _Ip + operator()(_Ip __x, iter_difference_t<_Ip> __n, _Ip __bound_iter) const { ranges::advance(__x, -__n, __bound_iter); return __x; } }; -} // namespace __prev - inline namespace __cpo { -inline constexpr auto prev = __prev::__fn{}; +inline constexpr auto prev = __prev{}; } // namespace __cpo } // namespace ranges @@ -73,4 +84,6 @@ inline constexpr auto prev = __prev::__fn{}; _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ITERATOR_PREV_H diff --git a/lib/libcxx/include/__iterator/projected.h b/lib/libcxx/include/__iterator/projected.h index 463d07b0d33c..d12f0167de1d 100644 --- a/lib/libcxx/include/__iterator/projected.h +++ b/lib/libcxx/include/__iterator/projected.h @@ -26,6 +26,10 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct __projected_impl { struct __type { + using __primary_template _LIBCPP_NODEBUG = __type; + using __projected_iterator _LIBCPP_NODEBUG = _It; + using __projected_projection _LIBCPP_NODEBUG = _Proj; + using value_type = remove_cvref_t>; indirect_result_t<_Proj&, _It> operator*() const; // not defined }; @@ -34,6 +38,10 @@ struct __projected_impl { template struct __projected_impl<_It, _Proj> { struct __type { + using __primary_template _LIBCPP_NODEBUG = __type; + using __projected_iterator _LIBCPP_NODEBUG = _It; + using __projected_projection _LIBCPP_NODEBUG = _Proj; + using value_type = remove_cvref_t>; using difference_type = iter_difference_t<_It>; indirect_result_t<_Proj&, _It> operator*() const; // not defined diff --git a/lib/libcxx/include/__iterator/ranges_iterator_traits.h b/lib/libcxx/include/__iterator/ranges_iterator_traits.h index 859e7082048a..9a31b651eb5d 100644 --- a/lib/libcxx/include/__iterator/ranges_iterator_traits.h +++ b/lib/libcxx/include/__iterator/ranges_iterator_traits.h @@ -24,13 +24,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 23 template -using __range_key_type = __remove_const_t::first_type>; +using __range_key_type _LIBCPP_NODEBUG = __remove_const_t::first_type>; template -using __range_mapped_type = typename ranges::range_value_t<_Range>::second_type; +using __range_mapped_type _LIBCPP_NODEBUG = typename ranges::range_value_t<_Range>::second_type; template -using __range_to_alloc_type = +using __range_to_alloc_type _LIBCPP_NODEBUG = pair::first_type, typename ranges::range_value_t<_Range>::second_type>; #endif diff --git a/lib/libcxx/include/__iterator/reverse_access.h b/lib/libcxx/include/__iterator/reverse_access.h index 54d7270b04a5..f6e60c3fb75b 100644 --- a/lib/libcxx/include/__iterator/reverse_access.h +++ b/lib/libcxx/include/__iterator/reverse_access.h @@ -12,7 +12,6 @@ #include <__config> #include <__iterator/reverse_iterator.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__iterator/reverse_iterator.h b/lib/libcxx/include/__iterator/reverse_iterator.h index 50c0f21eaa28..5bd1f868d3ff 100644 --- a/lib/libcxx/include/__iterator/reverse_iterator.h +++ b/lib/libcxx/include/__iterator/reverse_iterator.h @@ -136,10 +136,12 @@ class _LIBCPP_TEMPLATE_VIS reverse_iterator _LIBCPP_HIDE_FROM_ABI constexpr pointer operator->() const requires is_pointer_v<_Iter> || requires(const _Iter __i) { __i.operator->(); } { + _Iter __tmp = current; + --__tmp; if constexpr (is_pointer_v<_Iter>) { - return std::prev(current); + return __tmp; } else { - return std::prev(current).operator->(); + return __tmp.operator->(); } } #else @@ -327,8 +329,8 @@ __reverse_range(_Range&& __range) { template struct __unwrap_iter_impl >, __b> { - using _UnwrappedIter = decltype(__unwrap_iter_impl<_Iter>::__unwrap(std::declval<_Iter>())); - using _ReverseWrapper = reverse_iterator >; + using _UnwrappedIter _LIBCPP_NODEBUG = decltype(__unwrap_iter_impl<_Iter>::__unwrap(std::declval<_Iter>())); + using _ReverseWrapper _LIBCPP_NODEBUG = reverse_iterator >; static _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _ReverseWrapper __rewrap(_ReverseWrapper __orig_iter, _UnwrappedIter __unwrapped_iter) { diff --git a/lib/libcxx/include/__iterator/segmented_iterator.h b/lib/libcxx/include/__iterator/segmented_iterator.h index f3cd1e5fa1f5..7a8e1addeacd 100644 --- a/lib/libcxx/include/__iterator/segmented_iterator.h +++ b/lib/libcxx/include/__iterator/segmented_iterator.h @@ -41,8 +41,8 @@ // Returns the iterator composed of the segment iterator and local iterator. #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/integral_constant.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -72,7 +72,7 @@ template struct __has_specialization<_Tp, sizeof(_Tp) * 0> : true_type {}; template -using __is_segmented_iterator = __has_specialization<__segmented_iterator_traits<_Iterator> >; +using __is_segmented_iterator _LIBCPP_NODEBUG = __has_specialization<__segmented_iterator_traits<_Iterator> >; _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__iterator/size.h b/lib/libcxx/include/__iterator/size.h index 876e6963f77d..84e2e3b21f1d 100644 --- a/lib/libcxx/include/__iterator/size.h +++ b/lib/libcxx/include/__iterator/size.h @@ -11,9 +11,10 @@ #define _LIBCPP___ITERATOR_SIZE_H #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__type_traits/common_type.h> #include <__type_traits/make_signed.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__iterator/static_bounded_iter.h b/lib/libcxx/include/__iterator/static_bounded_iter.h new file mode 100644 index 000000000000..8f4fbdf6dff9 --- /dev/null +++ b/lib/libcxx/include/__iterator/static_bounded_iter.h @@ -0,0 +1,318 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ITERATOR_STATIC_BOUNDED_ITER_H +#define _LIBCPP___ITERATOR_STATIC_BOUNDED_ITER_H + +#include <__assert> +#include <__compare/ordering.h> +#include <__compare/three_way_comparable.h> +#include <__config> +#include <__cstddef/size_t.h> +#include <__iterator/iterator_traits.h> +#include <__memory/pointer_traits.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/disjunction.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_convertible.h> +#include <__type_traits/is_same.h> +#include <__type_traits/make_const_lvalue_ref.h> +#include <__utility/move.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +struct __static_bounded_iter_storage { + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter_storage() = default; + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __static_bounded_iter_storage(_Iterator __current, _Iterator __begin) + : __current_(__current), __begin_(__begin) {} + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator& __current() _NOEXCEPT { return __current_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __current() const _NOEXCEPT { return __current_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __begin() const _NOEXCEPT { return __begin_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __end() const _NOEXCEPT { return __begin_ + _Size; } + +private: + _Iterator __current_; // current iterator + _Iterator __begin_; // start of the valid range, which is [__begin_, __begin_ + _Size) +}; + +template +struct __static_bounded_iter_storage<_Iterator, 0> { + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter_storage() = default; + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __static_bounded_iter_storage(_Iterator __current, _Iterator /* __begin */) + : __current_(__current) {} + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator& __current() _NOEXCEPT { return __current_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __current() const _NOEXCEPT { return __current_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __begin() const _NOEXCEPT { return __current_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __end() const _NOEXCEPT { return __current_; } + +private: + _Iterator __current_; // current iterator +}; + +// This is an iterator wrapper for contiguous iterators that points within a range +// whose size is known at compile-time. This is very similar to `__bounded_iter`, +// except that we don't have to store the end of the range in physical memory since +// it can be computed from the start of the range. +// +// The operations on which this iterator wrapper traps are the same as `__bounded_iter`. +template +struct __static_bounded_iter { + static_assert(__libcpp_is_contiguous_iterator<_Iterator>::value, + "Only contiguous iterators can be adapted by __static_bounded_iter."); + + using value_type = typename iterator_traits<_Iterator>::value_type; + using difference_type = typename iterator_traits<_Iterator>::difference_type; + using pointer = typename iterator_traits<_Iterator>::pointer; + using reference = typename iterator_traits<_Iterator>::reference; + using iterator_category = typename iterator_traits<_Iterator>::iterator_category; +#if _LIBCPP_STD_VER >= 20 + using iterator_concept = contiguous_iterator_tag; +#endif + + // Create a singular iterator. + // + // Such an iterator points past the end of an empty range, so it is not dereferenceable. + // Operations like comparison and assignment are valid. + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter() = default; + + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter(__static_bounded_iter const&) = default; + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter(__static_bounded_iter&&) = default; + + template , + _Or >, + is_same > > > >::value, + int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR + __static_bounded_iter(__static_bounded_iter<_OtherIterator, _Size> const& __other) _NOEXCEPT + : __storage_(__other.__storage_.__current(), __other.__storage_.__begin()) {} + + // Assign a bounded iterator to another one, rebinding the bounds of the iterator as well. + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter& operator=(__static_bounded_iter const&) = default; + _LIBCPP_HIDE_FROM_ABI __static_bounded_iter& operator=(__static_bounded_iter&&) = default; + +private: + // Create an iterator wrapping the given iterator, and whose bounds are described + // by the provided [begin, begin + _Size] range. + _LIBCPP_HIDE_FROM_ABI + _LIBCPP_CONSTEXPR_SINCE_CXX14 explicit __static_bounded_iter(_Iterator __current, _Iterator __begin) + : __storage_(__current, __begin) { + _LIBCPP_ASSERT_INTERNAL( + __begin <= __current, "__static_bounded_iter(current, begin): current and begin are inconsistent"); + _LIBCPP_ASSERT_INTERNAL( + __current <= __end(), "__static_bounded_iter(current, begin): current and (begin + Size) are inconsistent"); + } + + template + friend _LIBCPP_CONSTEXPR __static_bounded_iter<_It, _Sz> __make_static_bounded_iter(_It, _It); + +public: + // Dereference and indexing operations. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current() != __end(), "__static_bounded_iter::operator*: Attempt to dereference an iterator at the end"); + return *__current(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pointer operator->() const _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current() != __end(), "__static_bounded_iter::operator->: Attempt to dereference an iterator at the end"); + return std::__to_address(__current()); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator[](difference_type __n) const _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n >= __begin() - __current(), + "__static_bounded_iter::operator[]: Attempt to index an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n < __end() - __current(), + "__static_bounded_iter::operator[]: Attempt to index an iterator at or past the end"); + return __current()[__n]; + } + + // Arithmetic operations. + // + // These operations check that the iterator remains within `[begin, end]`. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter& operator++() _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current() != __end(), "__static_bounded_iter::operator++: Attempt to advance an iterator past the end"); + ++__current(); + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter operator++(int) _NOEXCEPT { + __static_bounded_iter __tmp(*this); + ++*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter& operator--() _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __current() != __begin(), "__static_bounded_iter::operator--: Attempt to rewind an iterator past the start"); + --__current(); + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter operator--(int) _NOEXCEPT { + __static_bounded_iter __tmp(*this); + --*this; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter& operator+=(difference_type __n) _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n >= __begin() - __current(), + "__static_bounded_iter::operator+=: Attempt to rewind an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n <= __end() - __current(), "__static_bounded_iter::operator+=: Attempt to advance an iterator past the end"); + __current() += __n; + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 friend __static_bounded_iter + operator+(__static_bounded_iter const& __self, difference_type __n) _NOEXCEPT { + __static_bounded_iter __tmp(__self); + __tmp += __n; + return __tmp; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 friend __static_bounded_iter + operator+(difference_type __n, __static_bounded_iter const& __self) _NOEXCEPT { + __static_bounded_iter __tmp(__self); + __tmp += __n; + return __tmp; + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __static_bounded_iter& operator-=(difference_type __n) _NOEXCEPT { + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n <= __current() - __begin(), + "__static_bounded_iter::operator-=: Attempt to rewind an iterator past the start"); + _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( + __n >= __current() - __end(), "__static_bounded_iter::operator-=: Attempt to advance an iterator past the end"); + __current() -= __n; + return *this; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 friend __static_bounded_iter + operator-(__static_bounded_iter const& __self, difference_type __n) _NOEXCEPT { + __static_bounded_iter __tmp(__self); + __tmp -= __n; + return __tmp; + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 friend difference_type + operator-(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() - __y.__current(); + } + + // Comparison operations. + // + // These operations do not check whether the iterators are within their bounds. + // The valid range for each iterator is also not considered as part of the comparison, + // i.e. two iterators pointing to the same location will be considered equal even + // if they have different validity ranges. + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator==(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() == __y.__current(); + } + +#if _LIBCPP_STD_VER <= 17 + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator!=(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() != __y.__current(); + } + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator<(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() < __y.__current(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator>(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() > __y.__current(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator<=(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() <= __y.__current(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR friend bool + operator>=(__static_bounded_iter const& __x, __static_bounded_iter const& __y) _NOEXCEPT { + return __x.__current() >= __y.__current(); + } + +#else + _LIBCPP_HIDE_FROM_ABI constexpr friend strong_ordering + operator<=>(__static_bounded_iter const& __x, __static_bounded_iter const& __y) noexcept { + if constexpr (three_way_comparable<_Iterator, strong_ordering>) { + return __x.__current() <=> __y.__current(); + } else { + if (__x.__current() < __y.__current()) + return strong_ordering::less; + + if (__x.__current() == __y.__current()) + return strong_ordering::equal; + + return strong_ordering::greater; + } + } +#endif // _LIBCPP_STD_VER >= 20 + +private: + template + friend struct pointer_traits; + template + friend struct __static_bounded_iter; + __static_bounded_iter_storage<_Iterator, _Size> __storage_; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator& __current() _NOEXCEPT { + return __storage_.__current(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __current() const _NOEXCEPT { + return __storage_.__current(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __begin() const _NOEXCEPT { + return __storage_.__begin(); + } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iterator __end() const _NOEXCEPT { return __storage_.__end(); } +}; + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __static_bounded_iter<_It, _Size> +__make_static_bounded_iter(_It __it, _It __begin) { + return __static_bounded_iter<_It, _Size>(std::move(__it), std::move(__begin)); +} + +#if _LIBCPP_STD_VER <= 17 +template +struct __libcpp_is_contiguous_iterator<__static_bounded_iter<_Iterator, _Size> > : true_type {}; +#endif + +template +struct pointer_traits<__static_bounded_iter<_Iterator, _Size> > { + using pointer = __static_bounded_iter<_Iterator, _Size>; + using element_type = typename pointer_traits<_Iterator>::element_type; + using difference_type = typename pointer_traits<_Iterator>::difference_type; + + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static element_type* to_address(pointer __it) _NOEXCEPT { + return std::__to_address(__it.__current()); + } +}; + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP___ITERATOR_STATIC_BOUNDED_ITER_H diff --git a/lib/libcxx/include/__iterator/wrap_iter.h b/lib/libcxx/include/__iterator/wrap_iter.h index 56183c0ee794..966c4675b704 100644 --- a/lib/libcxx/include/__iterator/wrap_iter.h +++ b/lib/libcxx/include/__iterator/wrap_iter.h @@ -13,12 +13,17 @@ #include <__compare/ordering.h> #include <__compare/three_way_comparable.h> #include <__config> +#include <__cstddef/size_t.h> #include <__iterator/iterator_traits.h> #include <__memory/addressof.h> #include <__memory/pointer_traits.h> +#include <__type_traits/conjunction.h> +#include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> #include <__type_traits/is_convertible.h> -#include +#include <__type_traits/is_same.h> +#include <__type_traits/make_const_lvalue_ref.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -44,9 +49,14 @@ class __wrap_iter { public: _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter() _NOEXCEPT : __i_() {} - template ::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter(const __wrap_iter<_Up>& __u) _NOEXCEPT - : __i_(__u.base()) {} + template < + class _OtherIter, + __enable_if_t< _And< is_convertible, + _Or >, + is_same > > > >::value, + int> = 0> + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 __wrap_iter(const __wrap_iter<_OtherIter>& __u) _NOEXCEPT + : __i_(__u.__i_) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference operator*() const _NOEXCEPT { return *__i_; } _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pointer operator->() const _NOEXCEPT { return std::__to_address(__i_); @@ -145,9 +155,6 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator!=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEXCEPT { return !(__x == __y); } -#endif - -// TODO(mordante) disable these overloads in the LLVM 20 release. template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR bool operator>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter1>& __y) _NOEXCEPT { @@ -184,7 +191,7 @@ operator<=(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) _NOEX return !(__y < __x); } -#if _LIBCPP_STD_VER >= 20 +#else template _LIBCPP_HIDE_FROM_ABI constexpr strong_ordering operator<=>(const __wrap_iter<_Iter1>& __x, const __wrap_iter<_Iter2>& __y) noexcept { diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale index 4b382764b446..dfe79d5e506f 100644 --- a/lib/libcxx/include/__locale +++ b/lib/libcxx/include/__locale @@ -12,7 +12,7 @@ #include <__config> #include <__locale_dir/locale_base_api.h> -#include <__memory/shared_ptr.h> // __shared_count +#include <__memory/shared_count.h> #include <__mutex/once_flag.h> #include <__type_traits/make_unsigned.h> #include <__utility/no_destroy.h> @@ -27,7 +27,7 @@ #include #include -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS # include #else # include <__std_mbstate_t.h> @@ -50,7 +50,7 @@ _LIBCPP_HIDE_FROM_ABI const _Facet& use_facet(const locale&); class _LIBCPP_EXPORTED_FROM_ABI locale { public: // locale is essentially a shared_ptr that doesn't support weak_ptrs and never got a move constructor. - using __trivially_relocatable = locale; + using __trivially_relocatable _LIBCPP_NODEBUG = locale; // types: class _LIBCPP_EXPORTED_FROM_ABI facet; @@ -60,8 +60,9 @@ public: static const category // values assigned here are for exposition only none = 0, - collate = LC_COLLATE_MASK, ctype = LC_CTYPE_MASK, monetary = LC_MONETARY_MASK, numeric = LC_NUMERIC_MASK, - time = LC_TIME_MASK, messages = LC_MESSAGES_MASK, all = collate | ctype | monetary | numeric | time | messages; + collate = _LIBCPP_COLLATE_MASK, ctype = _LIBCPP_CTYPE_MASK, monetary = _LIBCPP_MONETARY_MASK, + numeric = _LIBCPP_NUMERIC_MASK, time = _LIBCPP_TIME_MASK, messages = _LIBCPP_MESSAGES_MASK, + all = collate | ctype | monetary | numeric | time | messages; // construct/copy/destroy: locale() _NOEXCEPT; @@ -236,7 +237,7 @@ long collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) cons } extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate; #endif @@ -247,7 +248,7 @@ class _LIBCPP_TEMPLATE_VIS collate_byname; template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { - locale_t __l_; + __locale::__locale_t __l_; public: typedef char char_type; @@ -263,10 +264,10 @@ protected: string_type do_transform(const char_type* __lo, const char_type* __hi) const override; }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI collate_byname : public collate { - locale_t __l_; + __locale::__locale_t __l_; public: typedef wchar_t char_type; @@ -348,7 +349,7 @@ public: # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__NetBSD__) # ifdef __APPLE__ - typedef __uint32_t mask; + typedef uint32_t mask; # elif defined(__FreeBSD__) typedef unsigned long mask; # elif defined(__NetBSD__) @@ -449,7 +450,7 @@ public: template class _LIBCPP_TEMPLATE_VIS ctype; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI ctype : public locale::facet, public ctype_base { public: @@ -514,7 +515,9 @@ protected: virtual const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const; }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS + +inline _LIBCPP_HIDE_FROM_ABI bool __libcpp_isascii(int __c) { return (__c & ~0x7F) == 0; } template <> class _LIBCPP_EXPORTED_FROM_ABI ctype : public locale::facet, public ctype_base { @@ -527,25 +530,25 @@ public: explicit ctype(const mask* __tab = nullptr, bool __del = false, size_t __refs = 0); _LIBCPP_HIDE_FROM_ABI bool is(mask __m, char_type __c) const { - return isascii(__c) ? (__tab_[static_cast(__c)] & __m) != 0 : false; + return std::__libcpp_isascii(__c) ? (__tab_[static_cast(__c)] & __m) != 0 : false; } _LIBCPP_HIDE_FROM_ABI const char_type* is(const char_type* __low, const char_type* __high, mask* __vec) const { for (; __low != __high; ++__low, ++__vec) - *__vec = isascii(*__low) ? __tab_[static_cast(*__low)] : 0; + *__vec = std::__libcpp_isascii(*__low) ? __tab_[static_cast(*__low)] : 0; return __low; } _LIBCPP_HIDE_FROM_ABI const char_type* scan_is(mask __m, const char_type* __low, const char_type* __high) const { for (; __low != __high; ++__low) - if (isascii(*__low) && (__tab_[static_cast(*__low)] & __m)) + if (std::__libcpp_isascii(*__low) && (__tab_[static_cast(*__low)] & __m)) break; return __low; } _LIBCPP_HIDE_FROM_ABI const char_type* scan_not(mask __m, const char_type* __low, const char_type* __high) const { for (; __low != __high; ++__low) - if (!isascii(*__low) || !(__tab_[static_cast(*__low)] & __m)) + if (!std::__libcpp_isascii(*__low) || !(__tab_[static_cast(*__low)] & __m)) break; return __low; } @@ -616,7 +619,7 @@ class _LIBCPP_TEMPLATE_VIS ctype_byname; template <> class _LIBCPP_EXPORTED_FROM_ABI ctype_byname : public ctype { - locale_t __l_; + __locale::__locale_t __l_; public: explicit ctype_byname(const char*, size_t = 0); @@ -630,10 +633,10 @@ protected: const char_type* do_tolower(char_type* __low, const char_type* __high) const override; }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI ctype_byname : public ctype { - locale_t __l_; + __locale::__locale_t __l_; public: explicit ctype_byname(const char*, size_t = 0); @@ -655,7 +658,7 @@ protected: const char_type* do_narrow(const char_type* __low, const char_type* __high, char __dfault, char* __dest) const override; }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS template inline _LIBCPP_HIDE_FROM_ABI bool isspace(_CharT __c, const locale& __loc) { @@ -821,10 +824,10 @@ protected: // template <> class codecvt -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI codecvt : public locale::facet, public codecvt_base { - locale_t __l_; + __locale::__locale_t __l_; public: typedef wchar_t intern_type; @@ -900,7 +903,7 @@ protected: virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const; virtual int do_max_length() const _NOEXCEPT; }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS // template <> class codecvt // deprecated in C++20 @@ -982,7 +985,7 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T // template <> class codecvt // C++20 @@ -1145,7 +1148,7 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T // template <> class codecvt // C++20 @@ -1248,14 +1251,14 @@ codecvt_byname<_InternT, _ExternT, _StateT>::~codecvt_byname() {} _LIBCPP_SUPPRESS_DEPRECATED_POP extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; #endif extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 extern template class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // deprecated in C++20 -#ifndef _LIBCPP_HAS_NO_CHAR8_T +#if _LIBCPP_HAS_CHAR8_T extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 extern template class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname; // C++20 #endif @@ -1438,7 +1441,7 @@ protected: string __grouping_; }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct : public locale::facet { public: @@ -1467,7 +1470,7 @@ protected: char_type __thousands_sep_; string __grouping_; }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS // template class numpunct_byname @@ -1490,7 +1493,7 @@ private: void __init(const char*); }; -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS template <> class _LIBCPP_EXPORTED_FROM_ABI numpunct_byname : public numpunct { public: @@ -1506,7 +1509,7 @@ protected: private: void __init(const char*); }; -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__locale_dir/locale_base_api.h b/lib/libcxx/include/__locale_dir/locale_base_api.h index 8c000c558c52..c1e73caeecce 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api.h @@ -9,90 +9,315 @@ #ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H #define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H -#if defined(_LIBCPP_MSVCRT_LIKE) -# include <__locale_dir/locale_base_api/win32.h> -#elif defined(_AIX) || defined(__MVS__) -# include <__locale_dir/locale_base_api/ibm.h> -#elif defined(__ANDROID__) -# include <__locale_dir/locale_base_api/android.h> -#elif defined(__sun__) -# include <__locale_dir/locale_base_api/solaris.h> -#elif defined(_NEWLIB_VERSION) -# include <__locale_dir/locale_base_api/newlib.h> -#elif defined(__OpenBSD__) -# include <__locale_dir/locale_base_api/openbsd.h> -#elif defined(__Fuchsia__) -# include <__locale_dir/locale_base_api/fuchsia.h> -#elif defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) -# include <__locale_dir/locale_base_api/musl.h> -#elif defined(__APPLE__) || defined(__FreeBSD__) -# include -#endif +#include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif -/* -The platform-specific headers have to provide the following interface: - -// TODO: rename this to __libcpp_locale_t -using locale_t = implementation-defined; - -implementation-defined __libcpp_mb_cur_max_l(locale_t); -wint_t __libcpp_btowc_l(int, locale_t); -int __libcpp_wctob_l(wint_t, locale_t); -size_t __libcpp_wcsnrtombs_l(char* dest, const wchar_t** src, size_t wide_char_count, size_t len, mbstate_t, locale_t); -size_t __libcpp_wcrtomb_l(char* str, wchar_t wide_char, mbstate_t*, locale_t); -size_t __libcpp_mbsnrtowcs_l(wchar_t* dest, const char** src, size_t max_out, size_t len, mbstate_t*, locale_t); -size_t __libcpp_mbrtowc_l(wchar_t* dest, cosnt char* src, size_t count, mbstate_t*, locale_t); -int __libcpp_mbtowc_l(wchar_t* dest, const char* src, size_t count, locale_t); -size_t __libcpp_mbrlen_l(const char* str, size_t count, mbstate_t*, locale_t); -lconv* __libcpp_localeconv_l(locale_t); -size_t __libcpp_mbsrtowcs_l(wchar_t* dest, const char** src, size_t len, mbstate_t*, locale_t); -int __libcpp_snprintf_l(char* dest, size_t buff_size, locale_t, const char* format, ...); -int __libcpp_asprintf_l(char** dest, locale_t, const char* format, ...); -int __libcpp_sscanf_l(const char* dest, locale_t, const char* format, ...); - -// TODO: change these to reserved names -float strtof_l(const char* str, char** str_end, locale_t); -double strtod_l(const char* str, char** str_end, locale_t); -long double strtold_l(const char* str, char** str_end, locale_t); -long long strtoll_l(const char* str, char** str_end, locale_t); -unsigned long long strtoull_l(const char* str, char** str_end, locale_t); - -locale_t newlocale(int category_mask, const char* locale, locale_t base); -void freelocale(locale_t); - -int islower_l(int ch, locale_t); -int isupper_l(int ch, locale_t); -int isdigit_l(int ch, locale_t); -int isxdigit_l(int ch, locale_t); -int strcoll_l(const char* lhs, const char* rhs, locale_t); -size_t strxfrm_l(char* dst, const char* src, size_t n, locale_t); -int wcscoll_l(const char* lhs, const char* rhs, locale_t); -size_t wcsxfrm_l(wchar_t* dst, const wchar_t* src, size_t n, locale_t); -int toupper_l(int ch, locale_t); -int tolower_l(int ch, locale_t); -int iswspace_l(wint_t ch, locale_t); -int iswprint_l(wint_t ch, locale_t); -int iswcntrl_l(wint_t ch, locale_t); -int iswupper_l(wint_t ch, locale_t); -int iswlower_l(wint_t ch, locale_t); -int iswalpha_l(wint_t ch, locale_t); -int iswblank_l(wint_t ch, locale_t); -int iswdigit_l(wint_t ch, locale_t); -int iswpunct_l(wint_t ch, locale_t); -int iswxdigit_l(wint_t ch, locale_t); -wint_t towupper_l(wint_t ch, locale_t); -wint_t towlower_l(wint_t ch, locale_t); -size_t strftime_l(char* str, size_t len, const char* format, const tm*, locale_t); - - -These functions are equivalent to their C counterparts, -except that locale_t is used instead of the current global locale. - -The variadic functions may be implemented as templates with a parameter pack instead of variadic functions. -*/ +// The platform-specific headers have to provide the following interface. +// +// These functions are equivalent to their C counterparts, except that __locale::__locale_t +// is used instead of the current global locale. +// +// Variadic functions may be implemented as templates with a parameter pack instead +// of C-style variadic functions. +// +// Most of these functions are only required when building the library. Functions that are also +// required when merely using the headers are marked as such below. +// +// TODO: __localeconv shouldn't take a reference, but the Windows implementation doesn't allow copying __locale_t +// TODO: Eliminate the need for any of these functions from the headers. +// +// Locale management +// ----------------- +// namespace __locale { +// using __locale_t = implementation-defined; // required by the headers +// using __lconv_t = implementation-defined; +// __locale_t __newlocale(int, const char*, __locale_t); +// void __freelocale(__locale_t); +// char* __setlocale(int, const char*); +// __lconv_t* __localeconv(__locale_t&); +// } +// +// // required by the headers +// #define _LIBCPP_COLLATE_MASK /* implementation-defined */ +// #define _LIBCPP_CTYPE_MASK /* implementation-defined */ +// #define _LIBCPP_MONETARY_MASK /* implementation-defined */ +// #define _LIBCPP_NUMERIC_MASK /* implementation-defined */ +// #define _LIBCPP_TIME_MASK /* implementation-defined */ +// #define _LIBCPP_MESSAGES_MASK /* implementation-defined */ +// #define _LIBCPP_ALL_MASK /* implementation-defined */ +// #define _LIBCPP_LC_ALL /* implementation-defined */ +// +// Strtonum functions +// ------------------ +// namespace __locale { +// // required by the headers +// float __strtof(const char*, char**, __locale_t); +// double __strtod(const char*, char**, __locale_t); +// long double __strtold(const char*, char**, __locale_t); +// long long __strtoll(const char*, char**, __locale_t); +// unsigned long long __strtoull(const char*, char**, __locale_t); +// } +// +// Character manipulation functions +// -------------------------------- +// namespace __locale { +// int __islower(int, __locale_t); +// int __isupper(int, __locale_t); +// int __isdigit(int, __locale_t); // required by the headers +// int __isxdigit(int, __locale_t); // required by the headers +// int __toupper(int, __locale_t); +// int __tolower(int, __locale_t); +// int __strcoll(const char*, const char*, __locale_t); +// size_t __strxfrm(char*, const char*, size_t, __locale_t); +// +// int __iswctype(wint_t, wctype_t, __locale_t); +// int __iswspace(wint_t, __locale_t); +// int __iswprint(wint_t, __locale_t); +// int __iswcntrl(wint_t, __locale_t); +// int __iswupper(wint_t, __locale_t); +// int __iswlower(wint_t, __locale_t); +// int __iswalpha(wint_t, __locale_t); +// int __iswblank(wint_t, __locale_t); +// int __iswdigit(wint_t, __locale_t); +// int __iswpunct(wint_t, __locale_t); +// int __iswxdigit(wint_t, __locale_t); +// wint_t __towupper(wint_t, __locale_t); +// wint_t __towlower(wint_t, __locale_t); +// int __wcscoll(const wchar_t*, const wchar_t*, __locale_t); +// size_t __wcsxfrm(wchar_t*, const wchar_t*, size_t, __locale_t); +// +// size_t __strftime(char*, size_t, const char*, const tm*, __locale_t); +// } +// +// Other functions +// --------------- +// namespace __locale { +// implementation-defined __mb_len_max(__locale_t); +// wint_t __btowc(int, __locale_t); +// int __wctob(wint_t, __locale_t); +// size_t __wcsnrtombs(char*, const wchar_t**, size_t, size_t, mbstate_t*, __locale_t); +// size_t __wcrtomb(char*, wchar_t, mbstate_t*, __locale_t); +// size_t __mbsnrtowcs(wchar_t*, const char**, size_t, size_t, mbstate_t*, __locale_t); +// size_t __mbrtowc(wchar_t*, const char*, size_t, mbstate_t*, __locale_t); +// int __mbtowc(wchar_t*, const char*, size_t, __locale_t); +// size_t __mbrlen(const char*, size_t, mbstate_t*, __locale_t); +// size_t __mbsrtowcs(wchar_t*, const char**, size_t, mbstate_t*, __locale_t); +// +// int __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers +// int __asprintf(char**, __locale_t, const char*, ...); // required by the headers +// int __sscanf(const char*, __locale_t, const char*, ...); // required by the headers +// } + +#if defined(__APPLE__) +# include <__locale_dir/support/apple.h> +#elif defined(__FreeBSD__) +# include <__locale_dir/support/freebsd.h> +#elif defined(_LIBCPP_MSVCRT_LIKE) +# include <__locale_dir/support/windows.h> +#elif defined(__Fuchsia__) +# include <__locale_dir/support/fuchsia.h> +#else + +// TODO: This is a temporary definition to bridge between the old way we defined the locale base API +// (by providing global non-reserved names) and the new API. As we move individual platforms +// towards the new way of defining the locale base API, this should disappear since each platform +// will define those directly. +# if defined(_AIX) || defined(__MVS__) +# include <__locale_dir/locale_base_api/ibm.h> +# elif defined(__ANDROID__) +# include <__locale_dir/locale_base_api/android.h> +# elif defined(__OpenBSD__) +# include <__locale_dir/locale_base_api/openbsd.h> +# elif defined(__wasi__) || _LIBCPP_HAS_MUSL_LIBC +# include <__locale_dir/locale_base_api/musl.h> +# endif + +# include <__locale_dir/locale_base_api/bsd_locale_fallbacks.h> + +# include <__cstddef/size_t.h> +# include <__utility/forward.h> +# include +# include +# include +# if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# endif +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { +// +// Locale management +// +# define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +# define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +# define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +# define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +# define _LIBCPP_TIME_MASK LC_TIME_MASK +# define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +# define _LIBCPP_ALL_MASK LC_ALL_MASK +# define _LIBCPP_LC_ALL LC_ALL + +using __locale_t _LIBCPP_NODEBUG = locale_t; + +# if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t _LIBCPP_NODEBUG = lconv; + +inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { + return newlocale(__category_mask, __name, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) { + return ::setlocale(__category, __locale); +} + +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { freelocale(__loc); } + +inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return __libcpp_localeconv_l(__loc); } +# endif // _LIBCPP_BUILDING_LIBRARY + +// +// Strtonum functions +// +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { + return strtof_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { + return strtod_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { + return strtold_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return strtoll_l(__nptr, __endptr, __base, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI unsigned long long +__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return strtoull_l(__nptr, __endptr, __base, __loc); +} + +// +// Character manipulation functions +// +# if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __islower(int __ch, __locale_t __loc) { return islower_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __ch, __locale_t __loc) { return isupper_l(__ch, __loc); } +# endif + +inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __ch, __locale_t __loc) { return isdigit_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __ch, __locale_t __loc) { return isxdigit_l(__ch, __loc); } + +# if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { + return strcoll_l(__s1, __s2, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) { + return strxfrm_l(__dest, __src, __n, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __ch, __locale_t __loc) { return toupper_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __ch, __locale_t __loc) { return tolower_l(__ch, __loc); } + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __s1, const wchar_t* __s2, __locale_t __loc) { + return wcscoll_l(__s1, __s2, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { + return wcsxfrm_l(__dest, __src, __n, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __ch, wctype_t __type, __locale_t __loc) { + return iswctype_l(__ch, __type, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __ch, __locale_t __loc) { return iswspace_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __ch, __locale_t __loc) { return iswprint_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __ch, __locale_t __loc) { return iswcntrl_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __ch, __locale_t __loc) { return iswupper_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __ch, __locale_t __loc) { return iswlower_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __ch, __locale_t __loc) { return iswalpha_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __ch, __locale_t __loc) { return iswblank_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __ch, __locale_t __loc) { return iswdigit_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __ch, __locale_t __loc) { return iswpunct_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __ch, __locale_t __loc) { return iswxdigit_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __ch, __locale_t __loc) { return towupper_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __ch, __locale_t __loc) { return towlower_l(__ch, __loc); } +# endif + +inline _LIBCPP_HIDE_FROM_ABI size_t +__strftime(char* __s, size_t __max, const char* __format, const tm* __tm, __locale_t __loc) { + return strftime_l(__s, __max, __format, __tm, __loc); +} + +// +// Other functions +// +inline _LIBCPP_HIDE_FROM_ABI decltype(__libcpp_mb_cur_max_l(__locale_t())) __mb_len_max(__locale_t __loc) { + return __libcpp_mb_cur_max_l(__loc); +} +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { return __libcpp_btowc_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __ch, __locale_t __loc) { return __libcpp_wctob_l(__ch, __loc); } +inline _LIBCPP_HIDE_FROM_ABI size_t +__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_wcsnrtombs_l(__dest, __src, __nwc, __len, __ps, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __ch, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_wcrtomb_l(__s, __ch, __ps, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_mbsnrtowcs_l(__dest, __src, __nms, __len, __ps, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_mbrtowc_l(__pwc, __s, __n, __ps, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + return __libcpp_mbtowc_l(__pwc, __pmb, __max, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_mbrlen_l(__s, __n, __ps, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return __libcpp_mbsrtowcs_l(__dest, __src, __len, __ps, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +# endif // _LIBCPP_BUILDING_LIBRARY + +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates +# ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) +# else +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ +# endif + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __s, size_t __n, __locale_t __loc, const char* __format, _Args&&... __args) { + return std::__libcpp_snprintf_l(__s, __n, __loc, __format, std::forward<_Args>(__args)...); +} +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( + char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { + return std::__libcpp_asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); +} +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( + const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { + return std::__libcpp_sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); +} +_LIBCPP_DIAGNOSTIC_POP +# undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // Compatibility definition of locale base APIs #endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/android.h b/lib/libcxx/include/__locale_dir/locale_base_api/android.h index 9965d8bbf6a2..36b8d93e1b22 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/android.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/android.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H #include @@ -18,9 +18,6 @@ extern "C" { } #include -#if __ANDROID_API__ < 21 -# include <__support/xlocale/__posix_l_fallback.h> -#endif // If we do not have this header, we are in a platform build rather than an NDK // build, which will always be at least as new as the ToT NDK, in which case we @@ -30,9 +27,7 @@ extern "C" { // In NDK versions later than 16, locale-aware functions are provided by // legacy_stdlib_inlines.h # if __NDK_MAJOR__ <= 16 -# if __ANDROID_API__ < 21 -# include <__support/xlocale/__strtonum_fallback.h> -# elif __ANDROID_API__ < 26 +# if __ANDROID_API__ < 26 inline _LIBCPP_HIDE_FROM_ABI float strtof_l(const char* __nptr, char** __endptr, locale_t) { return ::strtof(__nptr, __endptr); @@ -47,4 +42,4 @@ inline _LIBCPP_HIDE_FROM_ABI double strtod_l(const char* __nptr, char** __endptr # endif // __NDK_MAJOR__ <= 16 #endif // __has_include() -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_ANDROID_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_ANDROID_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h deleted file mode 100644 index 1f9607209842..000000000000 --- a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_defaults.h +++ /dev/null @@ -1,36 +0,0 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// The BSDs have lots of *_l functions. We don't want to define those symbols -// on other platforms though, for fear of conflicts with user code. So here, -// we will define the mapping from an internal macro to the real BSD symbol. -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -#define __libcpp_mb_cur_max_l(loc) MB_CUR_MAX_L(loc) -#define __libcpp_btowc_l(ch, loc) btowc_l(ch, loc) -#define __libcpp_wctob_l(wch, loc) wctob_l(wch, loc) -#define __libcpp_wcsnrtombs_l(dst, src, nwc, len, ps, loc) wcsnrtombs_l(dst, src, nwc, len, ps, loc) -#define __libcpp_wcrtomb_l(src, wc, ps, loc) wcrtomb_l(src, wc, ps, loc) -#define __libcpp_mbsnrtowcs_l(dst, src, nms, len, ps, loc) mbsnrtowcs_l(dst, src, nms, len, ps, loc) -#define __libcpp_mbrtowc_l(pwc, s, n, ps, l) mbrtowc_l(pwc, s, n, ps, l) -#define __libcpp_mbtowc_l(pwc, pmb, max, l) mbtowc_l(pwc, pmb, max, l) -#define __libcpp_mbrlen_l(s, n, ps, l) mbrlen_l(s, n, ps, l) -#define __libcpp_localeconv_l(l) localeconv_l(l) -#define __libcpp_mbsrtowcs_l(dest, src, len, ps, l) mbsrtowcs_l(dest, src, len, ps, l) -#define __libcpp_snprintf_l(...) snprintf_l(__VA_ARGS__) -#define __libcpp_asprintf_l(...) asprintf_l(__VA_ARGS__) -#define __libcpp_sscanf_l(...) sscanf_l(__VA_ARGS__) - -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_DEFAULTS_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h index 76b94287cd6c..b62a1b737e97 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/bsd_locale_fallbacks.h @@ -10,15 +10,15 @@ // of those functions for non-BSD platforms. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H -#include <__locale_dir/locale_base_api/locale_guard.h> -#include +#include #include +#include #include -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS # include #endif @@ -28,65 +28,79 @@ _LIBCPP_BEGIN_NAMESPACE_STD +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(::uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + ::uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; + inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __libcpp_mb_cur_max_l(locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return MB_CUR_MAX; } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI wint_t __libcpp_btowc_l(int __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return btowc(__c); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_wctob_l(wint_t __c, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wctob(__c); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcsnrtombs_l(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcsnrtombs(__dest, __src, __nwc, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_wcrtomb_l(char* __s, wchar_t __wc, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return wcrtomb(__s, __wc, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsnrtowcs_l(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsnrtowcs(__dest, __src, __nms, __len, __ps); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrtowc_l(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrtowc(__pwc, __s, __n, __ps); } inline _LIBCPP_HIDE_FROM_ABI int __libcpp_mbtowc_l(wchar_t* __pwc, const char* __pmb, size_t __max, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbtowc(__pwc, __pmb, __max); } inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbrlen_l(const char* __s, size_t __n, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbrlen(__s, __n, __ps); } -#endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS +#endif // _LIBCPP_HAS_WIDE_CHARACTERS -inline _LIBCPP_HIDE_FROM_ABI lconv* __libcpp_localeconv_l(locale_t __l) { - __libcpp_locale_guard __current(__l); +inline _LIBCPP_HIDE_FROM_ABI lconv* __libcpp_localeconv_l(locale_t& __l) { + __locale_guard __current(__l); return localeconv(); } -#ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS +#if _LIBCPP_HAS_WIDE_CHARACTERS inline _LIBCPP_HIDE_FROM_ABI size_t __libcpp_mbsrtowcs_l(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, locale_t __l) { - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); return mbsrtowcs(__dest, __src, __len, __ps); } #endif @@ -95,7 +109,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __libcpp_snprintf_l( char* __s, size_t __n, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsnprintf(__s, __n, __format, __va); va_end(__va); return __res; @@ -105,7 +119,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __libcpp_asprintf_l( char** __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vasprintf(__s, __format, __va); va_end(__va); return __res; @@ -115,7 +129,7 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( const char* __s, locale_t __l, const char* __format, ...) { va_list __va; va_start(__va, __format); - __libcpp_locale_guard __current(__l); + __locale_guard __current(__l); int __res = vsscanf(__s, __format, __va); va_end(__va); return __res; @@ -123,4 +137,4 @@ inline _LIBCPP_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __libcpp_sscanf_l( _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_BSD_LOCALE_FALLBACKS_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/ibm.h b/lib/libcxx/include/__locale_dir/locale_base_api/ibm.h index 01af20194428..1d1d15df9f79 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/ibm.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/ibm.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H #if defined(__MVS__) # include <__support/ibm/locale_mgmt_zos.h> @@ -82,7 +82,7 @@ strtoull_l(const char* __nptr, char** __endptr, int __base, locale_t locale) { inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char* fmt, va_list ap) { const size_t buff_size = 256; - if ((*strp = (char*)malloc(buff_size)) == NULL) { + if ((*strp = (char*)malloc(buff_size)) == nullptr) { return -1; } @@ -97,7 +97,7 @@ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char va_end(ap_copy); if ((size_t)str_size >= buff_size) { - if ((*strp = (char*)realloc(*strp, str_size + 1)) == NULL) { + if ((*strp = (char*)realloc(*strp, str_size + 1)) == nullptr) { return -1; } str_size = vsnprintf(*strp, str_size + 1, fmt, ap); @@ -105,4 +105,4 @@ _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 2, 0) int vasprintf(char** strp, const char return str_size; } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_IBM_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_IBM_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/locale_guard.h b/lib/libcxx/include/__locale_dir/locale_base_api/locale_guard.h deleted file mode 100644 index 2baacb51cd06..000000000000 --- a/lib/libcxx/include/__locale_dir/locale_base_api/locale_guard.h +++ /dev/null @@ -1,78 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H - -#include <__config> -#include <__locale> // for locale_t -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -#if !defined(_LIBCPP_LOCALE__L_EXTENSIONS) -struct __libcpp_locale_guard { - _LIBCPP_HIDE_FROM_ABI __libcpp_locale_guard(locale_t& __loc) : __old_loc_(uselocale(__loc)) {} - - _LIBCPP_HIDE_FROM_ABI ~__libcpp_locale_guard() { - if (__old_loc_) - uselocale(__old_loc_); - } - - locale_t __old_loc_; - - __libcpp_locale_guard(__libcpp_locale_guard const&) = delete; - __libcpp_locale_guard& operator=(__libcpp_locale_guard const&) = delete; -}; -#elif defined(_LIBCPP_MSVCRT_LIKE) -struct __libcpp_locale_guard { - __libcpp_locale_guard(locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { - // Setting the locale can be expensive even when the locale given is - // already the current locale, so do an explicit check to see if the - // current locale is already the one we want. - const char* __lc = __setlocale(nullptr); - // If every category is the same, the locale string will simply be the - // locale name, otherwise it will be a semicolon-separated string listing - // each category. In the second case, we know at least one category won't - // be what we want, so we only have to check the first case. - if (std::strcmp(__l.__get_locale(), __lc) != 0) { - __locale_all = _strdup(__lc); - if (__locale_all == nullptr) - __throw_bad_alloc(); - __setlocale(__l.__get_locale()); - } - } - ~__libcpp_locale_guard() { - // The CRT documentation doesn't explicitly say, but setlocale() does the - // right thing when given a semicolon-separated list of locale settings - // for the different categories in the same format as returned by - // setlocale(LC_ALL, nullptr). - if (__locale_all != nullptr) { - __setlocale(__locale_all); - free(__locale_all); - } - _configthreadlocale(__status); - } - static const char* __setlocale(const char* __locale) { - const char* __new_locale = setlocale(LC_ALL, __locale); - if (__new_locale == nullptr) - __throw_bad_alloc(); - return __new_locale; - } - int __status; - char* __locale_all = nullptr; -}; -#endif - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_LOCALE_GUARD_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/musl.h b/lib/libcxx/include/__locale_dir/locale_base_api/musl.h index bf7b849d5863..1653214cdba1 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/musl.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/musl.h @@ -14,8 +14,8 @@ // in Musl. //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H #include #include @@ -28,4 +28,4 @@ inline _LIBCPP_HIDE_FROM_ABI unsigned long long strtoull_l(const char* __nptr, c return ::strtoull(__nptr, __endptr, __base); } -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_MUSL_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_MUSL_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/newlib.h b/lib/libcxx/include/__locale_dir/locale_base_api/newlib.h deleted file mode 100644 index a8c1cff16e6d..000000000000 --- a/lib/libcxx/include/__locale_dir/locale_base_api/newlib.h +++ /dev/null @@ -1,12 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H - -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_NEWLIB_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/openbsd.h b/lib/libcxx/include/__locale_dir/locale_base_api/openbsd.h index 0c05d6a0f788..d4fb224e0c80 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/openbsd.h +++ b/lib/libcxx/include/__locale_dir/locale_base_api/openbsd.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#ifndef _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H +#define _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H #include <__support/xlocale/__strtonum_fallback.h> #include @@ -16,4 +16,4 @@ #include #include -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_OPENBSD_H +#endif // _LIBCPP___LOCALE_DIR_LOCALE_BASE_API_OPENBSD_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/win32.h b/lib/libcxx/include/__locale_dir/locale_base_api/win32.h deleted file mode 100644 index f66baffb6920..000000000000 --- a/lib/libcxx/include/__locale_dir/locale_base_api/win32.h +++ /dev/null @@ -1,235 +0,0 @@ -// -*- C++ -*- -//===-----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H - -#include <__config> -#include -#include // _locale_t -#include -#include - -#define _X_ALL LC_ALL -#define _X_COLLATE LC_COLLATE -#define _X_CTYPE LC_CTYPE -#define _X_MONETARY LC_MONETARY -#define _X_NUMERIC LC_NUMERIC -#define _X_TIME LC_TIME -#define _X_MAX LC_MAX -#define _X_MESSAGES 6 -#define _NCAT (_X_MESSAGES + 1) - -#define _CATMASK(n) ((1 << (n)) >> 1) -#define _M_COLLATE _CATMASK(_X_COLLATE) -#define _M_CTYPE _CATMASK(_X_CTYPE) -#define _M_MONETARY _CATMASK(_X_MONETARY) -#define _M_NUMERIC _CATMASK(_X_NUMERIC) -#define _M_TIME _CATMASK(_X_TIME) -#define _M_MESSAGES _CATMASK(_X_MESSAGES) -#define _M_ALL (_CATMASK(_NCAT) - 1) - -#define LC_COLLATE_MASK _M_COLLATE -#define LC_CTYPE_MASK _M_CTYPE -#define LC_MONETARY_MASK _M_MONETARY -#define LC_NUMERIC_MASK _M_NUMERIC -#define LC_TIME_MASK _M_TIME -#define LC_MESSAGES_MASK _M_MESSAGES -#define LC_ALL_MASK \ - (LC_COLLATE_MASK | LC_CTYPE_MASK | LC_MESSAGES_MASK | LC_MONETARY_MASK | LC_NUMERIC_MASK | LC_TIME_MASK) - -class __lconv_storage { -public: - __lconv_storage(const lconv* __lc_input) { - __lc_ = *__lc_input; - - __decimal_point_ = __lc_input->decimal_point; - __thousands_sep_ = __lc_input->thousands_sep; - __grouping_ = __lc_input->grouping; - __int_curr_symbol_ = __lc_input->int_curr_symbol; - __currency_symbol_ = __lc_input->currency_symbol; - __mon_decimal_point_ = __lc_input->mon_decimal_point; - __mon_thousands_sep_ = __lc_input->mon_thousands_sep; - __mon_grouping_ = __lc_input->mon_grouping; - __positive_sign_ = __lc_input->positive_sign; - __negative_sign_ = __lc_input->negative_sign; - - __lc_.decimal_point = const_cast(__decimal_point_.c_str()); - __lc_.thousands_sep = const_cast(__thousands_sep_.c_str()); - __lc_.grouping = const_cast(__grouping_.c_str()); - __lc_.int_curr_symbol = const_cast(__int_curr_symbol_.c_str()); - __lc_.currency_symbol = const_cast(__currency_symbol_.c_str()); - __lc_.mon_decimal_point = const_cast(__mon_decimal_point_.c_str()); - __lc_.mon_thousands_sep = const_cast(__mon_thousands_sep_.c_str()); - __lc_.mon_grouping = const_cast(__mon_grouping_.c_str()); - __lc_.positive_sign = const_cast(__positive_sign_.c_str()); - __lc_.negative_sign = const_cast(__negative_sign_.c_str()); - } - - lconv* __get() { return &__lc_; } - -private: - lconv __lc_; - std::string __decimal_point_; - std::string __thousands_sep_; - std::string __grouping_; - std::string __int_curr_symbol_; - std::string __currency_symbol_; - std::string __mon_decimal_point_; - std::string __mon_thousands_sep_; - std::string __mon_grouping_; - std::string __positive_sign_; - std::string __negative_sign_; -}; - -class locale_t { -public: - locale_t() : __locale_(nullptr), __locale_str_(nullptr), __lc_(nullptr) {} - locale_t(std::nullptr_t) : __locale_(nullptr), __locale_str_(nullptr), __lc_(nullptr) {} - locale_t(_locale_t __xlocale, const char* __xlocale_str) - : __locale_(__xlocale), __locale_str_(__xlocale_str), __lc_(nullptr) {} - locale_t(const locale_t& __l) : __locale_(__l.__locale_), __locale_str_(__l.__locale_str_), __lc_(nullptr) {} - - ~locale_t() { delete __lc_; } - - locale_t& operator=(const locale_t& __l) { - __locale_ = __l.__locale_; - __locale_str_ = __l.__locale_str_; - // __lc_ not copied - return *this; - } - - friend bool operator==(const locale_t& __left, const locale_t& __right) { - return __left.__locale_ == __right.__locale_; - } - - friend bool operator==(const locale_t& __left, int __right) { return __left.__locale_ == nullptr && __right == 0; } - - friend bool operator==(const locale_t& __left, long long __right) { - return __left.__locale_ == nullptr && __right == 0; - } - - friend bool operator==(const locale_t& __left, std::nullptr_t) { return __left.__locale_ == nullptr; } - - friend bool operator==(int __left, const locale_t& __right) { return __left == 0 && nullptr == __right.__locale_; } - - friend bool operator==(std::nullptr_t, const locale_t& __right) { return nullptr == __right.__locale_; } - - friend bool operator!=(const locale_t& __left, const locale_t& __right) { return !(__left == __right); } - - friend bool operator!=(const locale_t& __left, int __right) { return !(__left == __right); } - - friend bool operator!=(const locale_t& __left, long long __right) { return !(__left == __right); } - - friend bool operator!=(const locale_t& __left, std::nullptr_t __right) { return !(__left == __right); } - - friend bool operator!=(int __left, const locale_t& __right) { return !(__left == __right); } - - friend bool operator!=(std::nullptr_t __left, const locale_t& __right) { return !(__left == __right); } - - operator bool() const { return __locale_ != nullptr; } - - const char* __get_locale() const { return __locale_str_; } - - operator _locale_t() const { return __locale_; } - - lconv* __store_lconv(const lconv* __input_lc) { - delete __lc_; - __lc_ = new __lconv_storage(__input_lc); - return __lc_->__get(); - } - -private: - _locale_t __locale_; - const char* __locale_str_; - __lconv_storage* __lc_ = nullptr; -}; - -// Locale management functions -#define freelocale _free_locale -// FIXME: base currently unused. Needs manual work to construct the new locale -locale_t newlocale(int __mask, const char* __locale, locale_t __base); -// uselocale can't be implemented on Windows because Windows allows partial modification -// of thread-local locale and so _get_current_locale() returns a copy while uselocale does -// not create any copies. -// We can still implement raii even without uselocale though. - -lconv* localeconv_l(locale_t& __loc); -size_t mbrlen_l(const char* __restrict __s, size_t __n, mbstate_t* __restrict __ps, locale_t __loc); -size_t mbsrtowcs_l( - wchar_t* __restrict __dst, const char** __restrict __src, size_t __len, mbstate_t* __restrict __ps, locale_t __loc); -size_t wcrtomb_l(char* __restrict __s, wchar_t __wc, mbstate_t* __restrict __ps, locale_t __loc); -size_t mbrtowc_l( - wchar_t* __restrict __pwc, const char* __restrict __s, size_t __n, mbstate_t* __restrict __ps, locale_t __loc); -size_t mbsnrtowcs_l(wchar_t* __restrict __dst, - const char** __restrict __src, - size_t __nms, - size_t __len, - mbstate_t* __restrict __ps, - locale_t __loc); -size_t wcsnrtombs_l(char* __restrict __dst, - const wchar_t** __restrict __src, - size_t __nwc, - size_t __len, - mbstate_t* __restrict __ps, - locale_t __loc); -wint_t btowc_l(int __c, locale_t __loc); -int wctob_l(wint_t __c, locale_t __loc); - -decltype(MB_CUR_MAX) MB_CUR_MAX_L(locale_t __l); - -// the *_l functions are prefixed on Windows, only available for msvcr80+, VS2005+ -#define mbtowc_l _mbtowc_l -#define strtoll_l _strtoi64_l -#define strtoull_l _strtoui64_l -#define strtod_l _strtod_l -#if defined(_LIBCPP_MSVCRT) -# define strtof_l _strtof_l -# define strtold_l _strtold_l -#else -_LIBCPP_EXPORTED_FROM_ABI float strtof_l(const char*, char**, locale_t); -_LIBCPP_EXPORTED_FROM_ABI long double strtold_l(const char*, char**, locale_t); -#endif -inline _LIBCPP_HIDE_FROM_ABI int islower_l(int __c, _locale_t __loc) { return _islower_l((int)__c, __loc); } - -inline _LIBCPP_HIDE_FROM_ABI int isupper_l(int __c, _locale_t __loc) { return _isupper_l((int)__c, __loc); } - -#define isdigit_l _isdigit_l -#define isxdigit_l _isxdigit_l -#define strcoll_l _strcoll_l -#define strxfrm_l _strxfrm_l -#define wcscoll_l _wcscoll_l -#define wcsxfrm_l _wcsxfrm_l -#define toupper_l _toupper_l -#define tolower_l _tolower_l -#define iswspace_l _iswspace_l -#define iswprint_l _iswprint_l -#define iswcntrl_l _iswcntrl_l -#define iswupper_l _iswupper_l -#define iswlower_l _iswlower_l -#define iswalpha_l _iswalpha_l -#define iswdigit_l _iswdigit_l -#define iswpunct_l _iswpunct_l -#define iswxdigit_l _iswxdigit_l -#define towupper_l _towupper_l -#define towlower_l _towlower_l -#if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 -_LIBCPP_EXPORTED_FROM_ABI size_t strftime_l(char* ret, size_t n, const char* format, const struct tm* tm, locale_t loc); -#else -# define strftime_l _strftime_l -#endif -#define sscanf_l(__s, __l, __f, ...) _sscanf_l(__s, __f, __l, __VA_ARGS__) -_LIBCPP_EXPORTED_FROM_ABI int snprintf_l(char* __ret, size_t __n, locale_t __loc, const char* __format, ...); -_LIBCPP_EXPORTED_FROM_ABI int asprintf_l(char** __ret, locale_t __loc, const char* __format, ...); -_LIBCPP_EXPORTED_FROM_ABI int vasprintf_l(char** __ret, locale_t __loc, const char* __format, va_list __ap); - -// not-so-pressing FIXME: use locale to determine blank characters -inline int iswblank_l(wint_t __c, locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); } - -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_WIN32_H diff --git a/lib/libcxx/include/__locale_dir/pad_and_output.h b/lib/libcxx/include/__locale_dir/pad_and_output.h new file mode 100644 index 000000000000..a1cb37d0786d --- /dev/null +++ b/lib/libcxx/include/__locale_dir/pad_and_output.h @@ -0,0 +1,88 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_PAD_AND_OUTPUT_H +#define _LIBCPP___LOCALE_DIR_PAD_AND_OUTPUT_H + +#include <__config> + +#if _LIBCPP_HAS_LOCALIZATION + +# include + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +template +_LIBCPP_HIDE_FROM_ABI _OutputIterator __pad_and_output( + _OutputIterator __s, const _CharT* __ob, const _CharT* __op, const _CharT* __oe, ios_base& __iob, _CharT __fl) { + streamsize __sz = __oe - __ob; + streamsize __ns = __iob.width(); + if (__ns > __sz) + __ns -= __sz; + else + __ns = 0; + for (; __ob < __op; ++__ob, ++__s) + *__s = *__ob; + for (; __ns; --__ns, ++__s) + *__s = __fl; + for (; __ob < __oe; ++__ob, ++__s) + *__s = *__ob; + __iob.width(0); + return __s; +} + +template +_LIBCPP_HIDE_FROM_ABI ostreambuf_iterator<_CharT, _Traits> __pad_and_output( + ostreambuf_iterator<_CharT, _Traits> __s, + const _CharT* __ob, + const _CharT* __op, + const _CharT* __oe, + ios_base& __iob, + _CharT __fl) { + if (__s.__sbuf_ == nullptr) + return __s; + streamsize __sz = __oe - __ob; + streamsize __ns = __iob.width(); + if (__ns > __sz) + __ns -= __sz; + else + __ns = 0; + streamsize __np = __op - __ob; + if (__np > 0) { + if (__s.__sbuf_->sputn(__ob, __np) != __np) { + __s.__sbuf_ = nullptr; + return __s; + } + } + if (__ns > 0) { + basic_string<_CharT, _Traits> __sp(__ns, __fl); + if (__s.__sbuf_->sputn(__sp.data(), __ns) != __ns) { + __s.__sbuf_ = nullptr; + return __s; + } + } + __np = __oe - __op; + if (__np > 0) { + if (__s.__sbuf_->sputn(__op, __np) != __np) { + __s.__sbuf_ = nullptr; + return __s; + } + } + __iob.width(0); + return __s; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_HAS_LOCALIZATION + +#endif // _LIBCPP___LOCALE_DIR_PAD_AND_OUTPUT_H diff --git a/lib/libcxx/include/locale.h b/lib/libcxx/include/__locale_dir/support/apple.h similarity index 56% rename from lib/libcxx/include/locale.h rename to lib/libcxx/include/__locale_dir/support/apple.h index 425bf47d437a..62eb79c30d43 100644 --- a/lib/libcxx/include/locale.h +++ b/lib/libcxx/include/__locale_dir/support/apple.h @@ -1,5 +1,4 @@ -// -*- C++ -*- -//===----------------------------------------------------------------------===// +//===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,31 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP_LOCALE_H -#define _LIBCPP_LOCALE_H - -/* - locale.h synopsis - -Macros: - - LC_ALL - LC_COLLATE - LC_CTYPE - LC_MONETARY - LC_NUMERIC - LC_TIME - -Types: - - lconv - -Functions: - - setlocale - localeconv - -*/ +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_APPLE_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_APPLE_H #include <__config> @@ -39,8 +15,6 @@ # pragma GCC system_header #endif -#if __has_include_next() -# include_next -#endif +#include <__locale_dir/support/bsd_like.h> -#endif // _LIBCPP_LOCALE_H +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_APPLE_H diff --git a/lib/libcxx/include/__locale_dir/support/bsd_like.h b/lib/libcxx/include/__locale_dir/support/bsd_like.h new file mode 100644 index 000000000000..405f1589c8c9 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/bsd_like.h @@ -0,0 +1,234 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_BSD_LIKE_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_BSD_LIKE_H + +#include <__config> +#include <__cstddef/size_t.h> +#include <__std_mbstate_t.h> +#include <__utility/forward.h> +#include // std::lconv +#include +#include +#include +#include +#include +#if _LIBCPP_HAS_WIDE_CHARACTERS +# include +# include +#endif + +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +// +// Locale management +// +#define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +#define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +#define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +#define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +#define _LIBCPP_TIME_MASK LC_TIME_MASK +#define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +#define _LIBCPP_ALL_MASK LC_ALL_MASK +#define _LIBCPP_LC_ALL LC_ALL + +using __locale_t = ::locale_t; +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t = std::lconv; + +inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __locale, __locale_t __base) { + return ::newlocale(__category_mask, __locale, __base); +} + +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::freelocale(__loc); } + +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) { + return ::setlocale(__category, __locale); +} + +inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return ::localeconv_l(__loc); } +#endif // _LIBCPP_BUILDING_LIBRARY + +// +// Strtonum functions +// +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtof_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtod_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::strtold_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return ::strtoll_l(__nptr, __endptr, __base, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI unsigned long long +__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return ::strtoull_l(__nptr, __endptr, __base, __loc); +} + +// +// Character manipulation functions +// +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return ::islower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return ::isupper_l(__c, __loc); } +#endif + +inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return ::isdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return ::isxdigit_l(__c, __loc); } + +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::toupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return ::tolower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { + return ::strcoll_l(__s1, __s2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) { + return ::strxfrm_l(__dest, __src, __n, __loc); +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { + return ::iswctype_l(__c, __type, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t __loc) { return ::iswspace_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t __loc) { return ::iswprint_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t __loc) { return ::iswcntrl_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t __loc) { return ::iswupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t __loc) { return ::iswlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t __loc) { return ::iswalpha_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t __loc) { return ::iswblank_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t __loc) { return ::iswdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t __loc) { return ::iswpunct_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t __loc) { return ::iswxdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t __loc) { return ::towupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t __loc) { return ::towlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t __loc) { + return ::wcscoll_l(__ws1, __ws2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { + return ::wcsxfrm_l(__dest, __src, __n, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + +inline _LIBCPP_HIDE_FROM_ABI size_t +__strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t __loc) { + return ::strftime_l(__s, __max, __format, __tm, __loc); +} + +// +// Other functions +// +inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) { return MB_CUR_MAX_L(__loc); } + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __c, __locale_t __loc) { return ::btowc_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __c, __locale_t __loc) { return ::wctob_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI size_t +__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return ::wcsnrtombs_l(__dest, __src, __nwc, __len, __ps, __loc); // wcsnrtombs is a POSIX extension +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __wc, mbstate_t* __ps, __locale_t __loc) { + return ::wcrtomb_l(__s, __wc, __ps, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return ::mbsnrtowcs_l(__dest, __src, __nms, __len, __ps, __loc); // mbsnrtowcs is a POSIX extension +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + return ::mbrtowc_l(__pwc, __s, __n, __ps, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + return ::mbtowc_l(__pwc, __pmb, __max, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + return ::mbrlen_l(__s, __n, __ps, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { + return ::mbsrtowcs_l(__dest, __src, __len, __ps, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY + +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates +#ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) +#else +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ +#endif + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __s, size_t __n, __locale_t __loc, const char* __format, _Args&&... __args) { + return ::snprintf_l(__s, __n, __loc, __format, std::forward<_Args>(__args)...); +} + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( + char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { + return ::asprintf_l(__s, __loc, __format, std::forward<_Args>(__args)...); // non-standard +} + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( + const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { + return ::sscanf_l(__s, __loc, __format, std::forward<_Args>(__args)...); +} +_LIBCPP_DIAGNOSTIC_POP +#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_BSD_LIKE_H diff --git a/lib/libcxx/include/__locale_dir/locale_base_api/fuchsia.h b/lib/libcxx/include/__locale_dir/support/freebsd.h similarity index 54% rename from lib/libcxx/include/__locale_dir/locale_base_api/fuchsia.h rename to lib/libcxx/include/__locale_dir/support/freebsd.h index 4c3440f981c6..5c6e21e38727 100644 --- a/lib/libcxx/include/__locale_dir/locale_base_api/fuchsia.h +++ b/lib/libcxx/include/__locale_dir/support/freebsd.h @@ -1,4 +1,3 @@ -// -*- C++ -*- //===-----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -7,12 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H -#define _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_FREEBSD_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_FREEBSD_H -#include <__support/xlocale/__posix_l_fallback.h> -#include <__support/xlocale/__strtonum_fallback.h> -#include -#include +#include <__config> -#endif // _LIBCPP___LOCALE_LOCALE_BASE_API_FUCHSIA_H +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#include <__locale_dir/support/bsd_like.h> + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_FREEBSD_H diff --git a/lib/libcxx/include/__locale_dir/support/fuchsia.h b/lib/libcxx/include/__locale_dir/support/fuchsia.h new file mode 100644 index 000000000000..fb9de74ab7c7 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/fuchsia.h @@ -0,0 +1,160 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_FUCHSIA_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_FUCHSIA_H + +#include <__config> +#include <__utility/forward.h> +#include // uselocale & friends +#include +#include +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(locale_t& __loc) : __old_loc_(::uselocale(__loc)) {} + + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + if (__old_loc_) + ::uselocale(__old_loc_); + } + + locale_t __old_loc_; + + __locale_guard(__locale_guard const&) = delete; + __locale_guard& operator=(__locale_guard const&) = delete; +}; + +// +// Locale management +// +#define _LIBCPP_COLLATE_MASK LC_COLLATE_MASK +#define _LIBCPP_CTYPE_MASK LC_CTYPE_MASK +#define _LIBCPP_MONETARY_MASK LC_MONETARY_MASK +#define _LIBCPP_NUMERIC_MASK LC_NUMERIC_MASK +#define _LIBCPP_TIME_MASK LC_TIME_MASK +#define _LIBCPP_MESSAGES_MASK LC_MESSAGES_MASK +#define _LIBCPP_ALL_MASK LC_ALL_MASK +#define _LIBCPP_LC_ALL LC_ALL + +using __locale_t = locale_t; + +#if defined(_LIBCPP_BUILDING_LIBRARY) +using __lconv_t = std::lconv; + +inline _LIBCPP_HIDE_FROM_ABI __locale_t __newlocale(int __category_mask, const char* __name, __locale_t __loc) { + return ::newlocale(__category_mask, __name, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::freelocale(__loc); } + +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __locale) { + return ::setlocale(__category, __locale); +} + +inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { + __locale_guard __current(__loc); + return std::localeconv(); +} + +// +// Other functions +// +inline _LIBCPP_HIDE_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t __loc) { + __locale_guard __current(__loc); + return MB_CUR_MAX; +} +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI wint_t __btowc(int __ch, __locale_t __loc) { + __locale_guard __current(__loc); + return std::btowc(__ch); +} +inline _LIBCPP_HIDE_FROM_ABI int __wctob(wint_t __ch, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wctob(__ch); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__wcsnrtombs(char* __dest, const wchar_t** __src, size_t __nwc, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::wcsnrtombs(__dest, __src, __nwc, __len, __ps); // non-standard +} +inline _LIBCPP_HIDE_FROM_ABI size_t __wcrtomb(char* __s, wchar_t __ch, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::wcrtomb(__s, __ch, __ps); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __dest, const char** __src, size_t __nms, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::mbsnrtowcs(__dest, __src, __nms, __len, __ps); // non-standard +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbrtowc(wchar_t* __pwc, const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrtowc(__pwc, __s, __n, __ps); +} +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbtowc(__pwc, __pmb, __max); +} +inline _LIBCPP_HIDE_FROM_ABI size_t __mbrlen(const char* __s, size_t __n, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return std::mbrlen(__s, __n, __ps); +} +inline _LIBCPP_HIDE_FROM_ABI size_t +__mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, __locale_t __loc) { + __locale_guard __current(__loc); + return ::mbsrtowcs(__dest, __src, __len, __ps); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS +#endif // _LIBCPP_BUILDING_LIBRARY + +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates +#ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) +#else +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ +#endif + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __s, size_t __n, __locale_t __loc, const char* __format, _Args&&... __args) { + __locale_guard __current(__loc); + return std::snprintf(__s, __n, __format, std::forward<_Args>(__args)...); +} +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf( + char** __s, __locale_t __loc, const char* __format, _Args&&... __args) { + __locale_guard __current(__loc); + return ::asprintf(__s, __format, std::forward<_Args>(__args)...); // non-standard +} +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( + const char* __s, __locale_t __loc, const char* __format, _Args&&... __args) { + __locale_guard __current(__loc); + return std::sscanf(__s, __format, std::forward<_Args>(__args)...); +} + +_LIBCPP_DIAGNOSTIC_POP +#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#include <__locale_dir/support/no_locale/characters.h> +#include <__locale_dir/support/no_locale/strtonum.h> + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_FUCHSIA_H diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/characters.h b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h new file mode 100644 index 000000000000..4fb48ed9ceac --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/no_locale/characters.h @@ -0,0 +1,102 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_CHARACTERS_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_CHARACTERS_H + +#include <__config> +#include <__cstddef/size_t.h> +#include +#include +#include +#include +#if _LIBCPP_HAS_WIDE_CHARACTERS +# include +#endif + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +// +// Character manipulation functions +// +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t) { return std::islower(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t) { return std::isupper(__c); } +#endif + +inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t) { return std::isdigit(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t) { return std::isxdigit(__c); } + +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t) { return std::toupper(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t) { return std::tolower(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t) { + return std::strcoll(__s1, __s2); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t) { + return std::strxfrm(__dest, __src, __n); +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t) { + return std::iswctype(__c, __type); +} + +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t) { return std::iswspace(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t) { return std::iswprint(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t) { return std::iswcntrl(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t) { return std::iswupper(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t) { return std::iswlower(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t) { return std::iswalpha(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t) { return std::iswblank(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t) { return std::iswdigit(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t) { return std::iswpunct(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t) { return std::iswxdigit(__c); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t) { return std::towupper(__c); } + +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t) { return std::towlower(__c); } + +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t) { + return std::wcscoll(__ws1, __ws2); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t) { + return std::wcsxfrm(__dest, __src, __n); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + +inline _LIBCPP_HIDE_FROM_ABI size_t +__strftime(char* __s, size_t __max, const char* __format, const struct tm* __tm, __locale_t) { + return std::strftime(__s, __max, __format, __tm); +} +#endif // _LIBCPP_BUILDING_LIBRARY + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_CHARACTERS_H diff --git a/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h new file mode 100644 index 000000000000..0e7a32993e73 --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/no_locale/strtonum.h @@ -0,0 +1,49 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_STRTONUM_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_STRTONUM_H + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +// +// Strtonum functions +// +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t) { + return std::strtof(__nptr, __endptr); +} + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t) { + return std::strtod(__nptr, __endptr); +} + +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t) { + return std::strtold(__nptr, __endptr); +} + +inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t) { + return std::strtoll(__nptr, __endptr, __base); +} + +inline _LIBCPP_HIDE_FROM_ABI unsigned long long +__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t) { + return std::strtoull(__nptr, __endptr, __base); +} + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_NO_LOCALE_STRTONUM_H diff --git a/lib/libcxx/include/__locale_dir/support/windows.h b/lib/libcxx/include/__locale_dir/support/windows.h new file mode 100644 index 000000000000..56d34c6f0e6c --- /dev/null +++ b/lib/libcxx/include/__locale_dir/support/windows.h @@ -0,0 +1,343 @@ +//===-----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___LOCALE_DIR_SUPPORT_WINDOWS_H +#define _LIBCPP___LOCALE_DIR_SUPPORT_WINDOWS_H + +#include <__config> +#include <__cstddef/nullptr_t.h> +#include <__utility/forward.h> +#include // std::lconv & friends +#include +#include // ::_isupper_l & friends +#include // ::_locale_t +#include // ::_sscanf_l +#include // ::_strtod_l & friends +#include // ::_strcoll_l +#include +#include // ::_strftime_l + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD +namespace __locale { + +using __lconv_t = std::lconv; + +class __lconv_storage { +public: + __lconv_storage(const __lconv_t* __lc_input) { + __lc_ = *__lc_input; + + __decimal_point_ = __lc_input->decimal_point; + __thousands_sep_ = __lc_input->thousands_sep; + __grouping_ = __lc_input->grouping; + __int_curr_symbol_ = __lc_input->int_curr_symbol; + __currency_symbol_ = __lc_input->currency_symbol; + __mon_decimal_point_ = __lc_input->mon_decimal_point; + __mon_thousands_sep_ = __lc_input->mon_thousands_sep; + __mon_grouping_ = __lc_input->mon_grouping; + __positive_sign_ = __lc_input->positive_sign; + __negative_sign_ = __lc_input->negative_sign; + + __lc_.decimal_point = const_cast(__decimal_point_.c_str()); + __lc_.thousands_sep = const_cast(__thousands_sep_.c_str()); + __lc_.grouping = const_cast(__grouping_.c_str()); + __lc_.int_curr_symbol = const_cast(__int_curr_symbol_.c_str()); + __lc_.currency_symbol = const_cast(__currency_symbol_.c_str()); + __lc_.mon_decimal_point = const_cast(__mon_decimal_point_.c_str()); + __lc_.mon_thousands_sep = const_cast(__mon_thousands_sep_.c_str()); + __lc_.mon_grouping = const_cast(__mon_grouping_.c_str()); + __lc_.positive_sign = const_cast(__positive_sign_.c_str()); + __lc_.negative_sign = const_cast(__negative_sign_.c_str()); + } + + __lconv_t* __get() { return &__lc_; } + +private: + __lconv_t __lc_; + std::string __decimal_point_; + std::string __thousands_sep_; + std::string __grouping_; + std::string __int_curr_symbol_; + std::string __currency_symbol_; + std::string __mon_decimal_point_; + std::string __mon_thousands_sep_; + std::string __mon_grouping_; + std::string __positive_sign_; + std::string __negative_sign_; +}; + +// +// Locale management +// +#define _CATMASK(n) ((1 << (n)) >> 1) +#define _LIBCPP_COLLATE_MASK _CATMASK(LC_COLLATE) +#define _LIBCPP_CTYPE_MASK _CATMASK(LC_CTYPE) +#define _LIBCPP_MONETARY_MASK _CATMASK(LC_MONETARY) +#define _LIBCPP_NUMERIC_MASK _CATMASK(LC_NUMERIC) +#define _LIBCPP_TIME_MASK _CATMASK(LC_TIME) +#define _LIBCPP_MESSAGES_MASK _CATMASK(6) +#define _LIBCPP_ALL_MASK \ + (_LIBCPP_COLLATE_MASK | _LIBCPP_CTYPE_MASK | _LIBCPP_MESSAGES_MASK | _LIBCPP_MONETARY_MASK | _LIBCPP_NUMERIC_MASK | \ + _LIBCPP_TIME_MASK) +#define _LIBCPP_LC_ALL LC_ALL + +class __locale_t { +public: + __locale_t() : __locale_(nullptr), __locale_str_(nullptr), __lc_(nullptr) {} + __locale_t(std::nullptr_t) : __locale_(nullptr), __locale_str_(nullptr), __lc_(nullptr) {} + __locale_t(::_locale_t __loc, const char* __loc_str) : __locale_(__loc), __locale_str_(__loc_str), __lc_(nullptr) {} + __locale_t(const __locale_t& __loc) + : __locale_(__loc.__locale_), __locale_str_(__loc.__locale_str_), __lc_(nullptr) {} + + ~__locale_t() { delete __lc_; } + + __locale_t& operator=(const __locale_t& __loc) { + __locale_ = __loc.__locale_; + __locale_str_ = __loc.__locale_str_; + // __lc_ not copied + return *this; + } + + friend bool operator==(const __locale_t& __left, const __locale_t& __right) { + return __left.__locale_ == __right.__locale_; + } + + friend bool operator==(const __locale_t& __left, int __right) { return __left.__locale_ == nullptr && __right == 0; } + + friend bool operator==(const __locale_t& __left, long long __right) { + return __left.__locale_ == nullptr && __right == 0; + } + + friend bool operator==(const __locale_t& __left, std::nullptr_t) { return __left.__locale_ == nullptr; } + + friend bool operator==(int __left, const __locale_t& __right) { return __left == 0 && nullptr == __right.__locale_; } + + friend bool operator==(std::nullptr_t, const __locale_t& __right) { return nullptr == __right.__locale_; } + + friend bool operator!=(const __locale_t& __left, const __locale_t& __right) { return !(__left == __right); } + + friend bool operator!=(const __locale_t& __left, int __right) { return !(__left == __right); } + + friend bool operator!=(const __locale_t& __left, long long __right) { return !(__left == __right); } + + friend bool operator!=(const __locale_t& __left, std::nullptr_t __right) { return !(__left == __right); } + + friend bool operator!=(int __left, const __locale_t& __right) { return !(__left == __right); } + + friend bool operator!=(std::nullptr_t __left, const __locale_t& __right) { return !(__left == __right); } + + operator bool() const { return __locale_ != nullptr; } + + const char* __get_locale() const { return __locale_str_; } + + operator ::_locale_t() const { return __locale_; } + + __lconv_t* __store_lconv(const __lconv_t* __input_lc) { + delete __lc_; + __lc_ = new __lconv_storage(__input_lc); + return __lc_->__get(); + } + +private: + ::_locale_t __locale_; + const char* __locale_str_; + __lconv_storage* __lc_ = nullptr; +}; + +#if defined(_LIBCPP_BUILDING_LIBRARY) +_LIBCPP_EXPORTED_FROM_ABI __locale_t __newlocale(int __mask, const char* __locale, __locale_t __base); +inline _LIBCPP_HIDE_FROM_ABI void __freelocale(__locale_t __loc) { ::_free_locale(__loc); } +inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __locale) { + char* __new_locale = ::setlocale(__category, __locale); + if (__new_locale == nullptr) + std::__throw_bad_alloc(); + return __new_locale; +} +_LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc); +#endif // _LIBCPP_BUILDING_LIBRARY + +// +// Strtonum functions +// + +// the *_l functions are prefixed on Windows, only available for msvcr80+, VS2005+ +#if defined(_LIBCPP_MSVCRT) +inline _LIBCPP_HIDE_FROM_ABI float __strtof(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::_strtof_l(__nptr, __endptr, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI long double __strtold(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::_strtold_l(__nptr, __endptr, __loc); +} +#else +_LIBCPP_EXPORTED_FROM_ABI float __strtof(const char*, char**, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI long double __strtold(const char*, char**, __locale_t); +#endif + +inline _LIBCPP_HIDE_FROM_ABI double __strtod(const char* __nptr, char** __endptr, __locale_t __loc) { + return ::_strtod_l(__nptr, __endptr, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI long long __strtoll(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return ::_strtoi64_l(__nptr, __endptr, __base, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI unsigned long long +__strtoull(const char* __nptr, char** __endptr, int __base, __locale_t __loc) { + return ::_strtoui64_l(__nptr, __endptr, __base, __loc); +} + +// +// Character manipulation functions +// +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __islower(int __c, __locale_t __loc) { return _islower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __isupper(int __c, __locale_t __loc) { return _isupper_l(__c, __loc); } +#endif + +inline _LIBCPP_HIDE_FROM_ABI int __isdigit(int __c, __locale_t __loc) { return _isdigit_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __isxdigit(int __c, __locale_t __loc) { return _isxdigit_l(__c, __loc); } + +#if defined(_LIBCPP_BUILDING_LIBRARY) +inline _LIBCPP_HIDE_FROM_ABI int __toupper(int __c, __locale_t __loc) { return ::_toupper_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __tolower(int __c, __locale_t __loc) { return ::_tolower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __strcoll(const char* __s1, const char* __s2, __locale_t __loc) { + return ::_strcoll_l(__s1, __s2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __strxfrm(char* __dest, const char* __src, size_t __n, __locale_t __loc) { + return ::_strxfrm_l(__dest, __src, __n, __loc); +} + +# if _LIBCPP_HAS_WIDE_CHARACTERS +inline _LIBCPP_HIDE_FROM_ABI int __iswctype(wint_t __c, wctype_t __type, __locale_t __loc) { + return ::_iswctype_l(__c, __type, __loc); +} +inline _LIBCPP_HIDE_FROM_ABI int __iswspace(wint_t __c, __locale_t __loc) { return ::_iswspace_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswprint(wint_t __c, __locale_t __loc) { return ::_iswprint_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswcntrl(wint_t __c, __locale_t __loc) { return ::_iswcntrl_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswupper(wint_t __c, __locale_t __loc) { return ::_iswupper_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswlower(wint_t __c, __locale_t __loc) { return ::_iswlower_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswalpha(wint_t __c, __locale_t __loc) { return ::_iswalpha_l(__c, __loc); } +// TODO: use locale to determine blank characters +inline _LIBCPP_HIDE_FROM_ABI int __iswblank(wint_t __c, __locale_t /*loc*/) { return (__c == L' ' || __c == L'\t'); } +inline _LIBCPP_HIDE_FROM_ABI int __iswdigit(wint_t __c, __locale_t __loc) { return ::_iswdigit_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswpunct(wint_t __c, __locale_t __loc) { return ::_iswpunct_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI int __iswxdigit(wint_t __c, __locale_t __loc) { return ::_iswxdigit_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI wint_t __towupper(wint_t __c, __locale_t __loc) { return ::_towupper_l(__c, __loc); } +inline _LIBCPP_HIDE_FROM_ABI wint_t __towlower(wint_t __c, __locale_t __loc) { return ::_towlower_l(__c, __loc); } + +inline _LIBCPP_HIDE_FROM_ABI int __wcscoll(const wchar_t* __ws1, const wchar_t* __ws2, __locale_t __loc) { + return ::_wcscoll_l(__ws1, __ws2, __loc); +} + +inline _LIBCPP_HIDE_FROM_ABI size_t __wcsxfrm(wchar_t* __dest, const wchar_t* __src, size_t __n, __locale_t __loc) { + return ::_wcsxfrm_l(__dest, __src, __n, __loc); +} +# endif // _LIBCPP_HAS_WIDE_CHARACTERS + +# if defined(__MINGW32__) && __MSVCRT_VERSION__ < 0x0800 +_LIBCPP_EXPORTED_FROM_ABI size_t __strftime(char*, size_t, const char*, const struct tm*, __locale_t); +# else +inline _LIBCPP_HIDE_FROM_ABI size_t +__strftime(char* __ret, size_t __n, const char* __format, const struct tm* __tm, __locale_t __loc) { + return ::_strftime_l(__ret, __n, __format, __tm, __loc); +} +# endif + +// +// Other functions +// +_LIBCPP_EXPORTED_FROM_ABI decltype(MB_CUR_MAX) __mb_len_max(__locale_t); +_LIBCPP_EXPORTED_FROM_ABI wint_t __btowc(int, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI int __wctob(wint_t, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI size_t +__wcsnrtombs(char* __restrict, const wchar_t** __restrict, size_t, size_t, mbstate_t* __restrict, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI size_t __wcrtomb(char* __restrict, wchar_t, mbstate_t* __restrict, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI size_t +__mbsnrtowcs(wchar_t* __restrict, const char** __restrict, size_t, size_t, mbstate_t* __restrict, __locale_t); +_LIBCPP_EXPORTED_FROM_ABI size_t +__mbrtowc(wchar_t* __restrict, const char* __restrict, size_t, mbstate_t* __restrict, __locale_t); + +inline _LIBCPP_HIDE_FROM_ABI int __mbtowc(wchar_t* __pwc, const char* __pmb, size_t __max, __locale_t __loc) { + return ::_mbtowc_l(__pwc, __pmb, __max, __loc); +} + +_LIBCPP_EXPORTED_FROM_ABI size_t __mbrlen(const char* __restrict, size_t, mbstate_t* __restrict, __locale_t); + +_LIBCPP_EXPORTED_FROM_ABI size_t +__mbsrtowcs(wchar_t* __restrict, const char** __restrict, size_t, mbstate_t* __restrict, __locale_t); +#endif // _LIBCPP_BUILDING_LIBRARY + +_LIBCPP_EXPORTED_FROM_ABI _LIBCPP_ATTRIBUTE_FORMAT(__printf__, 4, 5) int __snprintf( + char* __ret, size_t __n, __locale_t __loc, const char* __format, ...); + +_LIBCPP_EXPORTED_FROM_ABI +_LIBCPP_ATTRIBUTE_FORMAT(__printf__, 3, 4) int __asprintf(char** __ret, __locale_t __loc, const char* __format, ...); + +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") +_LIBCPP_GCC_DIAGNOSTIC_IGNORED("-Wformat-nonliteral") // GCC doesn't support [[gnu::format]] on variadic templates +#ifdef _LIBCPP_COMPILER_CLANG_BASED +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) _LIBCPP_ATTRIBUTE_FORMAT(__VA_ARGS__) +#else +# define _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(...) /* nothing */ +#endif + +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT(__scanf__, 3, 4) int __sscanf( + const char* __dest, __locale_t __loc, const char* __format, _Args&&... __args) { + return ::_sscanf_l(__dest, __format, __loc, std::forward<_Args>(__args)...); +} +_LIBCPP_DIAGNOSTIC_POP +#undef _LIBCPP_VARIADIC_ATTRIBUTE_FORMAT + +#if defined(_LIBCPP_BUILDING_LIBRARY) +struct __locale_guard { + _LIBCPP_HIDE_FROM_ABI __locale_guard(__locale_t __l) : __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { + // Setting the locale can be expensive even when the locale given is + // already the current locale, so do an explicit check to see if the + // current locale is already the one we want. + const char* __lc = __locale::__setlocale(LC_ALL, nullptr); + // If every category is the same, the locale string will simply be the + // locale name, otherwise it will be a semicolon-separated string listing + // each category. In the second case, we know at least one category won't + // be what we want, so we only have to check the first case. + if (std::strcmp(__l.__get_locale(), __lc) != 0) { + __locale_all = _strdup(__lc); + if (__locale_all == nullptr) + __throw_bad_alloc(); + __locale::__setlocale(LC_ALL, __l.__get_locale()); + } + } + _LIBCPP_HIDE_FROM_ABI ~__locale_guard() { + // The CRT documentation doesn't explicitly say, but setlocale() does the + // right thing when given a semicolon-separated list of locale settings + // for the different categories in the same format as returned by + // setlocale(LC_ALL, nullptr). + if (__locale_all != nullptr) { + __locale::__setlocale(LC_ALL, __locale_all); + free(__locale_all); + } + _configthreadlocale(__status); + } + int __status; + char* __locale_all = nullptr; +}; +#endif // _LIBCPP_BUILDING_LIBRARY + +} // namespace __locale +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___LOCALE_DIR_SUPPORT_WINDOWS_H diff --git a/lib/libcxx/include/__math/abs.h b/lib/libcxx/include/__math/abs.h index ab82a2800f53..fc3bf3a2c7c3 100644 --- a/lib/libcxx/include/__math/abs.h +++ b/lib/libcxx/include/__math/abs.h @@ -23,19 +23,19 @@ namespace __math { // fabs -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fabs(float __x) _NOEXCEPT { return __builtin_fabsf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fabs(double __x) _NOEXCEPT { return __builtin_fabs(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fabs(long double __x) _NOEXCEPT { return __builtin_fabsl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double fabs(_A1 __x) _NOEXCEPT { return __builtin_fabs((double)__x); } diff --git a/lib/libcxx/include/__math/copysign.h b/lib/libcxx/include/__math/copysign.h index b38690bb581a..c3ca6a3b0370 100644 --- a/lib/libcxx/include/__math/copysign.h +++ b/lib/libcxx/include/__math/copysign.h @@ -13,7 +13,6 @@ #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/promote.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -25,16 +24,16 @@ namespace __math { // copysign -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float copysign(float __x, float __y) _NOEXCEPT { return ::__builtin_copysignf(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double copysign(long double __x, long double __y) _NOEXCEPT { return ::__builtin_copysignl(__x, __y); } template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type copysign(_A1 __x, _A2 __y) _NOEXCEPT { return ::__builtin_copysign(__x, __y); } diff --git a/lib/libcxx/include/__math/hypot.h b/lib/libcxx/include/__math/hypot.h index b99216371101..b2bf8e11c8ec 100644 --- a/lib/libcxx/include/__math/hypot.h +++ b/lib/libcxx/include/__math/hypot.h @@ -9,16 +9,15 @@ #ifndef _LIBCPP___MATH_HYPOT_H #define _LIBCPP___MATH_HYPOT_H -#include <__algorithm/max.h> #include <__config> #include <__math/abs.h> #include <__math/exponential_functions.h> +#include <__math/min_max.h> #include <__math/roots.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_same.h> #include <__type_traits/promote.h> -#include <__utility/pair.h> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -63,7 +62,7 @@ _LIBCPP_HIDE_FROM_ABI _Real __hypot(_Real __x, _Real __y, _Real __z) { const _Real __overflow_scale = __math::ldexp(_Real(1), -(__exp + 20)); // Scale arguments depending on their size - const _Real __max_abs = std::max(__math::fabs(__x), std::max(__math::fabs(__y), __math::fabs(__z))); + const _Real __max_abs = __math::fmax(__math::fabs(__x), __math::fmax(__math::fabs(__y), __math::fabs(__z))); _Real __scale; if (__max_abs > __overflow_threshold) { // x*x + y*y + z*z might overflow __scale = __overflow_scale; diff --git a/lib/libcxx/include/__math/min_max.h b/lib/libcxx/include/__math/min_max.h index 27997b44910a..db900c849e72 100644 --- a/lib/libcxx/include/__math/min_max.h +++ b/lib/libcxx/include/__math/min_max.h @@ -25,21 +25,21 @@ namespace __math { // fmax -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmax(float __x, float __y) _NOEXCEPT { return __builtin_fmaxf(__x, __y); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmax(double __x, double __y) _NOEXCEPT { return __builtin_fmax(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmax(long double __x, long double __y) _NOEXCEPT { return __builtin_fmaxl(__x, __y); } template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmax(_A1 __x, _A2 __y) _NOEXCEPT { using __result_type = typename __promote<_A1, _A2>::type; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmax((__result_type)__x, (__result_type)__y); @@ -47,21 +47,21 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::typ // fmin -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float fmin(float __x, float __y) _NOEXCEPT { return __builtin_fminf(__x, __y); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double fmin(double __x, double __y) _NOEXCEPT { return __builtin_fmin(__x, __y); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double fmin(long double __x, long double __y) _NOEXCEPT { return __builtin_fminl(__x, __y); } template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI typename __promote<_A1, _A2>::type fmin(_A1 __x, _A2 __y) _NOEXCEPT { using __result_type = typename __promote<_A1, _A2>::type; static_assert(!(_IsSame<_A1, __result_type>::value && _IsSame<_A2, __result_type>::value), ""); return __math::fmin((__result_type)__x, (__result_type)__y); diff --git a/lib/libcxx/include/__math/remainder.h b/lib/libcxx/include/__math/remainder.h index 0fbf0b8ef97b..0adb7f3af5de 100644 --- a/lib/libcxx/include/__math/remainder.h +++ b/lib/libcxx/include/__math/remainder.h @@ -14,7 +14,6 @@ #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_same.h> #include <__type_traits/promote.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__math/roots.h b/lib/libcxx/include/__math/roots.h index 359fd747cfbe..cef376fb008c 100644 --- a/lib/libcxx/include/__math/roots.h +++ b/lib/libcxx/include/__math/roots.h @@ -39,19 +39,19 @@ inline _LIBCPP_HIDE_FROM_ABI double sqrt(_A1 __x) _NOEXCEPT { // cbrt -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float cbrt(float __x) _NOEXCEPT { return __builtin_cbrtf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double cbrt(double __x) _NOEXCEPT { return __builtin_cbrt(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double cbrt(long double __x) _NOEXCEPT { return __builtin_cbrtl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double cbrt(_A1 __x) _NOEXCEPT { return __builtin_cbrt((double)__x); } diff --git a/lib/libcxx/include/__math/rounding_functions.h b/lib/libcxx/include/__math/rounding_functions.h index f7246ba7fed0..474f585a62f1 100644 --- a/lib/libcxx/include/__math/rounding_functions.h +++ b/lib/libcxx/include/__math/rounding_functions.h @@ -26,37 +26,37 @@ namespace __math { // ceil -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float ceil(float __x) _NOEXCEPT { return __builtin_ceilf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double ceil(double __x) _NOEXCEPT { return __builtin_ceil(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double ceil(long double __x) _NOEXCEPT { return __builtin_ceill(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double ceil(_A1 __x) _NOEXCEPT { return __builtin_ceil((double)__x); } // floor -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float floor(float __x) _NOEXCEPT { return __builtin_floorf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double floor(double __x) _NOEXCEPT { return __builtin_floor(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double floor(long double __x) _NOEXCEPT { return __builtin_floorl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double floor(_A1 __x) _NOEXCEPT { return __builtin_floor((double)__x); } @@ -126,21 +126,21 @@ inline _LIBCPP_HIDE_FROM_ABI long lround(_A1 __x) _NOEXCEPT { // nearbyint -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float nearbyint(float __x) _NOEXCEPT { return __builtin_nearbyintf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double nearbyint(double __x) _NOEXCEPT { return __builtin_nearbyint(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double nearbyint(long double __x) _NOEXCEPT { return __builtin_nearbyintl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double nearbyint(_A1 __x) _NOEXCEPT { return __builtin_nearbyint((double)__x); } @@ -186,55 +186,55 @@ inline _LIBCPP_HIDE_FROM_ABI double nexttoward(_A1 __x, long double __y) _NOEXCE // rint -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float rint(float __x) _NOEXCEPT { return __builtin_rintf(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double rint(double __x) _NOEXCEPT { return __builtin_rint(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double rint(long double __x) _NOEXCEPT { return __builtin_rintl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double rint(_A1 __x) _NOEXCEPT { return __builtin_rint((double)__x); } // round -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float round(float __x) _NOEXCEPT { return __builtin_round(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double round(double __x) _NOEXCEPT { return __builtin_round(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double round(long double __x) _NOEXCEPT { return __builtin_roundl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double round(_A1 __x) _NOEXCEPT { return __builtin_round((double)__x); } // trunc -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); } +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI float trunc(float __x) _NOEXCEPT { return __builtin_trunc(__x); } template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double trunc(double __x) _NOEXCEPT { return __builtin_trunc(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI long double trunc(long double __x) _NOEXCEPT { return __builtin_truncl(__x); } template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI double trunc(_A1 __x) _NOEXCEPT { return __builtin_trunc((double)__x); } diff --git a/lib/libcxx/include/__math/traits.h b/lib/libcxx/include/__math/traits.h index 27ec52ecef02..0c96f766a767 100644 --- a/lib/libcxx/include/__math/traits.h +++ b/lib/libcxx/include/__math/traits.h @@ -12,11 +12,9 @@ #include <__config> #include <__type_traits/enable_if.h> #include <__type_traits/is_arithmetic.h> -#include <__type_traits/is_floating_point.h> #include <__type_traits/is_integral.h> #include <__type_traits/is_signed.h> #include <__type_traits/promote.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -28,115 +26,131 @@ namespace __math { // signbit -template ::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +// TODO(LLVM 22): Remove conditional once support for Clang 19 is dropped. +#if defined(_LIBCPP_COMPILER_GCC) || __has_constexpr_builtin(__builtin_signbit) +# define _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_CONSTEXPR_SINCE_CXX23 +#else +# define _LIBCPP_SIGNBIT_CONSTEXPR +#endif + +// The universal C runtime (UCRT) in the WinSDK provides floating point overloads +// for std::signbit(). By defining our overloads as templates, we can work around +// this issue as templates are less preferred than non-template functions. +template +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(float __x) _NOEXCEPT { + return __builtin_signbit(__x); +} + +template +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(double __x) _NOEXCEPT { + return __builtin_signbit(__x); +} + +template +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(long double __x) _NOEXCEPT { return __builtin_signbit(__x); } template ::value && is_signed<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1 __x) _NOEXCEPT { return __x < 0; } template ::value && !is_signed<_A1>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_SIGNBIT_CONSTEXPR _LIBCPP_HIDE_FROM_ABI bool signbit(_A1) _NOEXCEPT { return false; } // isfinite -template ::value && numeric_limits<_A1>::has_infinity, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1 __x) _NOEXCEPT { - return __builtin_isfinite((typename __promote<_A1>::type)__x); -} - -template ::value && !numeric_limits<_A1>::has_infinity, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT { +template ::value, int> = 0> +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(_A1) _NOEXCEPT { return true; } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(float __x) _NOEXCEPT { return __builtin_isfinite(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(double __x) _NOEXCEPT { return __builtin_isfinite(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isfinite(long double __x) _NOEXCEPT { return __builtin_isfinite(__x); } // isinf -template ::value && numeric_limits<_A1>::has_infinity, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1 __x) _NOEXCEPT { - return __builtin_isinf((typename __promote<_A1>::type)__x); -} - -template ::value && !numeric_limits<_A1>::has_infinity, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT { +template ::value, int> = 0> +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(_A1) _NOEXCEPT { return false; } -#ifdef _LIBCPP_PREFERRED_OVERLOAD -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(float __x) _NOEXCEPT { return __builtin_isinf(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD bool -isinf(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI +#ifdef _LIBCPP_PREFERRED_OVERLOAD +_LIBCPP_PREFERRED_OVERLOAD +#endif + bool + isinf(double __x) _NOEXCEPT { return __builtin_isinf(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isinf(long double __x) _NOEXCEPT { return __builtin_isinf(__x); } -#endif // isnan -template ::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1 __x) _NOEXCEPT { - return __builtin_isnan(__x); -} - template ::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT { +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(_A1) _NOEXCEPT { return false; } -#ifdef _LIBCPP_PREFERRED_OVERLOAD -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(float __x) _NOEXCEPT { return __builtin_isnan(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI _LIBCPP_PREFERRED_OVERLOAD bool -isnan(double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI +#ifdef _LIBCPP_PREFERRED_OVERLOAD +_LIBCPP_PREFERRED_OVERLOAD +#endif + bool + isnan(double __x) _NOEXCEPT { return __builtin_isnan(__x); } -_LIBCPP_NODISCARD inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnan(long double __x) _NOEXCEPT { return __builtin_isnan(__x); } -#endif // isnormal -template ::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT { +template ::value, int> = 0> +[[__nodiscard__]] _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT { + return __x != 0; +} + +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(float __x) _NOEXCEPT { return __builtin_isnormal(__x); } -template ::value, int> = 0> -_LIBCPP_NODISCARD _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(_A1 __x) _NOEXCEPT { - return __x != 0; +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(double __x) _NOEXCEPT { + return __builtin_isnormal(__x); +} + +[[__nodiscard__]] inline _LIBCPP_CONSTEXPR_SINCE_CXX23 _LIBCPP_HIDE_FROM_ABI bool isnormal(long double __x) _NOEXCEPT { + return __builtin_isnormal(__x); } // isgreater template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isgreater((type)__x, (type)__y); } @@ -144,7 +158,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreater(_A1 __x, _A2 __y) // isgreaterequal template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isgreaterequal((type)__x, (type)__y); } @@ -152,7 +166,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isgreaterequal(_A1 __x, _A2 // isless template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isless((type)__x, (type)__y); } @@ -160,7 +174,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isless(_A1 __x, _A2 __y) _NO // islessequal template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_islessequal((type)__x, (type)__y); } @@ -168,7 +182,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessequal(_A1 __x, _A2 __y // islessgreater template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_islessgreater((type)__x, (type)__y); } @@ -176,7 +190,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool islessgreater(_A1 __x, _A2 _ // isunordered template ::value && is_arithmetic<_A2>::value, int> = 0> -_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT { +[[__nodiscard__]] inline _LIBCPP_HIDE_FROM_ABI bool isunordered(_A1 __x, _A2 __y) _NOEXCEPT { using type = typename __promote<_A1, _A2>::type; return __builtin_isunordered((type)__x, (type)__y); } diff --git a/lib/libcxx/include/__mbstate_t.h b/lib/libcxx/include/__mbstate_t.h index bfa6d617e2b8..e013384454b4 100644 --- a/lib/libcxx/include/__mbstate_t.h +++ b/lib/libcxx/include/__mbstate_t.h @@ -35,7 +35,7 @@ # define __CORRECT_ISO_CPP_WCHAR_H_PROTO #endif -#if defined(_LIBCPP_HAS_MUSL_LIBC) +#if _LIBCPP_HAS_MUSL_LIBC # define __NEED_mbstate_t # include # undef __NEED_mbstate_t @@ -43,7 +43,7 @@ # include // works on most Unixes #elif __has_include() # include // works on Darwin -#elif !defined(_LIBCPP_HAS_NO_WIDE_CHARACTERS) && __has_include_next() +#elif _LIBCPP_HAS_WIDE_CHARACTERS && __has_include_next() # include_next // fall back to the C standard provider of mbstate_t #elif __has_include_next() # include_next // is also required to make mbstate_t visible diff --git a/lib/libcxx/include/__mdspan/default_accessor.h b/lib/libcxx/include/__mdspan/default_accessor.h index 1cc5f15545fc..d6f3ddb998e9 100644 --- a/lib/libcxx/include/__mdspan/default_accessor.h +++ b/lib/libcxx/include/__mdspan/default_accessor.h @@ -18,12 +18,11 @@ #define _LIBCPP___MDSPAN_DEFAULT_ACCESSOR_H #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/is_abstract.h> #include <__type_traits/is_array.h> #include <__type_traits/is_convertible.h> #include <__type_traits/remove_const.h> -#include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__mdspan/extents.h b/lib/libcxx/include/__mdspan/extents.h index 95082ef3d11a..65a697769bda 100644 --- a/lib/libcxx/include/__mdspan/extents.h +++ b/lib/libcxx/include/__mdspan/extents.h @@ -19,6 +19,9 @@ #include <__assert> #include <__config> + +#include <__concepts/arithmetic.h> +#include <__cstddef/byte.h> #include <__type_traits/common_type.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_nothrow_constructible.h> @@ -27,9 +30,7 @@ #include <__utility/integer_sequence.h> #include <__utility/unreachable.h> #include -#include #include -#include #include #include @@ -128,14 +129,14 @@ struct __maybe_static_array { // Static values member static constexpr size_t __size_ = sizeof...(_Values); static constexpr size_t __size_dynamic_ = ((_Values == _DynTag) + ... + 0); - using _StaticValues = __static_array<_TStatic, _Values...>; - using _DynamicValues = __possibly_empty_array<_TDynamic, __size_dynamic_>; + using _StaticValues _LIBCPP_NODEBUG = __static_array<_TStatic, _Values...>; + using _DynamicValues _LIBCPP_NODEBUG = __possibly_empty_array<_TDynamic, __size_dynamic_>; // Dynamic values member _LIBCPP_NO_UNIQUE_ADDRESS _DynamicValues __dyn_vals_; // static mapping of indices to the position in the dynamic values array - using _DynamicIdxMap = __static_partial_sums(_Values == _DynTag)...>; + using _DynamicIdxMap _LIBCPP_NODEBUG = __static_partial_sums(_Values == _DynTag)...>; template _LIBCPP_HIDE_FROM_ABI static constexpr _DynamicValues __zeros(index_sequence<_Indices...>) noexcept { @@ -282,8 +283,7 @@ class extents { using size_type = make_unsigned_t; using rank_type = size_t; - static_assert(is_integral::value && !is_same::value, - "extents::index_type must be a signed or unsigned integer type"); + static_assert(__libcpp_integer, "extents::index_type must be a signed or unsigned integer type"); static_assert(((__mdspan_detail::__is_representable_as(_Extents) || (_Extents == dynamic_extent)) && ...), "extents ctor: arguments must be representable as index_type and nonnegative"); @@ -292,7 +292,8 @@ class extents { static constexpr rank_type __rank_dynamic_ = ((_Extents == dynamic_extent) + ... + 0); // internal storage type using __maybe_static_array - using _Values = __mdspan_detail::__maybe_static_array<_IndexType, size_t, dynamic_extent, _Extents...>; + using _Values _LIBCPP_NODEBUG = + __mdspan_detail::__maybe_static_array<_IndexType, size_t, dynamic_extent, _Extents...>; [[no_unique_address]] _Values __vals_; public: @@ -448,7 +449,7 @@ struct __make_dextents< _IndexType, 0, extents<_IndexType, _ExtentsPack...>> { using type = extents<_IndexType, _ExtentsPack...>; }; -} // end namespace __mdspan_detail +} // namespace __mdspan_detail // [mdspan.extents.dextents], alias template template diff --git a/lib/libcxx/include/__mdspan/layout_left.h b/lib/libcxx/include/__mdspan/layout_left.h index d058cbccffd9..288b3dd8038e 100644 --- a/lib/libcxx/include/__mdspan/layout_left.h +++ b/lib/libcxx/include/__mdspan/layout_left.h @@ -21,14 +21,12 @@ #include <__config> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_nothrow_constructible.h> #include <__utility/integer_sequence.h> #include -#include -#include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__mdspan/layout_right.h b/lib/libcxx/include/__mdspan/layout_right.h index 6842e9dc37fd..72922d1049c7 100644 --- a/lib/libcxx/include/__mdspan/layout_right.h +++ b/lib/libcxx/include/__mdspan/layout_right.h @@ -19,15 +19,14 @@ #include <__assert> #include <__config> +#include <__cstddef/size_t.h> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> #include <__type_traits/is_nothrow_constructible.h> #include <__utility/integer_sequence.h> -#include -#include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__mdspan/layout_stride.h b/lib/libcxx/include/__mdspan/layout_stride.h index 86148ac849ec..bb93de977514 100644 --- a/lib/libcxx/include/__mdspan/layout_stride.h +++ b/lib/libcxx/include/__mdspan/layout_stride.h @@ -18,19 +18,22 @@ #define _LIBCPP___MDSPAN_LAYOUT_STRIDE_H #include <__assert> +#include <__concepts/same_as.h> #include <__config> #include <__fwd/mdspan.h> #include <__mdspan/extents.h> +#include <__type_traits/common_type.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_convertible.h> +#include <__type_traits/is_integral.h> #include <__type_traits/is_nothrow_constructible.h> +#include <__type_traits/is_same.h> #include <__utility/as_const.h> #include <__utility/integer_sequence.h> #include <__utility/swap.h> #include -#include -#include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__mdspan/mdspan.h b/lib/libcxx/include/__mdspan/mdspan.h index 1ff4fd4ba4a8..3f9b35b185b1 100644 --- a/lib/libcxx/include/__mdspan/mdspan.h +++ b/lib/libcxx/include/__mdspan/mdspan.h @@ -37,9 +37,6 @@ #include <__type_traits/remove_reference.h> #include <__utility/integer_sequence.h> #include -#include -#include -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/lib/libcxx/include/__memory/addressof.h b/lib/libcxx/include/__memory/addressof.h index fa590212c49b..98b08958a6a9 100644 --- a/lib/libcxx/include/__memory/addressof.h +++ b/lib/libcxx/include/__memory/addressof.h @@ -23,17 +23,15 @@ inline _LIBCPP_CONSTEXPR_SINCE_CXX17 _LIBCPP_NO_CFI _LIBCPP_HIDE_FROM_ABI _Tp* a return __builtin_addressof(__x); } -#if defined(_LIBCPP_HAS_OBJC_ARC) && !defined(_LIBCPP_PREDEFINED_OBJC_ARC_ADDRESSOF) +#if _LIBCPP_HAS_OBJC_ARC // Objective-C++ Automatic Reference Counting uses qualified pointers -// that require special addressof() signatures. When -// _LIBCPP_PREDEFINED_OBJC_ARC_ADDRESSOF is defined, the compiler -// itself is providing these definitions. Otherwise, we provide them. +// that require special addressof() signatures. template inline _LIBCPP_HIDE_FROM_ABI __strong _Tp* addressof(__strong _Tp& __x) _NOEXCEPT { return &__x; } -# ifdef _LIBCPP_HAS_OBJC_ARC_WEAK +# if _LIBCPP_HAS_OBJC_ARC_WEAK template inline _LIBCPP_HIDE_FROM_ABI __weak _Tp* addressof(__weak _Tp& __x) _NOEXCEPT { return &__x; diff --git a/lib/libcxx/include/__memory/align.h b/lib/libcxx/include/__memory/align.h index bbb995f4a8c8..402eac338092 100644 --- a/lib/libcxx/include/__memory/align.h +++ b/lib/libcxx/include/__memory/align.h @@ -10,7 +10,7 @@ #define _LIBCPP___MEMORY_ALIGN_H #include <__config> -#include +#include <__cstddef/size_t.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/lib/libcxx/include/__memory/aligned_alloc.h b/lib/libcxx/include/__memory/aligned_alloc.h index cb424328bcaf..fb36983d9c3d 100644 --- a/lib/libcxx/include/__memory/aligned_alloc.h +++ b/lib/libcxx/include/__memory/aligned_alloc.h @@ -10,7 +10,6 @@ #define _LIBCPP___MEMORY_ALIGNED_ALLOC_H #include <__config> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -19,7 +18,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#ifndef _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +#if _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION // Low-level helpers to call the aligned allocation and deallocation functions // on the target platform. This is used to implement libc++'s own memory @@ -30,7 +29,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD inline _LIBCPP_HIDE_FROM_ABI void* __libcpp_aligned_alloc(std::size_t __alignment, std::size_t __size) { # if defined(_LIBCPP_MSVCRT_LIKE) return ::_aligned_malloc(__size, __alignment); -# elif _LIBCPP_STD_VER >= 17 && !defined(_LIBCPP_HAS_NO_C11_ALIGNED_ALLOC) +# elif _LIBCPP_STD_VER >= 17 && _LIBCPP_HAS_C11_ALIGNED_ALLOC // aligned_alloc() requires that __size is a multiple of __alignment, // but for C++ [new.delete.general], only states "if the value of an // alignment argument passed to any of these functions is not a valid @@ -57,7 +56,7 @@ inline _LIBCPP_HIDE_FROM_ABI void __libcpp_aligned_free(void* __ptr) { # endif } -#endif // !_LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +#endif // _LIBCPP_HAS_LIBRARY_ALIGNED_ALLOCATION _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__memory/allocate_at_least.h b/lib/libcxx/include/__memory/allocate_at_least.h index df73d9a2e94a..9b5a8bcbd459 100644 --- a/lib/libcxx/include/__memory/allocate_at_least.h +++ b/lib/libcxx/include/__memory/allocate_at_least.h @@ -10,8 +10,8 @@ #define _LIBCPP___MEMORY_ALLOCATE_AT_LEAST_H #include <__config> +#include <__cstddef/size_t.h> #include <__memory/allocator_traits.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -35,7 +35,7 @@ struct __allocation_result { }; template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR __allocation_result::pointer> __allocate_at_least(_Alloc& __alloc, size_t __n) { return {__alloc.allocate(__n), __n}; diff --git a/lib/libcxx/include/__memory/allocation_guard.h b/lib/libcxx/include/__memory/allocation_guard.h index cb870af7be67..66edcd92ed61 100644 --- a/lib/libcxx/include/__memory/allocation_guard.h +++ b/lib/libcxx/include/__memory/allocation_guard.h @@ -14,7 +14,6 @@ #include <__memory/addressof.h> #include <__memory/allocator_traits.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -46,8 +45,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD // custom allocator. template struct __allocation_guard { - using _Pointer = typename allocator_traits<_Alloc>::pointer; - using _Size = typename allocator_traits<_Alloc>::size_type; + using _Pointer _LIBCPP_NODEBUG = typename allocator_traits<_Alloc>::pointer; + using _Size _LIBCPP_NODEBUG = typename allocator_traits<_Alloc>::size_type; template // we perform the allocator conversion inside the constructor _LIBCPP_HIDE_FROM_ABI explicit __allocation_guard(_AllocT __alloc, _Size __n) diff --git a/lib/libcxx/include/__memory/allocator.h b/lib/libcxx/include/__memory/allocator.h index 2d8624e771bc..191a59e6614a 100644 --- a/lib/libcxx/include/__memory/allocator.h +++ b/lib/libcxx/include/__memory/allocator.h @@ -11,17 +11,19 @@ #define _LIBCPP___MEMORY_ALLOCATOR_H #include <__config> +#include <__cstddef/ptrdiff_t.h> +#include <__cstddef/size_t.h> #include <__memory/addressof.h> #include <__memory/allocate_at_least.h> #include <__memory/allocator_traits.h> +#include <__new/allocate.h> +#include <__new/exceptions.h> #include <__type_traits/is_const.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_same.h> #include <__type_traits/is_void.h> #include <__type_traits/is_volatile.h> #include <__utility/forward.h> -#include -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -47,23 +49,7 @@ class _LIBCPP_TEMPLATE_VIS allocator { typedef allocator<_Up> other; }; }; - -// TODO(LLVM 20): Remove the escape hatch -# ifdef _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST -template <> -class _LIBCPP_TEMPLATE_VIS allocator { -public: - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void* const_pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const void value_type; - - template - struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { - typedef allocator<_Up> other; - }; -}; -# endif // _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST -#endif // _LIBCPP_STD_VER <= 17 +#endif // _LIBCPP_STD_VER <= 17 // This class provides a non-trivial default constructor to the class that derives from it // if the condition is satisfied. @@ -109,18 +95,20 @@ class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::v template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator(const allocator<_Up>&) _NOEXCEPT {} - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* allocate(size_t __n) { + static_assert(sizeof(_Tp) >= 0, "cannot allocate memory for an incomplete type"); if (__n > allocator_traits::max_size(*this)) __throw_bad_array_new_length(); if (__libcpp_is_constant_evaluated()) { return static_cast<_Tp*>(::operator new(__n * sizeof(_Tp))); } else { - return static_cast<_Tp*>(std::__libcpp_allocate(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp))); + return std::__libcpp_allocate<_Tp>(__element_count(__n)); } } #if _LIBCPP_STD_VER >= 23 [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr allocation_result<_Tp*> allocate_at_least(size_t __n) { + static_assert(sizeof(_Tp) >= 0, "cannot allocate memory for an incomplete type"); return {allocate(__n), __n}; } #endif @@ -129,7 +117,7 @@ class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::v if (__libcpp_is_constant_evaluated()) { ::operator delete(__p); } else { - std::__libcpp_deallocate((void*)__p, __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); + std::__libcpp_deallocate<_Tp>(__p, __element_count(__n)); } } @@ -152,7 +140,7 @@ class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::v return std::addressof(__x); } - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 _Tp* allocate(size_t __n, const void*) { return allocate(__n); } @@ -169,85 +157,6 @@ class _LIBCPP_TEMPLATE_VIS allocator : private __non_trivial_if::v #endif }; -// TODO(LLVM 20): Remove the escape hatch -#ifdef _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST -template -class _LIBCPP_TEMPLATE_VIS allocator - : private __non_trivial_if::value, allocator > { - static_assert(!is_volatile<_Tp>::value, "std::allocator does not support volatile types"); - -public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef const _Tp value_type; - typedef true_type propagate_on_container_move_assignment; -# if _LIBCPP_STD_VER <= 23 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_ALLOCATOR_MEMBERS) - _LIBCPP_DEPRECATED_IN_CXX23 typedef true_type is_always_equal; -# endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator() _NOEXCEPT = default; - - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 allocator(const allocator<_Up>&) _NOEXCEPT {} - - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 const _Tp* allocate(size_t __n) { - if (__n > allocator_traits::max_size(*this)) - __throw_bad_array_new_length(); - if (__libcpp_is_constant_evaluated()) { - return static_cast(::operator new(__n * sizeof(_Tp))); - } else { - return static_cast(std::__libcpp_allocate(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp))); - } - } - -# if _LIBCPP_STD_VER >= 23 - [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr allocation_result allocate_at_least(size_t __n) { - return {allocate(__n), __n}; - } -# endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void deallocate(const _Tp* __p, size_t __n) { - if (__libcpp_is_constant_evaluated()) { - ::operator delete(const_cast<_Tp*>(__p)); - } else { - std::__libcpp_deallocate((void*)const_cast<_Tp*>(__p), __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); - } - } - - // C++20 Removed members -# if _LIBCPP_STD_VER <= 17 - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp* const_pointer; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& reference; - _LIBCPP_DEPRECATED_IN_CXX17 typedef const _Tp& const_reference; - - template - struct _LIBCPP_DEPRECATED_IN_CXX17 rebind { - typedef allocator<_Up> other; - }; - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI const_pointer address(const_reference __x) const _NOEXCEPT { - return std::addressof(__x); - } - - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_DEPRECATED_IN_CXX17 const _Tp* allocate(size_t __n, const void*) { - return allocate(__n); - } - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI size_type max_size() const _NOEXCEPT { - return size_type(~0) / sizeof(_Tp); - } - - template - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI void construct(_Up* __p, _Args&&... __args) { - ::new ((void*)__p) _Up(std::forward<_Args>(__args)...); - } - - _LIBCPP_DEPRECATED_IN_CXX17 _LIBCPP_HIDE_FROM_ABI void destroy(pointer __p) { __p->~_Tp(); } -# endif -}; -#endif // _LIBCPP_ENABLE_REMOVED_ALLOCATOR_CONST - template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool operator==(const allocator<_Tp>&, const allocator<_Up>&) _NOEXCEPT { diff --git a/lib/libcxx/include/__memory/allocator_arg_t.h b/lib/libcxx/include/__memory/allocator_arg_t.h index 7e66da740cd4..72a0a9c399bd 100644 --- a/lib/libcxx/include/__memory/allocator_arg_t.h +++ b/lib/libcxx/include/__memory/allocator_arg_t.h @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H -#define _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H +#ifndef _LIBCPP___MEMORY_ALLOCATOR_ARG_T_H +#define _LIBCPP___MEMORY_ALLOCATOR_ARG_T_H #include <__config> #include <__memory/uses_allocator.h> @@ -39,10 +39,10 @@ constexpr allocator_arg_t allocator_arg = allocator_arg_t(); template struct __uses_alloc_ctor_imp { - typedef _LIBCPP_NODEBUG __remove_cvref_t<_Alloc> _RawAlloc; - static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value; - static const bool __ic = is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value; - static const int value = __ua ? 2 - __ic : 0; + using _RawAlloc _LIBCPP_NODEBUG = __remove_cvref_t<_Alloc>; + static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value; + static const bool __ic = is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value; + static const int value = __ua ? 2 - __ic : 0; }; template @@ -72,4 +72,4 @@ __user_alloc_construct_impl(integral_constant, _Tp* __storage, const _Al _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___FUNCTIONAL_ALLOCATOR_ARG_T_H +#endif // _LIBCPP___MEMORY_ALLOCATOR_ARG_T_H diff --git a/lib/libcxx/include/__memory/allocator_destructor.h b/lib/libcxx/include/__memory/allocator_destructor.h index ed3d8918f5fe..aac92a23fa0d 100644 --- a/lib/libcxx/include/__memory/allocator_destructor.h +++ b/lib/libcxx/include/__memory/allocator_destructor.h @@ -20,11 +20,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class __allocator_destructor { - typedef _LIBCPP_NODEBUG allocator_traits<_Alloc> __alloc_traits; + using __alloc_traits _LIBCPP_NODEBUG = allocator_traits<_Alloc>; public: - typedef _LIBCPP_NODEBUG typename __alloc_traits::pointer pointer; - typedef _LIBCPP_NODEBUG typename __alloc_traits::size_type size_type; + using pointer _LIBCPP_NODEBUG = typename __alloc_traits::pointer; + using size_type _LIBCPP_NODEBUG = typename __alloc_traits::size_type; private: _Alloc& __alloc_; diff --git a/lib/libcxx/include/__memory/allocator_traits.h b/lib/libcxx/include/__memory/allocator_traits.h index c5fcc89327b8..2d9ab847e9f2 100644 --- a/lib/libcxx/include/__memory/allocator_traits.h +++ b/lib/libcxx/include/__memory/allocator_traits.h @@ -11,8 +11,11 @@ #define _LIBCPP___MEMORY_ALLOCATOR_TRAITS_H #include <__config> +#include <__cstddef/size_t.h> +#include <__fwd/memory.h> #include <__memory/construct_at.h> #include <__memory/pointer_traits.h> +#include <__type_traits/detected_or.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_constructible.h> #include <__type_traits/is_empty.h> @@ -22,7 +25,6 @@ #include <__type_traits/void_t.h> #include <__utility/declval.h> #include <__utility/forward.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -41,17 +43,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD struct NAME<_Tp, __void_t > : true_type {} // __pointer -template , - bool = __has_pointer<_RawAlloc>::value> -struct __pointer { - using type _LIBCPP_NODEBUG = typename _RawAlloc::pointer; -}; -template -struct __pointer<_Tp, _Alloc, _RawAlloc, false> { - using type _LIBCPP_NODEBUG = _Tp*; -}; +template +using __pointer_member _LIBCPP_NODEBUG = typename _Tp::pointer; + +template +using __pointer _LIBCPP_NODEBUG = __detected_or_t<_Tp*, __pointer_member, __libcpp_remove_reference_t<_Alloc> >; // __const_pointer _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_pointer, const_pointer); @@ -62,7 +58,7 @@ struct __const_pointer { template struct __const_pointer<_Tp, _Ptr, _Alloc, false> { #ifdef _LIBCPP_CXX03_LANG - using type = typename pointer_traits<_Ptr>::template rebind::other; + using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind::other; #else using type _LIBCPP_NODEBUG = typename pointer_traits<_Ptr>::template rebind; #endif @@ -99,13 +95,11 @@ struct __const_void_pointer<_Ptr, _Alloc, false> { }; // __size_type -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_size_type, size_type); -template ::value> -struct __size_type : make_unsigned<_DiffType> {}; +template +using __size_type_member _LIBCPP_NODEBUG = typename _Tp::size_type; + template -struct __size_type<_Alloc, _DiffType, true> { - using type _LIBCPP_NODEBUG = typename _Alloc::size_type; -}; +using __size_type _LIBCPP_NODEBUG = __detected_or_t<__make_unsigned_t<_DiffType>, __size_type_member, _Alloc>; // __alloc_traits_difference_type _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_alloc_traits_difference_type, difference_type); @@ -119,40 +113,38 @@ struct __alloc_traits_difference_type<_Alloc, _Ptr, true> { }; // __propagate_on_container_copy_assignment -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_copy_assignment, propagate_on_container_copy_assignment); -template ::value> -struct __propagate_on_container_copy_assignment : false_type {}; +template +using __propagate_on_container_copy_assignment_member _LIBCPP_NODEBUG = + typename _Tp::propagate_on_container_copy_assignment; + template -struct __propagate_on_container_copy_assignment<_Alloc, true> { - using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_copy_assignment; -}; +using __propagate_on_container_copy_assignment _LIBCPP_NODEBUG = + __detected_or_t; // __propagate_on_container_move_assignment -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_move_assignment, propagate_on_container_move_assignment); -template ::value> -struct __propagate_on_container_move_assignment : false_type {}; +template +using __propagate_on_container_move_assignment_member _LIBCPP_NODEBUG = + typename _Tp::propagate_on_container_move_assignment; + template -struct __propagate_on_container_move_assignment<_Alloc, true> { - using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_move_assignment; -}; +using __propagate_on_container_move_assignment _LIBCPP_NODEBUG = + __detected_or_t; // __propagate_on_container_swap -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_swap, propagate_on_container_swap); -template ::value> -struct __propagate_on_container_swap : false_type {}; +template +using __propagate_on_container_swap_member _LIBCPP_NODEBUG = typename _Tp::propagate_on_container_swap; + template -struct __propagate_on_container_swap<_Alloc, true> { - using type _LIBCPP_NODEBUG = typename _Alloc::propagate_on_container_swap; -}; +using __propagate_on_container_swap _LIBCPP_NODEBUG = + __detected_or_t; // __is_always_equal -_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_is_always_equal, is_always_equal); -template ::value> -struct __is_always_equal : is_empty<_Alloc> {}; +template +using __is_always_equal_member _LIBCPP_NODEBUG = typename _Tp::is_always_equal; + template -struct __is_always_equal<_Alloc, true> { - using type _LIBCPP_NODEBUG = typename _Alloc::is_always_equal; -}; +using __is_always_equal _LIBCPP_NODEBUG = + __detected_or_t::type, __is_always_equal_member, _Alloc>; // __allocator_traits_rebind _LIBCPP_SUPPRESS_DEPRECATED_PUSH @@ -177,7 +169,7 @@ struct __allocator_traits_rebind<_Alloc<_Tp, _Args...>, _Up, false> { _LIBCPP_SUPPRESS_DEPRECATED_POP template -using __allocator_traits_rebind_t = typename __allocator_traits_rebind<_Alloc, _Tp>::type; +using __allocator_traits_rebind_t _LIBCPP_NODEBUG = typename __allocator_traits_rebind<_Alloc, _Tp>::type; _LIBCPP_SUPPRESS_DEPRECATED_PUSH @@ -244,20 +236,18 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(allocation_result); template struct _LIBCPP_TEMPLATE_VIS allocator_traits { - using allocator_type = _Alloc; - using value_type = typename allocator_type::value_type; - using pointer = typename __pointer::type; - using const_pointer = typename __const_pointer::type; - using void_pointer = typename __void_pointer::type; - using const_void_pointer = typename __const_void_pointer::type; - using difference_type = typename __alloc_traits_difference_type::type; - using size_type = typename __size_type::type; - using propagate_on_container_copy_assignment = - typename __propagate_on_container_copy_assignment::type; - using propagate_on_container_move_assignment = - typename __propagate_on_container_move_assignment::type; - using propagate_on_container_swap = typename __propagate_on_container_swap::type; - using is_always_equal = typename __is_always_equal::type; + using allocator_type = _Alloc; + using value_type = typename allocator_type::value_type; + using pointer = __pointer; + using const_pointer = typename __const_pointer::type; + using void_pointer = typename __void_pointer::type; + using const_void_pointer = typename __const_void_pointer::type; + using difference_type = typename __alloc_traits_difference_type::type; + using size_type = __size_type; + using propagate_on_container_copy_assignment = __propagate_on_container_copy_assignment; + using propagate_on_container_move_assignment = __propagate_on_container_move_assignment; + using propagate_on_container_swap = __propagate_on_container_swap; + using is_always_equal = __is_always_equal; #ifndef _LIBCPP_CXX03_LANG template @@ -275,13 +265,13 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits { }; #endif // _LIBCPP_CXX03_LANG - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n) { return __a.allocate(__n); } template ::value, int> = 0> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer __hint) { _LIBCPP_SUPPRESS_DEPRECATED_PUSH return __a.allocate(__n, __hint); @@ -290,7 +280,7 @@ struct _LIBCPP_TEMPLATE_VIS allocator_traits { template ::value, int> = 0> - _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 static pointer allocate(allocator_type& __a, size_type __n, const_void_pointer) { return __a.allocate(__n); } @@ -369,12 +359,12 @@ template using __rebind_alloc _LIBCPP_NODEBUG = typename _Traits::template rebind_alloc<_Tp>; #else template -using __rebind_alloc = typename _Traits::template rebind_alloc<_Tp>::other; +using __rebind_alloc _LIBCPP_NODEBUG = typename _Traits::template rebind_alloc<_Tp>::other; #endif template struct __check_valid_allocator : true_type { - using _Traits = std::allocator_traits<_Alloc>; + using _Traits _LIBCPP_NODEBUG = std::allocator_traits<_Alloc>; static_assert(is_same<_Alloc, __rebind_alloc<_Traits, typename _Traits::value_type> >::value, "[allocator.requirements] states that rebinding an allocator to the same type should result in the " "original allocator"); diff --git a/lib/libcxx/include/__memory/array_cookie.h b/lib/libcxx/include/__memory/array_cookie.h new file mode 100644 index 000000000000..806a9e99ecaf --- /dev/null +++ b/lib/libcxx/include/__memory/array_cookie.h @@ -0,0 +1,55 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MEMORY_ARRAY_COOKIE_H +#define _LIBCPP___MEMORY_ARRAY_COOKIE_H + +#include <__config> +#include <__configuration/abi.h> +#include <__cstddef/size_t.h> +#include <__type_traits/integral_constant.h> +#include <__type_traits/is_trivially_destructible.h> +#include <__type_traits/negation.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Trait representing whether a type requires an array cookie at the start of its allocation when +// allocated as `new T[n]` and deallocated as `delete[] array`. +// +// Under the Itanium C++ ABI [1], we know that an array cookie is available unless `T` is trivially +// destructible and the call to `operator delete[]` is not a sized operator delete. Under ABIs other +// than the Itanium ABI, we assume there are no array cookies. +// +// [1]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#array-cookies +#ifdef _LIBCPP_ABI_ITANIUM +// TODO: Use a builtin instead +// TODO: We should factor in the choice of the usual deallocation function in this determination. +template +struct __has_array_cookie : _Not > {}; +#else +template +struct __has_array_cookie : false_type {}; +#endif + +template +// Avoid failures when -fsanitize-address-poison-custom-array-cookie is enabled +_LIBCPP_HIDE_FROM_ABI _LIBCPP_NO_SANITIZE("address") size_t __get_array_cookie(_Tp const* __ptr) { + static_assert( + __has_array_cookie<_Tp>::value, "Trying to access the array cookie of a type that is not guaranteed to have one"); + size_t const* __cookie = reinterpret_cast(__ptr) - 1; // TODO: Use a builtin instead + return *__cookie; +} + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___MEMORY_ARRAY_COOKIE_H diff --git a/lib/libcxx/include/__memory/assume_aligned.h b/lib/libcxx/include/__memory/assume_aligned.h index 526eb3334f95..08f1772cd6df 100644 --- a/lib/libcxx/include/__memory/assume_aligned.h +++ b/lib/libcxx/include/__memory/assume_aligned.h @@ -12,8 +12,8 @@ #include <__assert> #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/is_constant_evaluated.h> -#include #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -23,7 +23,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __assume_aligned(_Tp* __ptr) { static_assert(_Np != 0 && (_Np & (_Np - 1)) == 0, "std::assume_aligned(p) requires N to be a power of two"); if (__libcpp_is_constant_evaluated()) { diff --git a/lib/libcxx/include/__memory/builtin_new_allocator.h b/lib/libcxx/include/__memory/builtin_new_allocator.h deleted file mode 100644 index c6f7f3c5ff52..000000000000 --- a/lib/libcxx/include/__memory/builtin_new_allocator.h +++ /dev/null @@ -1,67 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___MEMORY_BUILTIN_NEW_ALLOCATOR_H -#define _LIBCPP___MEMORY_BUILTIN_NEW_ALLOCATOR_H - -#include <__config> -#include <__memory/unique_ptr.h> -#include -#include - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -// __builtin_new_allocator -- A non-templated helper for allocating and -// deallocating memory using __builtin_operator_new and -// __builtin_operator_delete. It should be used in preference to -// `std::allocator` to avoid additional instantiations. -struct __builtin_new_allocator { - struct __builtin_new_deleter { - typedef void* pointer_type; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __builtin_new_deleter(size_t __size, size_t __align) - : __size_(__size), __align_(__align) {} - - _LIBCPP_HIDE_FROM_ABI void operator()(void* __p) const _NOEXCEPT { - std::__libcpp_deallocate(__p, __size_, __align_); - } - - private: - size_t __size_; - size_t __align_; - }; - - typedef unique_ptr __holder_t; - - _LIBCPP_HIDE_FROM_ABI static __holder_t __allocate_bytes(size_t __s, size_t __align) { - return __holder_t(std::__libcpp_allocate(__s, __align), __builtin_new_deleter(__s, __align)); - } - - _LIBCPP_HIDE_FROM_ABI static void __deallocate_bytes(void* __p, size_t __s, size_t __align) _NOEXCEPT { - std::__libcpp_deallocate(__p, __s, __align); - } - - template - _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI static __holder_t __allocate_type(size_t __n) { - return __allocate_bytes(__n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); - } - - template - _LIBCPP_NODEBUG _LIBCPP_ALWAYS_INLINE _LIBCPP_HIDE_FROM_ABI static void - __deallocate_type(void* __p, size_t __n) _NOEXCEPT { - __deallocate_bytes(__p, __n * sizeof(_Tp), _LIBCPP_ALIGNOF(_Tp)); - } -}; - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___MEMORY_BUILTIN_NEW_ALLOCATOR_H diff --git a/lib/libcxx/include/__memory/compressed_pair.h b/lib/libcxx/include/__memory/compressed_pair.h index 40e5cfc35fb0..38798a21fa3c 100644 --- a/lib/libcxx/include/__memory/compressed_pair.h +++ b/lib/libcxx/include/__memory/compressed_pair.h @@ -11,161 +11,95 @@ #define _LIBCPP___MEMORY_COMPRESSED_PAIR_H #include <__config> -#include <__fwd/tuple.h> -#include <__tuple/tuple_indices.h> -#include <__type_traits/decay.h> -#include <__type_traits/dependent_type.h> -#include <__type_traits/enable_if.h> -#include <__type_traits/is_constructible.h> +#include <__cstddef/size_t.h> +#include <__type_traits/datasizeof.h> #include <__type_traits/is_empty.h> #include <__type_traits/is_final.h> -#include <__type_traits/is_same.h> -#include <__type_traits/is_swappable.h> -#include <__utility/forward.h> -#include <__utility/move.h> -#include <__utility/piecewise_construct.h> -#include +#include <__type_traits/is_reference.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif -_LIBCPP_PUSH_MACROS -#include <__undef_macros> - _LIBCPP_BEGIN_NAMESPACE_STD -// Tag used to default initialize one or both of the pair's elements. -struct __default_init_tag {}; -struct __value_init_tag {}; - -template ::value && !__libcpp_is_final<_Tp>::value> -struct __compressed_pair_elem { - using _ParamT = _Tp; - using reference = _Tp&; - using const_reference = const _Tp&; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(__default_init_tag) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(__value_init_tag) : __value_() {} - - template >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(_Up&& __u) - : __value_(std::forward<_Up>(__u)) {} +// ================================================================================================================== // +// The utilites here are for staying ABI compatible with the legacy `__compressed_pair`. They should not be used // +// for new data structures. Use `_LIBCPP_NO_UNIQUE_ADDRESS` for new data structures instead (but make sure you // +// understand how it works). // +// ================================================================================================================== // -#ifndef _LIBCPP_CXX03_LANG - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 explicit __compressed_pair_elem( - piecewise_construct_t, tuple<_Args...> __args, __tuple_indices<_Indices...>) - : __value_(std::forward<_Args>(std::get<_Indices>(__args))...) {} -#endif - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference __get() _NOEXCEPT { return __value_; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR const_reference __get() const _NOEXCEPT { return __value_; } - -private: - _Tp __value_; -}; +// The first member is aligned to the alignment of the second member to force padding in front of the compressed pair +// in case there are members before it. +// +// For example: +// (assuming x86-64 linux) +// class SomeClass { +// uint32_t member1; +// _LIBCPP_COMPRESSED_PAIR(uint32_t, member2, uint64_t, member3); +// } +// +// The layout with __compressed_pair is: +// member1 - offset: 0, size: 4 +// padding - offset: 4, size: 4 +// member2 - offset: 8, size: 4 +// padding - offset: 12, size: 4 +// member3 - offset: 16, size: 8 +// +// If the [[gnu::aligned]] wasn't there, the layout would instead be: +// member1 - offset: 0, size: 4 +// member2 - offset: 4, size: 4 +// member3 - offset: 8, size: 8 +// +// Furthermore, that alignment must be the same as what was used in the old __compressed_pair layout, so we must +// handle reference types specially since alignof(T&) == alignof(T). +// See https://github.com/llvm/llvm-project/issues/118559. -template -struct __compressed_pair_elem<_Tp, _Idx, true> : private _Tp { - using _ParamT = _Tp; - using reference = _Tp&; - using const_reference = const _Tp&; - using __value_type = _Tp; - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem() = default; - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(__default_init_tag) {} - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(__value_init_tag) : __value_type() {} - - template >::value, int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair_elem(_Up&& __u) - : __value_type(std::forward<_Up>(__u)) {} - -#ifndef _LIBCPP_CXX03_LANG - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 - __compressed_pair_elem(piecewise_construct_t, tuple<_Args...> __args, __tuple_indices<_Indices...>) - : __value_type(std::forward<_Args>(std::get<_Indices>(__args))...) {} -#endif +#ifndef _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 reference __get() _NOEXCEPT { return *this; } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR const_reference __get() const _NOEXCEPT { return *this; } -}; +template +inline const size_t __compressed_pair_alignment = _LIBCPP_ALIGNOF(_Tp); -template -class __compressed_pair : private __compressed_pair_elem<_T1, 0>, private __compressed_pair_elem<_T2, 1> { -public: - // NOTE: This static assert should never fire because __compressed_pair - // is *almost never* used in a scenario where it's possible for T1 == T2. - // (The exception is std::function where it is possible that the function - // object and the allocator have the same type). - static_assert( - (!is_same<_T1, _T2>::value), - "__compressed_pair cannot be instantiated when T1 and T2 are the same type; " - "The current implementation is NOT ABI-compatible with the previous implementation for this configuration"); - - using _Base1 _LIBCPP_NODEBUG = __compressed_pair_elem<_T1, 0>; - using _Base2 _LIBCPP_NODEBUG = __compressed_pair_elem<_T2, 1>; - - template , _Dummy>::value && - __dependent_type, _Dummy>::value, - int> = 0> - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair() - : _Base1(__value_init_tag()), _Base2(__value_init_tag()) {} - - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR explicit __compressed_pair(_U1&& __t1, _U2&& __t2) - : _Base1(std::forward<_U1>(__t1)), _Base2(std::forward<_U2>(__t2)) {} - -#ifndef _LIBCPP_CXX03_LANG - template - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 explicit __compressed_pair( - piecewise_construct_t __pc, tuple<_Args1...> __first_args, tuple<_Args2...> __second_args) - : _Base1(__pc, std::move(__first_args), typename __make_tuple_indices::type()), - _Base2(__pc, std::move(__second_args), typename __make_tuple_indices::type()) {} -#endif +template +inline const size_t __compressed_pair_alignment<_Tp&> = _LIBCPP_ALIGNOF(void*); - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename _Base1::reference first() _NOEXCEPT { - return static_cast<_Base1&>(*this).__get(); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename _Base1::const_reference first() const _NOEXCEPT { - return static_cast<_Base1 const&>(*this).__get(); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 typename _Base2::reference second() _NOEXCEPT { - return static_cast<_Base2&>(*this).__get(); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR typename _Base2::const_reference second() const _NOEXCEPT { - return static_cast<_Base2 const&>(*this).__get(); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static _Base1* __get_first_base(__compressed_pair* __pair) _NOEXCEPT { - return static_cast<_Base1*>(__pair); - } - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR static _Base2* __get_second_base(__compressed_pair* __pair) _NOEXCEPT { - return static_cast<_Base2*>(__pair); - } - - _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void swap(__compressed_pair& __x) - _NOEXCEPT_(__is_nothrow_swappable_v<_T1>&& __is_nothrow_swappable_v<_T2>) { - using std::swap; - swap(first(), __x.first()); - swap(second(), __x.second()); - } +template ::value && !__libcpp_is_final<_ToPad>::value) || + is_reference<_ToPad>::value || sizeof(_ToPad) == __datasizeof_v<_ToPad>)> +class __compressed_pair_padding { + char __padding_[sizeof(_ToPad) - __datasizeof_v<_ToPad>] = {}; }; -template -inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 void -swap(__compressed_pair<_T1, _T2>& __x, __compressed_pair<_T1, _T2>& __y) - _NOEXCEPT_(__is_nothrow_swappable_v<_T1>&& __is_nothrow_swappable_v<_T2>) { - __x.swap(__y); -} +template +class __compressed_pair_padding<_ToPad, true> {}; + +# define _LIBCPP_COMPRESSED_PAIR(T1, Initializer1, T2, Initializer2) \ + _LIBCPP_NO_UNIQUE_ADDRESS __attribute__((__aligned__(::std::__compressed_pair_alignment))) T1 Initializer1; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding1_, __LINE__, _); \ + _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding2_, __LINE__, _) + +# define _LIBCPP_COMPRESSED_TRIPLE(T1, Initializer1, T2, Initializer2, T3, Initializer3) \ + _LIBCPP_NO_UNIQUE_ADDRESS \ + __attribute__((__aligned__(::std::__compressed_pair_alignment), \ + __aligned__(::std::__compressed_pair_alignment))) T1 Initializer1; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding1_, __LINE__, _); \ + _LIBCPP_NO_UNIQUE_ADDRESS T2 Initializer2; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding2_, __LINE__, _); \ + _LIBCPP_NO_UNIQUE_ADDRESS T3 Initializer3; \ + _LIBCPP_NO_UNIQUE_ADDRESS ::std::__compressed_pair_padding _LIBCPP_CONCAT3(__padding3_, __LINE__, _) + +#else +# define _LIBCPP_COMPRESSED_PAIR(T1, Name1, T2, Name2) \ + _LIBCPP_NO_UNIQUE_ADDRESS T1 Name1; \ + _LIBCPP_NO_UNIQUE_ADDRESS T2 Name2 + +# define _LIBCPP_COMPRESSED_TRIPLE(T1, Name1, T2, Name2, T3, Name3) \ + _LIBCPP_NO_UNIQUE_ADDRESS T1 Name1; \ + _LIBCPP_NO_UNIQUE_ADDRESS T2 Name2; \ + _LIBCPP_NO_UNIQUE_ADDRESS T3 Name3 +#endif // _LIBCPP_ABI_NO_COMPRESSED_PAIR_PADDING _LIBCPP_END_NAMESPACE_STD -_LIBCPP_POP_MACROS - #endif // _LIBCPP___MEMORY_COMPRESSED_PAIR_H diff --git a/lib/libcxx/include/__memory/construct_at.h b/lib/libcxx/include/__memory/construct_at.h index eb0213248006..1f129d17970b 100644 --- a/lib/libcxx/include/__memory/construct_at.h +++ b/lib/libcxx/include/__memory/construct_at.h @@ -14,13 +14,12 @@ #include <__config> #include <__iterator/access.h> #include <__memory/addressof.h> -#include <__memory/voidify.h> +#include <__new/placement_new_delete.h> #include <__type_traits/enable_if.h> #include <__type_traits/is_array.h> #include <__utility/declval.h> #include <__utility/forward.h> #include <__utility/move.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -38,7 +37,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ()) _Tp(std::declval<_Args>()...))> _LIBCPP_HIDE_FROM_ABI constexpr _Tp* construct_at(_Tp* __location, _Args&&... __args) { _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"); - return ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + return ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); } #endif @@ -49,7 +48,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Tp* __construct_at(_Tp* __l return std::construct_at(__location, std::forward<_Args>(__args)...); #else return _LIBCPP_ASSERT_NON_NULL(__location != nullptr, "null pointer given to construct_at"), - ::new (std::__voidify(*__location)) _Tp(std::forward<_Args>(__args)...); + ::new (static_cast(__location)) _Tp(std::forward<_Args>(__args)...); #endif } diff --git a/lib/libcxx/include/__memory/destruct_n.h b/lib/libcxx/include/__memory/destruct_n.h index 78635ad0af04..db227a4ea1dc 100644 --- a/lib/libcxx/include/__memory/destruct_n.h +++ b/lib/libcxx/include/__memory/destruct_n.h @@ -10,9 +10,9 @@ #define _LIBCPP___MEMORY_DESTRUCT_N_H #include <__config> +#include <__cstddef/size_t.h> #include <__type_traits/integral_constant.h> #include <__type_traits/is_trivially_destructible.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -25,35 +25,35 @@ struct __destruct_n { size_t __size_; template - _LIBCPP_HIDE_FROM_ABI void __process(_Tp* __p, false_type) _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __process(_Tp* __p, false_type) _NOEXCEPT { for (size_t __i = 0; __i < __size_; ++__i, ++__p) __p->~_Tp(); } template - _LIBCPP_HIDE_FROM_ABI void __process(_Tp*, true_type) _NOEXCEPT {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __process(_Tp*, true_type) _NOEXCEPT {} - _LIBCPP_HIDE_FROM_ABI void __incr(false_type) _NOEXCEPT { ++__size_; } - _LIBCPP_HIDE_FROM_ABI void __incr(true_type) _NOEXCEPT {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __incr(false_type) _NOEXCEPT { ++__size_; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __incr(true_type) _NOEXCEPT {} - _LIBCPP_HIDE_FROM_ABI void __set(size_t __s, false_type) _NOEXCEPT { __size_ = __s; } - _LIBCPP_HIDE_FROM_ABI void __set(size_t, true_type) _NOEXCEPT {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __set(size_t __s, false_type) _NOEXCEPT { __size_ = __s; } + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __set(size_t, true_type) _NOEXCEPT {} public: - _LIBCPP_HIDE_FROM_ABI explicit __destruct_n(size_t __s) _NOEXCEPT : __size_(__s) {} + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 explicit __destruct_n(size_t __s) _NOEXCEPT : __size_(__s) {} template - _LIBCPP_HIDE_FROM_ABI void __incr() _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __incr() _NOEXCEPT { __incr(integral_constant::value>()); } template - _LIBCPP_HIDE_FROM_ABI void __set(size_t __s, _Tp*) _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void __set(size_t __s, _Tp*) _NOEXCEPT { __set(__s, integral_constant::value>()); } template - _LIBCPP_HIDE_FROM_ABI void operator()(_Tp* __p) _NOEXCEPT { + _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 void operator()(_Tp* __p) _NOEXCEPT { __process(__p, integral_constant::value>()); } }; diff --git a/lib/libcxx/include/__memory/inout_ptr.h b/lib/libcxx/include/__memory/inout_ptr.h index e5f3ac5d027e..b0e75937927c 100644 --- a/lib/libcxx/include/__memory/inout_ptr.h +++ b/lib/libcxx/include/__memory/inout_ptr.h @@ -15,6 +15,7 @@ #include <__memory/pointer_traits.h> #include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> +#include <__type_traits/is_pointer.h> #include <__type_traits/is_same.h> #include <__type_traits/is_specialization.h> #include <__type_traits/is_void.h> diff --git a/lib/libcxx/include/__type_traits/noexcept_move_assign_container.h b/lib/libcxx/include/__memory/noexcept_move_assign_container.h similarity index 85% rename from lib/libcxx/include/__type_traits/noexcept_move_assign_container.h rename to lib/libcxx/include/__memory/noexcept_move_assign_container.h index baaf36d9980e..b0063516aaaf 100644 --- a/lib/libcxx/include/__type_traits/noexcept_move_assign_container.h +++ b/lib/libcxx/include/__memory/noexcept_move_assign_container.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef _LIBCPP___TYPE_TRAITS_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H -#define _LIBCPP___TYPE_TRAITS_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H +#ifndef _LIBCPP___MEMORY_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H +#define _LIBCPP___MEMORY_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H #include <__config> #include <__memory/allocator_traits.h> @@ -34,4 +34,4 @@ struct __noexcept_move_assign_container _LIBCPP_END_NAMESPACE_STD -#endif // _LIBCPP___TYPE_TRAITS_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H +#endif // _LIBCPP___MEMORY_NOEXCEPT_MOVE_ASSIGN_CONTAINER_H diff --git a/lib/libcxx/include/__memory/out_ptr.h b/lib/libcxx/include/__memory/out_ptr.h index fd99110790cc..030a4c3b0ed0 100644 --- a/lib/libcxx/include/__memory/out_ptr.h +++ b/lib/libcxx/include/__memory/out_ptr.h @@ -15,6 +15,7 @@ #include <__memory/pointer_traits.h> #include <__memory/shared_ptr.h> #include <__memory/unique_ptr.h> +#include <__type_traits/is_pointer.h> #include <__type_traits/is_specialization.h> #include <__type_traits/is_void.h> #include <__utility/forward.h> diff --git a/lib/libcxx/include/__memory/pointer_traits.h b/lib/libcxx/include/__memory/pointer_traits.h index 0914aceb318b..afe3d1bf8a2d 100644 --- a/lib/libcxx/include/__memory/pointer_traits.h +++ b/lib/libcxx/include/__memory/pointer_traits.h @@ -11,17 +11,19 @@ #define _LIBCPP___MEMORY_POINTER_TRAITS_H #include <__config> +#include <__cstddef/ptrdiff_t.h> #include <__memory/addressof.h> #include <__type_traits/conditional.h> #include <__type_traits/conjunction.h> #include <__type_traits/decay.h> +#include <__type_traits/enable_if.h> +#include <__type_traits/integral_constant.h> #include <__type_traits/is_class.h> #include <__type_traits/is_function.h> #include <__type_traits/is_void.h> #include <__type_traits/void_t.h> #include <__utility/declval.h> #include <__utility/forward.h> -#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -48,17 +50,17 @@ struct __pointer_traits_element_type {}; template struct __pointer_traits_element_type<_Ptr, true> { - typedef _LIBCPP_NODEBUG typename _Ptr::element_type type; + using type _LIBCPP_NODEBUG = typename _Ptr::element_type; }; template