From 5b7b0090c80f0ef1b25f7814f3682ad7099ab556 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 9 Apr 2021 11:21:49 -0700 Subject: [PATCH 1/7] [WebAssembly] Add shuffles as an option for lowering BUILD_VECTOR When lowering a BUILD_VECTOR SDNode, we choose among various possible vector creation instructions in an attempt to minimize the total number of instructions used. We previously considered using swizzles, consts, and splats, and this patch adds shuffles as well. A common pattern that now lowers to shuffles is when two 64-bit vectors are concatenated. Previously, concatenations generally lowered to sequences of extract_lane and replace_lane instructions when they could have been a single shuffle. Differential Revision: https://reviews.llvm.org/D100018 --- .../WebAssembly/WebAssemblyISelLowering.cpp | 95 +++++++++++++++++-- .../CodeGen/WebAssembly/simd-build-vector.ll | 16 ++++ llvm/test/CodeGen/WebAssembly/simd-concat.ll | 79 +++++++++++++++ 3 files changed, 184 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/WebAssembly/simd-concat.ll diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index c519c7d76c54c..322020638d9fe 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1600,8 +1600,8 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, // TODO: Tune this. For example, lanewise swizzling is very expensive, so // swizzled lanes should be given greater weight. - // TODO: Investigate building vectors by shuffling together vectors built by - // separately specialized means. + // TODO: Investigate looping rather than always extracting/replacing specific + // lanes to fill gaps. auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; @@ -1632,12 +1632,30 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, return std::make_pair(SwizzleSrc, SwizzleIndices); }; + // If the lane is extracted from another vector at a constant index, return + // that vector. The source vector must not have more lanes than the dest + // because the shufflevector indices are in terms of the destination lanes and + // would not be able to address the smaller individual source lanes. + auto GetShuffleSrc = [&](const SDValue &Lane) { + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return SDValue(); + if (!isa(Lane->getOperand(1).getNode())) + return SDValue(); + if (Lane->getOperand(0).getValueType().getVectorNumElements() > + VecT.getVectorNumElements()) + return SDValue(); + return Lane->getOperand(0); + }; + using ValueEntry = std::pair; SmallVector SplatValueCounts; using SwizzleEntry = std::pair, size_t>; SmallVector SwizzleCounts; + using ShuffleEntry = std::pair; + SmallVector ShuffleCounts; + auto AddCount = [](auto &Counts, const auto &Val) { auto CountIt = llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; }); @@ -1666,9 +1684,11 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, AddCount(SplatValueCounts, Lane); - if (IsConstant(Lane)) { + if (IsConstant(Lane)) NumConstantLanes++; - } else if (CanSwizzle) { + if (auto ShuffleSrc = GetShuffleSrc(Lane)) + AddCount(ShuffleCounts, ShuffleSrc); + if (CanSwizzle) { auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); if (SwizzleSrcs.first) AddCount(SwizzleCounts, SwizzleSrcs); @@ -1686,18 +1706,81 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op, std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + // Shuffles can draw from up to two vectors, so find the two most common + // sources. + SDValue ShuffleSrc1, ShuffleSrc2; + size_t NumShuffleLanes = 0; + if (ShuffleCounts.size()) { + std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts); + ShuffleCounts.erase(std::remove_if(ShuffleCounts.begin(), + ShuffleCounts.end(), + [&](const auto &Pair) { + return Pair.first == ShuffleSrc1; + }), + ShuffleCounts.end()); + } + if (ShuffleCounts.size()) { + size_t AdditionalShuffleLanes; + std::tie(ShuffleSrc2, AdditionalShuffleLanes) = + GetMostCommon(ShuffleCounts); + NumShuffleLanes += AdditionalShuffleLanes; + } + // Predicate returning true if the lane is properly initialized by the // original instruction std::function IsLaneConstructed; SDValue Result; - // Prefer swizzles over vector consts over splats - if (NumSwizzleLanes >= NumSplatLanes && NumSwizzleLanes >= NumConstantLanes) { + // Prefer swizzles over shuffles over vector consts over splats + if (NumSwizzleLanes >= NumShuffleLanes && + NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) { Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, SwizzleIndices); auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { return Swizzled == GetSwizzleSrcs(I, Lane); }; + } else if (NumShuffleLanes >= NumConstantLanes && + NumShuffleLanes >= NumSplatLanes) { + size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8; + size_t DestLaneCount = VecT.getVectorNumElements(); + size_t Scale1 = 1; + size_t Scale2 = 1; + SDValue Src1 = ShuffleSrc1; + SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT); + if (Src1.getValueType() != VecT) { + size_t LaneSize = + Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8; + assert(LaneSize > DestLaneSize); + Scale1 = LaneSize / DestLaneSize; + Src1 = DAG.getBitcast(VecT, Src1); + } + if (Src2.getValueType() != VecT) { + size_t LaneSize = + Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8; + assert(LaneSize > DestLaneSize); + Scale2 = LaneSize / DestLaneSize; + Src2 = DAG.getBitcast(VecT, Src2); + } + + int Mask[16]; + assert(DestLaneCount <= 16); + for (size_t I = 0; I < DestLaneCount; ++I) { + const SDValue &Lane = Op->getOperand(I); + SDValue Src = GetShuffleSrc(Lane); + if (Src == ShuffleSrc1) { + Mask[I] = Lane->getConstantOperandVal(1) * Scale1; + } else if (Src && Src == ShuffleSrc2) { + Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2; + } else { + Mask[I] = -1; + } + } + ArrayRef MaskRef(Mask, DestLaneCount); + Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef); + IsLaneConstructed = [&](size_t, const SDValue &Lane) { + auto Src = GetShuffleSrc(Lane); + return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2); + }; } else if (NumConstantLanes >= NumSplatLanes) { SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll index c1060ea1101fe..7003714098f25 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -165,6 +165,22 @@ define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { ret <8 x i16> %v0 } +; CHECK-LABEL: half_shuffle_i32x4: +; CHECK-NEXT: .functype half_shuffle_i32x4 (v128) -> (v128) +; CHECK: i8x16.shuffle $push[[L0:[0-9]+]]=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0 +; CHECK: i32x4.replace_lane +; CHECK: i32x4.replace_lane +; CHECK: return +define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) { + %s0 = extractelement <4 x i32> %src, i32 0 + %s2 = extractelement <4 x i32> %src, i32 2 + %v0 = insertelement <4 x i32> undef, i32 0, i32 0 + %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1 + %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2 + %v3 = insertelement <4 x i32> %v2, i32 3, i32 3 + ret <4 x i32> %v3 +} + ; CHECK-LABEL: mashup_swizzle_i8x16: ; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) ; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 diff --git a/llvm/test/CodeGen/WebAssembly/simd-concat.ll b/llvm/test/CodeGen/WebAssembly/simd-concat.ll new file mode 100644 index 0000000000000..21fe627f125c1 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-concat.ll @@ -0,0 +1,79 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s + +; Check that all varieties of vector concatenations get lowered to shuffles. + +target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" +target triple = "wasm32-unknown--wasm" + +define <16 x i8> @concat_v8i8(<8 x i8> %a, <8 x i8> %b) { +; CHECK-LABEL: concat_v8i8: +; CHECK: .functype concat_v8i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> + ret <16 x i8> %v +} + +define <8 x i8> @concat_v4i8(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: concat_v4i8: +; CHECK: .functype concat_v4i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> + ret <8 x i8> %v +} + +define <8 x i16> @concat_v4i16(<4 x i16> %a, <4 x i16> %b) { +; CHECK-LABEL: concat_v4i16: +; CHECK: .functype concat_v4i16 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> + ret <8 x i16> %v +} + +define <4 x i8> @concat_v2i8(<2 x i8> %a, <2 x i8> %b) { +; CHECK-LABEL: concat_v2i8: +; CHECK: .functype concat_v2i8 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <2 x i8> %a, <2 x i8> %b, <4 x i32> + ret <4 x i8> %v +} + +define <4 x i16> @concat_v2i16(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: concat_v2i16: +; CHECK: .functype concat_v2i16 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <2 x i16> %a, <2 x i16> %b, <4 x i32> + ret <4 x i16> %v +} + +define <4 x i32> @concat_v2i32(<2 x i32> %a, <2 x i32> %b) { +; CHECK-LABEL: concat_v2i32: +; CHECK: .functype concat_v2i32 (v128, v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: # fallthrough-return + %v = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> + ret <4 x i32> %v +} From e07c0f553c2333562173c0ffc57fa87d7d70e4d3 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Sun, 11 Apr 2021 11:13:16 -0700 Subject: [PATCH 2/7] [WebAssembly] Update v128.any_true In the final SIMD spec, there is only a single v128.any_true instruction, rather than one for each lane interpretation because the semantics do not depend on the lane interpretation. Differential Revision: https://reviews.llvm.org/D100241 --- .../WebAssembly/WebAssemblyInstrSIMD.td | 36 +++++++++++++++---- .../CodeGen/WebAssembly/simd-intrinsics.ll | 8 ++--- .../CodeGen/WebAssembly/simd-reductions.ll | 24 ++++++------- .../test/MC/Disassembler/WebAssembly/wasm.txt | 3 +- llvm/test/MC/WebAssembly/simd-encodings.s | 3 +- 5 files changed, 48 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index f8df4d35de92e..9bdeab5f4db96 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -822,22 +822,44 @@ defm ABS : SIMDUnaryInt; // Integer negation: neg defm NEG : SIMDUnaryInt; +// Population count: popcnt +defm POPCNT : SIMDUnary; + // Any lane true: any_true -defm ANYTRUE : SIMDReduce; +defm ANYTRUE : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), [], + "v128.any_true\t$dst, $vec", "v128.any_true", 0x53>; + +foreach vec = IntVecs in +def : Pat<(int_wasm_anytrue (vec.vt V128:$vec)), (ANYTRUE V128:$vec)>; // All lanes true: all_true -defm ALLTRUE : SIMDReduce; +multiclass SIMDAllTrue simdop> { + defm ALLTRUE_#vec : SIMD_I<(outs I32:$dst), (ins V128:$vec), (outs), (ins), + [(set I32:$dst, + (i32 (int_wasm_alltrue (vec.vt V128:$vec))))], + vec.prefix#".all_true\t$dst, $vec", + vec.prefix#".all_true", simdop>; +} -// Population count: popcnt -defm POPCNT : SIMDUnary; +defm "" : SIMDAllTrue; +defm "" : SIMDAllTrue; +defm "" : SIMDAllTrue; +defm "" : SIMDAllTrue; // Reductions already return 0 or 1, so and 1, setne 0, and seteq 1 // can be folded out foreach reduction = - [["int_wasm_anytrue", "ANYTRUE"], ["int_wasm_alltrue", "ALLTRUE"]] in -foreach vec = IntVecs in { + [["int_wasm_anytrue", "ANYTRUE", "I8x16"], + ["int_wasm_anytrue", "ANYTRUE", "I16x8"], + ["int_wasm_anytrue", "ANYTRUE", "I32x4"], + ["int_wasm_anytrue", "ANYTRUE", "I64x2"], + ["int_wasm_alltrue", "ALLTRUE_I8x16", "I8x16"], + ["int_wasm_alltrue", "ALLTRUE_I16x8", "I16x8"], + ["int_wasm_alltrue", "ALLTRUE_I32x4", "I32x4"], + ["int_wasm_alltrue", "ALLTRUE_I64x2", "I64x2"]] in { defvar intrinsic = !cast(reduction[0]); -defvar inst = !cast(reduction[1]#"_"#vec); +defvar inst = !cast(reduction[1]); +defvar vec = !cast(reduction[2]); def : Pat<(i32 (and (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; def : Pat<(i32 (setne (i32 (intrinsic (vec.vt V128:$x))), (i32 0))), (inst $x)>; def : Pat<(i32 (seteq (i32 (intrinsic (vec.vt V128:$x))), (i32 1))), (inst $x)>; diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index e78b167ed0ed6..5d98f2b563783 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -87,7 +87,7 @@ define <16 x i8> @popcnt_v16i8(<16 x i8> %x) { ; CHECK-LABEL: any_v16i8: ; CHECK-NEXT: .functype any_v16i8 (v128) -> (i32){{$}} -; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare i32 @llvm.wasm.anytrue.v16i8(<16 x i8>) define i32 @any_v16i8(<16 x i8> %x) { @@ -319,7 +319,7 @@ define <8 x i16> @extadd_pairwise_u_v8i16(<16 x i8> %x) { ; CHECK-LABEL: any_v8i16: ; CHECK-NEXT: .functype any_v8i16 (v128) -> (i32){{$}} -; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare i32 @llvm.wasm.anytrue.v8i16(<8 x i16>) define i32 @any_v8i16(<8 x i16> %x) { @@ -468,7 +468,7 @@ define <4 x i32> @extadd_pairwise_u_v4i32(<8 x i16> %x) { ; CHECK-LABEL: any_v4i32: ; CHECK-NEXT: .functype any_v4i32 (v128) -> (i32){{$}} -; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare i32 @llvm.wasm.anytrue.v4i32(<4 x i32>) define i32 @any_v4i32(<4 x i32> %x) { @@ -643,7 +643,7 @@ define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: any_v2i64: ; CHECK-NEXT: .functype any_v2i64 (v128) -> (i32){{$}} -; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} declare i32 @llvm.wasm.anytrue.v2i64(<2 x i64>) define i32 @any_v2i64(<2 x i64> %x) { diff --git a/llvm/test/CodeGen/WebAssembly/simd-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-reductions.ll index 259ef3b3a81fe..500a4495028f0 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-reductions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-reductions.ll @@ -14,7 +14,7 @@ declare i32 @llvm.wasm.alltrue.v16i8(<16 x i8>) ; CHECK-LABEL: any_v16i8_trunc: ; CHECK-NEXT: .functype any_v16i8_trunc (v128) -> (i32){{$}} -; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v16i8_trunc(<16 x i8> %x) { %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x) @@ -25,7 +25,7 @@ define i32 @any_v16i8_trunc(<16 x i8> %x) { ; CHECK-LABEL: any_v16i8_ne: ; CHECK-NEXT: .functype any_v16i8_ne (v128) -> (i32){{$}} -; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v16i8_ne(<16 x i8> %x) { %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x) @@ -36,7 +36,7 @@ define i32 @any_v16i8_ne(<16 x i8> %x) { ; CHECK-LABEL: any_v16i8_eq: ; CHECK-NEXT: .functype any_v16i8_eq (v128) -> (i32){{$}} -; CHECK-NEXT: i8x16.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v16i8_eq(<16 x i8> %x) { %a = call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> %x) @@ -86,7 +86,7 @@ declare i32 @llvm.wasm.alltrue.v8i16(<8 x i16>) ; CHECK-LABEL: any_v8i16_trunc: ; CHECK-NEXT: .functype any_v8i16_trunc (v128) -> (i32){{$}} -; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v8i16_trunc(<8 x i16> %x) { %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x) @@ -97,7 +97,7 @@ define i32 @any_v8i16_trunc(<8 x i16> %x) { ; CHECK-LABEL: any_v8i16_ne: ; CHECK-NEXT: .functype any_v8i16_ne (v128) -> (i32){{$}} -; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v8i16_ne(<8 x i16> %x) { %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x) @@ -108,7 +108,7 @@ define i32 @any_v8i16_ne(<8 x i16> %x) { ; CHECK-LABEL: any_v8i16_eq: ; CHECK-NEXT: .functype any_v8i16_eq (v128) -> (i32){{$}} -; CHECK-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v8i16_eq(<8 x i16> %x) { %a = call i32 @llvm.wasm.anytrue.v8i16(<8 x i16> %x) @@ -158,7 +158,7 @@ declare i32 @llvm.wasm.alltrue.v4i32(<4 x i32>) ; CHECK-LABEL: any_v4i32_trunc: ; CHECK-NEXT: .functype any_v4i32_trunc (v128) -> (i32){{$}} -; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v4i32_trunc(<4 x i32> %x) { %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x) @@ -169,7 +169,7 @@ define i32 @any_v4i32_trunc(<4 x i32> %x) { ; CHECK-LABEL: any_v4i32_ne: ; CHECK-NEXT: .functype any_v4i32_ne (v128) -> (i32){{$}} -; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v4i32_ne(<4 x i32> %x) { %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x) @@ -180,7 +180,7 @@ define i32 @any_v4i32_ne(<4 x i32> %x) { ; CHECK-LABEL: any_v4i32_eq: ; CHECK-NEXT: .functype any_v4i32_eq (v128) -> (i32){{$}} -; CHECK-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v4i32_eq(<4 x i32> %x) { %a = call i32 @llvm.wasm.anytrue.v4i32(<4 x i32> %x) @@ -230,7 +230,7 @@ declare i32 @llvm.wasm.alltrue.v2i64(<2 x i64>) ; CHECK-LABEL: any_v2i64_trunc: ; CHECK-NEXT: .functype any_v2i64_trunc (v128) -> (i32){{$}} -; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v2i64_trunc(<2 x i64> %x) { %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x) @@ -241,7 +241,7 @@ define i32 @any_v2i64_trunc(<2 x i64> %x) { ; CHECK-LABEL: any_v2i64_ne: ; CHECK-NEXT: .functype any_v2i64_ne (v128) -> (i32){{$}} -; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v2i64_ne(<2 x i64> %x) { %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x) @@ -252,7 +252,7 @@ define i32 @any_v2i64_ne(<2 x i64> %x) { ; CHECK-LABEL: any_v2i64_eq: ; CHECK-NEXT: .functype any_v2i64_eq (v128) -> (i32){{$}} -; CHECK-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}} +; CHECK-NEXT: v128.any_true $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} define i32 @any_v2i64_eq(<2 x i64> %x) { %a = call i32 @llvm.wasm.anytrue.v2i64(<2 x i64> %x) diff --git a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt index 783d59416f9de..0cbf584d9688e 100644 --- a/llvm/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/llvm/test/MC/Disassembler/WebAssembly/wasm.txt @@ -43,8 +43,7 @@ 0xFD 0x83 0x01 # Including non-canonical LEB128 encodings -# CHECK: i16x8.any_true -# CHECK-NOT: i16x8.neg +# CHECK: i16x8.q15mulr_sat_s 0xFD 0x82 0x81 0x80 0x80 0x80 0x80 0x00 # Check br_table, which has its own operand type. diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index 2ce4eb622906d..c1047add02b48 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -280,7 +280,8 @@ main: # CHECK: v128.bitselect # encoding: [0xfd,0x52] v128.bitselect - # TODO: v128.any_true # encoding: [0xfd,0x53] + # CHECK: v128.any_true # encoding: [0xfd,0x53] + v128.any_true # CHECK: v128.load8_lane 32, 1 # encoding: [0xfd,0x54,0x00,0x20,0x01] v128.load8_lane 32, 1 From 9890f5e00119278aa8a91a2bc46b98ee4cc1f4cc Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Tue, 13 Apr 2021 00:06:25 -0700 Subject: [PATCH 3/7] [WebAssembly] Test i64x2.abs encoding This test was disabled despite the instruction having been implemented for a long time. This commit just enables the test. Differential Revision: https://reviews.llvm.org/D100345 --- llvm/test/MC/WebAssembly/simd-encodings.s | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s index c1047add02b48..3093fb2963444 100644 --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -597,7 +597,8 @@ main: # CHECK: i32x4.extmul_high_i16x8_u # encoding: [0xfd,0xbf,0x01] i32x4.extmul_high_i16x8_u - # TODO: i64x2.abs # encoding: [0xfd,0xc0,0x01] + # CHECK: i64x2.abs # encoding: [0xfd,0xc0,0x01] + i64x2.abs # CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01] i64x2.neg From 78fe0296bb1b07e54549043c4851c759990e90a8 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 14 Apr 2021 09:19:26 -0700 Subject: [PATCH 4/7] [WebAssembly] Use standard intrinsics for f32x4 and f64x2 ops Now that these instructions are no longer prototypes, we do not need to be careful about keeping them opt-in and can use the standard LLVM infrastructure for them. This commit removes the bespoke intrinsics we were using to represent these operations in favor of the corresponding target-independent intrinsics. The clang builtins are preserved because there is no standard way to easily represent these operations in C/C++. For consistency with the scalar codegen in the Wasm backend, the intrinsic used to represent {f32x4,f64x2}.nearest is @llvm.nearbyint even though @llvm.roundeven better captures the semantics of the underlying Wasm instruction. Replacing our use of @llvm.nearbyint with use of @llvm.roundeven is left to a potential future patch. Differential Revision: https://reviews.llvm.org/D100411 --- clang/lib/CodeGen/CGBuiltin.cpp | 8 +-- clang/test/CodeGen/builtins-wasm.c | 16 ++--- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 20 ------ .../WebAssembly/WebAssemblyISelLowering.cpp | 3 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 16 ++--- .../CodeGen/WebAssembly/simd-intrinsics.ll | 32 +++++----- .../CodeGen/WebAssembly/simd-unsupported.ll | 64 ------------------- 7 files changed, 37 insertions(+), 122 deletions(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 1fd79306596e6..ea6d039fd2038 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16819,19 +16819,19 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_ceil_f32x4: case WebAssembly::BI__builtin_wasm_ceil_f64x2: - IntNo = Intrinsic::wasm_ceil; + IntNo = Intrinsic::ceil; break; case WebAssembly::BI__builtin_wasm_floor_f32x4: case WebAssembly::BI__builtin_wasm_floor_f64x2: - IntNo = Intrinsic::wasm_floor; + IntNo = Intrinsic::floor; break; case WebAssembly::BI__builtin_wasm_trunc_f32x4: case WebAssembly::BI__builtin_wasm_trunc_f64x2: - IntNo = Intrinsic::wasm_trunc; + IntNo = Intrinsic::trunc; break; case WebAssembly::BI__builtin_wasm_nearest_f32x4: case WebAssembly::BI__builtin_wasm_nearest_f64x2: - IntNo = Intrinsic::wasm_nearest; + IntNo = Intrinsic::nearbyint; break; default: llvm_unreachable("unexpected builtin ID"); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index c27be6d909c08..7b7965c026e1a 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -792,49 +792,49 @@ f64x2 pmax_f64x2(f64x2 x, f64x2 y) { f32x4 ceil_f32x4(f32x4 x) { return __builtin_wasm_ceil_f32x4(x); - // WEBASSEMBLY: call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %x) + // WEBASSEMBLY: call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) // WEBASSEMBLY: ret } f32x4 floor_f32x4(f32x4 x) { return __builtin_wasm_floor_f32x4(x); - // WEBASSEMBLY: call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %x) + // WEBASSEMBLY: call <4 x float> @llvm.floor.v4f32(<4 x float> %x) // WEBASSEMBLY: ret } f32x4 trunc_f32x4(f32x4 x) { return __builtin_wasm_trunc_f32x4(x); - // WEBASSEMBLY: call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %x) + // WEBASSEMBLY: call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) // WEBASSEMBLY: ret } f32x4 nearest_f32x4(f32x4 x) { return __builtin_wasm_nearest_f32x4(x); - // WEBASSEMBLY: call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %x) + // WEBASSEMBLY: call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) // WEBASSEMBLY: ret } f64x2 ceil_f64x2(f64x2 x) { return __builtin_wasm_ceil_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %x) + // WEBASSEMBLY: call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) // WEBASSEMBLY: ret } f64x2 floor_f64x2(f64x2 x) { return __builtin_wasm_floor_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %x) + // WEBASSEMBLY: call <2 x double> @llvm.floor.v2f64(<2 x double> %x) // WEBASSEMBLY: ret } f64x2 trunc_f64x2(f64x2 x) { return __builtin_wasm_trunc_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %x) + // WEBASSEMBLY: call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) // WEBASSEMBLY: ret } f64x2 nearest_f64x2(f64x2 x) { return __builtin_wasm_nearest_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %x) + // WEBASSEMBLY: call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %x) // WEBASSEMBLY: ret } diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index a57080d1d95b4..f4bdd07b81082 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -183,26 +183,6 @@ def int_wasm_pmax : [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; -// TODO: Replace these instrinsics with normal ISel patterns once the -// rounding instructions are merged to the proposal -// (https://github.com/WebAssembly/simd/pull/232). -def int_wasm_ceil : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_floor : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_trunc : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_nearest : - Intrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>], - [IntrNoMem, IntrSpeculatable]>; - // TODO: Replace these intrinsic with normal ISel patterns once the // load_zero instructions are merged to the proposal. def int_wasm_load32_zero : diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 322020638d9fe..bce8f8ef2105d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -180,8 +180,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(Op, T, Legal); // Expand float operations supported for scalars but not SIMD - for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT, - ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, + for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FRINT}) for (auto T : {MVT::v4f32, MVT::v2f64}) setOperationAction(Op, T, Expand); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 9bdeab5f4db96..1588f6aac8610 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1044,14 +1044,14 @@ defm NEG : SIMDUnaryFP; defm SQRT : SIMDUnaryFP; // Rounding: ceil, floor, trunc, nearest -defm CEIL : SIMDUnary; -defm FLOOR : SIMDUnary; -defm TRUNC: SIMDUnary; -defm NEAREST: SIMDUnary; -defm CEIL : SIMDUnary; -defm FLOOR : SIMDUnary; -defm TRUNC: SIMDUnary; -defm NEAREST: SIMDUnary; +defm CEIL : SIMDUnary; +defm FLOOR : SIMDUnary; +defm TRUNC: SIMDUnary; +defm NEAREST: SIMDUnary; +defm CEIL : SIMDUnary; +defm FLOOR : SIMDUnary; +defm TRUNC: SIMDUnary; +defm NEAREST: SIMDUnary; //===----------------------------------------------------------------------===// // Floating-point binary arithmetic diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 5d98f2b563783..f28eb0b242a52 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -722,9 +722,9 @@ define <4 x float> @pmax_v4f32(<4 x float> %a, <4 x float> %b) { ; CHECK-NEXT: .functype ceil_v4f32 (v128) -> (v128){{$}} ; CHECK-NEXT: f32x4.ceil $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x float> @llvm.wasm.ceil.v4f32(<4 x float>) +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) define <4 x float> @ceil_v4f32(<4 x float> %a) { - %v = call <4 x float> @llvm.wasm.ceil.v4f32(<4 x float> %a) + %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %a) ret <4 x float> %v } @@ -732,9 +732,9 @@ define <4 x float> @ceil_v4f32(<4 x float> %a) { ; CHECK-NEXT: .functype floor_v4f32 (v128) -> (v128){{$}} ; CHECK-NEXT: f32x4.floor $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x float> @llvm.wasm.floor.v4f32(<4 x float>) +declare <4 x float> @llvm.floor.v4f32(<4 x float>) define <4 x float> @floor_v4f32(<4 x float> %a) { - %v = call <4 x float> @llvm.wasm.floor.v4f32(<4 x float> %a) + %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %a) ret <4 x float> %v } @@ -742,9 +742,9 @@ define <4 x float> @floor_v4f32(<4 x float> %a) { ; CHECK-NEXT: .functype trunc_v4f32 (v128) -> (v128){{$}} ; CHECK-NEXT: f32x4.trunc $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x float> @llvm.wasm.trunc.v4f32(<4 x float>) +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) define <4 x float> @trunc_v4f32(<4 x float> %a) { - %v = call <4 x float> @llvm.wasm.trunc.v4f32(<4 x float> %a) + %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %a) ret <4 x float> %v } @@ -752,9 +752,9 @@ define <4 x float> @trunc_v4f32(<4 x float> %a) { ; CHECK-NEXT: .functype nearest_v4f32 (v128) -> (v128){{$}} ; CHECK-NEXT: f32x4.nearest $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x float> @llvm.wasm.nearest.v4f32(<4 x float>) +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) define <4 x float> @nearest_v4f32(<4 x float> %a) { - %v = call <4 x float> @llvm.wasm.nearest.v4f32(<4 x float> %a) + %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %a) ret <4 x float> %v } @@ -807,9 +807,9 @@ define <2 x double> @pmax_v2f64(<2 x double> %a, <2 x double> %b) { ; CHECK-NEXT: .functype ceil_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.ceil $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.ceil.v2f64(<2 x double>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) define <2 x double> @ceil_v2f64(<2 x double> %a) { - %v = call <2 x double> @llvm.wasm.ceil.v2f64(<2 x double> %a) + %v = call <2 x double> @llvm.ceil.v2f64(<2 x double> %a) ret <2 x double> %v } @@ -817,9 +817,9 @@ define <2 x double> @ceil_v2f64(<2 x double> %a) { ; CHECK-NEXT: .functype floor_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.floor $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.floor.v2f64(<2 x double>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) define <2 x double> @floor_v2f64(<2 x double> %a) { - %v = call <2 x double> @llvm.wasm.floor.v2f64(<2 x double> %a) + %v = call <2 x double> @llvm.floor.v2f64(<2 x double> %a) ret <2 x double> %v } @@ -827,9 +827,9 @@ define <2 x double> @floor_v2f64(<2 x double> %a) { ; CHECK-NEXT: .functype trunc_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.trunc $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.trunc.v2f64(<2 x double>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) define <2 x double> @trunc_v2f64(<2 x double> %a) { - %v = call <2 x double> @llvm.wasm.trunc.v2f64(<2 x double> %a) + %v = call <2 x double> @llvm.trunc.v2f64(<2 x double> %a) ret <2 x double> %v } @@ -837,9 +837,9 @@ define <2 x double> @trunc_v2f64(<2 x double> %a) { ; CHECK-NEXT: .functype nearest_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.nearest $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.nearest.v2f64(<2 x double>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) define <2 x double> @nearest_v2f64(<2 x double> %a) { - %v = call <2 x double> @llvm.wasm.nearest.v2f64(<2 x double> %a) + %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) ret <2 x double> %v } diff --git a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll index 1fc0a92b9032d..9332f51c7a911 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-unsupported.ll @@ -366,38 +366,6 @@ define <2 x i64> @rotr_v2i64(<2 x i64> %x, <2 x i64> %y) { ; 4 x f32 ; ============================================================================== -; CHECK-LABEL: ceil_v4f32: -; CHECK: f32.ceil -declare <4 x float> @llvm.ceil.v4f32(<4 x float>) -define <4 x float> @ceil_v4f32(<4 x float> %x) { - %v = call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) - ret <4 x float> %v -} - -; CHECK-LABEL: floor_v4f32: -; CHECK: f32.floor -declare <4 x float> @llvm.floor.v4f32(<4 x float>) -define <4 x float> @floor_v4f32(<4 x float> %x) { - %v = call <4 x float> @llvm.floor.v4f32(<4 x float> %x) - ret <4 x float> %v -} - -; CHECK-LABEL: trunc_v4f32: -; CHECK: f32.trunc -declare <4 x float> @llvm.trunc.v4f32(<4 x float>) -define <4 x float> @trunc_v4f32(<4 x float> %x) { - %v = call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) - ret <4 x float> %v -} - -; CHECK-LABEL: nearbyint_v4f32: -; CHECK: f32.nearest -declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) -define <4 x float> @nearbyint_v4f32(<4 x float> %x) { - %v = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) - ret <4 x float> %v -} - ; CHECK-LABEL: copysign_v4f32: ; CHECK: f32.copysign declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>) @@ -498,38 +466,6 @@ define <4 x float> @round_v4f32(<4 x float> %x) { ; 2 x f64 ; ============================================================================== -; CHECK-LABEL: ceil_v2f64: -; CHECK: f64.ceil -declare <2 x double> @llvm.ceil.v2f64(<2 x double>) -define <2 x double> @ceil_v2f64(<2 x double> %x) { - %v = call <2 x double> @llvm.ceil.v2f64(<2 x double> %x) - ret <2 x double> %v -} - -; CHECK-LABEL: floor_v2f64: -; CHECK: f64.floor -declare <2 x double> @llvm.floor.v2f64(<2 x double>) -define <2 x double> @floor_v2f64(<2 x double> %x) { - %v = call <2 x double> @llvm.floor.v2f64(<2 x double> %x) - ret <2 x double> %v -} - -; CHECK-LABEL: trunc_v2f64: -; CHECK: f64.trunc -declare <2 x double> @llvm.trunc.v2f64(<2 x double>) -define <2 x double> @trunc_v2f64(<2 x double> %x) { - %v = call <2 x double> @llvm.trunc.v2f64(<2 x double> %x) - ret <2 x double> %v -} - -; CHECK-LABEL: nearbyint_v2f64: -; CHECK: f64.nearest -declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) -define <2 x double> @nearbyint_v2f64(<2 x double> %x) { - %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %x) - ret <2 x double> %v -} - ; CHECK-LABEL: copysign_v2f64: ; CHECK: f64.copysign declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) From fd7d65ac3c8ff426d4b88ecdcdbf821a929ae006 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 14 Apr 2021 10:42:44 -0700 Subject: [PATCH 5/7] [WebAssembly] Codegen for f64x2.convert_low_i32x4_{s,u} Add a custom DAG combine and ISD opcode for detecting patterns like (uint_to_fp (extract_subvector ...)) before the extract_subvector is expanded to ensure that they will ultimately lower to f64x2.convert_low_i32x4_{s,u} instructions. Since these instructions are no longer prototypes and can now be produced via standard IR, this commit also removes the target intrinsics and builtins that had been used to prototype the instructions. Differential Revision: https://reviews.llvm.org/D100425 --- .../clang/Basic/BuiltinsWebAssembly.def | 2 - clang/lib/CodeGen/CGBuiltin.cpp | 15 ------- clang/test/CodeGen/builtins-wasm.c | 12 ------ llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 6 --- .../lib/Target/WebAssembly/WebAssemblyISD.def | 2 + .../WebAssembly/WebAssemblyISelLowering.cpp | 41 +++++++++++++++++++ .../WebAssembly/WebAssemblyInstrSIMD.td | 17 ++++---- .../CodeGen/WebAssembly/simd-conversions.ll | 22 ++++++++++ .../CodeGen/WebAssembly/simd-intrinsics.ll | 20 --------- 9 files changed, 74 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index 3f8b050aabfd1..db8ec8ebeb302 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -196,8 +196,6 @@ TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_convert_low_s_i32x4_f64x2, "V2dV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_convert_low_u_i32x4_f64x2, "V2dV4Ui", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ea6d039fd2038..0dd0dc07d7693 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17181,21 +17181,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, Function *Callee = CGM.getIntrinsic(IntNo); return Builder.CreateCall(Callee, Vec); } - case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: - case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: { - Value *Vec = EmitScalarExpr(E->getArg(0)); - unsigned IntNo; - switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_convert_low_s_i32x4_f64x2: - IntNo = Intrinsic::wasm_convert_low_signed; - break; - case WebAssembly::BI__builtin_wasm_convert_low_u_i32x4_f64x2: - IntNo = Intrinsic::wasm_convert_low_unsigned; - break; - } - Function *Callee = CGM.getIntrinsic(IntNo); - return Builder.CreateCall(Callee, Vec); - } case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 7b7965c026e1a..a5c6f4423c3b4 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -914,18 +914,6 @@ u64x2 extend_high_u_i32x4_i64x2(u32x4 x) { // WEBASSEMBLY: ret } -f64x2 convert_low_s_i32x4_f64x2(i32x4 x) { - return __builtin_wasm_convert_low_s_i32x4_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %x) - // WEBASSEMBLY: ret -} - -f64x2 convert_low_u_i32x4_f64x2(u32x4 x) { - return __builtin_wasm_convert_low_u_i32x4_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %x) - // WEBASSEMBLY: ret -} - i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) { return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index f4bdd07b81082..977647db92adf 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -275,12 +275,6 @@ def int_wasm_extadd_pairwise_unsigned : [IntrNoMem, IntrSpeculatable]>; // TODO: Remove these if possible if they are merged to the spec. -def int_wasm_convert_low_signed : - Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_convert_low_unsigned : - Intrinsic<[llvm_v2f64_ty], [llvm_v4i32_ty], - [IntrNoMem, IntrSpeculatable]>; def int_wasm_trunc_sat_zero_signed : Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index 3a82dd45a5f65..c73ce43057f85 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -33,6 +33,8 @@ HANDLE_NODETYPE(EXTEND_LOW_S) HANDLE_NODETYPE(EXTEND_LOW_U) HANDLE_NODETYPE(EXTEND_HIGH_S) HANDLE_NODETYPE(EXTEND_HIGH_U) +HANDLE_NODETYPE(CONVERT_LOW_S) +HANDLE_NODETYPE(CONVERT_LOW_U) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(CATCH) HANDLE_NODETYPE(MEMORY_COPY) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index bce8f8ef2105d..5fa5cf22a090f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -130,6 +130,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); + // Combine {s,u}int_to_fp of extract_vectors into conversion ops + setTargetDAGCombine(ISD::SINT_TO_FP); + setTargetDAGCombine(ISD::UINT_TO_FP); + // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) @@ -2016,6 +2020,40 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return DAG.getNode(Op, SDLoc(N), ResVT, Source); } +static SDValue +performVectorConvertLowCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + assert(N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::UINT_TO_FP); + + // Combine ({s,u}int_to_fp (extract_subvector ... 0)) to an + // f64x2.convert_low_i32x4_{s,u} SDNode. + auto Extract = N->getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + auto Source = Extract.getOperand(0); + auto *IndexNode = dyn_cast(Extract.getOperand(1)); + if (IndexNode == nullptr) + return SDValue(); + auto Index = IndexNode->getZExtValue(); + + // The types must be correct. + EVT ResVT = N->getValueType(0); + if (ResVT != MVT::v2f64 || Extract.getValueType() != MVT::v2i32) + return SDValue(); + + // The extracted vector must be the low half. + if (Index != 0) + return SDValue(); + + unsigned Op = N->getOpcode() == ISD::SINT_TO_FP + ? WebAssemblyISD::CONVERT_LOW_S + : WebAssemblyISD::CONVERT_LOW_U; + + return DAG.getNode(Op, SDLoc(N), ResVT, Source); +} + SDValue WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -2027,5 +2065,8 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: return performVectorExtendCombine(N, DCI); + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + return performVectorConvertLowCombine(N, DCI); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 1588f6aac8610..cfbb3ffec5393 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1104,16 +1104,21 @@ multiclass SIMDConvert; defm "" : SIMDConvert; -// Integer to floating point: convert -defm "" : SIMDConvert; -defm "" : SIMDConvert; - // Lower llvm.wasm.trunc.sat.* to saturating instructions def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; +// Integer to floating point: convert +def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def convert_low_s : SDNode<"WebAssemblyISD::CONVERT_LOW_S", convert_low_t>; +def convert_low_u : SDNode<"WebAssemblyISD::CONVERT_LOW_U", convert_low_t>; +defm "" : SIMDConvert; +defm "" : SIMDConvert; +defm "" : SIMDConvert; +defm "" : SIMDConvert; + // Extending operations def extend_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; def extend_low_s : SDNode<"WebAssemblyISD::EXTEND_LOW_S", extend_t>; @@ -1268,10 +1273,6 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; -defm "" : SIMDConvert; -defm "" : SIMDConvert; //===----------------------------------------------------------------------===// // Saturating Rounding Q-Format Multiplication diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll index 36856336e65e5..431d559220409 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -81,3 +81,25 @@ define <2 x i64> @trunc_sat_u_v2i64(<2 x double> %x) { %a = fptoui <2 x double> %x to <2 x i64> ret <2 x i64> %a } + +; CHECK-LABEL: convert_low_s_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype convert_low_s_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @convert_low_s_v2f64(<4 x i32> %x) { + %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> + %a = sitofp <2 x i32> %v to <2 x double> + ret <2 x double> %a +} + +; CHECK-LABEL: convert_low_u_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype convert_low_u_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @convert_low_u_v2f64(<4 x i32> %x) { + %v = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> + %a = uitofp <2 x i32> %v to <2 x double> + ret <2 x double> %a +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index f28eb0b242a52..5df5ae9a21bde 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -843,26 +843,6 @@ define <2 x double> @nearest_v2f64(<2 x double> %a) { ret <2 x double> %v } -; CHECK-LABEL: convert_low_signed_v2f64: -; CHECK-NEXT: .functype convert_low_signed_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.convert_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.convert.low.signed(<4 x i32>) -define <2 x double> @convert_low_signed_v2f64(<4 x i32> %a) { - %v = call <2 x double> @llvm.wasm.convert.low.signed(<4 x i32> %a) - ret <2 x double> %v -} - -; CHECK-LABEL: convert_low_unsigned_v2f64: -; CHECK-NEXT: .functype convert_low_unsigned_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.convert_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32>) -define <2 x double> @convert_low_unsigned_v2f64(<4 x i32> %a) { - %v = call <2 x double> @llvm.wasm.convert.low.unsigned(<4 x i32> %a) - ret <2 x double> %v -} - ; CHECK-LABEL: promote_low_v2f64: ; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} ; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}} From 6ffbea3802fe554dfeb3d253b932c1a52f8b6fcd Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Wed, 14 Apr 2021 13:43:09 -0700 Subject: [PATCH 6/7] [WebAssembly] Codegen for i64x2.extend_{low,high}_i32x4_{s,u} Removes the builtins and intrinsics used to opt in to using these instructions and replaces them with normal ISel patterns now that they are no longer prototypes. Differential Revision: https://reviews.llvm.org/D100402 --- .../clang/Basic/BuiltinsWebAssembly.def | 5 -- clang/lib/CodeGen/CGBuiltin.cpp | 23 -------- clang/test/CodeGen/builtins-wasm.c | 24 --------- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 11 ---- .../WebAssembly/WebAssemblyISelLowering.cpp | 8 ++- .../WebAssembly/WebAssemblyInstrSIMD.td | 14 ++--- .../CodeGen/WebAssembly/simd-extending.ll | 52 +++++++++++++++++++ .../CodeGen/WebAssembly/simd-intrinsics.ll | 40 -------------- 8 files changed, 61 insertions(+), 116 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def index db8ec8ebeb302..bc0c37a11207f 100644 --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -191,11 +191,6 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16UcV8UsV8Us", "nc", "simd TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4UiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extend_low_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extend_high_s_i32x4_i64x2, "V2LLiV4i", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extend_low_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_extend_high_u_i32x4_i64x2, "V2LLUiV4Ui", "nc", "simd128") - TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0dd0dc07d7693..9322f04250fcd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17158,29 +17158,6 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()}); return Builder.CreateCall(Callee, {Low, High}); } - case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2: - case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2: - case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2: - case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2: { - Value *Vec = EmitScalarExpr(E->getArg(0)); - unsigned IntNo; - switch (BuiltinID) { - case WebAssembly::BI__builtin_wasm_extend_low_s_i32x4_i64x2: - IntNo = Intrinsic::wasm_extend_low_signed; - break; - case WebAssembly::BI__builtin_wasm_extend_high_s_i32x4_i64x2: - IntNo = Intrinsic::wasm_extend_high_signed; - break; - case WebAssembly::BI__builtin_wasm_extend_low_u_i32x4_i64x2: - IntNo = Intrinsic::wasm_extend_low_unsigned; - break; - case WebAssembly::BI__builtin_wasm_extend_high_u_i32x4_i64x2: - IntNo = Intrinsic::wasm_extend_high_unsigned; - break; - } - Function *Callee = CGM.getIntrinsic(IntNo); - return Builder.CreateCall(Callee, Vec); - } case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index a5c6f4423c3b4..1a986f03dc498 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -890,30 +890,6 @@ u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) { // WEBASSEMBLY: ret } -i64x2 extend_low_s_i32x4_i64x2(i32x4 x) { - return __builtin_wasm_extend_low_s_i32x4_i64x2(x); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32> %x) - // WEBASSEMBLY: ret -} - -i64x2 extend_high_s_i32x4_i64x2(i32x4 x) { - return __builtin_wasm_extend_high_s_i32x4_i64x2(x); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32> %x) - // WEBASSEMBLY: ret -} - -u64x2 extend_low_u_i32x4_i64x2(u32x4 x) { - return __builtin_wasm_extend_low_u_i32x4_i64x2(x); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32> %x) - // WEBASSEMBLY: ret -} - -u64x2 extend_high_u_i32x4_i64x2(u32x4 x) { - return __builtin_wasm_extend_high_u_i32x4_i64x2(x); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32> %x) - // WEBASSEMBLY: ret -} - i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) { return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 977647db92adf..4e2d557f1f083 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -157,17 +157,6 @@ def int_wasm_narrow_unsigned : [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem, IntrSpeculatable]>; -// TODO: Replace these intrinsics with normal ISel patterns once i32x4 to i64x2 -// extending is merged to the proposal. -def int_wasm_extend_low_signed : - Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extend_high_signed : - Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extend_low_unsigned : - Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_extend_high_unsigned : - Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty], [IntrNoMem, IntrSpeculatable]>; - def int_wasm_q15mulr_sat_signed : Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty], diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 5fa5cf22a090f..a9cbe75271226 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1994,8 +1994,8 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { return SDValue(); auto Index = IndexNode->getZExtValue(); - // Only v8i8 and v4i16 extracts can be widened, and only if the extracted - // subvector is the low or high half of its source. + // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the + // extracted subvector is the low or high half of its source. EVT ResVT = N->getValueType(0); if (ResVT == MVT::v8i16) { if (Extract.getValueType() != MVT::v8i8 || @@ -2005,6 +2005,10 @@ performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { if (Extract.getValueType() != MVT::v4i16 || Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4)) return SDValue(); + } else if (ResVT == MVT::v2i64) { + if (Extract.getValueType() != MVT::v2i32 || + Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2)) + return SDValue(); } else { return SDValue(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index cfbb3ffec5393..7cf3cb1854fb1 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1138,17 +1138,9 @@ multiclass SIMDExtend baseInst> { "extend_high_"#vec.split.prefix#"_u", !add(baseInst, 3)>; } -defm "" : SIMDExtend; -defm "" : SIMDExtend; - -defm "" : SIMDConvert; -defm "" : SIMDConvert; -defm "" : SIMDConvert; -defm "" : SIMDConvert; +defm "" : SIMDExtend; +defm "" : SIMDExtend; +defm "" : SIMDExtend; // Narrowing operations multiclass SIMDNarrow baseInst> { diff --git a/llvm/test/CodeGen/WebAssembly/simd-extending.ll b/llvm/test/CodeGen/WebAssembly/simd-extending.ll index 3f512cd2678e0..9ecee61424e40 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-extending.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-extending.ll @@ -110,6 +110,58 @@ define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) { ret <4 x i32> %extended } +define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) { +; CHECK-LABEL: extend_low_i32x4_s: +; CHECK: .functype extend_low_i32x4_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extend_low_i32x4_s +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i32> %v, <4 x i32> undef, + <2 x i32> + %extended = sext <2 x i32> %low to <2 x i64> + ret <2 x i64> %extended +} + +define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) { +; CHECK-LABEL: extend_low_i32x4_u: +; CHECK: .functype extend_low_i32x4_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extend_low_i32x4_u +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i32> %v, <4 x i32> undef, + <2 x i32> + %extended = zext <2 x i32> %low to <2 x i64> + ret <2 x i64> %extended +} + +define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) { +; CHECK-LABEL: extend_high_i32x4_s: +; CHECK: .functype extend_high_i32x4_s (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extend_high_i32x4_s +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i32> %v, <4 x i32> undef, + <2 x i32> + %extended = sext <2 x i32> %low to <2 x i64> + ret <2 x i64> %extended +} + +define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) { +; CHECK-LABEL: extend_high_i32x4_u: +; CHECK: .functype extend_high_i32x4_u (v128) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i64x2.extend_high_i32x4_u +; CHECK-NEXT: # fallthrough-return + %low = shufflevector <4 x i32> %v, <4 x i32> undef, + <2 x i32> + %extended = zext <2 x i32> %low to <2 x i64> + ret <2 x i64> %extended +} + ;; Also test that similar patterns with offsets not corresponding to ;; the low or high half are correctly expanded. diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 5df5ae9a21bde..6e8e5a2fed71b 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -553,46 +553,6 @@ define <4 x i32> @trunc_sat_zero_unsigned_v4i32(<2 x double> %a) { ; ============================================================================== ; 2 x i64 ; ============================================================================== -; CHECK-LABEL: extend_low_s_v2i64: -; CHECK-NEXT: .functype extend_low_s_v2i64 (v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extend_low_i32x4_s $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32>) -define <2 x i64> @extend_low_s_v2i64(<4 x i32> %x) { - %a = call <2 x i64> @llvm.wasm.extend.low.signed(<4 x i32> %x) - ret <2 x i64> %a -} - -; CHECK-LABEL: extend_high_s_v2i64: -; CHECK-NEXT: .functype extend_high_s_v2i64 (v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extend_high_i32x4_s $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32>) -define <2 x i64> @extend_high_s_v2i64(<4 x i32> %x) { - %a = call <2 x i64> @llvm.wasm.extend.high.signed(<4 x i32> %x) - ret <2 x i64> %a -} - -; CHECK-LABEL: extend_low_u_v2i64: -; CHECK-NEXT: .functype extend_low_u_v2i64 (v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extend_low_i32x4_u $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32>) -define <2 x i64> @extend_low_u_v2i64(<4 x i32> %x) { - %a = call <2 x i64> @llvm.wasm.extend.low.unsigned(<4 x i32> %x) - ret <2 x i64> %a -} - -; CHECK-LABEL: extend_high_u_v2i64: -; CHECK-NEXT: .functype extend_high_u_v2i64 (v128) -> (v128){{$}} -; CHECK-NEXT: i64x2.extend_high_i32x4_u $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32>) -define <2 x i64> @extend_high_u_v2i64(<4 x i32> %x) { - %a = call <2 x i64> @llvm.wasm.extend.high.unsigned(<4 x i32> %x) - ret <2 x i64> %a -} - ; CHECK-LABEL: extmul_low_s_v2i64: ; CHECK-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}} ; CHECK-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} From 05751c82ecb45c069fbeee6887903d020cff4b87 Mon Sep 17 00:00:00 2001 From: Thomas Lively Date: Fri, 16 Apr 2021 12:11:20 -0700 Subject: [PATCH 7/7] [WebAssembly] Remove saturating fp-to-int target intrinsics Use the target-independent @llvm.fptosi and @llvm.fptoui intrinsics instead. This includes removing the instrinsics for i32x4.trunc_sat_zero_f64x2_{s,u}, which are now represented in IR as a saturating truncation to a v2i32 followed by a concatenation with a zero vector. Differential Revision: https://reviews.llvm.org/D100596 --- clang/lib/CodeGen/CGBuiltin.cpp | 24 +- clang/test/CodeGen/builtins-wasm.c | 30 +- llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 6 - llvm/lib/Analysis/ConstantFolding.cpp | 24 +- .../lib/Target/WebAssembly/WebAssemblyISD.def | 2 + .../WebAssembly/WebAssemblyISelLowering.cpp | 77 +++ .../WebAssembly/WebAssemblyISelLowering.h | 1 + .../WebAssembly/WebAssemblyInstrConv.td | 26 +- .../WebAssembly/WebAssemblyInstrSIMD.td | 24 +- llvm/test/CodeGen/WebAssembly/conv.ll | 77 ++- .../CodeGen/WebAssembly/simd-intrinsics.ll | 36 +- .../ConstProp/WebAssembly/trunc_saturate.ll | 610 ------------------ 12 files changed, 220 insertions(+), 717 deletions(-) delete mode 100644 llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9322f04250fcd..6a197ed1cbecd 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16756,8 +16756,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed, - {ResT, Src->getType()}); + Function *Callee = + CGM.getIntrinsic(Intrinsic::fptosi_sat, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32: @@ -16767,8 +16767,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4: { Value *Src = EmitScalarExpr(E->getArg(0)); llvm::Type *ResT = ConvertType(E->getType()); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned, - {ResT, Src->getType()}); + Function *Callee = + CGM.getIntrinsic(Intrinsic::fptoui_sat, {ResT, Src->getType()}); return Builder.CreateCall(Callee, {Src}); } case WebAssembly::BI__builtin_wasm_min_f32: @@ -17164,14 +17164,22 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, unsigned IntNo; switch (BuiltinID) { case WebAssembly::BI__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4: - IntNo = Intrinsic::wasm_trunc_sat_zero_signed; + IntNo = Intrinsic::fptosi_sat; break; case WebAssembly::BI__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4: - IntNo = Intrinsic::wasm_trunc_sat_zero_unsigned; + IntNo = Intrinsic::fptoui_sat; break; } - Function *Callee = CGM.getIntrinsic(IntNo); - return Builder.CreateCall(Callee, Vec); + llvm::Type *SrcT = Vec->getType(); + llvm::Type *TruncT = + SrcT->getWithNewType(llvm::IntegerType::get(getLLVMContext(), 32)); + Function *Callee = CGM.getIntrinsic(IntNo, {TruncT, SrcT}); + Value *Trunc = Builder.CreateCall(Callee, Vec); + Value *Splat = Builder.CreateVectorSplat(2, Builder.getInt32(0)); + Value *ConcatMask = + llvm::ConstantVector::get({Builder.getInt32(0), Builder.getInt32(1), + Builder.getInt32(2), Builder.getInt32(3)}); + return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask); } case WebAssembly::BI__builtin_wasm_demote_zero_f64x2_f32x4: { Value *Vec = EmitScalarExpr(E->getArg(0)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 1a986f03dc498..d20b6a739f94a 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -123,49 +123,49 @@ long long trunc_u_i64_f64(double f) { int trunc_saturate_s_i32_f32(float f) { return __builtin_wasm_trunc_saturate_s_i32_f32(f); - // WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float %f) + // WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f32(float %f) // WEBASSEMBLY-NEXT: ret } int trunc_saturate_u_i32_f32(float f) { return __builtin_wasm_trunc_saturate_u_i32_f32(f); - // WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float %f) + // WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f32(float %f) // WEBASSEMBLY-NEXT: ret } int trunc_saturate_s_i32_f64(double f) { return __builtin_wasm_trunc_saturate_s_i32_f64(f); - // WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double %f) + // WEBASSEMBLY: call i32 @llvm.fptosi.sat.i32.f64(double %f) // WEBASSEMBLY-NEXT: ret } int trunc_saturate_u_i32_f64(double f) { return __builtin_wasm_trunc_saturate_u_i32_f64(f); - // WEBASSEMBLY: call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double %f) + // WEBASSEMBLY: call i32 @llvm.fptoui.sat.i32.f64(double %f) // WEBASSEMBLY-NEXT: ret } long long trunc_saturate_s_i64_f32(float f) { return __builtin_wasm_trunc_saturate_s_i64_f32(f); - // WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float %f) + // WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f32(float %f) // WEBASSEMBLY-NEXT: ret } long long trunc_saturate_u_i64_f32(float f) { return __builtin_wasm_trunc_saturate_u_i64_f32(f); - // WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float %f) + // WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f32(float %f) // WEBASSEMBLY-NEXT: ret } long long trunc_saturate_s_i64_f64(double f) { return __builtin_wasm_trunc_saturate_s_i64_f64(f); - // WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double %f) + // WEBASSEMBLY: call i64 @llvm.fptosi.sat.i64.f64(double %f) // WEBASSEMBLY-NEXT: ret } long long trunc_saturate_u_i64_f64(double f) { return __builtin_wasm_trunc_saturate_u_i64_f64(f); - // WEBASSEMBLY: call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double %f) + // WEBASSEMBLY: call i64 @llvm.fptoui.sat.i64.f64(double %f) // WEBASSEMBLY-NEXT: ret } @@ -852,13 +852,13 @@ f64x2 sqrt_f64x2(f64x2 x) { i32x4 trunc_saturate_s_i32x4_f32x4(f32x4 f) { return __builtin_wasm_trunc_saturate_s_i32x4_f32x4(f); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %f) + // WEBASSEMBLY: call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %f) // WEBASSEMBLY-NEXT: ret } i32x4 trunc_saturate_u_i32x4_f32x4(f32x4 f) { return __builtin_wasm_trunc_saturate_u_i32x4_f32x4(f); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32(<4 x float> %f) + // WEBASSEMBLY: call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %f) // WEBASSEMBLY-NEXT: ret } @@ -892,14 +892,16 @@ u16x8 narrow_u_i16x8_i32x4(u32x4 low, u32x4 high) { i32x4 trunc_sat_zero_s_f64x2_i32x4(f64x2 x) { return __builtin_wasm_trunc_sat_zero_s_f64x2_i32x4(x); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %x) - // WEBASSEMBLY: ret + // WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %x) + // WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> + // WEBASSEMBLY: ret <4 x i32> %1 } u32x4 trunc_sat_zero_u_f64x2_i32x4(f64x2 x) { return __builtin_wasm_trunc_sat_zero_u_f64x2_i32x4(x); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.trunc.sat.zero.unsigned(<2 x double> %x) - // WEBASSEMBLY: ret + // WEBASSEMBLY: %0 = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %x) + // WEBASSEMBLY: %1 = shufflevector <2 x i32> %0, <2 x i32> zeroinitializer, <4 x i32> + // WEBASSEMBLY: ret <4 x i32> %1 } f32x4 wasm_demote_zero_f64x2_f32x4(f64x2 x) { diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index 4e2d557f1f083..abeb4c0a19f34 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -264,12 +264,6 @@ def int_wasm_extadd_pairwise_unsigned : [IntrNoMem, IntrSpeculatable]>; // TODO: Remove these if possible if they are merged to the spec. -def int_wasm_trunc_sat_zero_signed : - Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; -def int_wasm_trunc_sat_zero_unsigned : - Intrinsic<[llvm_v4i32_ty], [llvm_v2f64_ty], - [IntrNoMem, IntrSpeculatable]>; def int_wasm_demote_zero : Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index f73890d548f09..5b592c17e8de1 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -1493,8 +1493,6 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) { // WebAssembly float semantics are always known case Intrinsic::wasm_trunc_signed: case Intrinsic::wasm_trunc_unsigned: - case Intrinsic::wasm_trunc_saturate_signed: - case Intrinsic::wasm_trunc_saturate_unsigned: return true; // Floating point operations cannot be folded in strictfp functions in @@ -1896,17 +1894,11 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, APFloat U = Op->getValueAPF(); if (IntrinsicID == Intrinsic::wasm_trunc_signed || - IntrinsicID == Intrinsic::wasm_trunc_unsigned || - IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned) { - - bool Saturating = IntrinsicID == Intrinsic::wasm_trunc_saturate_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_unsigned; - bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed || - IntrinsicID == Intrinsic::wasm_trunc_saturate_signed; + IntrinsicID == Intrinsic::wasm_trunc_unsigned) { + bool Signed = IntrinsicID == Intrinsic::wasm_trunc_signed; if (U.isNaN()) - return Saturating ? ConstantInt::get(Ty, 0) : nullptr; + return nullptr; unsigned Width = Ty->getIntegerBitWidth(); APSInt Int(Width, !Signed); @@ -1917,15 +1909,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, if (Status == APFloat::opOK || Status == APFloat::opInexact) return ConstantInt::get(Ty, Int); - if (!Saturating) - return nullptr; - - if (U.isNegative()) - return Signed ? ConstantInt::get(Ty, APInt::getSignedMinValue(Width)) - : ConstantInt::get(Ty, APInt::getMinValue(Width)); - else - return Signed ? ConstantInt::get(Ty, APInt::getSignedMaxValue(Width)) - : ConstantInt::get(Ty, APInt::getMaxValue(Width)); + return nullptr; } if (IntrinsicID == Intrinsic::fptoui_sat || diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def index c73ce43057f85..33fd2ae11154f 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -35,6 +35,8 @@ HANDLE_NODETYPE(EXTEND_HIGH_S) HANDLE_NODETYPE(EXTEND_HIGH_U) HANDLE_NODETYPE(CONVERT_LOW_S) HANDLE_NODETYPE(CONVERT_LOW_U) +HANDLE_NODETYPE(TRUNC_SAT_ZERO_S) +HANDLE_NODETYPE(TRUNC_SAT_ZERO_U) HANDLE_NODETYPE(THROW) HANDLE_NODETYPE(CATCH) HANDLE_NODETYPE(MEMORY_COPY) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index a9cbe75271226..29742177626fd 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -121,6 +121,11 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(Op, T, Expand); } + if (Subtarget->hasNontrappingFPToInt()) + for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) + for (auto T : {MVT::i32, MVT::i64}) + setOperationAction(Op, T, Custom); + // SIMD-specific configuration if (Subtarget->hasSIMD128()) { // Hoist bitcasts out of shuffles @@ -134,6 +139,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); + // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u} + setTargetDAGCombine(ISD::CONCAT_VECTORS); + // Support saturating add for i8x16 and i16x8 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT}) for (auto T : {MVT::v16i8, MVT::v8i16}) @@ -198,6 +206,10 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}) for (auto T : {MVT::v2i64, MVT::v2f64}) setOperationAction(Op, T, Expand); + + // But saturating fp_to_int converstions are + for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) + setOperationAction(Op, MVT::v4i32, Custom); } // As a special case, these operators use the type to mean the type to @@ -1233,6 +1245,9 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, case ISD::SRA: case ISD::SRL: return LowerShift(Op, DAG); + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + return LowerFP_TO_INT_SAT(Op, DAG); } } @@ -1949,6 +1964,21 @@ SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op, return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal); } +SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT ResT = Op.getValueType(); + uint64_t Width = Op.getConstantOperandVal(1); + + if ((ResT == MVT::i32 || ResT == MVT::i64) && (Width == 32 || Width == 64)) + return Op; + + if (ResT == MVT::v4i32 && Width == 32) + return Op; + + return SDValue(); +} + //===----------------------------------------------------------------------===// // Custom DAG combine hooks //===----------------------------------------------------------------------===// @@ -2037,6 +2067,8 @@ performVectorConvertLowCombine(SDNode *N, if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) return SDValue(); auto Source = Extract.getOperand(0); + if (Source.getValueType() != MVT::v4i32) + return SDValue(); auto *IndexNode = dyn_cast(Extract.getOperand(1)); if (IndexNode == nullptr) return SDValue(); @@ -2058,6 +2090,49 @@ performVectorConvertLowCombine(SDNode *N, return DAG.getNode(Op, SDLoc(N), ResVT, Source); } +static SDValue +performVectorTruncSatLowCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + auto &DAG = DCI.DAG; + assert(N->getOpcode() == ISD::CONCAT_VECTORS); + + // Combine this: + // + // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0))) + // + // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x). + EVT ResVT = N->getValueType(0); + if (ResVT != MVT::v4i32) + return SDValue(); + + auto FPToInt = N->getOperand(0); + auto FPToIntOp = FPToInt.getOpcode(); + if (FPToIntOp != ISD::FP_TO_SINT_SAT && FPToIntOp != ISD::FP_TO_UINT_SAT) + return SDValue(); + if (FPToInt.getConstantOperandVal(1) != 32) + return SDValue(); + + auto Source = FPToInt.getOperand(0); + if (Source.getValueType() != MVT::v2f64) + return SDValue(); + + auto *Splat = dyn_cast(N->getOperand(1)); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!Splat || !Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs)) + return SDValue(); + if (SplatValue != 0) + return SDValue(); + + unsigned Op = FPToIntOp == ISD::FP_TO_SINT_SAT + ? WebAssemblyISD::TRUNC_SAT_ZERO_S + : WebAssemblyISD::TRUNC_SAT_ZERO_U; + + return DAG.getNode(Op, SDLoc(N), ResVT, Source); +} + SDValue WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -2072,5 +2147,7 @@ WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return performVectorConvertLowCombine(N, DCI); + case ISD::CONCAT_VECTORS: + return performVectorTruncSatLowCombine(N, DCI); } } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index c8a052d011997..10aca7708c239 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -119,6 +119,7 @@ class WebAssemblyTargetLowering final : public TargetLowering { SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerAccessVectorElement(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const; // Custom DAG combine hooks SDValue diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td index f3d9c5d5032cb..68ef43f6af36d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -96,23 +96,15 @@ defm I64_TRUNC_U_SAT_F64 : I<(outs I64:$dst), (ins F64:$src), (outs), (ins), "i64.trunc_sat_f64_u", 0xfc07>, Requires<[HasNontrappingFPToInt]>; -// Lower llvm.wasm.trunc.saturate.* to saturating instructions -def : Pat<(int_wasm_trunc_saturate_signed F32:$src), - (I32_TRUNC_S_SAT_F32 F32:$src)>; -def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src), - (I32_TRUNC_U_SAT_F32 F32:$src)>; -def : Pat<(int_wasm_trunc_saturate_signed F64:$src), - (I32_TRUNC_S_SAT_F64 F64:$src)>; -def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src), - (I32_TRUNC_U_SAT_F64 F64:$src)>; -def : Pat<(int_wasm_trunc_saturate_signed F32:$src), - (I64_TRUNC_S_SAT_F32 F32:$src)>; -def : Pat<(int_wasm_trunc_saturate_unsigned F32:$src), - (I64_TRUNC_U_SAT_F32 F32:$src)>; -def : Pat<(int_wasm_trunc_saturate_signed F64:$src), - (I64_TRUNC_S_SAT_F64 F64:$src)>; -def : Pat<(int_wasm_trunc_saturate_unsigned F64:$src), - (I64_TRUNC_U_SAT_F64 F64:$src)>; +// Support the explicitly saturating operations as well. +def : Pat<(fp_to_sint_sat F32:$src, (i32 32)), (I32_TRUNC_S_SAT_F32 F32:$src)>; +def : Pat<(fp_to_uint_sat F32:$src, (i32 32)), (I32_TRUNC_U_SAT_F32 F32:$src)>; +def : Pat<(fp_to_sint_sat F64:$src, (i32 32)), (I32_TRUNC_S_SAT_F64 F64:$src)>; +def : Pat<(fp_to_uint_sat F64:$src, (i32 32)), (I32_TRUNC_U_SAT_F64 F64:$src)>; +def : Pat<(fp_to_sint_sat F32:$src, (i32 64)), (I64_TRUNC_S_SAT_F32 F32:$src)>; +def : Pat<(fp_to_uint_sat F32:$src, (i32 64)), (I64_TRUNC_U_SAT_F32 F32:$src)>; +def : Pat<(fp_to_sint_sat F64:$src, (i32 64)), (I64_TRUNC_S_SAT_F64 F64:$src)>; +def : Pat<(fp_to_uint_sat F64:$src, (i32 64)), (I64_TRUNC_U_SAT_F64 F64:$src)>; // Conversion from floating point to integer pseudo-instructions which don't // trap on overflow or invalid. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 7cf3cb1854fb1..efcdf0368488b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1104,11 +1104,21 @@ multiclass SIMDConvert; defm "" : SIMDConvert; -// Lower llvm.wasm.trunc.sat.* to saturating instructions -def : Pat<(v4i32 (int_wasm_trunc_saturate_signed (v4f32 V128:$src))), - (fp_to_sint_I32x4 $src)>; -def : Pat<(v4i32 (int_wasm_trunc_saturate_unsigned (v4f32 V128:$src))), - (fp_to_uint_I32x4 $src)>; +// Support the saturating variety as well. +def trunc_s_sat32 : PatFrag<(ops node:$x), (fp_to_sint_sat $x, (i32 32))>; +def trunc_u_sat32 : PatFrag<(ops node:$x), (fp_to_uint_sat $x, (i32 32))>; +def : Pat<(v4i32 (trunc_s_sat32 (v4f32 V128:$src))), (fp_to_sint_I32x4 $src)>; +def : Pat<(v4i32 (trunc_u_sat32 (v4f32 V128:$src))), (fp_to_uint_I32x4 $src)>; + +def trunc_sat_zero_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def trunc_sat_zero_s : + SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_S", trunc_sat_zero_t>; +def trunc_sat_zero_u : + SDNode<"WebAssemblyISD::TRUNC_SAT_ZERO_U", trunc_sat_zero_t>; +defm "" : SIMDConvert; +defm "" : SIMDConvert; // Integer to floating point: convert def convert_low_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; @@ -1261,10 +1271,6 @@ defm "" : SIMDConvert; defm "" : SIMDConvert; -defm "" : SIMDConvert; -defm "" : SIMDConvert; //===----------------------------------------------------------------------===// // Saturating Rounding Q-Format Multiplication diff --git a/llvm/test/CodeGen/WebAssembly/conv.ll b/llvm/test/CodeGen/WebAssembly/conv.ll index 68f941546ce10..5699c7b9adc5b 100644 --- a/llvm/test/CodeGen/WebAssembly/conv.ll +++ b/llvm/test/CodeGen/WebAssembly/conv.ll @@ -45,9 +45,9 @@ define i32 @i32_trunc_s_f32(float %x) { ; CHECK-NEXT: .functype i32_trunc_sat_s_f32 (f32) -> (i32){{$}} ; CHECK-NEXT: i32.trunc_sat_f32_s $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float) +declare i32 @llvm.fptosi.sat.i32.f32(float) define i32 @i32_trunc_sat_s_f32(float %x) { - %a = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float %x) + %a = call i32 @llvm.fptosi.sat.i32.f32(float %x) ret i32 %a } @@ -64,9 +64,9 @@ define i32 @i32_trunc_u_f32(float %x) { ; CHECK-NEXT: .functype i32_trunc_sat_u_f32 (f32) -> (i32){{$}} ; CHECK-NEXT: i32.trunc_sat_f32_u $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float) +declare i32 @llvm.fptoui.sat.i32.f32(float) define i32 @i32_trunc_sat_u_f32(float %x) { - %a = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float %x) + %a = call i32 @llvm.fptoui.sat.i32.f32(float %x) ret i32 %a } @@ -83,9 +83,9 @@ define i32 @i32_trunc_s_f64(double %x) { ; CHECK-NEXT: .functype i32_trunc_sat_s_f64 (f64) -> (i32){{$}} ; CHECK-NEXT: i32.trunc_sat_f64_s $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double) +declare i32 @llvm.fptosi.sat.i32.f64(double) define i32 @i32_trunc_sat_s_f64(double %x) { - %a = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double %x) + %a = call i32 @llvm.fptosi.sat.i32.f64(double %x) ret i32 %a } @@ -102,9 +102,9 @@ define i32 @i32_trunc_u_f64(double %x) { ; CHECK-NEXT: .functype i32_trunc_sat_u_f64 (f64) -> (i32){{$}} ; CHECK-NEXT: i32.trunc_sat_f64_u $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double) +declare i32 @llvm.fptoui.sat.i32.f64(double) define i32 @i32_trunc_sat_u_f64(double %x) { - %a = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double %x) + %a = call i32 @llvm.fptoui.sat.i32.f64(double %x) ret i32 %a } @@ -121,9 +121,9 @@ define i64 @i64_trunc_s_f32(float %x) { ; CHECK-NEXT: .functype i64_trunc_sat_s_f32 (f32) -> (i64){{$}} ; CHECK-NEXT: i64.trunc_sat_f32_s $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float) +declare i64 @llvm.fptosi.sat.i64.f32(float) define i64 @i64_trunc_sat_s_f32(float %x) { - %a = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float %x) + %a = call i64 @llvm.fptosi.sat.i64.f32(float %x) ret i64 %a } @@ -140,9 +140,9 @@ define i64 @i64_trunc_u_f32(float %x) { ; CHECK-NEXT: .functype i64_trunc_sat_u_f32 (f32) -> (i64){{$}} ; CHECK-NEXT: i64.trunc_sat_f32_u $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float) +declare i64 @llvm.fptoui.sat.i64.f32(float) define i64 @i64_trunc_sat_u_f32(float %x) { - %a = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float %x) + %a = call i64 @llvm.fptoui.sat.i64.f32(float %x) ret i64 %a } @@ -159,9 +159,9 @@ define i64 @i64_trunc_s_f64(double %x) { ; CHECK-NEXT: .functype i64_trunc_sat_s_f64 (f64) -> (i64){{$}} ; CHECK-NEXT: i64.trunc_sat_f64_s $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double) +declare i64 @llvm.fptosi.sat.i64.f64(double) define i64 @i64_trunc_sat_s_f64(double %x) { - %a = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double %x) + %a = call i64 @llvm.fptosi.sat.i64.f64(double %x) ret i64 %a } @@ -178,9 +178,9 @@ define i64 @i64_trunc_u_f64(double %x) { ; CHECK-NEXT: .functype i64_trunc_sat_u_f64 (f64) -> (i64){{$}} ; CHECK-NEXT: i64.trunc_sat_f64_u $push[[NUM:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[NUM]]{{$}} -declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double) +declare i64 @llvm.fptoui.sat.i64.f64(double) define i64 @i64_trunc_sat_u_f64(double %x) { - %a = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double %x) + %a = call i64 @llvm.fptoui.sat.i64.f64(double %x) ret i64 %a } @@ -274,7 +274,7 @@ define float @f32_demote_f64(double %x) { ret float %a } -; If the high its are unused, LLVM will optimize sext/zext into anyext, which +; If the high bits are unused, LLVM will optimize sext/zext into anyext, which ; we need to patterm-match back to a specific instruction. ; CHECK-LABEL: anyext: @@ -312,3 +312,46 @@ define i64 @bitcast_double_to_i64(double %a) { %t = bitcast double %a to i64 ret i64 %t } + +; Check that saturating fptoint with unsupported target bit widths is lowered +; correctly. + +; CHECK-LABEL: i16_trunc_sat_s_f32: +; CHECK-NEXT: .functype i16_trunc_sat_s_f32 (f32) -> (i32){{$}} +; CHECK: i32.select +; CHECK: return +declare i16 @llvm.fptosi.sat.i16.f32(float) +define i16 @i16_trunc_sat_s_f32(float %x) { + %a = call i16 @llvm.fptosi.sat.i16.f32(float %x) + ret i16 %a +} + +; CHECK-LABEL: i16_trunc_sat_u_f32: +; CHECK-NEXT: .functype i16_trunc_sat_u_f32 (f32) -> (i32){{$}} +; CHECK: i32.select +; CHECK: return +declare i16 @llvm.fptoui.sat.i16.f32(float) +define i16 @i16_trunc_sat_u_f32(float %x) { + %a = call i16 @llvm.fptoui.sat.i16.f32(float %x) + ret i16 %a +} + +; CHECK-LABEL: i16_trunc_sat_s_f64: +; CHECK-NEXT: .functype i16_trunc_sat_s_f64 (f64) -> (i32){{$}} +; CHECK: i32.select +; CHECK: return +declare i16 @llvm.fptosi.sat.i16.f64(double) +define i16 @i16_trunc_sat_s_f64(double %x) { + %a = call i16 @llvm.fptosi.sat.i16.f64(double %x) + ret i16 %a +} + +; CHECK-LABEL: i16_trunc_sat_u_f64: +; CHECK-NEXT: .functype i16_trunc_sat_u_f64 (f64) -> (i32){{$}} +; CHECK: i32.select +; CHECK: return +declare i16 @llvm.fptoui.sat.i16.f64(double) +define i16 @i16_trunc_sat_u_f64(double %x) { + %a = call i16 @llvm.fptoui.sat.i16.f64(double %x) + ret i16 %a +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll index 6e8e5a2fed71b..0fc008d3ef9a3 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -513,9 +513,9 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %c) { ; CHECK-NEXT: .functype trunc_sat_s_v4i32 (v128) -> (v128){{$}} ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s $push[[R:[0-9]+]]=, $0 ; CHECK-NEXT: return $pop[[R]] -declare <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float>) +declare <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float>) define <4 x i32> @trunc_sat_s_v4i32(<4 x float> %x) { - %a = call <4 x i32> @llvm.wasm.trunc.saturate.signed.v4i32.v4f32(<4 x float> %x) + %a = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> %x) ret <4 x i32> %a } @@ -524,30 +524,34 @@ define <4 x i32> @trunc_sat_s_v4i32(<4 x float> %x) { ; CHECK-NEXT: .functype trunc_sat_u_v4i32 (v128) -> (v128){{$}} ; CHECK-NEXT: i32x4.trunc_sat_f32x4_u $push[[R:[0-9]+]]=, $0 ; CHECK-NEXT: return $pop[[R]] -declare <4 x i32> @llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32(<4 x float>) +declare <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float>) define <4 x i32> @trunc_sat_u_v4i32(<4 x float> %x) { - %a = call <4 x i32> @llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32(<4 x float> %x) + %a = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> %x) ret <4 x i32> %a } -; CHECK-LABEL: trunc_sat_zero_signed_v4i32: -; CHECK-NEXT: .functype trunc_sat_zero_signed_v4i32 (v128) -> (v128){{$}} +; CHECK-LABEL: trunc_sat_zero_s_v4i32: +; CHECK-NEXT: .functype trunc_sat_zero_s_v4i32 (v128) -> (v128){{$}} ; CHECK-NEXT: i32x4.trunc_sat_zero_f64x2_s $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double>) -define <4 x i32> @trunc_sat_zero_signed_v4i32(<2 x double> %a) { - %v = call <4 x i32> @llvm.wasm.trunc.sat.zero.signed(<2 x double> %a) - ret <4 x i32> %v +declare <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double>) +define <4 x i32> @trunc_sat_zero_s_v4i32(<2 x double> %x) { + %v = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> %x) + %a = shufflevector <2 x i32> %v, <2 x i32> , + <4 x i32> + ret <4 x i32> %a } -; CHECK-LABEL: trunc_sat_zero_unsigned_v4i32: -; CHECK-NEXT: .functype trunc_sat_zero_unsigned_v4i32 (v128) -> (v128){{$}} +; CHECK-LABEL: trunc_sat_zero_u_v4i32: +; CHECK-NEXT: .functype trunc_sat_zero_u_v4i32 (v128) -> (v128){{$}} ; CHECK-NEXT: i32x4.trunc_sat_zero_f64x2_u $push[[R:[0-9]+]]=, $0{{$}} ; CHECK-NEXT: return $pop[[R]]{{$}} -declare <4 x i32> @llvm.wasm.trunc.sat.zero.unsigned(<2 x double>) -define <4 x i32> @trunc_sat_zero_unsigned_v4i32(<2 x double> %a) { - %v = call <4 x i32> @llvm.wasm.trunc.sat.zero.unsigned(<2 x double> %a) - ret <4 x i32> %v +declare <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double>) +define <4 x i32> @trunc_sat_zero_u_v4i32(<2 x double> %x) { + %v = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> %x) + %a = shufflevector <2 x i32> %v, <2 x i32> , + <4 x i32> + ret <4 x i32> %a } ; ============================================================================== diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll b/llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll deleted file mode 100644 index a7cd6066785aa..0000000000000 --- a/llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll +++ /dev/null @@ -1,610 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -instsimplify -S | FileCheck %s - -target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" -target triple = "wasm32-unknown-unknown" - -declare i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float) -declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float) -declare i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double) -declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double) -declare i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float) -declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float) -declare i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double) -declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double) - -define void @test_i32_trunc_sat_f32_s(i32* %p) { -; CHECK-LABEL: @test_i32_trunc_sat_f32_s( -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483520, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483647, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483647, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; - %t0 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float +0.0) - store volatile i32 %t0, i32* %p - %t1 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -0.0) - store volatile i32 %t1, i32* %p - %t2 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x36a0000000000000); 0x1p-149 - store volatile i32 %t2, i32* %p - %t3 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xb6a0000000000000); -0x1p-149 - store volatile i32 %t3, i32* %p - %t4 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 1.0) - store volatile i32 %t4, i32* %p - %t5 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x3ff19999a0000000); 0x1.19999ap+0 - store volatile i32 %t5, i32* %p - %t6 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 1.5) - store volatile i32 %t6, i32* %p - %t7 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -1.0) - store volatile i32 %t7, i32* %p - %t8 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xbff19999a0000000); -0x1.19999ap+0 - store volatile i32 %t8, i32* %p - %t9 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -1.5) - store volatile i32 %t9, i32* %p - %t10 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xbffe666660000000); -1.9 - store volatile i32 %t10, i32* %p - %t11 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -2.0) - store volatile i32 %t11, i32* %p - %t12 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 2147483520.0) - store volatile i32 %t12, i32* %p - %t13 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -2147483648.0) - store volatile i32 %t13, i32* %p - %t14 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 2147483648.0) - store volatile i32 %t14, i32* %p - %t15 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -2147483904.0) - store volatile i32 %t15, i32* %p - %t16 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x7ff0000000000000); inf - store volatile i32 %t16, i32* %p - %t17 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xfff0000000000000); -inf - store volatile i32 %t17, i32* %p - %t18 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x7ff8000000000000); nan - store volatile i32 %t18, i32* %p - %t19 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x7ffa000000000000); nan:0x200000 - store volatile i32 %t19, i32* %p - %t20 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xfff8000000000000); -nan - store volatile i32 %t20, i32* %p - %t21 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xfffa000000000000); -nan:0x200000 - store volatile i32 %t21, i32* %p - ret void -} - -define void @test_i32_trunc_sat_f32_u(i32* %p) { -; CHECK-LABEL: @test_i32_trunc_sat_f32_u( -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -256, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; - %t0 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float +0.0) - store volatile i32 %t0, i32* %p - %t1 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float -0.0) - store volatile i32 %t1, i32* %p - %t2 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x36a0000000000000); 0x1p-149 - store volatile i32 %t2, i32* %p - %t3 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xb6a0000000000000); -0x1p-149 - store volatile i32 %t3, i32* %p - %t4 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 1.0) - store volatile i32 %t4, i32* %p - %t5 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x3ff19999a0000000); 0x1.19999ap+0 - store volatile i32 %t5, i32* %p - %t6 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 1.5) - store volatile i32 %t6, i32* %p - %t7 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x3ffe666660000000); 1.9 - store volatile i32 %t7, i32* %p - %t8 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 2.0) - store volatile i32 %t8, i32* %p - %t9 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 2147483648.0) - store volatile i32 %t9, i32* %p - %t10 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 4294967040.0) - store volatile i32 %t10, i32* %p - %t11 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xbfecccccc0000000); -0x1.ccccccp-1 - store volatile i32 %t11, i32* %p - %t12 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xbfefffffe0000000); -0x1.fffffep-1 - store volatile i32 %t12, i32* %p - %t13 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 4294967296.0) - store volatile i32 %t13, i32* %p - %t14 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float -1.0) - store volatile i32 %t14, i32* %p - %t15 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x7ff0000000000000); inf - store volatile i32 %t15, i32* %p - %t16 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xfff0000000000000); -inf - store volatile i32 %t16, i32* %p - %t17 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x7ff8000000000000); nan - store volatile i32 %t17, i32* %p - %t18 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0x7ffa000000000000); nan:0x200000 - store volatile i32 %t18, i32* %p - %t19 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xfff8000000000000); -nan - store volatile i32 %t19, i32* %p - %t20 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float 0xfffa000000000000); -nan:0x200000 - store volatile i32 %t20, i32* %p - ret void -} - -define void @test_i32_trunc_sat_f64_s(i32* %p) { -; CHECK-LABEL: @test_i32_trunc_sat_f64_s( -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483647, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483647, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2147483647, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; - %t0 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double +0.0) - store volatile i32 %t0, i32* %p - %t1 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -0.0) - store volatile i32 %t1, i32* %p - %t2 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x0010000000000001); 0x0.0000000000001p-1022 - store volatile i32 %t2, i32* %p - %t3 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x8010000000000001); -0x1.0000000000001p-1022 - store volatile i32 %t3, i32* %p - %t4 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 1.0) - store volatile i32 %t4, i32* %p - %t5 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x3ff199999999999a); 0x1.199999999999ap+0 - store volatile i32 %t5, i32* %p - %t6 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 1.5) - store volatile i32 %t6, i32* %p - %t7 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -1.0) - store volatile i32 %t7, i32* %p - %t8 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0xbff199999999999a); -0x1.199999999999ap+0 - store volatile i32 %t8, i32* %p - %t9 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -1.5) - store volatile i32 %t9, i32* %p - %t10 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0xbffe666666666666); -1.9 - store volatile i32 %t10, i32* %p - %t11 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -2.0) - store volatile i32 %t11, i32* %p - %t12 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 2147483647.0) - store volatile i32 %t12, i32* %p - %t13 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -2147483648.0) - store volatile i32 %t13, i32* %p - %t14 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 2147483648.0) - store volatile i32 %t14, i32* %p - %t15 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double -2147483649.0) - store volatile i32 %t15, i32* %p - %t16 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x7ff0000000000000); inf - store volatile i32 %t16, i32* %p - %t17 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0xfff0000000000000); -inf - store volatile i32 %t17, i32* %p - %t18 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x7ff8000000000000); nan - store volatile i32 %t18, i32* %p - %t19 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x7ff4000000000000); nan:0x4000000000000 - store volatile i32 %t19, i32* %p - %t20 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0xfff8000000000000); -nan - store volatile i32 %t20, i32* %p - %t21 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double 0x7ff4000000000000); -nan:0x4000000000000 - store volatile i32 %t21, i32* %p - ret void -} - -define void @test_i32_trunc_sat_f64_u(i32* %p) { -; CHECK-LABEL: @test_i32_trunc_sat_f64_u( -; CHECK-NEXT: store volatile i32 0, i32* [[P:%.*]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 2, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -2147483648, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 100000000, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 -1, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: store volatile i32 0, i32* [[P]], align 4 -; CHECK-NEXT: ret void -; - %t0 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double +0.0) - store volatile i32 %t0, i32* %p - %t1 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double -0.0) - store volatile i32 %t1, i32* %p - %t2 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x0010000000000001); 0x0.0000000000001p-1022 - store volatile i32 %t2, i32* %p - %t3 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x8010000000000001); -0x0.0000000000001p-1022 - store volatile i32 %t3, i32* %p - %t4 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 1.0) - store volatile i32 %t4, i32* %p - %t5 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x3ff199999999999a); 0x1.199999999999ap+0 - store volatile i32 %t5, i32* %p - %t6 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 1.5) - store volatile i32 %t6, i32* %p - %t7 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x3ffe666666666666); 1.9 - store volatile i32 %t7, i32* %p - %t8 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 2.0) - store volatile i32 %t8, i32* %p - %t9 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 2147483648.0) - store volatile i32 %t9, i32* %p - %t10 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 4294967295.0) - store volatile i32 %t10, i32* %p - %t11 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0xbfeccccccccccccd); -0x1.ccccccccccccdp-1 - store volatile i32 %t11, i32* %p - %t12 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0xbfefffffffffffff); -0x1.fffffffffffffp-1 - store volatile i32 %t12, i32* %p - %t13 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 100000000.0); 1e8 - store volatile i32 %t13, i32* %p - %t14 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 4294967296.0) - store volatile i32 %t14, i32* %p - %t15 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double -1.0) - store volatile i32 %t15, i32* %p - %t16 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 10000000000000000.0); 1e16 - store volatile i32 %t16, i32* %p - %t17 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 1000000000000000000000000000000.0); 1e30 - store volatile i32 %t17, i32* %p - %t18 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 9223372036854775808.0) - store volatile i32 %t18, i32* %p - %t19 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x7ff0000000000000); inf - store volatile i32 %t19, i32* %p - %t20 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0xfff0000000000000); -inf - store volatile i32 %t20, i32* %p - %t21 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x7ff8000000000000); nan - store volatile i32 %t21, i32* %p - %t22 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0x7ff4000000000000); nan:0x4000000000000 - store volatile i32 %t22, i32* %p - %t23 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0xfff8000000000000); -nan - store volatile i32 %t23, i32* %p - %t24 = call i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double 0xfff4000000000000); -nan:0x4000000000000 - store volatile i32 %t24, i32* %p - ret void -} - -define void @test_i64_trunc_sat_f32_s(i64* %p) { -; CHECK-LABEL: @test_i64_trunc_sat_f32_s( -; CHECK-NEXT: store volatile i64 0, i64* [[P:%.*]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -2, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223371487098961920, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223372036854775807, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223372036854775807, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: ret void -; - %t0 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float +0.0) - store volatile i64 %t0, i64* %p - %t1 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -0.0) - store volatile i64 %t1, i64* %p - %t2 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0x36a0000000000000); 0x1p-149 - store volatile i64 %t2, i64* %p - %t3 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xb6a0000000000000); -0x1p-149 - store volatile i64 %t3, i64* %p - %t4 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 1.0) - store volatile i64 %t4, i64* %p - %t5 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0x3ff19999a0000000); 0x1.19999ap+0 - store volatile i64 %t5, i64* %p - %t6 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 1.5) - store volatile i64 %t6, i64* %p - %t7 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -1.0) - store volatile i64 %t7, i64* %p - %t8 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xbff19999a0000000); -0x1.19999ap+0 - store volatile i64 %t8, i64* %p - %t9 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -1.5) - store volatile i64 %t9, i64* %p - %t10 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xbffe666660000000); -1.9 - store volatile i64 %t10, i64* %p - %t11 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -2.0) - store volatile i64 %t11, i64* %p - %t12 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 4294967296.0) - store volatile i64 %t12, i64* %p - %t13 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -4294967296.0) - store volatile i64 %t13, i64* %p - %t14 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 9223371487098961920.0) - store volatile i64 %t14, i64* %p - %t15 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -9223372036854775808.0) - store volatile i64 %t15, i64* %p - %t16 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 9223372036854775808.0) - store volatile i64 %t16, i64* %p - %t17 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float -9223373136366403584.0) - store volatile i64 %t17, i64* %p - %t18 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0x7ff0000000000000); inf - store volatile i64 %t18, i64* %p - %t19 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xfff0000000000000); -inf - store volatile i64 %t19, i64* %p - %t20 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0x7ff8000000000000); nan - store volatile i64 %t20, i64* %p - %t21 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0x7ffa000000000000); nan:0x200000 - store volatile i64 %t21, i64* %p - %t22 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xfff8000000000000); -nan - store volatile i64 %t22, i64* %p - %t23 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float 0xfffa000000000000); -nan:0x200000 - store volatile i64 %t23, i64* %p - ret void -} - -define void @test_i64_trunc_sat_f32_u(i64* %p) { -; CHECK-LABEL: @test_i64_trunc_sat_f32_u( -; CHECK-NEXT: store volatile i64 0, i64* [[P:%.*]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1099511627776, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: ret void -; - %t0 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float +0.0) - store volatile i64 %t0, i64* %p - %t1 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float -0.0) - store volatile i64 %t1, i64* %p - %t2 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0x36a0000000000000); 0x1p-149 - store volatile i64 %t2, i64* %p - %t3 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xb6a0000000000000); -0x1p-149 - store volatile i64 %t3, i64* %p - %t4 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 1.0) - store volatile i64 %t4, i64* %p - %t5 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0x3ff19999a0000000); 0x1.19999ap+0 - store volatile i64 %t5, i64* %p - %t6 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 1.5) - store volatile i64 %t6, i64* %p - %t7 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 4294967296.0) - store volatile i64 %t7, i64* %p - %t8 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 18446742974197923840.0) - store volatile i64 %t8, i64* %p - %t9 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xbfecccccc0000000); -0x1.ccccccp-1 - store volatile i64 %t9, i64* %p - %t10 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xbfefffffe0000000); -0x1.fffffep-1 - store volatile i64 %t10, i64* %p - %t11 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 18446744073709551616.0) - store volatile i64 %t11, i64* %p - %t12 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float -1.0) - store volatile i64 %t12, i64* %p - %t13 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0x7ff0000000000000); inf - store volatile i64 %t13, i64* %p - %t14 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xfff0000000000000); -inf - store volatile i64 %t14, i64* %p - %t15 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0x7ff8000000000000); nan - store volatile i64 %t15, i64* %p - %t16 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0x7ffa000000000000); nan:0x200000 - store volatile i64 %t16, i64* %p - %t17 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xfff8000000000000); -nan - store volatile i64 %t17, i64* %p - %t18 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float 0xfffa000000000000); -nan:0x200000 - store volatile i64 %t18, i64* %p - ret void -} - -define void @test_i64_trunc_sat_f64_s(i64* %p) { -; CHECK-LABEL: @test_i64_trunc_sat_f64_s( -; CHECK-NEXT: store volatile i64 0, i64* [[P:%.*]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -2, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223372036854774784, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223372036854775807, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 9223372036854775807, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: ret void -; - %t0 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double +0.0) - store volatile i64 %t0, i64* %p - %t1 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -0.0) - store volatile i64 %t1, i64* %p - %t2 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x0010000000000001); 0x0.0000000000001p-1022 - store volatile i64 %t2, i64* %p - %t3 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x8010000000000001); -0x1.0000000000001p-1022 - store volatile i64 %t3, i64* %p - %t4 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 1.0) - store volatile i64 %t4, i64* %p - %t5 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x3ff199999999999a); 0x1.199999999999ap+0 - store volatile i64 %t5, i64* %p - %t6 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 1.5) - store volatile i64 %t6, i64* %p - %t7 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -1.0) - store volatile i64 %t7, i64* %p - %t8 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0xbff199999999999a); -0x1.199999999999ap+0 - store volatile i64 %t8, i64* %p - %t9 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -1.5) - store volatile i64 %t9, i64* %p - %t10 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0xbffe666666666666); -1.9 - store volatile i64 %t10, i64* %p - %t11 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -2.0) - store volatile i64 %t11, i64* %p - %t12 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 4294967296.0) - store volatile i64 %t12, i64* %p - %t13 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -4294967296.0) - store volatile i64 %t13, i64* %p - %t14 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 9223372036854774784.0) - store volatile i64 %t14, i64* %p - %t15 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -9223372036854775808.0) - store volatile i64 %t15, i64* %p - %t16 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 9223372036854775808.0) - store volatile i64 %t16, i64* %p - %t17 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double -9223372036854777856.0) - store volatile i64 %t17, i64* %p - %t18 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x7ff0000000000000); inf - store volatile i64 %t18, i64* %p - %t19 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0xfff0000000000000); -inf - store volatile i64 %t19, i64* %p - %t20 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x7ff8000000000000); nan - store volatile i64 %t20, i64* %p - %t21 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x7ff4000000000000); nan:0x4000000000000 - store volatile i64 %t21, i64* %p - %t22 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0xfff8000000000000); -nan - store volatile i64 %t22, i64* %p - %t23 = call i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double 0x7ff4000000000000); -nan:0x4000000000000 - store volatile i64 %t23, i64* %p - ret void -} - -define void @test_i64_trunc_sat_f64_u(i64* %p) { -; CHECK-LABEL: @test_i64_trunc_sat_f64_u( -; CHECK-NEXT: store volatile i64 0, i64* [[P:%.*]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 4294967295, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 4294967296, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -2048, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 100000000, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 10000000000000000, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -9223372036854775808, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 -1, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: store volatile i64 0, i64* [[P]], align 8 -; CHECK-NEXT: ret void -; - %t0 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double +0.0) - store volatile i64 %t0, i64* %p - %t1 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double -0.0) - store volatile i64 %t1, i64* %p - %t2 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x0010000000000001); 0x0.0000000000001p-1022 - store volatile i64 %t2, i64* %p - %t3 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x8010000000000001); -0x0.0000000000001p-1022 - store volatile i64 %t3, i64* %p - %t4 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 1.0) - store volatile i64 %t4, i64* %p - %t5 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x3ff199999999999a); 0x1.199999999999ap+0 - store volatile i64 %t5, i64* %p - %t6 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 1.5) - store volatile i64 %t6, i64* %p - %t7 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 4294967295.0) - store volatile i64 %t7, i64* %p - %t8 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 4294967296.0) - store volatile i64 %t8, i64* %p - %t9 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 18446744073709549568.0) - store volatile i64 %t9, i64* %p - %t10 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0xbfeccccccccccccd); -0x1.ccccccccccccdp-1 - store volatile i64 %t10, i64* %p - %t11 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0xbfefffffffffffff); -0x1.fffffffffffffp-1 - store volatile i64 %t11, i64* %p - %t12 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 100000000.0); 1e8 - store volatile i64 %t12, i64* %p - %t13 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 10000000000000000.0); 1e16 - store volatile i64 %t13, i64* %p - %t14 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 9223372036854775808.0); - store volatile i64 %t14, i64* %p - %t15 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 18446744073709551616.0) - store volatile i64 %t15, i64* %p - %t16 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double -1.0) - store volatile i64 %t16, i64* %p - %t17 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x7ff0000000000000); inf - store volatile i64 %t17, i64* %p - %t18 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0xfff0000000000000); -inf - store volatile i64 %t18, i64* %p - %t19 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x7ff8000000000000); nan - store volatile i64 %t19, i64* %p - %t20 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0x7ff4000000000000); nan:0x4000000000000 - store volatile i64 %t20, i64* %p - %t21 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0xfff8000000000000); -nan - store volatile i64 %t21, i64* %p - %t22 = call i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double 0xfff4000000000000); -nan:0x4000000000000 - store volatile i64 %t22, i64* %p - ret void -}