From 31f369229bf6f2f3051530ec09cd5e50e9462442 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 16 Oct 2023 18:21:57 -0400 Subject: [PATCH 1/3] [RISCV] Adjust LMUL if not used to avoid VL toggle A common pattern with vmv.s.x is that we need to set VL afterwards because the SEW/LMUL ratio has changed, and thus VLMAX has changed: vsetvli zero, a1, e64, m1, ta, ma vmv.s.x v16, a0 vsetvli zero, a1, e32, m2, ta, ma However since LMUL and the SEW/LMUL ratio are ignored by vmv.s.x, we can avoid a VL toggle in the second vsetvli instruction by adjusting LMUL so that the SEW/LMUL ratio remains the same between the two instructions: vsetvli zero, a1, e64, m4, ta, ma vmv.s.x v16, a0 vsetvli zero, zero, e32, m2, ta, ma Avoiding a VL toggle may be more performant on some architectures, and in some cases allows a vsetvli to be deleted. --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 58 +++++++++++++ .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 10 +-- .../RISCV/rvv/fixed-vectors-masked-scatter.ll | 72 ++++++++-------- .../RISCV/rvv/fixed-vectors-reduction-fp.ll | 59 +++++++------ .../RISCV/rvv/fixed-vectors-reduction-int.ll | 84 +++++++++---------- .../RISCV/rvv/fixed-vectors-unaligned.ll | 4 +- .../RISCV/rvv/vreductions-fp-sdnode.ll | 24 +++--- .../test/CodeGen/RISCV/rvv/vreductions-int.ll | 24 +++--- 8 files changed, 193 insertions(+), 142 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 5584fa8d503db..6724acab277b5 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1463,6 +1463,55 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, return areCompatibleVTYPEs(PriorVType, VType, Used); } +// If LMUL or the SEW/LMUL ratio aren't demanded and MI and NextMI have the same +// AVL, then we can try and change MI's LMUL so that we can avoid setting VL in +// NextMI, e.g: +// +// vsetivli zero, 4, e32, m1, ta, ma +// vsetivli zero, 4, e16, mf4, ta, ma +// +// vsetivli zero, 4, e32, mf2, ta, ma +// vsetvli zero, zero, e16, mf4, ta, ma +// +// If possible, returns the new VTYPE that should be used for MI. +static std::optional +canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI, + const DemandedFields &Used) { + if (Used.LMUL || Used.SEWLMULRatio) + return std::nullopt; + if (!NextMI.getOperand(0).isDead()) + return std::nullopt; + // If we end up increasing the SEW/LMUL ratio, then we will decrease VLMAX, + // which means we might end up changing VL in the case that AVL > VLMAX. So + // bail if the exact VL value is needed. + // + // TODO: We could potentially relax this when we know we're increasing VLMAX. + if (Used.VLAny) + return std::nullopt; + + // If NextMI is already zero, zero then bail. If MI is zero, zero then we + // won't be able to tell if it has the same AVL as NextMI, so also bail. + if (isVLPreservingConfig(MI) || isVLPreservingConfig(NextMI)) + return std::nullopt; + + VSETVLIInfo NextMIInfo = getInfoForVSETVLI(NextMI); + VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); + if (!MIInfo.hasSameAVL(NextMIInfo)) + return std::nullopt; + + unsigned SEW = MIInfo.getSEW() * 8; + // Fixed point value with 3 fractional bits. + unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio(); + bool Fractional = NewRatio < 8; + RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL( + Fractional ? 8 / NewRatio : NewRatio / 8, Fractional); + + unsigned VType = MIInfo.encodeVTYPE(); + return RISCVVType::encodeVTYPE(NewVLMul, SEW / 8, + RISCVVType::isTailAgnostic(VType), + RISCVVType::isMaskAgnostic(VType)); +} + void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { MachineInstr *NextMI = nullptr; // We can have arbitrary code in successors, so VL and VTYPE @@ -1484,6 +1533,15 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { Used.demandVL(); if (NextMI) { + if (auto NewVType = canAdjustSEWLMULRatio(MI, *NextMI, Used)) { + MI.getOperand(2).setImm(*NewVType); + // Convert NextMI to vsetvli zero, zero + NextMI->setDesc(TII->get(RISCV::PseudoVSETVLIX0)); + NextMI->getOperand(0).setReg(RISCV::X0); + NextMI->getOperand(0).setIsDead(true); + NextMI->getOperand(1).ChangeToRegister(RISCV::X0, false, false, true); + } + if (!Used.usedVL() && !Used.usedVTYPE()) { ToDelete.push_back(&MI); // Leave NextMI unchanged diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index cbcca9d2696f4..4a39bbb519cc0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -65,9 +65,8 @@ define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v32i32_31: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vslideup.vi v8, v16, 31 ; CHECK-NEXT: ret %b = insertelement <32 x i32> %a, i32 %y, i32 31 @@ -103,9 +102,8 @@ define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) { ; CHECK-LABEL: insertelt_v64i32_63: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vslideup.vi v16, v24, 31 ; CHECK-NEXT: ret %b = insertelement <64 x i32> %a, i32 %y, i32 63 @@ -550,9 +548,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a2, 6 -; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 60b61e889315c..06fb0a8eba433 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -1138,11 +1138,11 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_7 ; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -1271,11 +1271,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_7 ; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -1408,12 +1408,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB20_7 ; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2043,11 +2043,11 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_7 ; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2175,11 +2175,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_7 ; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2314,12 +2314,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_7 ; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2451,11 +2451,11 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB32_7 ; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2584,11 +2584,11 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB33_7 ; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2724,12 +2724,12 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB34_7 ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse32.v v12, (a3) ; RV64ZVE32F-NEXT: andi a3, a2, 32 @@ -6393,11 +6393,11 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB58_7 ; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -6526,11 +6526,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_7 ; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -6663,12 +6663,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_7 ; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -7249,11 +7249,11 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB68_7 ; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7385,11 +7385,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_7 ; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7528,12 +7528,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_7 ; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: andi a2, a2, 255 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7669,11 +7669,11 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB71_7 ; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7806,11 +7806,11 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a2, a1, 16 ; RV64ZVE32F-NEXT: beqz a2, .LBB72_7 ; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7950,12 +7950,12 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: andi a3, a2, 16 ; RV64ZVE32F-NEXT: beqz a3, .LBB73_7 ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a3, v11 ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 4766b3727a462..514470227af78 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -478,9 +478,9 @@ define float @vreduce_fwadd_v8f32(ptr %x, float %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -496,9 +496,9 @@ define float @vreduce_ord_fwadd_v8f32(ptr %x, float %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -544,9 +544,9 @@ define float @vreduce_fwadd_v16f32(ptr %x, float %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -562,9 +562,9 @@ define float @vreduce_ord_fwadd_v16f32(ptr %x, float %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -613,9 +613,9 @@ define float @vreduce_fwadd_v32f32(ptr %x, float %s) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -632,9 +632,9 @@ define float @vreduce_ord_fwadd_v32f32(ptr %x, float %s) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -694,9 +694,8 @@ define float @vreduce_fwadd_v64f32(ptr %x, float %s) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -715,9 +714,9 @@ define float @vreduce_ord_fwadd_v64f32(ptr %x, float %s) { ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -890,9 +889,9 @@ define double @vreduce_fwadd_v4f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -908,9 +907,9 @@ define double @vreduce_ord_fwadd_v4f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -956,9 +955,9 @@ define double @vreduce_fwadd_v8f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -974,9 +973,9 @@ define double @vreduce_ord_fwadd_v8f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1022,9 +1021,9 @@ define double @vreduce_fwadd_v16f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1040,9 +1039,9 @@ define double @vreduce_ord_fwadd_v16f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -1118,9 +1117,9 @@ define double @vreduce_ord_fwadd_v32f64(ptr %x, double %s) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v8, 16 -; CHECK-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v12 ; CHECK-NEXT: vfwredosum.vs v8, v16, v8 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index f2a1f2752cda0..5942e9b0533e9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -364,9 +364,9 @@ define i16 @vwreduce_add_v16i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -382,9 +382,9 @@ define i16 @vwreduce_uadd_v16i16(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -418,9 +418,9 @@ define i16 @vwreduce_add_v32i16(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -437,9 +437,9 @@ define i16 @vwreduce_uadd_v32i16(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -473,9 +473,9 @@ define i16 @vwreduce_add_v64i16(ptr %x) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -492,9 +492,9 @@ define i16 @vwreduce_uadd_v64i16(ptr %x) { ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -536,9 +536,8 @@ define i16 @vwreduce_add_v128i16(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -559,9 +558,8 @@ define i16 @vwreduce_uadd_v128i16(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -734,9 +732,9 @@ define i32 @vwreduce_add_v8i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -752,9 +750,9 @@ define i32 @vwreduce_uadd_v8i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -786,9 +784,9 @@ define i32 @vwreduce_add_v16i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -804,9 +802,9 @@ define i32 @vwreduce_uadd_v16i32(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 16, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -840,9 +838,9 @@ define i32 @vwreduce_add_v32i32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -859,9 +857,9 @@ define i32 @vwreduce_uadd_v32i32(ptr %x) { ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v12, zero -; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v12 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -903,9 +901,8 @@ define i32 @vwreduce_add_v64i32(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -926,9 +923,8 @@ define i32 @vwreduce_uadd_v64i32(ptr %x) { ; CHECK-NEXT: vslidedown.vx v16, v8, a0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero ; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -1152,9 +1148,9 @@ define i64 @vwreduce_add_v4i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1185,9 +1181,9 @@ define i64 @vwreduce_uadd_v4i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 4, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1247,9 +1243,9 @@ define i64 @vwreduce_add_v8i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1280,9 +1276,9 @@ define i64 @vwreduce_uadd_v8i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1342,9 +1338,9 @@ define i64 @vwreduce_add_v16i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1375,9 +1371,9 @@ define i64 @vwreduce_uadd_v16i64(ptr %x) { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vsetivli zero, 16, e64, m1, ta, ma +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v12 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 1cc09b7f5eeb5..a883ca5396ff3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -341,9 +341,9 @@ define void @mscatter_v4i16_align1(<4 x i16> %val, <4 x ptr> %ptrs, <4 x i1> %m) ; RV64-SLOW-NEXT: .LBB6_4: # %else6 ; RV64-SLOW-NEXT: ret ; RV64-SLOW-NEXT: .LBB6_5: # %cond.store -; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf2, ta, ma +; RV64-SLOW-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 -; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a2, v10 ; RV64-SLOW-NEXT: srli a3, a1, 8 ; RV64-SLOW-NEXT: sb a3, 1(a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll index c5245451dc440..4319eb4e290b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll @@ -223,9 +223,9 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -238,9 +238,9 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -335,9 +335,9 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -350,9 +350,9 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v9, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -391,9 +391,9 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredusum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 @@ -406,9 +406,9 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: vreduce_ord_fwadd_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfwredosum.vs v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfmv.f.s fa0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll index 3f1892ede5678..8398fe4c32ffc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int.ll @@ -1019,9 +1019,9 @@ define signext i32 @vreduce_add_nxv4i32( %v) { define signext i32 @vwreduce_add_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_add_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsum.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1034,9 +1034,9 @@ define signext i32 @vwreduce_add_nxv4i16( %v) { define signext i32 @vwreduce_uadd_nxv4i16( %v) { ; CHECK-LABEL: vwreduce_uadd_nxv4i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vmv.x.s a0, v8 @@ -1432,9 +1432,9 @@ define i64 @vwreduce_add_nxv2i32( %v) { ; ; RV64-LABEL: vwreduce_add_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1460,9 +1460,9 @@ define i64 @vwreduce_uadd_nxv2i32( %v) { ; ; RV64-LABEL: vwreduce_uadd_nxv2i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV64-NEXT: vmv.s.x v9, zero -; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v9 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1684,9 +1684,9 @@ define i64 @vwreduce_add_nxv4i32( %v) { ; ; RV64-LABEL: vwreduce_add_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsum.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 @@ -1712,9 +1712,9 @@ define i64 @vwreduce_uadd_nxv4i32( %v) { ; ; RV64-LABEL: vwreduce_uadd_nxv4i32: ; RV64: # %bb.0: -; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vmv.s.x v10, zero -; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64-NEXT: vwredsumu.vs v8, v8, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 From c131942c8a93cf66707bac76f8023b745ebb89e0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 16 Oct 2023 23:31:46 -0400 Subject: [PATCH 2/3] Check that new ratio is valid --- llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 2 + .../test/CodeGen/RISCV/rvv/vsetvli-insert.mir | 54 +++++++++++++++++++ 2 files changed, 56 insertions(+) diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index 6724acab277b5..e00c171d9b273 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1502,6 +1502,8 @@ canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI, unsigned SEW = MIInfo.getSEW() * 8; // Fixed point value with 3 fractional bits. unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio(); + if (NewRatio < 1 || NewRatio > 64) + return std::nullopt; bool Fractional = NewRatio < 8; RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL( Fractional ? 8 / NewRatio : NewRatio / 8, Fractional); diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir index 7bda7a387c68f..9318e7e695fd5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -72,6 +72,14 @@ ret void } + define void @vmv_s_x() { + ret void + } + + define void @vmv_s_x_too_large_ratio_diff() { + ret void + } + declare @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(, , , i64) #1 declare @llvm.riscv.vle.nxv1i64.i64(, * nocapture, i64) #4 @@ -446,3 +454,49 @@ body: | %4:vr = PseudoVMV_V_I_MF4 undef %4, 0, 4, 3, 0 PseudoRET ... + +... +--- +name: vmv_s_x +tracksRegLiveness: true +body: | + bb.0: + liveins: $x1 + ; CHECK-LABEL: name: vmv_s_x + ; CHECK: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x1 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 201 /* e16, m2, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVMV_S_X_M1_:%[0-9]+]]:vr = PseudoVMV_S_X_M1 %pt, [[COPY]], 1, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 192 /* e8, m1, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VI_M1_:%[0-9]+]]:vr = PseudoVADD_VI_M1 %pt, [[PseudoVMV_S_X_M1_]], 1, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x1 + %pt:vr = IMPLICIT_DEF + %1:vr = PseudoVMV_S_X_M1 %pt, %0, 1, 4 + %2:vr = PseudoVADD_VI_M1 %pt, %1, 1, 1, 3, 0 + PseudoRET + +... +--- +name: vmv_s_x_too_large_ratio_diff +tracksRegLiveness: true +body: | + bb.0: + liveins: $x1 + ; CHECK-LABEL: name: vmv_s_x_too_large_ratio_diff + ; CHECK: liveins: $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x1 + ; CHECK-NEXT: %pt:vrm8 = IMPLICIT_DEF + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 219 /* e64, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVMV_S_X_M8_:%[0-9]+]]:vrm8 = PseudoVMV_S_X_M8 %pt, [[COPY]], 1, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 195 /* e8, m8, ta, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: [[PseudoVADD_VI_M8_:%[0-9]+]]:vrm8 = PseudoVADD_VI_M8 %pt, [[PseudoVMV_S_X_M8_]], 1, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoRET + %0:gpr = COPY $x1 + %pt:vrm8 = IMPLICIT_DEF + %1:vrm8 = PseudoVMV_S_X_M8 %pt, %0, 1, 6 + %2:vrm8 = PseudoVADD_VI_M8 %pt, %1, 1, 1, 3, 0 + PseudoRET From 0ddba020b1b2a9a9a8219eaefd7dda4352537c20 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 17 Oct 2023 17:10:48 -0400 Subject: [PATCH 3/3] Update test outside of rvv tree --- llvm/test/CodeGen/RISCV/double_reduct.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/double_reduct.ll b/llvm/test/CodeGen/RISCV/double_reduct.ll index cecaa9d24f8bc..aa8a60fab1841 100644 --- a/llvm/test/CodeGen/RISCV/double_reduct.ll +++ b/llvm/test/CodeGen/RISCV/double_reduct.ll @@ -106,9 +106,9 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) { define i16 @add_ext_v32i16(<32 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: add_ext_v32i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v11, zero -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vwredsumu.vs v10, v10, v11 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma