-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[RISCV] Adjust LMUL if not used to avoid VL toggle #69259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1463,6 +1463,57 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, | |
return areCompatibleVTYPEs(PriorVType, VType, Used); | ||
} | ||
|
||
// If LMUL or the SEW/LMUL ratio aren't demanded and MI and NextMI have the same | ||
// AVL, then we can try and change MI's LMUL so that we can avoid setting VL in | ||
// NextMI, e.g: | ||
// | ||
// vsetivli zero, 4, e32, m1, ta, ma | ||
// vsetivli zero, 4, e16, mf4, ta, ma | ||
// | ||
// vsetivli zero, 4, e32, mf2, ta, ma | ||
// vsetvli zero, zero, e16, mf4, ta, ma | ||
// | ||
// If possible, returns the new VTYPE that should be used for MI. | ||
static std::optional<unsigned> | ||
canAdjustSEWLMULRatio(const MachineInstr &MI, const MachineInstr &NextMI, | ||
const DemandedFields &Used) { | ||
if (Used.LMUL || Used.SEWLMULRatio) | ||
return std::nullopt; | ||
if (!NextMI.getOperand(0).isDead()) | ||
return std::nullopt; | ||
// If we end up increasing the SEW/LMUL ratio, then we will decrease VLMAX, | ||
// which means we might end up changing VL in the case that AVL > VLMAX. So | ||
// bail if the exact VL value is needed. | ||
// | ||
// TODO: We could potentially relax this when we know we're increasing VLMAX. | ||
if (Used.VLAny) | ||
return std::nullopt; | ||
|
||
// If NextMI is already zero, zero then bail. If MI is zero, zero then we | ||
// won't be able to tell if it has the same AVL as NextMI, so also bail. | ||
if (isVLPreservingConfig(MI) || isVLPreservingConfig(NextMI)) | ||
return std::nullopt; | ||
|
||
VSETVLIInfo NextMIInfo = getInfoForVSETVLI(NextMI); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't remember if it's safe to call getInfoForVSETVLI here on the post-rewrite MI. However, the existence of isNonZeroAVL seems to indicate I thought it wasn't previously? We should either write this in terms of the raw MI, or insert assertions sufficient to prove this is safe during insertion, and rewrite the existing post pass logic in terms of the VSETVLIInfo object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, in my local branch I've rewritten this to just operate on the MI, it's not too bad and we don't need getInfoForVSETVLI here. |
||
VSETVLIInfo MIInfo = getInfoForVSETVLI(MI); | ||
if (!MIInfo.hasSameAVL(NextMIInfo)) | ||
return std::nullopt; | ||
|
||
unsigned SEW = MIInfo.getSEW() * 8; | ||
// Fixed point value with 3 fractional bits. | ||
unsigned NewRatio = SEW / NextMIInfo.getSEWLMULRatio(); | ||
if (NewRatio < 1 || NewRatio > 64) | ||
return std::nullopt; | ||
bool Fractional = NewRatio < 8; | ||
RISCVII::VLMUL NewVLMul = RISCVVType::encodeLMUL( | ||
Fractional ? 8 / NewRatio : NewRatio / 8, Fractional); | ||
|
||
unsigned VType = MIInfo.encodeVTYPE(); | ||
return RISCVVType::encodeVTYPE(NewVLMul, SEW / 8, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SEW/8 appears to just be MIInfo.getSEW() |
||
RISCVVType::isTailAgnostic(VType), | ||
RISCVVType::isMaskAgnostic(VType)); | ||
} | ||
|
||
void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { | ||
MachineInstr *NextMI = nullptr; | ||
// We can have arbitrary code in successors, so VL and VTYPE | ||
|
@@ -1484,6 +1535,15 @@ void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) { | |
Used.demandVL(); | ||
|
||
if (NextMI) { | ||
if (auto NewVType = canAdjustSEWLMULRatio(MI, *NextMI, Used)) { | ||
MI.getOperand(2).setImm(*NewVType); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Have we properly guarded for the save/restore form which has a vreg as operand 2? |
||
// Convert NextMI to vsetvli zero, zero | ||
NextMI->setDesc(TII->get(RISCV::PseudoVSETVLIX0)); | ||
NextMI->getOperand(0).setReg(RISCV::X0); | ||
NextMI->getOperand(0).setIsDead(true); | ||
NextMI->getOperand(1).ChangeToRegister(RISCV::X0, false, false, true); | ||
} | ||
|
||
if (!Used.usedVL() && !Used.usedVTYPE()) { | ||
ToDelete.push_back(&MI); | ||
// Leave NextMI unchanged | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -65,9 +65,8 @@ define <32 x i32> @insertelt_v32i32_31(<32 x i32> %a, i32 %y) { | |
; CHECK-LABEL: insertelt_v32i32_31: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: li a1, 32 | ||
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma | ||
; CHECK-NEXT: vmv.s.x v16, a0 | ||
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is an interesting sub-case as we shouldn't need to be reasoning about ratios here. Why didn't the existing logic get this case given only LMUL is changing? I think this might just be the lack of LI handling in isNonZeroAVL? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point, I'm not quite sure why this part wasn't already caught since it should just be an entire VTYPE substitution. |
||
; CHECK-NEXT: vmv.s.x v16, a0 | ||
; CHECK-NEXT: vslideup.vi v8, v16, 31 | ||
; CHECK-NEXT: ret | ||
%b = insertelement <32 x i32> %a, i32 %y, i32 31 | ||
|
@@ -103,9 +102,8 @@ define <64 x i32> @insertelt_v64i32_63(<64 x i32> %a, i32 %y) { | |
; CHECK-LABEL: insertelt_v64i32_63: | ||
; CHECK: # %bb.0: | ||
; CHECK-NEXT: li a1, 32 | ||
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma | ||
; CHECK-NEXT: vmv.s.x v24, a0 | ||
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma | ||
; CHECK-NEXT: vmv.s.x v24, a0 | ||
; CHECK-NEXT: vslideup.vi v16, v24, 31 | ||
; CHECK-NEXT: ret | ||
%b = insertelement <64 x i32> %a, i32 %y, i32 63 | ||
|
@@ -550,9 +548,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { | |
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma | ||
; CHECK-NEXT: vle64.v v8, (a0) | ||
; CHECK-NEXT: li a2, 6 | ||
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma | ||
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma | ||
; CHECK-NEXT: vmv.s.x v8, a2 | ||
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma | ||
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma | ||
; CHECK-NEXT: vle64.v v12, (a1) | ||
; CHECK-NEXT: vadd.vv v8, v8, v12 | ||
; CHECK-NEXT: vse64.v v8, (a0) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1138,11 +1138,11 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB18_7 | ||
; RV64ZVE32F-NEXT: .LBB18_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -1271,11 +1271,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB19_7 | ||
; RV64ZVE32F-NEXT: .LBB19_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why isn't this already a VTYPE toggle from the forward pass? Having the same literal VL and knowing that VL is less than VLMAX for both should be sufficient? |
||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -1408,12 +1408,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB20_7 | ||
; RV64ZVE32F-NEXT: .LBB20_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: andi a2, a2, 255 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2043,11 +2043,11 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB29_7 | ||
; RV64ZVE32F-NEXT: .LBB29_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2175,11 +2175,11 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB30_7 | ||
; RV64ZVE32F-NEXT: .LBB30_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2314,12 +2314,12 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB31_7 | ||
; RV64ZVE32F-NEXT: .LBB31_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: andi a2, a2, 255 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2451,11 +2451,11 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB32_7 | ||
; RV64ZVE32F-NEXT: .LBB32_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2584,11 +2584,11 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB33_7 | ||
; RV64ZVE32F-NEXT: .LBB33_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -2724,12 +2724,12 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i | |
; RV64ZVE32F-NEXT: andi a3, a2, 16 | ||
; RV64ZVE32F-NEXT: beqz a3, .LBB34_7 | ||
; RV64ZVE32F-NEXT: .LBB34_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a3, v11 | ||
; RV64ZVE32F-NEXT: and a3, a3, a1 | ||
; RV64ZVE32F-NEXT: slli a3, a3, 2 | ||
; RV64ZVE32F-NEXT: add a3, a0, a3 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a3) | ||
; RV64ZVE32F-NEXT: andi a3, a2, 32 | ||
|
@@ -6393,11 +6393,11 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB58_7 | ||
; RV64ZVE32F-NEXT: .LBB58_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -6526,11 +6526,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB59_7 | ||
; RV64ZVE32F-NEXT: .LBB59_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -6663,12 +6663,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB60_7 | ||
; RV64ZVE32F-NEXT: .LBB60_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v10 | ||
; RV64ZVE32F-NEXT: andi a2, a2, 255 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 1 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 | ||
; RV64ZVE32F-NEXT: vse16.v v9, (a2) | ||
; RV64ZVE32F-NEXT: andi a2, a1, 32 | ||
|
@@ -7249,11 +7249,11 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB68_7 | ||
; RV64ZVE32F-NEXT: .LBB68_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
|
@@ -7385,11 +7385,11 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB69_7 | ||
; RV64ZVE32F-NEXT: .LBB69_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
|
@@ -7528,12 +7528,12 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB70_7 | ||
; RV64ZVE32F-NEXT: .LBB70_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: andi a2, a2, 255 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
|
@@ -7669,11 +7669,11 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB71_7 | ||
; RV64ZVE32F-NEXT: .LBB71_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
|
@@ -7806,11 +7806,11 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x | |
; RV64ZVE32F-NEXT: andi a2, a1, 16 | ||
; RV64ZVE32F-NEXT: beqz a2, .LBB72_7 | ||
; RV64ZVE32F-NEXT: .LBB72_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a2, v11 | ||
; RV64ZVE32F-NEXT: slli a2, a2, 2 | ||
; RV64ZVE32F-NEXT: add a2, a0, a2 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a2) | ||
|
@@ -7950,12 +7950,12 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x | |
; RV64ZVE32F-NEXT: andi a3, a2, 16 | ||
; RV64ZVE32F-NEXT: beqz a3, .LBB73_7 | ||
; RV64ZVE32F-NEXT: .LBB73_14: # %cond.store7 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vmv.x.s a3, v11 | ||
; RV64ZVE32F-NEXT: and a3, a3, a1 | ||
; RV64ZVE32F-NEXT: slli a3, a3, 2 | ||
; RV64ZVE32F-NEXT: add a3, a0, a3 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma | ||
; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 | ||
; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma | ||
; RV64ZVE32F-NEXT: vse32.v v12, (a3) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Function turns the VTYPE, not a boolean. So a better name might be: getAdjustedVTYPE