Skip to content

[VPlan] Use VPWidenIntrinsicRecipe to support binary and unary operations with EVL-vectorization #114205

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 3 additions & 66 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -877,7 +877,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
Expand Down Expand Up @@ -1063,7 +1062,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
Expand Down Expand Up @@ -1431,16 +1429,11 @@ class VPIRInstruction : public VPRecipeBase {
class VPWidenRecipe : public VPRecipeWithIRFlags {
unsigned Opcode;

protected:
template <typename IterT>
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
iterator_range<IterT> Operands)
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}

public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
Opcode(I.getOpcode()) {}

~VPWidenRecipe() override = default;

Expand All @@ -1450,15 +1443,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
return R;
}

static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
}

static inline bool classof(const VPUser *U) {
auto *R = dyn_cast<VPRecipeBase>(U);
return R && classof(R);
}
VP_CLASSOF_IMPL(VPDef::VPWidenSC)

/// Produce a widened instruction using the opcode and operands of the recipe,
/// processing State.VF elements.
Expand All @@ -1477,54 +1462,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
#endif
};

/// A recipe for widening operations with vector-predication intrinsics with
/// explicit vector length (EVL).
class VPWidenEVLRecipe : public VPWidenRecipe {
using VPRecipeWithIRFlags::transferFlags;

public:
template <typename IterT>
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
addOperand(&EVL);
}
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
transferFlags(W);
}

~VPWidenEVLRecipe() override = default;

VPWidenRecipe *clone() override final {
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
return nullptr;
}

VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);

VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }

/// Produce a vp-intrinsic using the opcode and operands of the recipe,
/// processing EVL elements.
void execute(VPTransformState &State) override final;

/// Returns true if the recipe only uses the first lane of operand \p Op.
bool onlyFirstLaneUsed(const VPValue *Op) const override {
assert(is_contained(operands(), Op) &&
"Op must be an operand of the recipe");
// EVL in that recipe is always the last operand, thus any use before means
// the VPValue should be vectorized.
return getEVL() == Op;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override final;
#endif
};

/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
[this](const VPRecipeBase *R) {
return inferScalarType(R->getOperand(0));
})
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
return R->getResultType();
Expand Down
48 changes: 0 additions & 48 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
Expand Down Expand Up @@ -1452,42 +1449,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
}
}

void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");

State.setDebugLocFrom(getDebugLoc());

assert(State.get(getOperand(0))->getType()->isVectorTy() &&
"VPWidenEVLRecipe should not be used for scalars");

VPValue *EVL = getEVL();
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
IRBuilderBase &BuilderIR = State.Builder;
VectorBuilder Builder(BuilderIR);
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());

SmallVector<Value *, 4> Ops;
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
VPValue *VPOp = getOperand(I);
Ops.push_back(State.get(VPOp));
}

Builder.setMask(Mask).setEVL(EVLArg);
Value *VPInst =
Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops, "vp.op");
// Currently vp-intrinsics only accept FMF flags.
// TODO: Enable other flags when support is added.
if (isa<FPMathOperator>(VPInst))
setFlags(cast<Instruction>(VPInst));

State.set(this, VPInst);
State.addMetadata(VPInst,
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
Expand All @@ -1497,15 +1458,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printFlags(O);
printOperands(O, SlotTracker);
}

void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN ";
printAsOperand(O, SlotTracker);
O << " = vp." << Instruction::getOpcodeName(getOpcode());
printFlags(O);
printOperands(O, SlotTracker);
}
#endif

void VPWidenCastRecipe::execute(VPTransformState &State) {
Expand Down
26 changes: 14 additions & 12 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1476,34 +1476,36 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPValue *NewMask = GetNewMask(S->getMask());
return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
})
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
})
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>(
[&](auto *CR) -> VPRecipeBase * {
.Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe, VPWidenRecipe>(
[&](auto *R) -> VPRecipeBase * {
Intrinsic::ID VPID = Intrinsic::not_intrinsic;
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(CR))
if (auto *CallR = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
VPID =
VPIntrinsic::getForIntrinsic(CallR->getVectorIntrinsicID());
else if (auto *CastR = dyn_cast<VPWidenCastRecipe>(CR))
} else if (auto *CastR = dyn_cast<VPWidenCastRecipe>(R)) {
VPID = VPIntrinsic::getForOpcode(CastR->getOpcode());
} else if (auto *W = dyn_cast<VPWidenRecipe>(R)) {
unsigned Opcode = W->getOpcode();
// TODO: Support other opcodes
if (!Instruction::isBinaryOp(Opcode) &&
!Instruction::isUnaryOp(Opcode))
return nullptr;
VPID = VPIntrinsic::getForOpcode(Opcode);
}
assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic");
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");

SmallVector<VPValue *> Ops(CR->operands());
SmallVector<VPValue *> Ops(R->operands());
Ops.push_back(&AllOneMask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
VPID, Ops, TypeInfo.inferScalarType(R), R->getDebugLoc());
})
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
SmallVector<VPValue *> Ops(Sel->operands());
Expand Down
1 change: 0 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanValue.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,6 @@ class VPDef {
VPWidenStoreEVLSC,
VPWidenStoreSC,
VPWidenSC,
VPWidenEVLSC,
VPWidenSelectSC,
VPBlendSC,
VPHistogramSC,
Expand Down
4 changes: 0 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
.Case<VPWidenEVLRecipe>([&](const VPWidenEVLRecipe *W) {
return VerifyEVLUse(*W,
Instruction::isUnaryOp(W->getOpcode()) ? 1 : 2);
})
.Case<VPScalarCastRecipe>(
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })
.Case<VPInstruction>([&](const VPInstruction *I) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ define void @test_fadd(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fadd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about (removed) fast attribute here?

Copy link
Contributor Author

@LiqinWeng LiqinWeng Dec 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will set flags from the recipe instead of by the underlying instruction. Pls review : #119847 && #121023

; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Expand Down Expand Up @@ -1362,7 +1362,7 @@ define void @test_fsub(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fsub.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Expand Down Expand Up @@ -1457,7 +1457,7 @@ define void @test_fmul(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fmul.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Expand Down Expand Up @@ -1552,7 +1552,7 @@ define void @test_fdiv(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fdiv.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> splat (float 3.000000e+00), <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Expand Down Expand Up @@ -1700,7 +1700,7 @@ define void @test_fneg(ptr nocapture %a, ptr nocapture readonly %b) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call fast <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Copy link
Contributor Author

@LiqinWeng LiqinWeng Dec 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will enable if the patch merge :#119847

; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x float> @llvm.vp.fneg.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP13]]
; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds float, ptr [[TMP17]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4f32.p0(<vscale x 4 x float> [[VP_OP]], ptr align 4 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP12]])
Expand Down Expand Up @@ -1761,3 +1761,43 @@ loop:
finish.loopexit:
ret void
}
;.
; IF-EVL: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; IF-EVL: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
; IF-EVL: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
; IF-EVL: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; IF-EVL: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
; IF-EVL: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]]}
; IF-EVL: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP9]] = distinct !{[[LOOP9]], [[META1]]}
; IF-EVL: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]]}
; IF-EVL: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP13]] = distinct !{[[LOOP13]], [[META1]]}
; IF-EVL: [[LOOP14]] = distinct !{[[LOOP14]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP15]] = distinct !{[[LOOP15]], [[META1]]}
; IF-EVL: [[LOOP16]] = distinct !{[[LOOP16]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
; IF-EVL: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]}
; IF-EVL: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP21]] = distinct !{[[LOOP21]], [[META1]]}
; IF-EVL: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP23]] = distinct !{[[LOOP23]], [[META1]]}
; IF-EVL: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP25]] = distinct !{[[LOOP25]], [[META1]]}
; IF-EVL: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP27]] = distinct !{[[LOOP27]], [[META1]]}
; IF-EVL: [[LOOP28]] = distinct !{[[LOOP28]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP29]] = distinct !{[[LOOP29]], [[META1]]}
; IF-EVL: [[LOOP30]] = distinct !{[[LOOP30]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP31]] = distinct !{[[LOOP31]], [[META1]]}
; IF-EVL: [[LOOP32]] = distinct !{[[LOOP32]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP33]] = distinct !{[[LOOP33]], [[META1]]}
; IF-EVL: [[LOOP34]] = distinct !{[[LOOP34]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP35]] = distinct !{[[LOOP35]], [[META1]]}
; IF-EVL: [[LOOP36]] = distinct !{[[LOOP36]], [[META1]], [[META2]]}
; IF-EVL: [[LOOP37]] = distinct !{[[LOOP37]], [[META1]]}
;.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
; RUN: -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck %s

; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, ir<%add>, ir<%rdx>, vp<%{{.+}}>)
; CHECK: Cost of 2 for VF vscale x 4: WIDEN-INTRINSIC vp<%{{.+}}> = call llvm.vp.merge(ir<true>, vp<%{{.+}}, ir<%rdx>, vp<%{{.+}}>)
; CHECK: LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %rdx = phi i32 [ %start, %entry ], [ %add, %loop ]

define i32 @add(ptr %a, i64 %n, i32 %start) {
Expand Down
Loading
Loading