-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[LV][EVL] Support icmp/fcmp instruction with EVL-vectorization #108533
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-llvm-transforms Author: LiqinWeng (LiqinWeng) ChangesPatch is 28.51 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108533.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 00d9f2909d71e2..8bcf5f3553cd86 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10546,13 +10546,13 @@ InstructionCost BoUpSLP::getSpillCost() const {
if (II->isAssumeLikeIntrinsic())
return true;
FastMathFlags FMF;
- SmallVector<Type *, 4> Tys;
+ SmallVector<Type *, 8> Tys;
for (auto &ArgOp : II->args())
Tys.push_back(ArgOp->getType());
if (auto *FPMO = dyn_cast<FPMathOperator>(II))
FMF = FPMO->getFastMathFlags();
IntrinsicCostAttributes ICA(II->getIntrinsicID(), II->getType(), Tys,
- FMF);
+ FMF, II);
InstructionCost IntrCost =
TTI->getIntrinsicInstrCost(ICA, TTI::TCK_RecipThroughput);
InstructionCost CallCost = TTI->getCallInstrCost(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 351f909ac0279d..6e0ea5673e5a4e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -1269,6 +1269,33 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
void VPWidenEVLRecipe::execute(VPTransformState &State) {
unsigned Opcode = getOpcode();
// TODO: Support other opcodes
+ if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
+ Value *Op1 = State.get(getOperand(0), 0);
+ Value *Op2 = State.get(getOperand(1), 0);
+ auto &Ctx = State.Builder.getContext();
+ Value *Pred = MetadataAsValue::get(
+ Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, true));
+
+ VectorType *DataType = VectorType::get(Type::getInt1Ty(Ctx), State.VF);
+
+ Value *VPInst = Builder.createVectorInstruction(Opcode, DataType,
+ {Op1, Op2, Pred}, "vp.op");
+ // if (isa<FPMathOperator>(VPInst))
+ // setFlags(cast<Instruction>(VPInst));
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ return;
+ }
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..0b2c126210162f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1345,7 +1345,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
unsigned Opcode = W->getOpcode();
if (!Instruction::isBinaryOp(Opcode) &&
- !Instruction::isUnaryOp(Opcode))
+ !Instruction::isUnaryOp(Opcode) &&
+ Opcode != Instruction::ICmp && Opcode != Instruction::FCmp)
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
index 41796e848632e4..a86f6b1d23ae7a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-cond-reduction.ll
@@ -69,7 +69,7 @@ define i32 @cond_add(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
-; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"sgt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[TMP20]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
@@ -282,7 +282,7 @@ define i32 @cond_add_pred(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP13]]
; IF-EVL-INLOOP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
-; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = icmp sgt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
+; IF-EVL-INLOOP-NEXT: [[TMP19:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"sgt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP20:%.*]] = select <vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> [[TMP19]], <vscale x 4 x i1> zeroinitializer
; IF-EVL-INLOOP-NEXT: [[TMP21:%.*]] = call i32 @llvm.vp.reduce.add.nxv4i32(i32 0, <vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i1> [[TMP20]], i32 [[TMP12]])
; IF-EVL-INLOOP-NEXT: [[TMP22]] = add i32 [[TMP21]], [[VEC_PHI]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
index 99da5058fbf922..c915ffd9a80d91 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll
@@ -45,18 +45,18 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP15]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
-; IF-EVL-NEXT: [[TMP17:%.*]] = icmp ne <vscale x 4 x i32> [[VP_OP_LOAD]], zeroinitializer
-; IF-EVL-NEXT: [[TMP18:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[TMP17]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
-; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[TMP19]], i32 0
-; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
-; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP]], ptr align 4 [[TMP20]], <vscale x 4 x i1> [[TMP18]], i32 [[TMP10]])
-; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP10]] to i64
-; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> zeroinitializer, metadata !"ne", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP17:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i1> [[VP_OP]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP11]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[TMP18]], i32 0
+; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[VP_OP4:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> [[VP_OP_LOAD3]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP10]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_OP4]], ptr align 4 [[TMP19]], <vscale x 4 x i1> [[TMP17]], i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP8]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL: scalar.ph:
@@ -65,13 +65,13 @@ define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) {
; IF-EVL: for.body:
; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]]
-; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
-; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP23]], 0
+; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP22]], 0
; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
; IF-EVL: if.then:
; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]]
-; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
-; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP23]], [[TMP24]]
+; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4
+; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP22]], [[TMP23]]
; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4
; IF-EVL-NEXT: br label [[FOR_INC]]
; IF-EVL: for.inc:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
index c1cf8b0fc541e7..def9d8d4c19124 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll
@@ -147,33 +147,33 @@ define void @reverse_load_store_masked(i64 %startval, ptr noalias %ptr, ptr noal
; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i32 [[TMP8]]
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP13]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP14]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP15:%.*]] = icmp slt <vscale x 4 x i32> [[VP_OP_LOAD]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; IF-EVL-NEXT: [[TMP16:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> zeroinitializer
-; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4
-; IF-EVL-NEXT: [[TMP20:%.*]] = mul i64 0, [[TMP19]]
-; IF-EVL-NEXT: [[TMP21:%.*]] = sub i64 1, [[TMP19]]
-; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP17]], i64 [[TMP20]]
-; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i64 [[TMP21]]
-; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP23]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
+; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i1> @llvm.vp.icmp.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD]], <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 100, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), metadata !"slt", <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = select <vscale x 4 x i1> [[TMP11]], <vscale x 4 x i1> [[VP_OP]], <vscale x 4 x i1> zeroinitializer
+; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[PTR1:%.*]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP18:%.*]] = mul i64 [[TMP17]], 4
+; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 0, [[TMP18]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = sub i64 1, [[TMP18]]
+; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[TMP16]], i64 [[TMP19]]
+; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i64 [[TMP20]]
+; IF-EVL-NEXT: [[VP_REVERSE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: [[VP_OP_LOAD4:%.*]] = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP22]], <vscale x 4 x i1> [[VP_REVERSE_MASK]], i32 [[TMP6]])
; IF-EVL-NEXT: [[VP_REVERSE:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_OP_LOAD4]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
-; IF-EVL-NEXT: [[TMP25:%.*]] = call i64 @llvm.vscale.i64()
-; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 [[TMP25]], 4
-; IF-EVL-NEXT: [[TMP27:%.*]] = mul i64 0, [[TMP26]]
-; IF-EVL-NEXT: [[TMP28:%.*]] = sub i64 1, [[TMP26]]
-; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP24]], i64 [[TMP27]]
-; IF-EVL-NEXT: [[TMP30:%.*]] = getelementptr i32, ptr [[TMP29]], i64 [[TMP28]]
+; IF-EVL-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[PTR2:%.*]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP24:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP25:%.*]] = mul i64 [[TMP24]], 4
+; IF-EVL-NEXT: [[TMP26:%.*]] = mul i64 0, [[TMP25]]
+; IF-EVL-NEXT: [[TMP27:%.*]] = sub i64 1, [[TMP25]]
+; IF-EVL-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP23]], i64 [[TMP26]]
+; IF-EVL-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[TMP28]], i64 [[TMP27]]
; IF-EVL-NEXT: [[VP_REVERSE5:%.*]] = call <vscale x 4 x i32> @llvm.experimental.vp.reverse.nxv4i32(<vscale x 4 x i32> [[VP_REVERSE]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP16]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP30]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
-; IF-EVL-NEXT: [[TMP31:%.*]] = zext i32 [[TMP6]] to i64
-; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP31]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[VP_REVERSE_MASK6:%.*]] = call <vscale x 4 x i1> @llvm.experimental.vp.reverse.nxv4i1(<vscale x 4 x i1> [[TMP15]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[VP_REVERSE5]], ptr align 4 [[TMP29]], <vscale x 4 x i1> [[VP_REVERSE_MASK6]], i32 [[TMP6]])
+; IF-EVL-NEXT: [[TMP30:%.*]] = zext i32 [[TMP6]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP30]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP4]]
-; IF-EVL-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; IF-EVL-NEXT: [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; IF-EVL: middle.block:
; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]]
; IF-EVL: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll b/llvm/test/Transforms/LoopVe...
[truncated]
|
2d4782f
to
2e5a44d
Compare
2e5a44d
to
4b50426
Compare
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cmp-intrinsics.ll
Outdated
Show resolved
Hide resolved
4b50426
to
f788c9c
Compare
ping |
VectorType *RetType = VectorType::get(Type::getInt1Ty(Ctx), State.VF); | ||
Value *VPInst = Builder.createVectorInstruction(Opcode, RetType, | ||
{Op1, Op2, Pred}, "vp.op"); | ||
if (auto *VecOp = dyn_cast<CastInst>(VPInst)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why createVectorInstruction may return CastInst if Opcode is ICmp/FCmp?
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) | ||
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still need this check?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed
63f1ab9
to
d50dd93
Compare
// TODO: Support other opcodes | ||
if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode)) | ||
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute"); | ||
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Shall we instead represent it as WPWidentIntrisic?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If possible that would be great, given that there are quite a few changes to the existing code here
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
…ions with EVL-vectorization
d50dd93
to
74a4321
Compare
6c89f9d
to
f219d2c
Compare
JFYI, we've now landed a change which moves away from representing most non-trapping instructions using VP intrinsics, and instead relying on the VLOptimizer to reduce the VL for normal IR nodes instead. Skimming this review title, I suspect this patch is no longer needed and can be closed. |
No description provided.