-
Notifications
You must be signed in to change notification settings - Fork 13.3k
[LV][EVL] Support cast instruction with EVL-vectorization #108351
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: LiqinWeng (LiqinWeng) ChangesPatch is 25.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/108351.diff 7 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b821da03c16e94..8450ed1765cafa 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4480,6 +4480,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCallSC:
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
+ case VPDef::VPWidenCastEVLSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 64242e43c56bc8..7e45a1c0b36dbd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -921,6 +921,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCallSC:
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
+ case VPRecipeBase::VPWidenCastEVLSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
@@ -1111,6 +1112,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
R->getVPDefID() == VPRecipeBase::VPVectorPointerSC;
}
@@ -1514,19 +1516,28 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Result type for the cast.
Type *ResultTy;
-public:
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
- CastInst &UI)
+protected:
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy, CastInst &UI)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op, UI), Opcode(Opcode),
ResultTy(ResultTy) {
assert(UI.getOpcode() == Opcode &&
"opcode of underlying cast doesn't match");
}
- VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ VPWidenCastRecipe(unsigned VPDefOpcode, Instruction::CastOps Opcode,
+ VPValue *Op, Type *ResultTy)
: VPRecipeWithIRFlags(VPDef::VPWidenCastSC, Op), Opcode(Opcode),
ResultTy(ResultTy) {}
+public:
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ CastInst &UI)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy, UI) {}
+
+ VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy)
+ : VPWidenCastRecipe(VPDef::VPWidenCastSC, Opcode, Op, ResultTy) {}
+
~VPWidenCastRecipe() override = default;
VPWidenCastRecipe *clone() override {
@@ -1537,7 +1548,15 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
return new VPWidenCastRecipe(Opcode, getOperand(0), ResultTy);
}
- VP_CLASSOF_IMPL(VPDef::VPWidenCastSC)
+ static inline bool classof(const VPRecipeBase *R) {
+ return R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenCastEVLSC;
+ }
+
+ static inline bool classof(const VPUser *U) {
+ auto *R = dyn_cast<VPRecipeBase>(U);
+ return R && classof(R);
+ }
/// Produce widened copies of the cast.
void execute(VPTransformState &State) override;
@@ -1554,6 +1573,54 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags {
Type *getResultType() const { return ResultTy; }
};
+// A recipe for widening cast operation with vector-predication intrinsics with
+/// explicit vector length (EVL).
+class VPWidenCastEVLRecipe : public VPWidenCastRecipe {
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ VPWidenCastEVLRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy,
+ VPValue &EVL)
+ : VPWidenCastRecipe(VPDef::VPWidenCastEVLSC, Opcode, Op, ResultTy) {
+ addOperand(&EVL);
+ }
+ VPWidenCastEVLRecipe(VPWidenCastRecipe &W, VPValue &EVL)
+ : VPWidenCastEVLRecipe(W.getOpcode(), W.getOperand(0), W.getResultType(),
+ EVL) {
+ transferFlags(W);
+ }
+
+ ~VPWidenCastEVLRecipe() override = default;
+
+ VPWidenCastEVLRecipe *clone() final {
+ llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
+ return nullptr;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenCastEVLSC)
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// Produce a vp-intrinsic copies of the cast.
+ void execute(VPTransformState &State) final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ return getEVL() == Op;
+ }
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const final;
+#endif
+};
+
/// VPScalarCastRecipe is a recipe to create scalar cast instructions.
class VPScalarCastRecipe : public VPSingleDefRecipe {
Instruction::CastOps Opcode;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 351f909ac0279d..62bc032f0c0563 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -69,6 +69,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenLoadEVLSC:
@@ -112,6 +113,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPReductionSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -162,6 +164,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPScalarIVStepsSC:
case VPWidenCanonicalIVSC:
case VPWidenCastSC:
+ case VPWidenCastEVLSC:
case VPWidenGEPSC:
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
@@ -1344,6 +1347,40 @@ void VPWidenCastRecipe::execute(VPTransformState &State) {
}
}
+void VPWidenCastEVLRecipe::execute(VPTransformState &State) {
+ unsigned Opcode = getOpcode();
+ State.setDebugLocFrom(getDebugLoc());
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+
+ assert(State.get(getOperand(0), 0)->getType()->isVectorTy() &&
+ "VPWidenEVLRecipe should not be used for scalars");
+
+ // TODO: add more cast instruction, eg: fptoint/inttofp/inttoptr/fptofp
+ if (Opcode == Instruction::SExt || Opcode == Instruction::ZExt ||
+ Opcode == Instruction::Trunc) {
+ Value *SrcVal = State.get(getOperand(0), 0);
+ VectorType *SrcTy = cast<VectorType>(SrcVal->getType());
+ VectorType *DsType =
+ VectorType::get(getResultType(), SrcTy->getElementCount());
+
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+
+ Builder.setMask(Mask).setEVL(State.get(getEVL(), 0, /*NeedsScalar=*/true));
+ Value *VPInst =
+ Builder.createVectorInstruction(Opcode, DsType, {SrcVal}, "vp.cast");
+ if (VPInst) {
+ if (auto *VecOp = dyn_cast<CastInst>(VPInst))
+ VecOp->copyIRFlags(getUnderlyingInstr());
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst,
+ dyn_cast_or_null<Instruction>(getUnderlyingValue()));
+ }
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1354,6 +1391,16 @@ void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
printOperands(O, SlotTracker);
O << " to " << *getResultType();
}
+
+void VPWidenCastEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN-VP ";
+ printAsOperand(O, SlotTracker);
+ O << " = vp." << Instruction::getOpcodeName(getOpcode()) << " ";
+ printFlags(O);
+ printOperands(O, SlotTracker);
+ O << " to " << *getResultType();
+}
#endif
/// This function adds
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index b722ec34ee6fb6..c4ac5a13a1473e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1349,6 +1349,15 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
return nullptr;
return new VPWidenEVLRecipe(*W, EVL);
})
+ .Case<VPWidenCastRecipe>(
+ [&](VPWidenCastRecipe *W) -> VPRecipeBase * {
+ unsigned Opcode = W->getOpcode();
+ if (Opcode != Instruction::SExt &&
+ Opcode != Instruction::ZExt &&
+ Opcode != Instruction::Trunc)
+ return nullptr;
+ return new VPWidenCastEVLRecipe(*W, EVL);
+ })
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index b8b2c0bd4d5ff1..17fc0a526e58d0 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -350,6 +350,7 @@ class VPDef {
VPWidenCallSC,
VPWidenCanonicalIVSC,
VPWidenCastSC,
+ VPWidenCastEVLSC,
VPWidenGEPSC,
VPWidenLoadEVLSC,
VPWidenLoadSC,
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
index 0381f6dae9811f..9c4ebf3d7ff849 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
@@ -159,38 +159,38 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP: vector.body:
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
-; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
+; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
-; IF-EVL-INLOOP-NEXT: [[TMP11:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP10]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
-; IF-EVL-INLOOP-NEXT: [[TMP12]] = add i32 [[TMP11]], [[VEC_PHI]]
+; IF-EVL-INLOOP-NEXT: [[VP_CAST:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[VP_CAST]], <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer), i32 [[TMP6]])
+; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP4]]
-; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; IF-EVL-INLOOP-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; IF-EVL-INLOOP: middle.block:
; IF-EVL-INLOOP-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; IF-EVL-INLOOP: scalar.ph:
; IF-EVL-INLOOP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; IF-EVL-INLOOP-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP11]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_BODY:%.*]]
; IF-EVL-INLOOP: for.body:
; IF-EVL-INLOOP-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
; IF-EVL-INLOOP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]]
-; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32
+; IF-EVL-INLOOP-NEXT: [[TMP13:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
+; IF-EVL-INLOOP-NEXT: [[CONV:%.*]] = sext i16 [[TMP13]] to i32
; IF-EVL-INLOOP-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
; IF-EVL-INLOOP-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1
; IF-EVL-INLOOP-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
; IF-EVL-INLOOP-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IF-EVL-INLOOP: for.cond.cleanup.loopexit:
-; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; IF-EVL-INLOOP-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
; IF-EVL-INLOOP-NEXT: br label [[FOR_COND_CLEANUP]]
; IF-EVL-INLOOP: for.cond.cleanup:
; IF-EVL-INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
new file mode 100644
index 00000000000000..f1f3d2c88301e1
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
@@ -0,0 +1,227 @@
+; REQUIRES: asserts
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s
+
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \
+; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s
+
+define void @vp_sext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<%4> = phi ir<0>, vp<%11>
+; IF-EVL-NEXT: EMIT vp<%5> = EXPLICIT-VECTOR-LENGTH vp<%4>, ir<%N>
+; IF-EVL-NEXT: vp<%6> = SCALAR-STEPS vp<%4>, ir<1>
+; IF-EVL-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6>
+; IF-EVL-NEXT: vp<%7> = vector-pointer ir<%arrayidx>
+; IF-EVL-NEXT: WIDEN ir<%0> = vp.load vp<%7>, vp<%5>
+; IF-EVL-NEXT: WIDEN-VP vp<%8> = vp.sext ir<%0>, vp<%5> to i64
+; IF-EVL-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6>
+; IF-EVL-NEXT: vp<%9> = vector-pointer ir<%arrayidx4>
+; IF-EVL-NEXT: WIDEN vp.store vp<%9>, vp<%8>, vp<%5>
+; IF-EVL-NEXT: SCALAR-CAST vp<%10> = zext vp<%5> to i64
+; IF-EVL-NEXT: EMIT vp<%11> = add vp<%10>, vp<%4>
+; IF-EVL-NEXT: EMIT vp<%12> = add vp<%3>, vp<%0>
+; IF-EVL-NEXT: EMIT branch-on-count vp<%12>, vp<%1>
+; IF-EVL-NEXT: No successors
+; IF-EVL-NEXT: }
+
+; NO-VP: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF>=1' {
+; NO-VP-NEXT: Live-in vp<%0> = VF * UF
+; NO-VP-NEXT: Live-in vp<%1> = vector-trip-count
+; NO-VP-NEXT: Live-in ir<%N> = original trip-count
+
+; NO-VP: vector.ph:
+; NO-VP-NEXT: Successor(s): vector loop
+
+; NO-VP: <x1> vector loop: {
+; NO-VP-NEXT: vector.body:
+; NO-VP-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%6>
+; NO-VP-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1>
+; NO-VP-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%3>
+; NO-VP-NEXT: vp<%4> = vector-pointer ir<%arrayidx>
+; NO-VP-NEXT: WIDEN ir<%0> = load vp<%4>
+; NO-VP-NEXT: WIDEN-CAST ir<%conv2> = sext ir<%0> to i64
+; NO-VP-NEXT: CLONE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%3>
+; NO-VP-NEXT: vp<%5> = vector-pointer ir<%arrayidx4>
+; NO-VP-NEXT: WIDEN store vp<%5>, ir<%conv2>
+; NO-VP-NEXT: EMIT vp<%6> = add nuw vp<%2>, vp<%0>
+; NO-VP-NEXT: EMIT branch-on-count vp<%6>, vp<%1>
+; NO-VP-NEXT: No successors
+; NO-VP-NEXT: }
+
+entry:
+ %cmp8 = icmp sgt i64 %N, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, ptr %b, i64 %indvars.iv
+ %0 = load i32, ptr %arrayidx, align 4
+ %conv2 = sext i32 %0 to i64
+ %arrayidx4 = getelementptr inbounds i64, ptr %a, i64 %indvars.iv
+ store i64 %conv2, ptr %arrayidx4, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define void @vp_zext(ptr noalias %a, ptr noalias %b, i64 %N) {
+; IF-EVL: VPlan 'Final VPlan for VF={vscale x 1,vscale x 2},UF={1}' {
+; IF-EVL-NEXT: Live-in vp<%0> = VF * UF
+; IF-EVL-NEXT: Live-in vp<%1> = vector-trip-count
+; IF-EVL-NEXT: Live-in ir<%N> = original trip-count
+
+; IF-EVL: vector.ph:
+; IF-EVL-NEXT: Successor(s): vector loop
+
+; IF-EVL: <x1> vector loop: {
+; IF-EVL-NEXT: vector.body:
+; IF-EVL-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%12>
+; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-...
[truncated]
|
636eb6f
to
5078445
Compare
b251550
to
0b102ba
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Apologies for the late reply since I just finished a long vacation.
I'm happy to see that someone else is also contributing to EVL vectorization. :)
0b102ba
to
0b3836c
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
a2544b8
to
18a6072
Compare
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-cast-intrinsics.ll
Outdated
Show resolved
Hide resolved
18a6072
to
ee4b5f4
Compare
68501a4
to
0ec4476
Compare
Pls give me 'LGTM' :)~~~ |
Same question as for #109614, would using a generic intrinsic recipe work, or is there a need to have dedicated recipes for those? |
|
Generally speaking, it should work. The only high level question here - distinguish between different opcodes in VPlan-based cost model. We'll need to implement something like switch-like approach in a recipe |
I think as long as we are using them only for cases where we directly CodeGen the intrinsic and nothing else, it should be fine to rely on TTI to return the correct costs. If there are any inaccuracies, the cost model should likely be improved to fix those |
8079515
to
68b2d92
Compare
e355974
to
af0e5a3
Compare
af0e5a3
to
5bd40b9
Compare
if (auto *CI = cast_or_null<CallInst>(getUnderlyingValue())) | ||
CI->getOperandBundlesAsDefs(OpBundles); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a comment on why this is isn't supported for VPINstrinsics.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
|
||
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); | ||
Instruction *V = State.Builder.CreateCall(VectorF, Args, OpBundles); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// Currently vp-intrinsics only accept FMF flags. llvm.vp.uitofp will get | ||
// Flags of OperationType::NonNegOp && OperationType::FPMathOp. | ||
if (isa<FPMathOperator>(V) && VectorIntrinsicID != Intrinsic::vp_uitofp) | ||
setFlags(V); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we instead drop the flags on construction for widen intrinsics recipes wit EVL?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- If we want to get the IR Flags, we need get the Instruction, which will get the correct OpType.
void setFlags(Instruction *I)
. But I didnt pass the Instruction on VPlanTransform - uitofp has the nneg IR flags, but llvm.vp.uitofp seems no this flag. I'm not sure if I understood something wrong.
- How to handle vp instruction IR flags may need to be handled by a separate patch? Currently, it seems that VP Reductions does not handle IR Flags
@@ -1446,7 +1446,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |||
VPTypeAnalysis TypeInfo(CanonicalIVType); | |||
LLVMContext &Ctx = CanonicalIVType->getContext(); | |||
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan); | |||
|
|||
VPValue *AllOneMask = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also use for select VPInstruction below?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A separate NFC patch would be better :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I don't understand what you mean, the vp.select does not have a mask parameter. declare <16 x i32> @llvm.vp.select.v16i32 (<16 x i1> <condition>, <16 x i32> <on_true>, <16 x i32> <on_false>, i32 <evl>)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think Florian means vp.merge.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will create a new patch to deal with it.
Function *VectorF = | ||
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); | ||
IsVPIntrinsic | ||
? VPIntrinsic::getOrInsertDeclarationForParams(M, VectorIntrinsicID, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@LiqinWeng Yes, that is a problem.
But perhaps we don't have to create exceptions specifically for VP intrinsics. Could you try to extend isVectorIntrinsicWithOverloadTypeAtArg
to handle it?
// Currently vp-intrinsics only accept FMF flags. llvm.vp.uitofp will get | ||
// Flags of OperationType::NonNegOp && OperationType::FPMathOp. | ||
if (isa<FPMathOperator>(V) && VectorIntrinsicID != Intrinsic::vp_uitofp) | ||
setFlags(V); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a question here.
Will setFlags really function as we expect if we don't pass the underlying instruction into the recipe? This PR also seems to lack corresponding test cases to verify whether the FMF has been correctly set.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have a question here. Will setFlags really function as we expect if we don't pass the underlying instruction into the recipe? This PR also seems to lack corresponding test cases to verify whether the FMF has been correctly set.
In the previous commit I pass the underlying instruction. At present, it seems that the underlying instruction information needs to be passed when setting metedata and flag. I am not sure about the flags of VP intrinsics, and they do not seem to correspond to non-vp intrinsics. For example, uitofp has the nneg IR flags, but llvm.vp.uitofp seems no this flag. I'm not sure if I understood something wrong. :) . And can we temporarily not set flag and resubmit a new ptach to deal with it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
vp.cast does not seem to require setting flags. So I dont pass the underlying instruction for cast transform to vp.cast . I am not sure if I understand it correctly. If there is any mistake, please point it out.
@@ -1446,7 +1446,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |||
VPTypeAnalysis TypeInfo(CanonicalIVType); | |||
LLVMContext &Ctx = CanonicalIVType->getContext(); | |||
SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan); | |||
|
|||
VPValue *AllOneMask = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A separate NFC patch would be better :)
Any other problems? @fhahn @alexey-bataev @Mel-Chen |
llvm/lib/Analysis/VectorUtils.cpp
Outdated
if (VPCastIntrinsic::isVPCast(ID)) { | ||
return OpdIdx == -1 || OpdIdx == 0; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (VPCastIntrinsic::isVPCast(ID)) { | |
return OpdIdx == -1 || OpdIdx == 0; | |
} | |
if (VPCastIntrinsic::isVPCast(ID)) | |
return OpdIdx == -1 || OpdIdx == 0; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
@@ -973,11 +974,13 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { | |||
|
|||
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles); | |||
|
|||
// FIXME: vp.cast and vp.select dont pass the underlying instruction into the |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment is confusing here I think, it should be at the place where it is not passed, as this needs to be fixed where the recipe is constructed, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed. I will try to add Flags during the recipe construction with new patch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks!
@@ -977,7 +978,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { | |||
|
|||
if (!V->getType()->isVoidTy()) | |||
State.set(this, V); | |||
State.addMetadata(V, CI); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this change still need?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The following patch will be used, I can remove it first. Pls give LGTM :)
@@ -964,7 +964,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) { | |||
Module *M = State.Builder.GetInsertBlock()->getModule(); | |||
Function *VectorF = | |||
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl); | |||
assert(VectorF && "Can't retrieve vector intrinsic."); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this change still need?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Probably sufficient to keep the existing message
@@ -1495,11 +1495,32 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |||
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue( | |||
IntegerType::getInt1Ty(CI->getContext()))); | |||
Ops.push_back(Mask); | |||
; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
redundant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed
if (VPID == Intrinsic::not_intrinsic) | ||
return nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good change, but unrelated.
Could you separate a new patch for this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah probably worth splitting off
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done
@@ -1485,8 +1485,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) { | |||
auto *CI = cast<CallInst>(CInst->getUnderlyingInstr()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The TypeSwitch here has grown quite big, it would be good to outline it to a separate function to reduce the nesting level and make things slightly easier to read
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(would be good as. a follow up)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK~
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/185/builds/9982 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/137/builds/10105 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/175/builds/9982 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/33/builds/8057 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/16/builds/10466 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/56/builds/14221 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/60/builds/14900 Here is the relevant piece of the build log for the reference
|
No description provided.