Skip to content

Commit c968452

Browse files
committed
[LV][EVL] Support call instruction with EVL-vectorization
1 parent 629673d commit c968452

File tree

7 files changed

+400
-95
lines changed

7 files changed

+400
-95
lines changed

llvm/include/llvm/IR/VectorBuilder.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/Analysis/VectorUtils.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,13 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
116116
unsigned ScalarOpdIdx) {
117117
switch (ID) {
118118
case Intrinsic::abs:
119+
case Intrinsic::vp_abs:
119120
case Intrinsic::ctlz:
121+
case Intrinsic::vp_ctlz:
120122
case Intrinsic::cttz:
123+
case Intrinsic::vp_cttz:
121124
case Intrinsic::is_fpclass:
125+
case Intrinsic::vp_is_fpclass:
122126
case Intrinsic::powi:
123127
return (ScalarOpdIdx == 1);
124128
case Intrinsic::smul_fix:
@@ -140,10 +144,13 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
140144
case Intrinsic::fptoui_sat:
141145
case Intrinsic::lrint:
142146
case Intrinsic::llrint:
147+
case Intrinsic::vp_lrint:
148+
case Intrinsic::vp_llrint:
143149
case Intrinsic::ucmp:
144150
case Intrinsic::scmp:
145151
return OpdIdx == -1 || OpdIdx == 0;
146152
case Intrinsic::is_fpclass:
153+
case Intrinsic::vp_is_fpclass:
147154
return OpdIdx == 0;
148155
case Intrinsic::powi:
149156
return OpdIdx == -1 || OpdIdx == 1;

llvm/lib/IR/VectorBuilder.cpp

+4-5
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Transforms/Utils/LoopUtils.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+44-17
Original file line numberDiff line numberDiff line change
@@ -993,24 +993,39 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
993993
Args.push_back(Arg);
994994
}
995995

996-
// Use vector version of the intrinsic.
997-
Module *M = State.Builder.GetInsertBlock()->getModule();
998-
Function *VectorF =
999-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1000-
assert(VectorF && "Can't retrieve vector intrinsic.");
1001-
1002-
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1003-
SmallVector<OperandBundleDef, 1> OpBundles;
1004-
if (CI)
1005-
CI->getOperandBundlesAsDefs(OpBundles);
996+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
997+
// Use vector version of the vector predicate Intrinsic
998+
IRBuilderBase &BuilderIR = State.Builder;
999+
VectorBuilder VBuilder(BuilderIR);
1000+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1001+
VBuilder.setMask(Mask).setEVL(Args.back());
1002+
// Remove the EVL from Args
1003+
Args.pop_back();
1004+
Value *VPInst = VBuilder.createSimpleIntrinsic(
1005+
VectorIntrinsicID, TysForDecl[0], Args, "vp.call");
1006+
if (!VPInst->getType()->isVoidTy())
1007+
State.set(this, VPInst);
1008+
State.addMetadata(VPInst,
1009+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1010+
} else {
1011+
// Use vector version of the intrinsic.
1012+
Module *M = State.Builder.GetInsertBlock()->getModule();
1013+
Function *VectorF =
1014+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1015+
assert(VectorF && "Can't retrieve vector intrinsic.");
10061016

1007-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1017+
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1018+
SmallVector<OperandBundleDef, 1> OpBundles;
1019+
if (CI)
1020+
CI->getOperandBundlesAsDefs(OpBundles);
10081021

1009-
setFlags(V);
1022+
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1023+
setFlags(V);
10101024

1011-
if (!V->getType()->isVoidTy())
1012-
State.set(this, V);
1013-
State.addMetadata(V, CI);
1025+
if (!V->getType()->isVoidTy())
1026+
State.set(this, V);
1027+
State.addMetadata(V, CI);
1028+
}
10141029
}
10151030

10161031
InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
@@ -1023,6 +1038,18 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10231038
// clear Arguments.
10241039
// TODO: Rework TTI interface to be independent of concrete IR values.
10251040
SmallVector<const Value *> Arguments;
1041+
1042+
Intrinsic::ID FID = VectorIntrinsicID;
1043+
unsigned NumOperands = getNumOperands();
1044+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID)) {
1045+
std::optional<Intrinsic::ID> ID =
1046+
VPIntrinsic::getFunctionalIntrinsicIDForVP(VectorIntrinsicID);
1047+
if (ID) {
1048+
FID = ID.value();
1049+
NumOperands = getNumOperands() - 1;
1050+
}
1051+
}
1052+
10261053
for (const auto &[Idx, Op] : enumerate(operands())) {
10271054
auto *V = Op->getUnderlyingValue();
10281055
if (!V) {
@@ -1038,14 +1065,14 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10381065

10391066
Type *RetTy = ToVectorTy(Ctx.Types.inferScalarType(this), VF);
10401067
SmallVector<Type *> ParamTys;
1041-
for (unsigned I = 0; I != getNumOperands(); ++I)
1068+
for (unsigned I = 0; I != NumOperands; ++I)
10421069
ParamTys.push_back(
10431070
ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
10441071

10451072
// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
10461073
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
10471074
IntrinsicCostAttributes CostAttrs(
1048-
VectorIntrinsicID, RetTy, Arguments, ParamTys, FMF,
1075+
FID, RetTy, Arguments, ParamTys, FMF,
10491076
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue()));
10501077
return Ctx.TTI.getIntrinsicInstrCost(CostAttrs, CostKind);
10511078
}

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -1385,6 +1385,24 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
13851385
VPValue *NewMask = GetNewMask(Red->getCondOp());
13861386
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
13871387
})
1388+
.Case<VPWidenIntrinsicRecipe>(
1389+
[&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
1390+
auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
1391+
SmallVector<VPValue *> Ops(CInst->operands());
1392+
Ops.push_back(&EVL);
1393+
Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
1394+
CI->getCalledFunction()->getIntrinsicID());
1395+
if (VPID == Intrinsic::not_intrinsic)
1396+
return nullptr;
1397+
// FIXME: In fact, can we really not pass the
1398+
// underlyingInstr? In this case, how to set the Flag and
1399+
// add metadata in execute?
1400+
return new VPWidenIntrinsicRecipe(
1401+
VPID, Ops, TypeInfo.inferScalarType(CInst), false,
1402+
false, false);
1403+
// return new VPWidenIntrinsicRecipe(
1404+
// *CI, VPID, Ops, CI->getType(), CI->getDebugLoc());
1405+
})
13881406
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
13891407
SmallVector<VPValue *> Ops(Sel->operands());
13901408
Ops.push_back(&EVL);

0 commit comments

Comments
 (0)