-
Notifications
You must be signed in to change notification settings - Fork 13.4k
[VPlan] Introduce VPWidenIntrinsicRecipe to separate from libcall. #110486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
8ae59c7
74fc8bb
ea64bc7
ebbfa49
ba896db
2ccbfb8
4243f26
03424f9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -4373,7 +4373,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( | |||||||||
[](const auto *R) { return Instruction::Store; }) | ||||||||||
.Case<VPWidenLoadRecipe>( | ||||||||||
[](const auto *R) { return Instruction::Load; }) | ||||||||||
.Case<VPWidenCallRecipe>( | ||||||||||
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>( | ||||||||||
[](const auto *R) { return Instruction::Call; }) | ||||||||||
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe, | ||||||||||
VPWidenCastRecipe>( | ||||||||||
|
@@ -4397,12 +4397,18 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( | |||||||||
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second; | ||||||||||
OS << "):"; | ||||||||||
if (Opcode == Instruction::Call) { | ||||||||||
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R); | ||||||||||
Function *CalledFn = | ||||||||||
WidenCall ? WidenCall->getCalledScalarFunction() | ||||||||||
: cast<Function>(R->getOperand(R->getNumOperands() - 1) | ||||||||||
->getLiveInIRValue()); | ||||||||||
OS << " call to " << CalledFn->getName(); | ||||||||||
StringRef Name = ""; | ||||||||||
if (auto *Int = dyn_cast<VPWidenIntrinsicRecipe>(R)) { | ||||||||||
Name = Int->getIntrinsicName(); | ||||||||||
} else { | ||||||||||
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R); | ||||||||||
Function *CalledFn = | ||||||||||
WidenCall ? WidenCall->getCalledScalarFunction() | ||||||||||
: cast<Function>(R->getOperand(R->getNumOperands() - 1) | ||||||||||
->getLiveInIRValue()); | ||||||||||
Name = CalledFn->getName(); | ||||||||||
} | ||||||||||
OS << " call to " << Name; | ||||||||||
} else | ||||||||||
OS << " " << Instruction::getOpcodeName(Opcode); | ||||||||||
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr, | ||||||||||
|
@@ -4453,6 +4459,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, | |||||||||
case VPDef::VPWidenCanonicalIVSC: | ||||||||||
case VPDef::VPWidenCastSC: | ||||||||||
case VPDef::VPWidenGEPSC: | ||||||||||
case VPDef::VPWidenIntrinsicSC: | ||||||||||
case VPDef::VPWidenSC: | ||||||||||
case VPDef::VPWidenSelectSC: | ||||||||||
case VPDef::VPBlendSC: | ||||||||||
|
@@ -8275,7 +8282,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi, | |||||||||
return new VPBlendRecipe(Phi, OperandsWithMask); | ||||||||||
} | ||||||||||
|
||||||||||
VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | ||||||||||
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | ||||||||||
ArrayRef<VPValue *> Operands, | ||||||||||
VFRange &Range) { | ||||||||||
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange( | ||||||||||
|
@@ -8306,8 +8313,9 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | |||||||||
}, | ||||||||||
Range); | ||||||||||
if (ShouldUseVectorIntrinsic) | ||||||||||
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID, | ||||||||||
CI->getDebugLoc()); | ||||||||||
return new VPWidenIntrinsicRecipe(*CI, ID, | ||||||||||
make_range(Ops.begin(), Ops.end() - 1), | ||||||||||
CI->getType(), CI->getDebugLoc()); | ||||||||||
|
||||||||||
Function *Variant = nullptr; | ||||||||||
std::optional<unsigned> MaskPos; | ||||||||||
|
@@ -8359,9 +8367,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI, | |||||||||
Ops.insert(Ops.begin() + *MaskPos, Mask); | ||||||||||
} | ||||||||||
|
||||||||||
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), | ||||||||||
Intrinsic::not_intrinsic, CI->getDebugLoc(), | ||||||||||
Variant); | ||||||||||
return new VPWidenCallRecipe( | ||||||||||
CI, Variant, make_range(Ops.begin(), Ops.end()), CI->getDebugLoc()); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks! |
||||||||||
} | ||||||||||
|
||||||||||
return nullptr; | ||||||||||
|
@@ -9234,7 +9241,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |||||||||
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) && | ||||||||||
"Expected instruction to be a call to the llvm.fmuladd intrinsic"); | ||||||||||
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) || | ||||||||||
isa<VPWidenCallRecipe>(CurrentLink)) && | ||||||||||
isa<VPWidenIntrinsicRecipe>(CurrentLink)) && | ||||||||||
CurrentLink->getOperand(2) == PreviousLink && | ||||||||||
"expected a call where the previous link is the added operand"); | ||||||||||
|
||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -886,6 +886,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue { | |
case VPRecipeBase::VPWidenCanonicalIVSC: | ||
case VPRecipeBase::VPWidenCastSC: | ||
case VPRecipeBase::VPWidenGEPSC: | ||
case VPRecipeBase::VPWidenIntrinsicSC: | ||
case VPRecipeBase::VPWidenSC: | ||
case VPRecipeBase::VPWidenEVLSC: | ||
case VPRecipeBase::VPWidenSelectSC: | ||
|
@@ -1608,25 +1609,77 @@ class VPScalarCastRecipe : public VPSingleDefRecipe { | |
} | ||
}; | ||
|
||
/// A recipe for widening Call instructions. | ||
class VPWidenCallRecipe : public VPRecipeWithIRFlags { | ||
/// ID of the vector intrinsic to call when widening the call. If set the | ||
/// Intrinsic::not_intrinsic, a library call will be used instead. | ||
/// A recipe for widening vector intrinsics. | ||
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags { | ||
/// ID of the vector intrinsic to widen. | ||
Intrinsic::ID VectorIntrinsicID; | ||
/// If this recipe represents a library call, Variant stores a pointer to | ||
/// the chosen function. There is a 1:1 mapping between a given VF and the | ||
/// chosen vectorized variant, so there will be a different vplan for each | ||
/// VF with a valid variant. | ||
|
||
/// Scalar type of the result produced by the intrinsic. | ||
Type *ResultTy; | ||
|
||
bool MayWriteToMemory; | ||
bool MayReadFromMemory; | ||
bool MayHaveSideEffects; | ||
|
||
public: | ||
template <typename IterT> | ||
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID, | ||
iterator_range<IterT> CallArguments, Type *Ty, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What about ArrayRef here instead of iterator_range? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||
DebugLoc DL = {}) | ||
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI), | ||
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty), | ||
MayWriteToMemory(CI.mayWriteToMemory()), | ||
MayReadFromMemory(CI.mayReadFromMemory()), | ||
MayHaveSideEffects(CI.mayHaveSideEffects()) {} | ||
|
||
~VPWidenIntrinsicRecipe() override = default; | ||
|
||
VPWidenIntrinsicRecipe *clone() override { | ||
return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()), | ||
VectorIntrinsicID, operands(), ResultTy, | ||
getDebugLoc()); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC) | ||
|
||
/// Produce a widened version of the vector intrinsic. | ||
void execute(VPTransformState &State) override; | ||
|
||
/// Return the cost of this vector intrinsic. | ||
InstructionCost computeCost(ElementCount VF, | ||
VPCostContext &Ctx) const override; | ||
|
||
Type *getResultTy() const { return ResultTy; } | ||
|
||
/// Return to name of the intrinsic as string. | ||
StringRef getIntrinsicName() const; | ||
|
||
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||
/// Print the recipe. | ||
void print(raw_ostream &O, const Twine &Indent, | ||
VPSlotTracker &SlotTracker) const override; | ||
#endif | ||
bool mayWriteToMemory() const { return MayWriteToMemory; } | ||
|
||
bool mayReadFromMemory() const { return MayReadFromMemory; } | ||
|
||
bool mayHaveSideEffects() const { return MayHaveSideEffects; } | ||
}; | ||
|
||
/// A recipe for widening Call instructions using library calls. | ||
class VPWidenCallRecipe : public VPRecipeWithIRFlags { | ||
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping | ||
/// between a given VF and the chosen vectorized variant, so there will be a | ||
/// different VPlan for each VF with a valid variant. | ||
Function *Variant; | ||
|
||
public: | ||
template <typename IterT> | ||
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments, | ||
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {}, | ||
Function *Variant = nullptr) | ||
VPWidenCallRecipe(Value *UV, Function *Variant, | ||
iterator_range<IterT> CallArguments, DebugLoc DL = {}) | ||
: VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments, | ||
*cast<Instruction>(UV)), | ||
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) { | ||
Variant(Variant) { | ||
assert( | ||
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) && | ||
"last operand must be the called function"); | ||
|
@@ -1635,8 +1688,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags { | |
~VPWidenCallRecipe() override = default; | ||
|
||
VPWidenCallRecipe *clone() override { | ||
return new VPWidenCallRecipe(getUnderlyingValue(), operands(), | ||
VectorIntrinsicID, getDebugLoc(), Variant); | ||
return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(), | ||
getDebugLoc()); | ||
} | ||
|
||
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe move Ops.push_back(Operands.back()); from line 8305 to line 8319 to avoid -1 here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!