Skip to content

[VPlan] Introduce VPWidenIntrinsicRecipe to separate from libcall. #110486

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 8, 2024
35 changes: 20 additions & 15 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4363,7 +4363,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
[](const auto *R) { return Instruction::Store; })
.Case<VPWidenLoadRecipe>(
[](const auto *R) { return Instruction::Load; })
.Case<VPWidenCallRecipe>(
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
[](const auto *R) { return Instruction::Call; })
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe>(
Expand All @@ -4387,12 +4387,18 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
OS << (Pair.second == Subset.front().second ? "" : ", ") << Pair.second;
OS << "):";
if (Opcode == Instruction::Call) {
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
OS << " call to " << CalledFn->getName();
StringRef Name = "";
if (auto *Int = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
Name = Int->getIntrinsicName();
} else {
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
Name = CalledFn->getName();
}
OS << " call to " << Name;
} else
OS << " " << Instruction::getOpcodeName(Opcode);
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
Expand Down Expand Up @@ -4443,6 +4449,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
case VPDef::VPBlendSC:
Expand Down Expand Up @@ -8266,7 +8273,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
return new VPBlendRecipe(Phi, OperandsWithMask);
}

VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
Expand All @@ -8286,7 +8293,6 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
return nullptr;

SmallVector<VPValue *, 4> Ops(Operands.take_front(CI->arg_size()));
Ops.push_back(Operands.back());

// Is it beneficial to perform intrinsic call compared to lib call?
bool ShouldUseVectorIntrinsic =
Expand All @@ -8297,8 +8303,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
},
Range);
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID,
CI->getDebugLoc());
return new VPWidenIntrinsicRecipe(*CI, ID, Ops, CI->getType(),
CI->getDebugLoc());

Function *Variant = nullptr;
std::optional<unsigned> MaskPos;
Expand Down Expand Up @@ -8350,9 +8356,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
Ops.insert(Ops.begin() + *MaskPos, Mask);
}

return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()),
Intrinsic::not_intrinsic, CI->getDebugLoc(),
Variant);
Ops.push_back(Operands.back());
return new VPWidenCallRecipe(CI, Variant, Ops, CI->getDebugLoc());
}

return nullptr;
Expand Down Expand Up @@ -9225,7 +9230,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) &&
"Expected instruction to be a call to the llvm.fmuladd intrinsic");
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
isa<VPWidenCallRecipe>(CurrentLink)) &&
isa<VPWidenIntrinsicRecipe>(CurrentLink)) &&
CurrentLink->getOperand(2) == PreviousLink &&
"expected a call where the previous link is the added operand");

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class VPRecipeBuilder {
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);

/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
/// decision from \p Range.Start to \p Range.End.
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
Expand Down
81 changes: 66 additions & 15 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
Expand Down Expand Up @@ -1613,25 +1614,75 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};

/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to call when widening the call. If set the
/// Intrinsic::not_intrinsic, a library call will be used instead.
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
Intrinsic::ID VectorIntrinsicID;
/// If this recipe represents a library call, Variant stores a pointer to
/// the chosen function. There is a 1:1 mapping between a given VF and the
/// chosen vectorized variant, so there will be a different vplan for each
/// VF with a valid variant.

/// Scalar type of the result produced by the intrinsic.
Type *ResultTy;

bool MayWriteToMemory;
bool MayReadFromMemory;
bool MayHaveSideEffects;

public:
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
ArrayRef<VPValue *> CallArguments, Type *Ty,
DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
MayWriteToMemory(CI.mayWriteToMemory()),
MayReadFromMemory(CI.mayReadFromMemory()),
MayHaveSideEffects(CI.mayHaveSideEffects()) {}

~VPWidenIntrinsicRecipe() override = default;

VPWidenIntrinsicRecipe *clone() override {
return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
VectorIntrinsicID, {op_begin(), op_end()},
ResultTy, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)

/// Produce a widened version of the vector intrinsic.
void execute(VPTransformState &State) override;

/// Return the cost of this vector intrinsic.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Type *getResultTy() const { return ResultTy; }

/// Return to name of the intrinsic as string.
StringRef getIntrinsicName() const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
bool mayWriteToMemory() const { return MayWriteToMemory; }

bool mayReadFromMemory() const { return MayReadFromMemory; }

bool mayHaveSideEffects() const { return MayHaveSideEffects; }
};

/// A recipe for widening Call instructions using library calls.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
/// between a given VF and the chosen vectorized variant, so there will be a
/// different VPlan for each VF with a valid variant.
Function *Variant;

public:
template <typename IterT>
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
Function *Variant = nullptr)
VPWidenCallRecipe(Value *UV, Function *Variant,
ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
*cast<Instruction>(UV)),
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
Variant(Variant) {
assert(
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
"last operand must be the called function");
Expand All @@ -1640,8 +1691,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
~VPWidenCallRecipe() override = default;

VPWidenCallRecipe *clone() override {
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
VectorIntrinsicID, getDebugLoc(), Variant);
return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
{op_begin(), op_end()}, getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>(
[](const VPWidenIntrinsicRecipe *R) { return R->getResultTy(); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
Expand Down
Loading
Loading