Skip to content

Commit 502a67d

Browse files
committed
[CostModel] Remove VF from IntrinsicCostAttributes
getIntrinsicInstrCost takes a IntrinsicCostAttributes holding various parameters of the intrinsic being costed. It can either be called with a scalar intrinsic (RetTy==Scalar, VF==1), with a vector instruction (RetTy==Vector, VF==1) or from the vectorizer with a scalar type and vector width (RetTy==Scalar, VF>1). A RetTy==Vector, VF>1 is considered an error. Both of the vector modes are expected to be treated the same, but because this is confusing many backends end up getting it wrong. Instead of trying work with those two values separately this removes the VF parameter, widening the RetTy/ArgTys by VF used called from the vectorizer. This keeps things simpler, but does require some other modifications to keep things consistent. Most backends look like this will be an improvement (or were not using getIntrinsicInstrCost). AMDGPU needed the most changes to keep the code from c230965 working. ARM removed the fix in dfac521, webassembly happens to get a fixup for an SLP cost issue and both X86 and AArch64 seem to now be using better costs from the vectorizer. Differential Revision: https://reviews.llvm.org/D95291
1 parent 6e1afd8 commit 502a67d

File tree

10 files changed

+191
-236
lines changed

10 files changed

+191
-236
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+13-25
Original file line numberDiff line numberDiff line change
@@ -118,44 +118,32 @@ class IntrinsicCostAttributes {
118118
SmallVector<Type *, 4> ParamTys;
119119
SmallVector<const Value *, 4> Arguments;
120120
FastMathFlags FMF;
121-
ElementCount VF = ElementCount::getFixed(1);
122121
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
123122
// arguments and the return value will be computed based on types.
124123
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
125124

126125
public:
127-
IntrinsicCostAttributes(const IntrinsicInst &I);
126+
IntrinsicCostAttributes(
127+
Intrinsic::ID Id, const CallBase &CI,
128+
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max());
128129

129-
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
130-
131-
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
132-
ElementCount Factor);
133-
134-
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
135-
ElementCount Factor, unsigned ScalarCost);
136-
137-
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
138-
ArrayRef<Type *> Tys, FastMathFlags Flags);
139-
140-
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
141-
ArrayRef<Type *> Tys, FastMathFlags Flags,
142-
unsigned ScalarCost);
143-
144-
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
145-
ArrayRef<Type *> Tys, FastMathFlags Flags,
146-
unsigned ScalarCost,
147-
const IntrinsicInst *I);
148-
149-
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
150-
ArrayRef<Type *> Tys);
130+
IntrinsicCostAttributes(
131+
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
132+
FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
133+
unsigned ScalarCost = std::numeric_limits<unsigned>::max());
151134

152135
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
153136
ArrayRef<const Value *> Args);
154137

138+
IntrinsicCostAttributes(
139+
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
140+
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
141+
const IntrinsicInst *I = nullptr,
142+
unsigned ScalarCost = std::numeric_limits<unsigned>::max());
143+
155144
Intrinsic::ID getID() const { return IID; }
156145
const IntrinsicInst *getInst() const { return II; }
157146
Type *getReturnType() const { return RetTy; }
158-
ElementCount getVectorFactor() const { return VF; }
159147
FastMathFlags getFlags() const { return FMF; }
160148
unsigned getScalarizationCost() const { return ScalarizationCost; }
161149
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+12-31
Original file line numberDiff line numberDiff line change
@@ -1211,12 +1211,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12111211

12121212
Type *RetTy = ICA.getReturnType();
12131213

1214-
ElementCount VF = ICA.getVectorFactor();
12151214
ElementCount RetVF =
12161215
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
12171216
: ElementCount::getFixed(1));
1218-
assert((RetVF.isScalar() || VF.isScalar()) &&
1219-
"VF > 1 and RetVF is a vector type");
12201217
const IntrinsicInst *I = ICA.getInst();
12211218
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
12221219
FastMathFlags FMF = ICA.getFlags();
@@ -1226,32 +1223,28 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12261223

12271224
case Intrinsic::cttz:
12281225
// FIXME: If necessary, this should go in target-specific overrides.
1229-
if (VF.isScalar() && RetVF.isScalar() &&
1230-
getTLI()->isCheapToSpeculateCttz())
1226+
if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
12311227
return TargetTransformInfo::TCC_Basic;
12321228
break;
12331229

12341230
case Intrinsic::ctlz:
12351231
// FIXME: If necessary, this should go in target-specific overrides.
1236-
if (VF.isScalar() && RetVF.isScalar() &&
1237-
getTLI()->isCheapToSpeculateCtlz())
1232+
if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz())
12381233
return TargetTransformInfo::TCC_Basic;
12391234
break;
12401235

12411236
case Intrinsic::memcpy:
12421237
return thisT()->getMemcpyCost(ICA.getInst());
12431238

12441239
case Intrinsic::masked_scatter: {
1245-
assert(VF.isScalar() && "Can't vectorize types here.");
12461240
const Value *Mask = Args[3];
12471241
bool VarMask = !isa<Constant>(Mask);
12481242
Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
12491243
return thisT()->getGatherScatterOpCost(Instruction::Store,
1250-
Args[0]->getType(), Args[1],
1244+
ICA.getArgTypes()[0], Args[1],
12511245
VarMask, Alignment, CostKind, I);
12521246
}
12531247
case Intrinsic::masked_gather: {
1254-
assert(VF.isScalar() && "Can't vectorize types here.");
12551248
const Value *Mask = Args[2];
12561249
bool VarMask = !isa<Constant>(Mask);
12571250
Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
@@ -1289,13 +1282,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12891282
case Intrinsic::vector_reduce_fmin:
12901283
case Intrinsic::vector_reduce_umax:
12911284
case Intrinsic::vector_reduce_umin: {
1292-
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
1285+
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
12931286
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
12941287
}
12951288
case Intrinsic::vector_reduce_fadd:
12961289
case Intrinsic::vector_reduce_fmul: {
12971290
IntrinsicCostAttributes Attrs(
1298-
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
1291+
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
12991292
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
13001293
}
13011294
case Intrinsic::fshl:
@@ -1347,32 +1340,20 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
13471340
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
13481341

13491342
// Assume that we need to scalarize this intrinsic.
1350-
SmallVector<Type *, 4> Types;
1351-
for (const Value *Op : Args) {
1352-
Type *OpTy = Op->getType();
1353-
assert(VF.isScalar() || !OpTy->isVectorTy());
1354-
Types.push_back(VF.isScalar()
1355-
? OpTy
1356-
: FixedVectorType::get(OpTy, VF.getKnownMinValue()));
1357-
}
1358-
1359-
if (VF.isVector() && !RetTy->isVoidTy())
1360-
RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue());
1361-
13621343
// Compute the scalarization overhead based on Args for a vector
1363-
// intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1364-
// CostModel will pass a vector RetTy and VF is 1.
1344+
// intrinsic.
13651345
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1366-
if (RetVF.isVector() || VF.isVector()) {
1346+
if (RetVF.isVector()) {
13671347
ScalarizationCost = 0;
13681348
if (!RetTy->isVoidTy())
13691349
ScalarizationCost +=
13701350
getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
13711351
ScalarizationCost +=
1372-
getOperandsScalarizationOverhead(Args, VF.getKnownMinValue());
1352+
getOperandsScalarizationOverhead(Args, RetVF.getKnownMinValue());
13731353
}
13741354

1375-
IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
1355+
IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1356+
ScalarizationCost);
13761357
return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
13771358
}
13781359

@@ -1615,7 +1596,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16151596
// SatMin -> Overflow && SumDiff >= 0
16161597
unsigned Cost = 0;
16171598
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1618-
ScalarizationCostPassed);
1599+
nullptr, ScalarizationCostPassed);
16191600
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
16201601
Cost +=
16211602
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
@@ -1636,7 +1617,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16361617

16371618
unsigned Cost = 0;
16381619
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1639-
ScalarizationCostPassed);
1620+
nullptr, ScalarizationCostPassed);
16401621
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
16411622
Cost +=
16421623
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

llvm/lib/Analysis/TargetTransformInfo.cpp

+18-67
Original file line numberDiff line numberDiff line change
@@ -54,86 +54,26 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
5454
return true;
5555
}
5656

57-
IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
58-
II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) {
59-
60-
FunctionType *FTy = I.getCalledFunction()->getFunctionType();
61-
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
62-
Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end());
63-
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
64-
FMF = FPMO->getFastMathFlags();
65-
}
66-
67-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
68-
const CallBase &CI) :
69-
II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
70-
71-
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
72-
FMF = FPMO->getFastMathFlags();
73-
74-
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
75-
FunctionType *FTy =
76-
CI.getCalledFunction()->getFunctionType();
77-
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
78-
}
79-
8057
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
8158
const CallBase &CI,
82-
ElementCount Factor)
83-
: RetTy(CI.getType()), IID(Id), VF(Factor) {
84-
85-
assert(!Factor.isScalable() && "Scalable vectors are not yet supported");
86-
if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
87-
FMF = FPMO->getFastMathFlags();
88-
89-
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
90-
FunctionType *FTy =
91-
CI.getCalledFunction()->getFunctionType();
92-
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
93-
}
94-
95-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
96-
const CallBase &CI,
97-
ElementCount Factor,
98-
unsigned ScalarCost)
99-
: RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
59+
unsigned ScalarizationCost)
60+
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
61+
ScalarizationCost(ScalarizationCost) {
10062

10163
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
10264
FMF = FPMO->getFastMathFlags();
10365

10466
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
105-
FunctionType *FTy =
106-
CI.getCalledFunction()->getFunctionType();
67+
FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
10768
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
10869
}
10970

110-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
111-
ArrayRef<Type *> Tys,
112-
FastMathFlags Flags) :
113-
RetTy(RTy), IID(Id), FMF(Flags) {
114-
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
115-
}
116-
117-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
118-
ArrayRef<Type *> Tys,
119-
FastMathFlags Flags,
120-
unsigned ScalarCost) :
121-
RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
122-
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
123-
}
124-
12571
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
12672
ArrayRef<Type *> Tys,
12773
FastMathFlags Flags,
128-
unsigned ScalarCost,
129-
const IntrinsicInst *I) :
130-
II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
131-
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
132-
}
133-
134-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
135-
ArrayRef<Type *> Tys) :
136-
RetTy(RTy), IID(Id) {
74+
const IntrinsicInst *I,
75+
unsigned ScalarCost)
76+
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
13777
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
13878
}
13979

@@ -147,6 +87,17 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
14787
ParamTys.push_back(Arguments[Idx]->getType());
14888
}
14989

90+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
91+
ArrayRef<const Value *> Args,
92+
ArrayRef<Type *> Tys,
93+
FastMathFlags Flags,
94+
const IntrinsicInst *I,
95+
unsigned ScalarCost)
96+
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
97+
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
98+
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
99+
}
100+
150101
bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
151102
LoopInfo &LI, DominatorTree &DT,
152103
bool ForceNestedLoop,

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

+16-17
Original file line numberDiff line numberDiff line change
@@ -731,40 +731,28 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
731731
if (ICA.isTypeBasedOnly())
732732
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
733733

734-
Type *RetTy = ICA.getReturnType();
735-
unsigned VF = ICA.getVectorFactor().getFixedValue();
736734
unsigned RetVF =
737735
(RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
738736
: 1);
739-
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
740737
const IntrinsicInst *I = ICA.getInst();
741738
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
742739
FastMathFlags FMF = ICA.getFlags();
743740
// Assume that we need to scalarize this intrinsic.
744-
SmallVector<Type *, 4> Types;
745-
for (const Value *Op : Args) {
746-
Type *OpTy = Op->getType();
747-
assert(VF == 1 || !OpTy->isVectorTy());
748-
Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF));
749-
}
750-
751-
if (VF > 1 && !RetTy->isVoidTy())
752-
RetTy = FixedVectorType::get(RetTy, VF);
753741

754742
// Compute the scalarization overhead based on Args for a vector
755743
// intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
756744
// CostModel will pass a vector RetTy and VF is 1.
757745
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
758-
if (RetVF > 1 || VF > 1) {
746+
if (RetVF > 1) {
759747
ScalarizationCost = 0;
760748
if (!RetTy->isVoidTy())
761749
ScalarizationCost +=
762750
getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
763-
ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
751+
ScalarizationCost += getOperandsScalarizationOverhead(Args, RetVF);
764752
}
765753

766-
IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, Types, FMF,
767-
ScalarizationCost, I);
754+
IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, ICA.getArgTypes(), FMF, I,
755+
ScalarizationCost);
768756
return getIntrinsicInstrCost(Attrs, CostKind);
769757
}
770758

@@ -784,9 +772,20 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
784772

785773
// TODO: Get more refined intrinsic costs?
786774
unsigned InstRate = getQuarterRateInstrCost(CostKind);
787-
if (ICA.getID() == Intrinsic::fma) {
775+
776+
switch (ICA.getID()) {
777+
case Intrinsic::fma:
788778
InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(CostKind)
789779
: getQuarterRateInstrCost(CostKind);
780+
break;
781+
case Intrinsic::uadd_sat:
782+
case Intrinsic::usub_sat:
783+
case Intrinsic::sadd_sat:
784+
case Intrinsic::ssub_sat:
785+
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
786+
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
787+
NElts = 1;
788+
break;
790789
}
791790

792791
return LT.first * NElts * InstRate;

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+3-8
Original file line numberDiff line numberDiff line change
@@ -1550,21 +1550,16 @@ int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
15501550
case Intrinsic::usub_sat: {
15511551
if (!ST->hasMVEIntegerOps())
15521552
break;
1553-
// Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
15541553
Type *VT = ICA.getReturnType();
1555-
if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
1556-
VT = VectorType::get(VT, ICA.getVectorFactor());
15571554

15581555
std::pair<int, MVT> LT =
15591556
TLI->getTypeLegalizationCost(DL, VT);
15601557
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
15611558
LT.second == MVT::v16i8) {
1562-
// This is a base cost of 1 for the vadd, plus 3 extract shifts if we
1559+
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
15631560
// need to extend the type, as it uses shr(qadd(shl, shl)).
1564-
unsigned Instrs = LT.second.getScalarSizeInBits() ==
1565-
ICA.getReturnType()->getScalarSizeInBits()
1566-
? 1
1567-
: 4;
1561+
unsigned Instrs =
1562+
LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4;
15681563
return LT.first * ST->getMVEVectorCostFactor() * Instrs;
15691564
}
15701565
break;

0 commit comments

Comments
 (0)