Skip to content

Commit 9202806

Browse files
author
Jinsong Ji
committed
Revert "[CostModel] Remove VF from IntrinsicCostAttributes"
This reverts commit 502a67d. This expose a failure in test-suite build on PowerPC, revert to unblock buildbot first, Dave will re-commit in https://reviews.llvm.org/D96287. Thanks Dave.
1 parent 21e8bb8 commit 9202806

File tree

10 files changed

+236
-191
lines changed

10 files changed

+236
-191
lines changed

llvm/include/llvm/Analysis/TargetTransformInfo.h

+25-13
Original file line numberDiff line numberDiff line change
@@ -118,32 +118,44 @@ class IntrinsicCostAttributes {
118118
SmallVector<Type *, 4> ParamTys;
119119
SmallVector<const Value *, 4> Arguments;
120120
FastMathFlags FMF;
121+
ElementCount VF = ElementCount::getFixed(1);
121122
// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
122123
// arguments and the return value will be computed based on types.
123124
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
124125

125126
public:
126-
IntrinsicCostAttributes(
127-
Intrinsic::ID Id, const CallBase &CI,
128-
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max());
127+
IntrinsicCostAttributes(const IntrinsicInst &I);
129128

130-
IntrinsicCostAttributes(
131-
Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
132-
FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
133-
unsigned ScalarCost = std::numeric_limits<unsigned>::max());
129+
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
130+
131+
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
132+
ElementCount Factor);
133+
134+
IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
135+
ElementCount Factor, unsigned ScalarCost);
134136

135137
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
136-
ArrayRef<const Value *> Args);
138+
ArrayRef<Type *> Tys, FastMathFlags Flags);
137139

138-
IntrinsicCostAttributes(
139-
Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args,
140-
ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(),
141-
const IntrinsicInst *I = nullptr,
142-
unsigned ScalarCost = std::numeric_limits<unsigned>::max());
140+
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
141+
ArrayRef<Type *> Tys, FastMathFlags Flags,
142+
unsigned ScalarCost);
143+
144+
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
145+
ArrayRef<Type *> Tys, FastMathFlags Flags,
146+
unsigned ScalarCost,
147+
const IntrinsicInst *I);
148+
149+
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
150+
ArrayRef<Type *> Tys);
151+
152+
IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
153+
ArrayRef<const Value *> Args);
143154

144155
Intrinsic::ID getID() const { return IID; }
145156
const IntrinsicInst *getInst() const { return II; }
146157
Type *getReturnType() const { return RetTy; }
158+
ElementCount getVectorFactor() const { return VF; }
147159
FastMathFlags getFlags() const { return FMF; }
148160
unsigned getScalarizationCost() const { return ScalarizationCost; }
149161
const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }

llvm/include/llvm/CodeGen/BasicTTIImpl.h

+31-12
Original file line numberDiff line numberDiff line change
@@ -1211,9 +1211,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12111211

12121212
Type *RetTy = ICA.getReturnType();
12131213

1214+
ElementCount VF = ICA.getVectorFactor();
12141215
ElementCount RetVF =
12151216
(RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
12161217
: ElementCount::getFixed(1));
1218+
assert((RetVF.isScalar() || VF.isScalar()) &&
1219+
"VF > 1 and RetVF is a vector type");
12171220
const IntrinsicInst *I = ICA.getInst();
12181221
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
12191222
FastMathFlags FMF = ICA.getFlags();
@@ -1223,28 +1226,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12231226

12241227
case Intrinsic::cttz:
12251228
// FIXME: If necessary, this should go in target-specific overrides.
1226-
if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
1229+
if (VF.isScalar() && RetVF.isScalar() &&
1230+
getTLI()->isCheapToSpeculateCttz())
12271231
return TargetTransformInfo::TCC_Basic;
12281232
break;
12291233

12301234
case Intrinsic::ctlz:
12311235
// FIXME: If necessary, this should go in target-specific overrides.
1232-
if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz())
1236+
if (VF.isScalar() && RetVF.isScalar() &&
1237+
getTLI()->isCheapToSpeculateCtlz())
12331238
return TargetTransformInfo::TCC_Basic;
12341239
break;
12351240

12361241
case Intrinsic::memcpy:
12371242
return thisT()->getMemcpyCost(ICA.getInst());
12381243

12391244
case Intrinsic::masked_scatter: {
1245+
assert(VF.isScalar() && "Can't vectorize types here.");
12401246
const Value *Mask = Args[3];
12411247
bool VarMask = !isa<Constant>(Mask);
12421248
Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
12431249
return thisT()->getGatherScatterOpCost(Instruction::Store,
1244-
ICA.getArgTypes()[0], Args[1],
1250+
Args[0]->getType(), Args[1],
12451251
VarMask, Alignment, CostKind, I);
12461252
}
12471253
case Intrinsic::masked_gather: {
1254+
assert(VF.isScalar() && "Can't vectorize types here.");
12481255
const Value *Mask = Args[2];
12491256
bool VarMask = !isa<Constant>(Mask);
12501257
Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
@@ -1282,13 +1289,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
12821289
case Intrinsic::vector_reduce_fmin:
12831290
case Intrinsic::vector_reduce_umax:
12841291
case Intrinsic::vector_reduce_umin: {
1285-
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1292+
IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
12861293
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
12871294
}
12881295
case Intrinsic::vector_reduce_fadd:
12891296
case Intrinsic::vector_reduce_fmul: {
12901297
IntrinsicCostAttributes Attrs(
1291-
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1298+
IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
12921299
return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
12931300
}
12941301
case Intrinsic::fshl:
@@ -1340,20 +1347,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
13401347
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
13411348

13421349
// Assume that we need to scalarize this intrinsic.
1350+
SmallVector<Type *, 4> Types;
1351+
for (const Value *Op : Args) {
1352+
Type *OpTy = Op->getType();
1353+
assert(VF.isScalar() || !OpTy->isVectorTy());
1354+
Types.push_back(VF.isScalar()
1355+
? OpTy
1356+
: FixedVectorType::get(OpTy, VF.getKnownMinValue()));
1357+
}
1358+
1359+
if (VF.isVector() && !RetTy->isVoidTy())
1360+
RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue());
1361+
13431362
// Compute the scalarization overhead based on Args for a vector
1344-
// intrinsic.
1363+
// intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1364+
// CostModel will pass a vector RetTy and VF is 1.
13451365
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1346-
if (RetVF.isVector()) {
1366+
if (RetVF.isVector() || VF.isVector()) {
13471367
ScalarizationCost = 0;
13481368
if (!RetTy->isVoidTy())
13491369
ScalarizationCost +=
13501370
getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
13511371
ScalarizationCost +=
1352-
getOperandsScalarizationOverhead(Args, RetVF.getKnownMinValue());
1372+
getOperandsScalarizationOverhead(Args, VF.getKnownMinValue());
13531373
}
13541374

1355-
IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1356-
ScalarizationCost);
1375+
IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
13571376
return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
13581377
}
13591378

@@ -1596,7 +1615,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
15961615
// SatMin -> Overflow && SumDiff >= 0
15971616
unsigned Cost = 0;
15981617
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1599-
nullptr, ScalarizationCostPassed);
1618+
ScalarizationCostPassed);
16001619
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
16011620
Cost +=
16021621
thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
@@ -1617,7 +1636,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
16171636

16181637
unsigned Cost = 0;
16191638
IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1620-
nullptr, ScalarizationCostPassed);
1639+
ScalarizationCostPassed);
16211640
Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
16221641
Cost +=
16231642
thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,

llvm/lib/Analysis/TargetTransformInfo.cpp

+67-18
Original file line numberDiff line numberDiff line change
@@ -54,26 +54,86 @@ bool HardwareLoopInfo::canAnalyze(LoopInfo &LI) {
5454
return true;
5555
}
5656

57+
IntrinsicCostAttributes::IntrinsicCostAttributes(const IntrinsicInst &I) :
58+
II(&I), RetTy(I.getType()), IID(I.getIntrinsicID()) {
59+
60+
FunctionType *FTy = I.getCalledFunction()->getFunctionType();
61+
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
62+
Arguments.insert(Arguments.begin(), I.arg_begin(), I.arg_end());
63+
if (auto *FPMO = dyn_cast<FPMathOperator>(&I))
64+
FMF = FPMO->getFastMathFlags();
65+
}
66+
67+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
68+
const CallBase &CI) :
69+
II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id) {
70+
71+
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
72+
FMF = FPMO->getFastMathFlags();
73+
74+
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
75+
FunctionType *FTy =
76+
CI.getCalledFunction()->getFunctionType();
77+
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
78+
}
79+
5780
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
5881
const CallBase &CI,
59-
unsigned ScalarizationCost)
60-
: II(dyn_cast<IntrinsicInst>(&CI)), RetTy(CI.getType()), IID(Id),
61-
ScalarizationCost(ScalarizationCost) {
82+
ElementCount Factor)
83+
: RetTy(CI.getType()), IID(Id), VF(Factor) {
84+
85+
assert(!Factor.isScalable() && "Scalable vectors are not yet supported");
86+
if (auto *FPMO = dyn_cast<FPMathOperator>(&CI))
87+
FMF = FPMO->getFastMathFlags();
88+
89+
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
90+
FunctionType *FTy =
91+
CI.getCalledFunction()->getFunctionType();
92+
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
93+
}
94+
95+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id,
96+
const CallBase &CI,
97+
ElementCount Factor,
98+
unsigned ScalarCost)
99+
: RetTy(CI.getType()), IID(Id), VF(Factor), ScalarizationCost(ScalarCost) {
62100

63101
if (const auto *FPMO = dyn_cast<FPMathOperator>(&CI))
64102
FMF = FPMO->getFastMathFlags();
65103

66104
Arguments.insert(Arguments.begin(), CI.arg_begin(), CI.arg_end());
67-
FunctionType *FTy = CI.getCalledFunction()->getFunctionType();
105+
FunctionType *FTy =
106+
CI.getCalledFunction()->getFunctionType();
68107
ParamTys.insert(ParamTys.begin(), FTy->param_begin(), FTy->param_end());
69108
}
70109

110+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
111+
ArrayRef<Type *> Tys,
112+
FastMathFlags Flags) :
113+
RetTy(RTy), IID(Id), FMF(Flags) {
114+
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
115+
}
116+
71117
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
72118
ArrayRef<Type *> Tys,
73119
FastMathFlags Flags,
74-
const IntrinsicInst *I,
75-
unsigned ScalarCost)
76-
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
120+
unsigned ScalarCost) :
121+
RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
122+
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
123+
}
124+
125+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
126+
ArrayRef<Type *> Tys,
127+
FastMathFlags Flags,
128+
unsigned ScalarCost,
129+
const IntrinsicInst *I) :
130+
II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
131+
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
132+
}
133+
134+
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
135+
ArrayRef<Type *> Tys) :
136+
RetTy(RTy), IID(Id) {
77137
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
78138
}
79139

@@ -87,17 +147,6 @@ IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *Ty,
87147
ParamTys.push_back(Arguments[Idx]->getType());
88148
}
89149

90-
IntrinsicCostAttributes::IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
91-
ArrayRef<const Value *> Args,
92-
ArrayRef<Type *> Tys,
93-
FastMathFlags Flags,
94-
const IntrinsicInst *I,
95-
unsigned ScalarCost)
96-
: II(I), RetTy(RTy), IID(Id), FMF(Flags), ScalarizationCost(ScalarCost) {
97-
ParamTys.insert(ParamTys.begin(), Tys.begin(), Tys.end());
98-
Arguments.insert(Arguments.begin(), Args.begin(), Args.end());
99-
}
100-
101150
bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
102151
LoopInfo &LI, DominatorTree &DT,
103152
bool ForceNestedLoop,

llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp

+17-16
Original file line numberDiff line numberDiff line change
@@ -731,28 +731,40 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
731731
if (ICA.isTypeBasedOnly())
732732
return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
733733

734+
Type *RetTy = ICA.getReturnType();
735+
unsigned VF = ICA.getVectorFactor().getFixedValue();
734736
unsigned RetVF =
735737
(RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
736738
: 1);
739+
assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
737740
const IntrinsicInst *I = ICA.getInst();
738741
const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
739742
FastMathFlags FMF = ICA.getFlags();
740743
// Assume that we need to scalarize this intrinsic.
744+
SmallVector<Type *, 4> Types;
745+
for (const Value *Op : Args) {
746+
Type *OpTy = Op->getType();
747+
assert(VF == 1 || !OpTy->isVectorTy());
748+
Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF));
749+
}
750+
751+
if (VF > 1 && !RetTy->isVoidTy())
752+
RetTy = FixedVectorType::get(RetTy, VF);
741753

742754
// Compute the scalarization overhead based on Args for a vector
743755
// intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
744756
// CostModel will pass a vector RetTy and VF is 1.
745757
unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
746-
if (RetVF > 1) {
758+
if (RetVF > 1 || VF > 1) {
747759
ScalarizationCost = 0;
748760
if (!RetTy->isVoidTy())
749761
ScalarizationCost +=
750762
getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
751-
ScalarizationCost += getOperandsScalarizationOverhead(Args, RetVF);
763+
ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
752764
}
753765

754-
IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, ICA.getArgTypes(), FMF, I,
755-
ScalarizationCost);
766+
IntrinsicCostAttributes Attrs(ICA.getID(), RetTy, Types, FMF,
767+
ScalarizationCost, I);
756768
return getIntrinsicInstrCost(Attrs, CostKind);
757769
}
758770

@@ -772,20 +784,9 @@ int GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
772784

773785
// TODO: Get more refined intrinsic costs?
774786
unsigned InstRate = getQuarterRateInstrCost(CostKind);
775-
776-
switch (ICA.getID()) {
777-
case Intrinsic::fma:
787+
if (ICA.getID() == Intrinsic::fma) {
778788
InstRate = ST->hasFastFMAF32() ? getHalfRateInstrCost(CostKind)
779789
: getQuarterRateInstrCost(CostKind);
780-
break;
781-
case Intrinsic::uadd_sat:
782-
case Intrinsic::usub_sat:
783-
case Intrinsic::sadd_sat:
784-
case Intrinsic::ssub_sat:
785-
static const auto ValidSatTys = {MVT::v2i16, MVT::v4i16};
786-
if (any_of(ValidSatTys, [&LT](MVT M) { return M == LT.second; }))
787-
NElts = 1;
788-
break;
789790
}
790791

791792
return LT.first * NElts * InstRate;

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

+8-3
Original file line numberDiff line numberDiff line change
@@ -1550,16 +1550,21 @@ int ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
15501550
case Intrinsic::usub_sat: {
15511551
if (!ST->hasMVEIntegerOps())
15521552
break;
1553+
// Get the Return type, either directly of from ICA.ReturnType and ICA.VF.
15531554
Type *VT = ICA.getReturnType();
1555+
if (!VT->isVectorTy() && !ICA.getVectorFactor().isScalar())
1556+
VT = VectorType::get(VT, ICA.getVectorFactor());
15541557

15551558
std::pair<int, MVT> LT =
15561559
TLI->getTypeLegalizationCost(DL, VT);
15571560
if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 ||
15581561
LT.second == MVT::v16i8) {
1559-
// This is a base cost of 1 for the vqadd, plus 3 extract shifts if we
1562+
// This is a base cost of 1 for the vadd, plus 3 extract shifts if we
15601563
// need to extend the type, as it uses shr(qadd(shl, shl)).
1561-
unsigned Instrs =
1562-
LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4;
1564+
unsigned Instrs = LT.second.getScalarSizeInBits() ==
1565+
ICA.getReturnType()->getScalarSizeInBits()
1566+
? 1
1567+
: 4;
15631568
return LT.first * ST->getMVEVectorCostFactor() * Instrs;
15641569
}
15651570
break;

0 commit comments

Comments
 (0)