@@ -1211,9 +1211,12 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1211
1211
1212
1212
Type *RetTy = ICA.getReturnType ();
1213
1213
1214
+ ElementCount VF = ICA.getVectorFactor ();
1214
1215
ElementCount RetVF =
1215
1216
(RetTy->isVectorTy () ? cast<VectorType>(RetTy)->getElementCount ()
1216
1217
: ElementCount::getFixed (1 ));
1218
+ assert ((RetVF.isScalar () || VF.isScalar ()) &&
1219
+ " VF > 1 and RetVF is a vector type" );
1217
1220
const IntrinsicInst *I = ICA.getInst ();
1218
1221
const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
1219
1222
FastMathFlags FMF = ICA.getFlags ();
@@ -1223,28 +1226,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1223
1226
1224
1227
case Intrinsic::cttz:
1225
1228
// FIXME: If necessary, this should go in target-specific overrides.
1226
- if (RetVF.isScalar () && getTLI ()->isCheapToSpeculateCttz ())
1229
+ if (VF.isScalar () && RetVF.isScalar () &&
1230
+ getTLI ()->isCheapToSpeculateCttz ())
1227
1231
return TargetTransformInfo::TCC_Basic;
1228
1232
break ;
1229
1233
1230
1234
case Intrinsic::ctlz:
1231
1235
// FIXME: If necessary, this should go in target-specific overrides.
1232
- if (RetVF.isScalar () && getTLI ()->isCheapToSpeculateCtlz ())
1236
+ if (VF.isScalar () && RetVF.isScalar () &&
1237
+ getTLI ()->isCheapToSpeculateCtlz ())
1233
1238
return TargetTransformInfo::TCC_Basic;
1234
1239
break ;
1235
1240
1236
1241
case Intrinsic::memcpy :
1237
1242
return thisT ()->getMemcpyCost (ICA.getInst ());
1238
1243
1239
1244
case Intrinsic::masked_scatter: {
1245
+ assert (VF.isScalar () && " Can't vectorize types here." );
1240
1246
const Value *Mask = Args[3 ];
1241
1247
bool VarMask = !isa<Constant>(Mask);
1242
1248
Align Alignment = cast<ConstantInt>(Args[2 ])->getAlignValue ();
1243
1249
return thisT ()->getGatherScatterOpCost (Instruction::Store,
1244
- ICA. getArgTypes () [0 ], Args[1 ],
1250
+ Args [0 ]-> getType () , Args[1 ],
1245
1251
VarMask, Alignment, CostKind, I);
1246
1252
}
1247
1253
case Intrinsic::masked_gather: {
1254
+ assert (VF.isScalar () && " Can't vectorize types here." );
1248
1255
const Value *Mask = Args[2 ];
1249
1256
bool VarMask = !isa<Constant>(Mask);
1250
1257
Align Alignment = cast<ConstantInt>(Args[1 ])->getAlignValue ();
@@ -1282,13 +1289,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1282
1289
case Intrinsic::vector_reduce_fmin:
1283
1290
case Intrinsic::vector_reduce_umax:
1284
1291
case Intrinsic::vector_reduce_umin: {
1285
- IntrinsicCostAttributes Attrs (IID, RetTy, Args[0 ]->getType (), FMF, I, 1 );
1292
+ IntrinsicCostAttributes Attrs (IID, RetTy, Args[0 ]->getType (), FMF, 1 , I );
1286
1293
return getTypeBasedIntrinsicInstrCost (Attrs, CostKind);
1287
1294
}
1288
1295
case Intrinsic::vector_reduce_fadd:
1289
1296
case Intrinsic::vector_reduce_fmul: {
1290
1297
IntrinsicCostAttributes Attrs (
1291
- IID, RetTy, {Args[0 ]->getType (), Args[1 ]->getType ()}, FMF, I, 1 );
1298
+ IID, RetTy, {Args[0 ]->getType (), Args[1 ]->getType ()}, FMF, 1 , I );
1292
1299
return getTypeBasedIntrinsicInstrCost (Attrs, CostKind);
1293
1300
}
1294
1301
case Intrinsic::fshl:
@@ -1340,20 +1347,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1340
1347
return BaseT::getIntrinsicInstrCost (ICA, CostKind);
1341
1348
1342
1349
// Assume that we need to scalarize this intrinsic.
1350
+ SmallVector<Type *, 4 > Types;
1351
+ for (const Value *Op : Args) {
1352
+ Type *OpTy = Op->getType ();
1353
+ assert (VF.isScalar () || !OpTy->isVectorTy ());
1354
+ Types.push_back (VF.isScalar ()
1355
+ ? OpTy
1356
+ : FixedVectorType::get (OpTy, VF.getKnownMinValue ()));
1357
+ }
1358
+
1359
+ if (VF.isVector () && !RetTy->isVoidTy ())
1360
+ RetTy = FixedVectorType::get (RetTy, VF.getKnownMinValue ());
1361
+
1343
1362
// Compute the scalarization overhead based on Args for a vector
1344
- // intrinsic.
1363
+ // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1364
+ // CostModel will pass a vector RetTy and VF is 1.
1345
1365
unsigned ScalarizationCost = std::numeric_limits<unsigned >::max ();
1346
- if (RetVF.isVector ()) {
1366
+ if (RetVF.isVector () || VF. isVector () ) {
1347
1367
ScalarizationCost = 0 ;
1348
1368
if (!RetTy->isVoidTy ())
1349
1369
ScalarizationCost +=
1350
1370
getScalarizationOverhead (cast<VectorType>(RetTy), true , false );
1351
1371
ScalarizationCost +=
1352
- getOperandsScalarizationOverhead (Args, RetVF .getKnownMinValue ());
1372
+ getOperandsScalarizationOverhead (Args, VF .getKnownMinValue ());
1353
1373
}
1354
1374
1355
- IntrinsicCostAttributes Attrs (IID, RetTy, ICA.getArgTypes (), FMF, I,
1356
- ScalarizationCost);
1375
+ IntrinsicCostAttributes Attrs (IID, RetTy, Types, FMF, ScalarizationCost, I);
1357
1376
return thisT ()->getTypeBasedIntrinsicInstrCost (Attrs, CostKind);
1358
1377
}
1359
1378
@@ -1596,7 +1615,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1596
1615
// SatMin -> Overflow && SumDiff >= 0
1597
1616
unsigned Cost = 0 ;
1598
1617
IntrinsicCostAttributes Attrs (OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1599
- nullptr , ScalarizationCostPassed);
1618
+ ScalarizationCostPassed);
1600
1619
Cost += thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
1601
1620
Cost +=
1602
1621
thisT ()->getCmpSelInstrCost (BinaryOperator::ICmp, RetTy, CondTy,
@@ -1617,7 +1636,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1617
1636
1618
1637
unsigned Cost = 0 ;
1619
1638
IntrinsicCostAttributes Attrs (OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1620
- nullptr , ScalarizationCostPassed);
1639
+ ScalarizationCostPassed);
1621
1640
Cost += thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
1622
1641
Cost +=
1623
1642
thisT ()->getCmpSelInstrCost (BinaryOperator::Select, RetTy, CondTy,
0 commit comments