@@ -993,24 +993,39 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
993
993
Args.push_back (Arg);
994
994
}
995
995
996
- // Use vector version of the intrinsic.
997
- Module *M = State.Builder .GetInsertBlock ()->getModule ();
998
- Function *VectorF =
999
- Intrinsic::getOrInsertDeclaration (M, VectorIntrinsicID, TysForDecl);
1000
- assert (VectorF && " Can't retrieve vector intrinsic." );
1001
-
1002
- auto *CI = cast_or_null<CallInst>(getUnderlyingValue ());
1003
- SmallVector<OperandBundleDef, 1 > OpBundles;
1004
- if (CI)
1005
- CI->getOperandBundlesAsDefs (OpBundles);
996
+ if (VPIntrinsic::isVPIntrinsic (VectorIntrinsicID)) {
997
+ // Use vector version of the vector predicate Intrinsic
998
+ IRBuilderBase &BuilderIR = State.Builder ;
999
+ VectorBuilder VBuilder (BuilderIR);
1000
+ Value *Mask = BuilderIR.CreateVectorSplat (State.VF , BuilderIR.getTrue ());
1001
+ VBuilder.setMask (Mask).setEVL (Args.back ());
1002
+ // Remove the EVL from Args
1003
+ Args.pop_back ();
1004
+ Value *VPInst = VBuilder.createSimpleIntrinsic (
1005
+ VectorIntrinsicID, TysForDecl[0 ], Args, " vp.call" );
1006
+ if (!VPInst->getType ()->isVoidTy ())
1007
+ State.set (this , VPInst);
1008
+ State.addMetadata (VPInst,
1009
+ dyn_cast_or_null<Instruction>(getUnderlyingValue ()));
1010
+ } else {
1011
+ // Use vector version of the intrinsic.
1012
+ Module *M = State.Builder .GetInsertBlock ()->getModule ();
1013
+ Function *VectorF =
1014
+ Intrinsic::getOrInsertDeclaration (M, VectorIntrinsicID, TysForDecl);
1015
+ assert (VectorF && " Can't retrieve vector intrinsic." );
1006
1016
1007
- CallInst *V = State.Builder .CreateCall (VectorF, Args, OpBundles);
1017
+ auto *CI = cast_or_null<CallInst>(getUnderlyingValue ());
1018
+ SmallVector<OperandBundleDef, 1 > OpBundles;
1019
+ if (CI)
1020
+ CI->getOperandBundlesAsDefs (OpBundles);
1008
1021
1009
- setFlags (V);
1022
+ CallInst *V = State.Builder .CreateCall (VectorF, Args, OpBundles);
1023
+ setFlags (V);
1010
1024
1011
- if (!V->getType ()->isVoidTy ())
1012
- State.set (this , V);
1013
- State.addMetadata (V, CI);
1025
+ if (!V->getType ()->isVoidTy ())
1026
+ State.set (this , V);
1027
+ State.addMetadata (V, CI);
1028
+ }
1014
1029
}
1015
1030
1016
1031
InstructionCost VPWidenIntrinsicRecipe::computeCost (ElementCount VF,
@@ -1023,6 +1038,18 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
1023
1038
// clear Arguments.
1024
1039
// TODO: Rework TTI interface to be independent of concrete IR values.
1025
1040
SmallVector<const Value *> Arguments;
1041
+
1042
+ Intrinsic::ID FID = VectorIntrinsicID;
1043
+ unsigned NumOperands = getNumOperands ();
1044
+ if (VPIntrinsic::isVPIntrinsic (VectorIntrinsicID)) {
1045
+ std::optional<Intrinsic::ID> ID =
1046
+ VPIntrinsic::getFunctionalIntrinsicIDForVP (VectorIntrinsicID);
1047
+ if (ID) {
1048
+ FID = ID.value ();
1049
+ NumOperands = getNumOperands () - 1 ;
1050
+ }
1051
+ }
1052
+
1026
1053
for (const auto &[Idx, Op] : enumerate(operands ())) {
1027
1054
auto *V = Op->getUnderlyingValue ();
1028
1055
if (!V) {
@@ -1038,14 +1065,14 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
1038
1065
1039
1066
Type *RetTy = ToVectorTy (Ctx.Types .inferScalarType (this ), VF);
1040
1067
SmallVector<Type *> ParamTys;
1041
- for (unsigned I = 0 ; I != getNumOperands () ; ++I)
1068
+ for (unsigned I = 0 ; I != NumOperands ; ++I)
1042
1069
ParamTys.push_back (
1043
1070
ToVectorTy (Ctx.Types .inferScalarType (getOperand (I)), VF));
1044
1071
1045
1072
// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
1046
1073
FastMathFlags FMF = hasFastMathFlags () ? getFastMathFlags () : FastMathFlags ();
1047
1074
IntrinsicCostAttributes CostAttrs (
1048
- VectorIntrinsicID , RetTy, Arguments, ParamTys, FMF,
1075
+ FID , RetTy, Arguments, ParamTys, FMF,
1049
1076
dyn_cast_or_null<IntrinsicInst>(getUnderlyingValue ()));
1050
1077
return Ctx.TTI .getIntrinsicInstrCost (CostAttrs, CostKind);
1051
1078
}
0 commit comments