@@ -6894,6 +6894,31 @@ class BaseShuffleAnalysis {
6894
6894
};
6895
6895
} // namespace
6896
6896
6897
+ /// Returns the cost of the shuffle instructions with the given \p Kind, vector
6898
+ /// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
6899
+ /// subvector pattern.
6900
+ static InstructionCost
6901
+ getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
6902
+ VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
6903
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
6904
+ int Index = 0, VectorType *SubTp = nullptr,
6905
+ ArrayRef<const Value *> Args = std::nullopt) {
6906
+ if (Kind != TTI::SK_PermuteTwoSrc)
6907
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
6908
+ int NumSrcElts = Tp->getElementCount().getKnownMinValue();
6909
+ int NumSubElts;
6910
+ if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
6911
+ Mask, NumSrcElts, NumSubElts, Index)) {
6912
+ if (Index + NumSubElts > NumSrcElts &&
6913
+ Index + NumSrcElts <= static_cast<int>(Mask.size()))
6914
+ return TTI.getShuffleCost(
6915
+ TTI::SK_InsertSubvector,
6916
+ FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt,
6917
+ TTI::TCK_RecipThroughput, Index, Tp);
6918
+ }
6919
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
6920
+ }
6921
+
6897
6922
/// Merges shuffle masks and emits final shuffle instruction, if required. It
6898
6923
/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
6899
6924
/// when the actual shuffle instruction is generated only if this is actually
@@ -7141,15 +7166,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7141
7166
std::optional<TTI::ShuffleKind> RegShuffleKind =
7142
7167
CheckPerRegistersShuffle(SubMask);
7143
7168
if (!RegShuffleKind) {
7144
- Cost += TTI. getShuffleCost(
7145
- *ShuffleKinds[Part],
7169
+ Cost += :: getShuffleCost(
7170
+ TTI, *ShuffleKinds[Part],
7146
7171
FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
7147
7172
continue;
7148
7173
}
7149
7174
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
7150
7175
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
7151
- Cost += TTI. getShuffleCost(
7152
- *RegShuffleKind,
7176
+ Cost += :: getShuffleCost(
7177
+ TTI, *RegShuffleKind,
7153
7178
FixedVectorType::get(VL.front()->getType(), EltsPerVector),
7154
7179
SubMask);
7155
7180
}
@@ -7222,8 +7247,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7222
7247
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
7223
7248
if (isEmptyOrIdentity(Mask, VF))
7224
7249
return TTI::TCC_Free;
7225
- return TTI. getShuffleCost(TTI::SK_PermuteTwoSrc,
7226
- cast<VectorType>(V1->getType()), Mask);
7250
+ return :: getShuffleCost(TTI, TTI::SK_PermuteTwoSrc,
7251
+ cast<VectorType>(V1->getType()), Mask);
7227
7252
}
7228
7253
InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
7229
7254
// Empty mask or identity mask are free.
@@ -8101,7 +8126,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8101
8126
for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
8102
8127
Mask[I] =
8103
8128
((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I;
8104
- Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
8129
+ Cost +=
8130
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
8105
8131
}
8106
8132
}
8107
8133
return Cost;
@@ -8428,8 +8454,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
8428
8454
return I->getOpcode() == E->getAltOpcode();
8429
8455
},
8430
8456
Mask);
8431
- VecCost += TTIRef. getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
8432
- FinalVecTy, Mask);
8457
+ VecCost += :: getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc,
8458
+ FinalVecTy, Mask);
8433
8459
// Patterns like [fadd,fsub] can be combined into a single instruction
8434
8460
// in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
8435
8461
// need to take into account their order when looking for the most used
@@ -9133,7 +9159,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
9133
9159
auto *FTy =
9134
9160
FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
9135
9161
InstructionCost C =
9136
- TTI-> getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
9162
+ :: getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
9137
9163
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
9138
9164
<< " for final shuffle of vector node and external "
9139
9165
"insertelement users.\n";
0 commit comments