Skip to content

Commit 5dcdf47

Browse files
committed
[SLP][NFC]Unify code for cost estimation/codegen for buildvector, NFC.
This just moves towards reusing same function for both cost estimation/codegen for buildvector.
1 parent a9e3d23 commit 5dcdf47

File tree

1 file changed

+142
-13
lines changed

1 file changed

+142
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 142 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7344,6 +7344,32 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
73447344
V2 = getAllOnesValue(
73457345
*R.DL,
73467346
FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
7347+
} else if (!V1 && V2) {
7348+
// Shuffle vector and tree node.
7349+
unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
7350+
const TreeEntry *E1 = P1.get<const TreeEntry *>();
7351+
CommonVF = std::max(VF, E1->getVectorFactor());
7352+
assert(all_of(Mask,
7353+
[=](int Idx) {
7354+
return Idx < 2 * static_cast<int>(CommonVF);
7355+
}) &&
7356+
"All elements in mask must be less than 2 * CommonVF.");
7357+
if (E1->Scalars.size() == VF && VF != CommonVF) {
7358+
SmallVector<int> E1Mask = E1->getCommonMask();
7359+
assert(!E1Mask.empty() && "Expected non-empty common mask.");
7360+
for (int &Idx : CommonMask) {
7361+
if (Idx == PoisonMaskElem)
7362+
continue;
7363+
if (Idx >= static_cast<int>(CommonVF))
7364+
Idx = E1Mask[Idx - CommonVF] + VF;
7365+
}
7366+
CommonVF = VF;
7367+
}
7368+
V1 = Constant::getNullValue(
7369+
FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
7370+
V2 = getAllOnesValue(
7371+
*R.DL,
7372+
FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
73477373
} else {
73487374
assert(V1 && V2 && "Expected both vectors.");
73497375
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -7380,7 +7406,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
73807406
R(R), CheckedExtracts(CheckedExtracts) {}
73817407
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
73827408
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
7383-
unsigned NumParts) {
7409+
unsigned NumParts, bool &UseVecBaseAsInput) {
7410+
UseVecBaseAsInput = false;
73847411
if (Mask.empty())
73857412
return nullptr;
73867413
Value *VecBase = nullptr;
@@ -7403,6 +7430,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
74037430
Data.value() == VL[Data.index()]);
74047431
});
74057432
});
7433+
SmallPtrSet<Value *, 4> UniqueBases;
74067434
unsigned SliceSize = VL.size() / NumParts;
74077435
for (unsigned Part = 0; Part < NumParts; ++Part) {
74087436
ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, SliceSize);
@@ -7417,13 +7445,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
74177445
// vectorized tree.
74187446
// Also, avoid adjusting the cost for extractelements with multiple uses
74197447
// in different graph entries.
7448+
auto *EE = cast<ExtractElementInst>(V);
7449+
VecBase = EE->getVectorOperand();
7450+
UniqueBases.insert(VecBase);
74207451
const TreeEntry *VE = R.getTreeEntry(V);
74217452
if (!CheckedExtracts.insert(V).second ||
74227453
!R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) ||
74237454
(VE && VE != E))
74247455
continue;
7425-
auto *EE = cast<ExtractElementInst>(V);
7426-
VecBase = EE->getVectorOperand();
74277456
std::optional<unsigned> EEIdx = getExtractIndex(EE);
74287457
if (!EEIdx)
74297458
continue;
@@ -7462,6 +7491,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
74627491
CommonMask.assign(Mask.begin(), Mask.end());
74637492
transformMaskAfterShuffle(CommonMask, CommonMask);
74647493
SameNodesEstimated = false;
7494+
if (NumParts != 1 && UniqueBases.size() != 1) {
7495+
UseVecBaseAsInput = true;
7496+
VecBase = Constant::getNullValue(
7497+
FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
7498+
}
74657499
return VecBase;
74667500
}
74677501
void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
@@ -7511,19 +7545,70 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
75117545
if (!SameNodesEstimated && InVectors.size() == 1)
75127546
InVectors.emplace_back(&E1);
75137547
}
7548+
/// Adds 2 input vectors and the mask for their shuffling.
7549+
void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
7550+
// May come only for shuffling of 2 vectors with extractelements, already
7551+
// handled in adjustExtracts.
7552+
assert(InVectors.size() == 1 &&
7553+
all_of(enumerate(CommonMask),
7554+
[&](auto P) {
7555+
if (P.value() == PoisonMaskElem)
7556+
return Mask[P.index()] == PoisonMaskElem;
7557+
auto *EI =
7558+
cast<ExtractElementInst>(InVectors.front()
7559+
.get<const TreeEntry *>()
7560+
->Scalars[P.index()]);
7561+
return EI->getVectorOperand() == V1 ||
7562+
EI->getVectorOperand() == V2;
7563+
}) &&
7564+
"Expected extractelement vectors.");
7565+
}
75147566
/// Adds another one input vector and the mask for the shuffling.
7515-
void add(Value *V1, ArrayRef<int> Mask) {
7567+
void add(Value *V1, ArrayRef<int> Mask, bool ForExtracts = false) {
75167568
if (InVectors.empty()) {
7517-
assert(CommonMask.empty() && "Expected empty input mask/vectors.");
7569+
assert(CommonMask.empty() && !ForExtracts &&
7570+
"Expected empty input mask/vectors.");
75187571
CommonMask.assign(Mask.begin(), Mask.end());
75197572
InVectors.assign(1, V1);
75207573
return;
75217574
}
7522-
assert(InVectors.size() == 1 && InVectors.front().is<const TreeEntry *>() &&
7523-
!CommonMask.empty() && "Expected only single entry from extracts.");
7575+
if (ForExtracts) {
7576+
// No need to add vectors here, already handled them in adjustExtracts.
7577+
assert(InVectors.size() == 1 &&
7578+
InVectors.front().is<const TreeEntry *>() && !CommonMask.empty() &&
7579+
all_of(enumerate(CommonMask),
7580+
[&](auto P) {
7581+
Value *Scalar = InVectors.front()
7582+
.get<const TreeEntry *>()
7583+
->Scalars[P.index()];
7584+
if (P.value() == PoisonMaskElem)
7585+
return P.value() == Mask[P.index()] ||
7586+
isa<UndefValue>(Scalar);
7587+
if (isa<Constant>(V1))
7588+
return true;
7589+
auto *EI = cast<ExtractElementInst>(Scalar);
7590+
return EI->getVectorOperand() == V1;
7591+
}) &&
7592+
"Expected only tree entry for extractelement vectors.");
7593+
return;
7594+
}
7595+
assert(!InVectors.empty() && !CommonMask.empty() &&
7596+
"Expected only tree entries from extracts/reused buildvectors.");
7597+
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
7598+
if (InVectors.size() == 2) {
7599+
Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
7600+
transformMaskAfterShuffle(CommonMask, CommonMask);
7601+
VF = std::max<unsigned>(VF, CommonMask.size());
7602+
} else if (const auto *InTE =
7603+
InVectors.front().dyn_cast<const TreeEntry *>()) {
7604+
VF = std::max(VF, InTE->getVectorFactor());
7605+
} else {
7606+
VF = std::max(
7607+
VF, cast<FixedVectorType>(InVectors.front().get<Value *>()->getType())
7608+
->getNumElements());
7609+
}
75247610
InVectors.push_back(V1);
7525-
unsigned VF = CommonMask.size();
7526-
for (unsigned Idx = 0; Idx < VF; ++Idx)
7611+
for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
75277612
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
75287613
CommonMask[Idx] = Mask[Idx] + VF;
75297614
}
@@ -7640,6 +7725,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
76407725
reorderScalars(GatheredScalars, ReorderMask);
76417726
SmallVector<int> Mask;
76427727
SmallVector<int> ExtractMask;
7728+
Value *ExtractVecBase = nullptr;
7729+
bool UseVecBaseAsInput = false;
76437730
SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
76447731
SmallVector<SmallVector<const TreeEntry *>> Entries;
76457732
SmallVector<std::optional<TTI::ShuffleKind>> ExtractShuffles;
@@ -7653,7 +7740,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
76537740
tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
76547741
if (!ExtractShuffles.empty()) {
76557742
if (Value *VecBase = Estimator.adjustExtracts(
7656-
E, ExtractMask, ExtractShuffles, NumParts)) {
7743+
E, ExtractMask, ExtractShuffles, NumParts, UseVecBaseAsInput)) {
76577744
if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
76587745
if (VF == VecBaseTy->getNumElements() &&
76597746
GatheredScalars.size() != VF) {
@@ -7748,6 +7835,48 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
77487835
ScalarTy, GatheredScalars.size())));
77497836
});
77507837
}
7838+
if (!ExtractShuffles.empty()) {
7839+
Value *Vec1 = nullptr;
7840+
// Gather of extractelements can be represented as just a shuffle of
7841+
// a single/two vectors the scalars are extracted from.
7842+
// Find input vectors.
7843+
Value *Vec2 = nullptr;
7844+
for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7845+
if (!Mask.empty() && Mask[I] != PoisonMaskElem)
7846+
ExtractMask[I] = PoisonMaskElem;
7847+
}
7848+
if (UseVecBaseAsInput) {
7849+
Vec1 = ExtractVecBase;
7850+
} else {
7851+
for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7852+
if (ExtractMask[I] == PoisonMaskElem)
7853+
continue;
7854+
if (isa<UndefValue>(E->Scalars[I]))
7855+
continue;
7856+
auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
7857+
Value *VecOp = EI->getVectorOperand();
7858+
if (const auto *TE = getTreeEntry(VecOp))
7859+
if (TE->VectorizedValue)
7860+
VecOp = TE->VectorizedValue;
7861+
if (!Vec1) {
7862+
Vec1 = VecOp;
7863+
} else if (Vec1 != EI->getVectorOperand()) {
7864+
assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
7865+
"Expected only 1 or 2 vectors shuffle.");
7866+
Vec2 = VecOp;
7867+
}
7868+
}
7869+
}
7870+
if (Vec2) {
7871+
Estimator.add(Vec1, Vec2, ExtractMask);
7872+
} else if (Vec1) {
7873+
Estimator.add(Vec1, ExtractMask, /*ForExtracts=*/true);
7874+
} else {
7875+
Estimator.add(PoisonValue::get(FixedVectorType::get(
7876+
ScalarTy, GatheredScalars.size())),
7877+
ExtractMask, /*ForExtracts=*/true);
7878+
}
7879+
}
77517880
if (!all_of(GatheredScalars, PoisonValue::classof)) {
77527881
auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size());
77537882
bool SameGathers = VL.equals(Gathers);
@@ -10341,7 +10470,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1034110470
InVectors.push_back(V1);
1034210471
}
1034310472
/// Adds another one input vector and the mask for the shuffling.
10344-
void add(Value *V1, ArrayRef<int> Mask) {
10473+
void add(Value *V1, ArrayRef<int> Mask, bool = false) {
1034510474
if (InVectors.empty()) {
1034610475
if (!isa<FixedVectorType>(V1->getType())) {
1034710476
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -10880,13 +11009,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1088011009
IsUsedInExpr &= FindReusedSplat(
1088111010
ExtractMask,
1088211011
cast<FixedVectorType>(Vec1->getType())->getNumElements());
10883-
ShuffleBuilder.add(Vec1, ExtractMask);
11012+
ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true);
1088411013
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
1088511014
} else {
1088611015
IsUsedInExpr = false;
1088711016
ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
1088811017
ScalarTy, GatheredScalars.size())),
10889-
ExtractMask);
11018+
ExtractMask, /*ForExtracts=*/true);
1089011019
}
1089111020
}
1089211021
if (!GatherShuffles.empty()) {

0 commit comments

Comments
 (0)