@@ -7344,6 +7344,32 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7344
7344
V2 = getAllOnesValue(
7345
7345
*R.DL,
7346
7346
FixedVectorType::get(E2->Scalars.front()->getType(), CommonVF));
7347
+ } else if (!V1 && V2) {
7348
+ // Shuffle vector and tree node.
7349
+ unsigned VF = cast<FixedVectorType>(V2->getType())->getNumElements();
7350
+ const TreeEntry *E1 = P1.get<const TreeEntry *>();
7351
+ CommonVF = std::max(VF, E1->getVectorFactor());
7352
+ assert(all_of(Mask,
7353
+ [=](int Idx) {
7354
+ return Idx < 2 * static_cast<int>(CommonVF);
7355
+ }) &&
7356
+ "All elements in mask must be less than 2 * CommonVF.");
7357
+ if (E1->Scalars.size() == VF && VF != CommonVF) {
7358
+ SmallVector<int> E1Mask = E1->getCommonMask();
7359
+ assert(!E1Mask.empty() && "Expected non-empty common mask.");
7360
+ for (int &Idx : CommonMask) {
7361
+ if (Idx == PoisonMaskElem)
7362
+ continue;
7363
+ if (Idx >= static_cast<int>(CommonVF))
7364
+ Idx = E1Mask[Idx - CommonVF] + VF;
7365
+ }
7366
+ CommonVF = VF;
7367
+ }
7368
+ V1 = Constant::getNullValue(
7369
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
7370
+ V2 = getAllOnesValue(
7371
+ *R.DL,
7372
+ FixedVectorType::get(E1->Scalars.front()->getType(), CommonVF));
7347
7373
} else {
7348
7374
assert(V1 && V2 && "Expected both vectors.");
7349
7375
unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
@@ -7380,7 +7406,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7380
7406
R(R), CheckedExtracts(CheckedExtracts) {}
7381
7407
Value *adjustExtracts(const TreeEntry *E, MutableArrayRef<int> Mask,
7382
7408
ArrayRef<std::optional<TTI::ShuffleKind>> ShuffleKinds,
7383
- unsigned NumParts) {
7409
+ unsigned NumParts, bool &UseVecBaseAsInput) {
7410
+ UseVecBaseAsInput = false;
7384
7411
if (Mask.empty())
7385
7412
return nullptr;
7386
7413
Value *VecBase = nullptr;
@@ -7403,6 +7430,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7403
7430
Data.value() == VL[Data.index()]);
7404
7431
});
7405
7432
});
7433
+ SmallPtrSet<Value *, 4> UniqueBases;
7406
7434
unsigned SliceSize = VL.size() / NumParts;
7407
7435
for (unsigned Part = 0; Part < NumParts; ++Part) {
7408
7436
ArrayRef<int> SubMask = Mask.slice(Part * SliceSize, SliceSize);
@@ -7417,13 +7445,14 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7417
7445
// vectorized tree.
7418
7446
// Also, avoid adjusting the cost for extractelements with multiple uses
7419
7447
// in different graph entries.
7448
+ auto *EE = cast<ExtractElementInst>(V);
7449
+ VecBase = EE->getVectorOperand();
7450
+ UniqueBases.insert(VecBase);
7420
7451
const TreeEntry *VE = R.getTreeEntry(V);
7421
7452
if (!CheckedExtracts.insert(V).second ||
7422
7453
!R.areAllUsersVectorized(cast<Instruction>(V), &VectorizedVals) ||
7423
7454
(VE && VE != E))
7424
7455
continue;
7425
- auto *EE = cast<ExtractElementInst>(V);
7426
- VecBase = EE->getVectorOperand();
7427
7456
std::optional<unsigned> EEIdx = getExtractIndex(EE);
7428
7457
if (!EEIdx)
7429
7458
continue;
@@ -7462,6 +7491,11 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7462
7491
CommonMask.assign(Mask.begin(), Mask.end());
7463
7492
transformMaskAfterShuffle(CommonMask, CommonMask);
7464
7493
SameNodesEstimated = false;
7494
+ if (NumParts != 1 && UniqueBases.size() != 1) {
7495
+ UseVecBaseAsInput = true;
7496
+ VecBase = Constant::getNullValue(
7497
+ FixedVectorType::get(VL.front()->getType(), CommonMask.size()));
7498
+ }
7465
7499
return VecBase;
7466
7500
}
7467
7501
void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
@@ -7511,19 +7545,70 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
7511
7545
if (!SameNodesEstimated && InVectors.size() == 1)
7512
7546
InVectors.emplace_back(&E1);
7513
7547
}
7548
+ /// Adds 2 input vectors and the mask for their shuffling.
7549
+ void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
7550
+ // May come only for shuffling of 2 vectors with extractelements, already
7551
+ // handled in adjustExtracts.
7552
+ assert(InVectors.size() == 1 &&
7553
+ all_of(enumerate(CommonMask),
7554
+ [&](auto P) {
7555
+ if (P.value() == PoisonMaskElem)
7556
+ return Mask[P.index()] == PoisonMaskElem;
7557
+ auto *EI =
7558
+ cast<ExtractElementInst>(InVectors.front()
7559
+ .get<const TreeEntry *>()
7560
+ ->Scalars[P.index()]);
7561
+ return EI->getVectorOperand() == V1 ||
7562
+ EI->getVectorOperand() == V2;
7563
+ }) &&
7564
+ "Expected extractelement vectors.");
7565
+ }
7514
7566
/// Adds another one input vector and the mask for the shuffling.
7515
- void add(Value *V1, ArrayRef<int> Mask) {
7567
+ void add(Value *V1, ArrayRef<int> Mask, bool ForExtracts = false ) {
7516
7568
if (InVectors.empty()) {
7517
- assert(CommonMask.empty() && "Expected empty input mask/vectors.");
7569
+ assert(CommonMask.empty() && !ForExtracts &&
7570
+ "Expected empty input mask/vectors.");
7518
7571
CommonMask.assign(Mask.begin(), Mask.end());
7519
7572
InVectors.assign(1, V1);
7520
7573
return;
7521
7574
}
7522
- assert(InVectors.size() == 1 && InVectors.front().is<const TreeEntry *>() &&
7523
- !CommonMask.empty() && "Expected only single entry from extracts.");
7575
+ if (ForExtracts) {
7576
+ // No need to add vectors here, already handled them in adjustExtracts.
7577
+ assert(InVectors.size() == 1 &&
7578
+ InVectors.front().is<const TreeEntry *>() && !CommonMask.empty() &&
7579
+ all_of(enumerate(CommonMask),
7580
+ [&](auto P) {
7581
+ Value *Scalar = InVectors.front()
7582
+ .get<const TreeEntry *>()
7583
+ ->Scalars[P.index()];
7584
+ if (P.value() == PoisonMaskElem)
7585
+ return P.value() == Mask[P.index()] ||
7586
+ isa<UndefValue>(Scalar);
7587
+ if (isa<Constant>(V1))
7588
+ return true;
7589
+ auto *EI = cast<ExtractElementInst>(Scalar);
7590
+ return EI->getVectorOperand() == V1;
7591
+ }) &&
7592
+ "Expected only tree entry for extractelement vectors.");
7593
+ return;
7594
+ }
7595
+ assert(!InVectors.empty() && !CommonMask.empty() &&
7596
+ "Expected only tree entries from extracts/reused buildvectors.");
7597
+ unsigned VF = cast<FixedVectorType>(V1->getType())->getNumElements();
7598
+ if (InVectors.size() == 2) {
7599
+ Cost += createShuffle(InVectors.front(), InVectors.back(), CommonMask);
7600
+ transformMaskAfterShuffle(CommonMask, CommonMask);
7601
+ VF = std::max<unsigned>(VF, CommonMask.size());
7602
+ } else if (const auto *InTE =
7603
+ InVectors.front().dyn_cast<const TreeEntry *>()) {
7604
+ VF = std::max(VF, InTE->getVectorFactor());
7605
+ } else {
7606
+ VF = std::max(
7607
+ VF, cast<FixedVectorType>(InVectors.front().get<Value *>()->getType())
7608
+ ->getNumElements());
7609
+ }
7524
7610
InVectors.push_back(V1);
7525
- unsigned VF = CommonMask.size();
7526
- for (unsigned Idx = 0; Idx < VF; ++Idx)
7611
+ for (unsigned Idx = 0, Sz = CommonMask.size(); Idx < Sz; ++Idx)
7527
7612
if (Mask[Idx] != PoisonMaskElem && CommonMask[Idx] == PoisonMaskElem)
7528
7613
CommonMask[Idx] = Mask[Idx] + VF;
7529
7614
}
@@ -7640,6 +7725,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7640
7725
reorderScalars(GatheredScalars, ReorderMask);
7641
7726
SmallVector<int> Mask;
7642
7727
SmallVector<int> ExtractMask;
7728
+ Value *ExtractVecBase = nullptr;
7729
+ bool UseVecBaseAsInput = false;
7643
7730
SmallVector<std::optional<TargetTransformInfo::ShuffleKind>> GatherShuffles;
7644
7731
SmallVector<SmallVector<const TreeEntry *>> Entries;
7645
7732
SmallVector<std::optional<TTI::ShuffleKind>> ExtractShuffles;
@@ -7653,7 +7740,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7653
7740
tryToGatherExtractElements(GatheredScalars, ExtractMask, NumParts);
7654
7741
if (!ExtractShuffles.empty()) {
7655
7742
if (Value *VecBase = Estimator.adjustExtracts(
7656
- E, ExtractMask, ExtractShuffles, NumParts)) {
7743
+ E, ExtractMask, ExtractShuffles, NumParts, UseVecBaseAsInput )) {
7657
7744
if (auto *VecBaseTy = dyn_cast<FixedVectorType>(VecBase->getType()))
7658
7745
if (VF == VecBaseTy->getNumElements() &&
7659
7746
GatheredScalars.size() != VF) {
@@ -7748,6 +7835,48 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
7748
7835
ScalarTy, GatheredScalars.size())));
7749
7836
});
7750
7837
}
7838
+ if (!ExtractShuffles.empty()) {
7839
+ Value *Vec1 = nullptr;
7840
+ // Gather of extractelements can be represented as just a shuffle of
7841
+ // a single/two vectors the scalars are extracted from.
7842
+ // Find input vectors.
7843
+ Value *Vec2 = nullptr;
7844
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7845
+ if (!Mask.empty() && Mask[I] != PoisonMaskElem)
7846
+ ExtractMask[I] = PoisonMaskElem;
7847
+ }
7848
+ if (UseVecBaseAsInput) {
7849
+ Vec1 = ExtractVecBase;
7850
+ } else {
7851
+ for (unsigned I = 0, Sz = ExtractMask.size(); I < Sz; ++I) {
7852
+ if (ExtractMask[I] == PoisonMaskElem)
7853
+ continue;
7854
+ if (isa<UndefValue>(E->Scalars[I]))
7855
+ continue;
7856
+ auto *EI = cast<ExtractElementInst>(E->Scalars[I]);
7857
+ Value *VecOp = EI->getVectorOperand();
7858
+ if (const auto *TE = getTreeEntry(VecOp))
7859
+ if (TE->VectorizedValue)
7860
+ VecOp = TE->VectorizedValue;
7861
+ if (!Vec1) {
7862
+ Vec1 = VecOp;
7863
+ } else if (Vec1 != EI->getVectorOperand()) {
7864
+ assert((!Vec2 || Vec2 == EI->getVectorOperand()) &&
7865
+ "Expected only 1 or 2 vectors shuffle.");
7866
+ Vec2 = VecOp;
7867
+ }
7868
+ }
7869
+ }
7870
+ if (Vec2) {
7871
+ Estimator.add(Vec1, Vec2, ExtractMask);
7872
+ } else if (Vec1) {
7873
+ Estimator.add(Vec1, ExtractMask, /*ForExtracts=*/true);
7874
+ } else {
7875
+ Estimator.add(PoisonValue::get(FixedVectorType::get(
7876
+ ScalarTy, GatheredScalars.size())),
7877
+ ExtractMask, /*ForExtracts=*/true);
7878
+ }
7879
+ }
7751
7880
if (!all_of(GatheredScalars, PoisonValue::classof)) {
7752
7881
auto Gathers = ArrayRef(GatheredScalars).take_front(VL.size());
7753
7882
bool SameGathers = VL.equals(Gathers);
@@ -10341,7 +10470,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
10341
10470
InVectors.push_back(V1);
10342
10471
}
10343
10472
/// Adds another one input vector and the mask for the shuffling.
10344
- void add(Value *V1, ArrayRef<int> Mask) {
10473
+ void add(Value *V1, ArrayRef<int> Mask, bool = false ) {
10345
10474
if (InVectors.empty()) {
10346
10475
if (!isa<FixedVectorType>(V1->getType())) {
10347
10476
V1 = createShuffle(V1, nullptr, CommonMask);
@@ -10880,13 +11009,13 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
10880
11009
IsUsedInExpr &= FindReusedSplat(
10881
11010
ExtractMask,
10882
11011
cast<FixedVectorType>(Vec1->getType())->getNumElements());
10883
- ShuffleBuilder.add(Vec1, ExtractMask);
11012
+ ShuffleBuilder.add(Vec1, ExtractMask, /*ForExtracts=*/true );
10884
11013
IsNonPoisoned &= isGuaranteedNotToBePoison(Vec1);
10885
11014
} else {
10886
11015
IsUsedInExpr = false;
10887
11016
ShuffleBuilder.add(PoisonValue::get(FixedVectorType::get(
10888
11017
ScalarTy, GatheredScalars.size())),
10889
- ExtractMask);
11018
+ ExtractMask, /*ForExtracts=*/true );
10890
11019
}
10891
11020
}
10892
11021
if (!GatherShuffles.empty()) {
0 commit comments