@@ -207,6 +207,32 @@ VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
207
207
return Parent->getEnclosingBlockWithPredecessors ();
208
208
}
209
209
210
+ bool VPBlockUtils::isHeader (const VPBlockBase *VPB,
211
+ const VPDominatorTree &VPDT) {
212
+ auto *VPBB = dyn_cast<VPBasicBlock>(VPB);
213
+ if (!VPBB)
214
+ return false ;
215
+
216
+ // If VPBB is in a region R, VPBB is a loop header if R is a loop region with
217
+ // VPBB as its entry, i.e., free of predecessors.
218
+ if (auto *R = VPBB->getParent ())
219
+ return !R->isReplicator () && VPBB->getNumPredecessors () == 0 ;
220
+
221
+ // A header dominates its second predecessor (the latch), with the other
222
+ // predecessor being the preheader
223
+ return VPB->getPredecessors ().size () == 2 &&
224
+ VPDT.dominates (VPB, VPB->getPredecessors ()[1 ]);
225
+ }
226
+
227
+ bool VPBlockUtils::isLatch (const VPBlockBase *VPB,
228
+ const VPDominatorTree &VPDT) {
229
+ // A latch has a header as its second successor, with its other successor
230
+ // leaving the loop. A preheader OTOH has a header as its first (and only)
231
+ // successor.
232
+ return VPB->getNumSuccessors () == 2 &&
233
+ VPBlockUtils::isHeader (VPB->getSuccessors ()[1 ], VPDT);
234
+ }
235
+
210
236
VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi () {
211
237
iterator It = begin ();
212
238
while (It != end () && It->isPhi ())
@@ -424,13 +450,21 @@ void VPBasicBlock::connectToPredecessors(VPTransformState &State) {
424
450
if (ParentLoop && !State.LI ->getLoopFor (NewBB))
425
451
ParentLoop->addBasicBlockToLoop (NewBB, *State.LI );
426
452
453
+ SmallVector<VPBlockBase *> Preds;
454
+ if (VPBlockUtils::isHeader (this , State.VPDT )) {
455
+ // There's no block for the latch yet, connect to the preheader only.
456
+ Preds = {getPredecessors ()[0 ]};
457
+ } else {
458
+ Preds = to_vector (getPredecessors ());
459
+ }
460
+
427
461
// Hook up the new basic block to its predecessors.
428
- for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors () ) {
462
+ for (VPBlockBase *PredVPBlock : Preds ) {
429
463
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock ();
430
464
auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors ();
465
+ assert (CFG.VPBB2IRBB .contains (PredVPBB) &&
466
+ " Predecessor basic-block not found building successor." );
431
467
BasicBlock *PredBB = CFG.VPBB2IRBB [PredVPBB];
432
-
433
- assert (PredBB && " Predecessor basic-block not found building successor." );
434
468
auto *PredBBTerminator = PredBB->getTerminator ();
435
469
LLVM_DEBUG (dbgs () << " LV: draw edge from" << PredBB->getName () << ' \n ' );
436
470
@@ -491,11 +525,25 @@ void VPBasicBlock::execute(VPTransformState *State) {
491
525
bool Replica = bool (State->Lane );
492
526
BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
493
527
528
+ if (VPBlockUtils::isHeader (this , State->VPDT )) {
529
+ // Create and register the new vector loop.
530
+ Loop *PrevParentLoop = State->CurrentParentLoop ;
531
+ State->CurrentParentLoop = State->LI ->AllocateLoop ();
532
+
533
+ // Insert the new loop into the loop nest and register the new basic blocks
534
+ // before calling any utilities such as SCEV that require valid LoopInfo.
535
+ if (PrevParentLoop)
536
+ PrevParentLoop->addChildLoop (State->CurrentParentLoop );
537
+ else
538
+ State->LI ->addTopLevelLoop (State->CurrentParentLoop );
539
+ }
540
+
494
541
auto IsReplicateRegion = [](VPBlockBase *BB) {
495
542
auto *R = dyn_cast_or_null<VPRegionBlock>(BB);
496
- return R && R->isReplicator ();
543
+ assert ((!R || R->isReplicator ()) &&
544
+ " only replicate region blocks should remain" );
545
+ return R;
497
546
};
498
-
499
547
// 1. Create an IR basic block.
500
548
if ((Replica && this == getParent ()->getEntry ()) ||
501
549
IsReplicateRegion (getSingleHierarchicalPredecessor ())) {
@@ -518,6 +566,10 @@ void VPBasicBlock::execute(VPTransformState *State) {
518
566
519
567
// 2. Fill the IR basic block with IR instructions.
520
568
executeRecipes (State, NewBB);
569
+
570
+ // If this block is a latch, update CurrentParentLoop.
571
+ if (VPBlockUtils::isLatch (this , State->VPDT ))
572
+ State->CurrentParentLoop = State->CurrentParentLoop ->getParentLoop ();
521
573
}
522
574
523
575
VPBasicBlock *VPBasicBlock::clone () {
@@ -729,35 +781,13 @@ VPRegionBlock *VPRegionBlock::clone() {
729
781
}
730
782
731
783
void VPRegionBlock::execute (VPTransformState *State) {
732
- ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>>
733
- RPOT (Entry);
734
-
735
- if (!isReplicator ()) {
736
- // Create and register the new vector loop.
737
- Loop *PrevParentLoop = State->CurrentParentLoop ;
738
- State->CurrentParentLoop = State->LI ->AllocateLoop ();
739
-
740
- // Insert the new loop into the loop nest and register the new basic blocks
741
- // before calling any utilities such as SCEV that require valid LoopInfo.
742
- if (PrevParentLoop)
743
- PrevParentLoop->addChildLoop (State->CurrentParentLoop );
744
- else
745
- State->LI ->addTopLevelLoop (State->CurrentParentLoop );
746
-
747
- // Visit the VPBlocks connected to "this", starting from it.
748
- for (VPBlockBase *Block : RPOT) {
749
- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
750
- Block->execute (State);
751
- }
752
-
753
- State->CurrentParentLoop = PrevParentLoop;
754
- return ;
755
- }
756
-
784
+ assert (isReplicator () &&
785
+ " Loop regions should have been lowered to plain CFG" );
757
786
assert (!State->Lane && " Replicating a Region with non-null instance." );
758
-
759
- // Enter replicating mode.
760
787
assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
788
+
789
+ ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT (
790
+ Entry);
761
791
State->Lane = VPLane (0 );
762
792
for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
763
793
++Lane) {
@@ -851,6 +881,22 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
851
881
}
852
882
#endif
853
883
884
+ void VPRegionBlock::dissolveToCFGLoop () {
885
+ auto *Header = cast<VPBasicBlock>(getEntry ());
886
+ VPBlockBase *Preheader = getSinglePredecessor ();
887
+ auto *ExitingLatch = cast<VPBasicBlock>(getExiting ());
888
+ VPBlockBase *Middle = getSingleSuccessor ();
889
+ VPBlockUtils::disconnectBlocks (Preheader, this );
890
+ VPBlockUtils::disconnectBlocks (this , Middle);
891
+
892
+ for (VPBlockBase *VPB : vp_depth_first_shallow (Entry))
893
+ VPB->setParent (getParent ());
894
+
895
+ VPBlockUtils::connectBlocks (Preheader, Header);
896
+ VPBlockUtils::connectBlocks (ExitingLatch, Middle);
897
+ VPBlockUtils::connectBlocks (ExitingLatch, Header);
898
+ }
899
+
854
900
VPlan::VPlan (Loop *L) {
855
901
setEntry (createVPIRBasicBlock (L->getLoopPreheader ()));
856
902
ScalarHeader = createVPIRBasicBlock (L->getHeader ());
@@ -962,16 +1008,15 @@ void VPlan::execute(VPTransformState *State) {
962
1008
963
1009
State->CFG .DTU .flush ();
964
1010
965
- auto *LoopRegion = getVectorLoopRegion ( );
966
- if (!LoopRegion )
1011
+ VPBasicBlock *Header = vputils::getFirstLoopHeader (* this , State-> VPDT );
1012
+ if (!Header )
967
1013
return ;
968
1014
969
- VPBasicBlock *LatchVPBB = LoopRegion-> getExitingBasicBlock ( );
1015
+ auto *LatchVPBB = cast<VPBasicBlock>(Header-> getPredecessors ()[ 1 ] );
970
1016
BasicBlock *VectorLatchBB = State->CFG .VPBB2IRBB [LatchVPBB];
971
1017
972
1018
// Fix the latch value of canonical, reduction and first-order recurrences
973
1019
// phis in the vector loop.
974
- VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
975
1020
for (VPRecipeBase &R : Header->phis ()) {
976
1021
// Skip phi-like recipes that generate their backedege values themselves.
977
1022
if (isa<VPWidenPHIRecipe>(&R))
@@ -1007,8 +1052,10 @@ void VPlan::execute(VPTransformState *State) {
1007
1052
bool NeedsScalar = isa<VPInstruction>(PhiR) ||
1008
1053
(isa<VPReductionPHIRecipe>(PhiR) &&
1009
1054
cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
1055
+
1010
1056
Value *Phi = State->get (PhiR, NeedsScalar);
1011
- // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does not.
1057
+ // VPHeaderPHIRecipe supports getBackedgeValue() but VPInstruction does
1058
+ // not.
1012
1059
Value *Val = State->get (PhiR->getOperand (1 ), NeedsScalar);
1013
1060
cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
1014
1061
}
0 commit comments