Skip to content

Commit e5f5517

Browse files
committed
[VPlan] Create IR basic block for middle.block in VPlan.
Create a IR BB directly for the middle.block, instead of creating the IR BB during skeleton creation and then replacing the middle VPBB with a VPIRBB. This moves another part of skeleton creation to VPlan and simplififes the code slightly by removing code to disconnect the middle block and vector preheader + the corresponding DT update. NFC modulo IR block naming and block creation order, which changes the IR names for the blocks.
1 parent e5f4019 commit e5f5517

15 files changed

+204
-215
lines changed

Diff for: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

+3-6
Original file line numberDiff line numberDiff line change
@@ -2681,13 +2681,9 @@ void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
26812681
Cost->requiresScalarEpilogue(VF.isVector())) &&
26822682
"loops not exiting via the latch without required epilogue?");
26832683

2684-
LoopMiddleBlock =
2685-
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
2686-
LI, nullptr, Twine(Prefix) + "middle.block");
2687-
replaceVPBBWithIRVPBB(Plan.getMiddleBlock(), LoopMiddleBlock);
26882684
LoopScalarPreHeader =
2689-
SplitBlock(LoopMiddleBlock, LoopMiddleBlock->getTerminator(), DT, LI,
2690-
nullptr, Twine(Prefix) + "scalar.ph");
2685+
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
2686+
LI, nullptr, Twine(Prefix) + "scalar.ph");
26912687
replaceVPBBWithIRVPBB(Plan.getScalarPreheader(), LoopScalarPreHeader);
26922688
}
26932689

@@ -10761,6 +10757,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1076110757
// to be vectorized by executing the plan (potentially with a different
1076210758
// factor) again shortly afterwards.
1076310759
VPlan &BestEpiPlan = LVP.getPlanFor(EpilogueVF.Width);
10760+
BestEpiPlan.getMiddleBlock()->setName("vec.epilog.middle.block");
1076410761
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
1076510762
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
1076610763
BestEpiPlan);

Diff for: llvm/lib/Transforms/Vectorize/VPlan.cpp

+1-9
Original file line numberDiff line numberDiff line change
@@ -987,18 +987,10 @@ void VPlan::execute(VPTransformState *State) {
987987
setName("Final VPlan");
988988
LLVM_DEBUG(dump());
989989

990-
// Disconnect the middle block from its single successor (the scalar loop
991-
// header) in both the CFG and DT. The branch will be recreated during VPlan
992-
// execution.
993-
BasicBlock *MiddleBB = State->CFG.ExitBB;
994-
BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor();
995-
auto *BrInst = new UnreachableInst(MiddleBB->getContext());
996-
BrInst->insertBefore(MiddleBB->getTerminator()->getIterator());
997-
MiddleBB->getTerminator()->eraseFromParent();
998-
State->CFG.DTU.applyUpdates({{DominatorTree::Delete, MiddleBB, ScalarPh}});
999990
// Disconnect scalar preheader and scalar header, as the dominator tree edge
1000991
// will be updated as part of VPlan execution. This allows keeping the DTU
1001992
// logic generic during VPlan execution.
993+
BasicBlock *ScalarPh = State->CFG.ExitBB;
1002994
State->CFG.DTU.applyUpdates(
1003995
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});
1004996

Diff for: llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

+19-19
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,13 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
5353
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
5454
; CHECK: middle.split:
5555
; CHECK-NEXT: br i1 [[TMP17]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
56+
; CHECK: middle.block:
57+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
58+
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
5659
; CHECK: vector.early.exit:
5760
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP16]], i1 true)
5861
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
59-
; CHECK-NEXT: br label [[LOOP_END:%.*]]
60-
; CHECK: middle.block:
61-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[N_VEC]]
62-
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
62+
; CHECK-NEXT: br label [[LOOP_END]]
6363
; CHECK: scalar.ph:
6464
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
6565
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -136,12 +136,12 @@ define i64 @same_exit_block_pre_inc_use4() {
136136
; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
137137
; CHECK: middle.split:
138138
; CHECK-NEXT: br i1 [[TMP5]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
139+
; CHECK: middle.block:
140+
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
139141
; CHECK: vector.early.exit:
140142
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v2i1(<2 x i1> [[TMP4]], i1 true)
141143
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <2 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
142-
; CHECK-NEXT: br label [[LOOP_END:%.*]]
143-
; CHECK: middle.block:
144-
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
144+
; CHECK-NEXT: br label [[LOOP_END]]
145145
; CHECK: scalar.ph:
146146
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
147147
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -214,12 +214,12 @@ define i64 @loop_contains_safe_call() #1 {
214214
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
215215
; CHECK: middle.split:
216216
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
217+
; CHECK: middle.block:
218+
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
217219
; CHECK: vector.early.exit:
218220
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 true)
219221
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
220-
; CHECK-NEXT: br label [[LOOP_END:%.*]]
221-
; CHECK: middle.block:
222-
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
222+
; CHECK-NEXT: br label [[LOOP_END]]
223223
; CHECK: scalar.ph:
224224
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
225225
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -309,13 +309,13 @@ define i64 @loop_contains_safe_div() #1 {
309309
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
310310
; CHECK: middle.split:
311311
; CHECK-NEXT: br i1 [[TMP6]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
312+
; CHECK: middle.block:
313+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]]
314+
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
312315
; CHECK: vector.early.exit:
313316
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1(<vscale x 4 x i1> [[TMP15]], i1 true)
314317
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <vscale x 4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
315-
; CHECK-NEXT: br label [[LOOP_END:%.*]]
316-
; CHECK: middle.block:
317-
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 64, [[INDEX1]]
318-
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END]], label [[SCALAR_PH]]
318+
; CHECK-NEXT: br label [[LOOP_END]]
319319
; CHECK: scalar.ph:
320320
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[OFFSET_IDX]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
321321
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -395,12 +395,12 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
395395
; CHECK: middle.split:
396396
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_LOAD2]], i32 3
397397
; CHECK-NEXT: br i1 [[TMP7]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
398+
; CHECK: middle.block:
399+
; CHECK-NEXT: br i1 true, label [[LOOP_END:%.*]], label [[SCALAR_PH]]
398400
; CHECK: vector.early.exit:
399401
; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP6]], i1 true)
400402
; CHECK-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <4 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
401-
; CHECK-NEXT: br label [[LOOP_END:%.*]]
402-
; CHECK: middle.block:
403-
; CHECK-NEXT: br i1 true, label [[LOOP_END]], label [[SCALAR_PH]]
403+
; CHECK-NEXT: br label [[LOOP_END]]
404404
; CHECK: scalar.ph:
405405
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 67, [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
406406
; CHECK-NEXT: br label [[LOOP:%.*]]
@@ -495,11 +495,11 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
495495
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
496496
; CHECK: middle.split:
497497
; CHECK-NEXT: br i1 [[TMP15]], label [[VECTOR_EARLY_EXIT:%.*]], label [[MIDDLE_BLOCK:%.*]]
498-
; CHECK: vector.early.exit:
499-
; CHECK-NEXT: br label [[FOUND:%.*]]
500498
; CHECK: middle.block:
501499
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX1]], [[N_VEC]]
502500
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
501+
; CHECK: vector.early.exit:
502+
; CHECK-NEXT: br label [[FOUND:%.*]]
503503
; CHECK: scalar.ph:
504504
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
505505
; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]

Diff for: llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

+3-3
Original file line numberDiff line numberDiff line change
@@ -103,17 +103,17 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
103103
; CHECK-NEXT: EMIT branch-on-count vp<[[EP_IV_NEXT]]>, ir<1024>
104104
; CHECK-NEXT: No successors
105105
; CHECK-NEXT: }
106-
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
106+
; CHECK-NEXT: Successor(s): middle.block
107107
; CHECK-EMPTY:
108-
; CHECK-NEXT: ir-bb<middle.block>:
108+
; CHECK-NEXT: middle.block:
109109
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<%accum>, ir<%add>
110110
; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
111111
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, ir<1024>
112112
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
113113
; CHECK-NEXT: Successor(s): ir-bb<exit>, ir-bb<scalar.ph>
114114
; CHECK-EMPTY:
115115
; CHECK-NEXT: ir-bb<exit>:
116-
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[EXTRACT]]> from ir-bb<middle.block>)
116+
; CHECK-NEXT: IR %add.lcssa = phi i32 [ %add, %for.body ] (extra operand: vp<[[EXTRACT]]> from middle.block)
117117
; CHECK-NEXT: No successors
118118
; CHECK-EMPTY:
119119
; CHECK-NEXT: ir-bb<scalar.ph>:

Diff for: llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

+4-4
Original file line numberDiff line numberDiff line change
@@ -209,9 +209,9 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
209209
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
210210
; CHECK-NEXT: No successors
211211
; CHECK-NEXT: }
212-
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
212+
; CHECK-NEXT: Successor(s): middle.block
213213
; CHECK-EMPTY:
214-
; CHECK-NEXT: ir-bb<middle.block>:
214+
; CHECK-NEXT: middle.block:
215215
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
216216
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
217217
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>
@@ -458,9 +458,9 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
458458
; CHECK-NEXT: EMIT branch-on-count vp<[[CAN_IV_NEXT]]>, ir<[[VEC_TC]]>
459459
; CHECK-NEXT: No successors
460460
; CHECK-NEXT: }
461-
; CHECK-NEXT: Successor(s): ir-bb<middle.block>
461+
; CHECK-NEXT: Successor(s): middle.block
462462
; CHECK-EMPTY:
463-
; CHECK-NEXT: ir-bb<middle.block>:
463+
; CHECK-NEXT: middle.block:
464464
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq vp<[[TC]]>, ir<[[VEC_TC]]>
465465
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
466466
; CHECK-NEXT: Successor(s): ir-bb<for.cond.cleanup.loopexit>, ir-bb<scalar.ph>

0 commit comments

Comments
 (0)