Skip to content

Commit 1f63caa

Browse files
committed
[LICM][MustExec] Make must-exec logic for IV condition commutative
MustExec has special logic to determine whether the first loop iteration will always be executed, by simplifying the IV comparison with the start value. Currently, this code assumes that the IV is on the LHS of the comparison, but this is not guaranteed. Make sure it handles the commuted variant as well. The changed PhaseOrdering test previously performed peeling to make the loads dereferenceable -- as a side effect, this also reduced the exit count by one, avoiding the awkward <= MAX case. Now we know up-front the the loads are dereferenceable and can be simply hoisted. As such, we retain the original exit count and now have to handle it by widening the exit count calculation to i128. This is a regression, but at least it preserves the vectorization, which was the original goal. I'm not sure what else can be done about that test.
1 parent f647321 commit 1f63caa

File tree

3 files changed

+147
-160
lines changed

3 files changed

+147
-160
lines changed

llvm/lib/Analysis/MustExecute.cpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,21 @@ static bool CanProveNotTakenFirstIteration(const BasicBlock *ExitBlock,
135135
// todo: this would be a lot more powerful if we used scev, but all the
136136
// plumbing is currently missing to pass a pointer in from the pass
137137
// Check for cmp (phi [x, preheader] ...), y where (pred x, y is known
138+
ICmpInst::Predicate Pred = Cond->getPredicate();
138139
auto *LHS = dyn_cast<PHINode>(Cond->getOperand(0));
139140
auto *RHS = Cond->getOperand(1);
140-
if (!LHS || LHS->getParent() != CurLoop->getHeader())
141-
return false;
141+
if (!LHS || LHS->getParent() != CurLoop->getHeader()) {
142+
Pred = Cond->getSwappedPredicate();
143+
LHS = dyn_cast<PHINode>(Cond->getOperand(1));
144+
RHS = Cond->getOperand(0);
145+
if (!LHS || LHS->getParent() != CurLoop->getHeader())
146+
return false;
147+
}
148+
142149
auto DL = ExitBlock->getModule()->getDataLayout();
143150
auto *IVStart = LHS->getIncomingValueForBlock(CurLoop->getLoopPreheader());
144-
auto *SimpleValOrNull = simplifyCmpInst(Cond->getPredicate(),
145-
IVStart, RHS,
146-
{DL, /*TLI*/ nullptr,
147-
DT, /*AC*/ nullptr, BI});
151+
auto *SimpleValOrNull = simplifyCmpInst(
152+
Pred, IVStart, RHS, {DL, /*TLI*/ nullptr, DT, /*AC*/ nullptr, BI});
148153
auto *SimpleCst = dyn_cast_or_null<Constant>(SimpleValOrNull);
149154
if (!SimpleCst)
150155
return false;

llvm/test/Transforms/LICM/hoist-mustexec.ll

+1-2
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,6 @@ fail:
218218
}
219219

220220
; Same as previous case, with commuted icmp.
221-
; FIXME: The load should get hoisted here as well.
222221
define i32 @test3_commuted(ptr noalias nocapture readonly %a) nounwind uwtable {
223222
; CHECK-LABEL: define i32 @test3_commuted(
224223
; CHECK-SAME: ptr noalias nocapture readonly [[A:%.*]]) #[[ATTR1]] {
@@ -227,14 +226,14 @@ define i32 @test3_commuted(ptr noalias nocapture readonly %a) nounwind uwtable {
227226
; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i32 [[LEN]], 0
228227
; CHECK-NEXT: br i1 [[IS_ZERO]], label [[FAIL:%.*]], label [[PREHEADER:%.*]]
229228
; CHECK: preheader:
229+
; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[A]], align 4
230230
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
231231
; CHECK: for.body:
232232
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[INC:%.*]], [[CONTINUE:%.*]] ]
233233
; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[ADD:%.*]], [[CONTINUE]] ]
234234
; CHECK-NEXT: [[R_CHK:%.*]] = icmp uge i32 [[LEN]], [[IV]]
235235
; CHECK-NEXT: br i1 [[R_CHK]], label [[CONTINUE]], label [[FAIL_LOOPEXIT:%.*]]
236236
; CHECK: continue:
237-
; CHECK-NEXT: [[I1:%.*]] = load i32, ptr [[A]], align 4
238237
; CHECK-NEXT: [[ADD]] = add nsw i32 [[I1]], [[ACC]]
239238
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1
240239
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000

0 commit comments

Comments
 (0)