Skip to content

Commit a6f4c1b

Browse files
tmandrynikickuharcwabbott0
authored andcommitted
[InstCombine] Fix infinite loop due to bitcast <-> phi transforms (#34)
* [InstCombine] Add test for iterator invalidation bug; NFC * [InstCombine] Fix user iterator invalidation in bitcast of phi transform This fixes the issue encountered in D71164. Instead of using a range-based for, manually iterate over the users and advance the iterator beforehand, so we do not skip any users due to iterator invalidation. Differential Revision: https://reviews.llvm.org/D72657 * [InstCombine] Make combineLoadToNewType a method; NFC So it can be reused as part of other combines. In particular for D71164. Conflicts: llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp * [InstCombine] Fix infinite loop due to bitcast <-> phi transforms Fix for https://bugs.llvm.org/show_bug.cgi?id=44245. The optimizeBitCastFromPhi() and FoldPHIArgOpIntoPHI() end up fighting against each other, because optimizeBitCastFromPhi() assumes that bitcasts of loads will get folded. This doesn't happen here, because a dangling phi node prevents the one-use fold in https://github.com/llvm/llvm-project/blob/master/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp#L620-L628 from triggering. This patch fixes the issue by explicitly performing the load combine as part of the bitcast of phi transform. Other attempts to force the load to be combined first were ultimately too unreliable. Differential Revision: https://reviews.llvm.org/D71164 * [InstCombine] Improve infinite loop detection Summary: This patch limits the default number of iterations performed by InstCombine. It also exposes a new option that allows to specify how many iterations is considered getting stuck in an infinite loop. Based on experiments performed on real-world C++ programs, InstCombine seems to perform at most ~8-20 iterations, so treating 1000 iterations as an infinite loop seems like a safe choice. See D71145 for details. The two limits can be specified via command line options. Reviewers: spatel, lebedev.ri, nikic, xbolva00, grosser Reviewed By: spatel Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71673 * [InstCombine] Add tests for PR44242 Differential Revision: https://reviews.llvm.org/D71260 * [InstCombine] Don't rewrite phi-of-bitcast when the phi has other users Judging by the existing comments, this was the intention, but the transform never actually checked if the existing phi's would be removed. See https://bugs.llvm.org/show_bug.cgi?id=44242 for an example where this causes much worse code generation on AMDGPU. Differential Revision: https://reviews.llvm.org/D71209 Co-authored-by: Nikita Popov <[email protected]> Co-authored-by: Jakub Kuderski <[email protected]> Co-authored-by: Connor Abbott <[email protected]>
1 parent 6071408 commit a6f4c1b

File tree

7 files changed

+503
-33
lines changed

7 files changed

+503
-33
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

+55-18
Original file line numberDiff line numberDiff line change
@@ -2217,6 +2217,31 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
22172217
}
22182218
}
22192219

2220+
// Check that each user of each old PHI node is something that we can
2221+
// rewrite, so that all of the old PHI nodes can be cleaned up afterwards.
2222+
for (auto *OldPN : OldPhiNodes) {
2223+
for (User *V : OldPN->users()) {
2224+
if (auto *SI = dyn_cast<StoreInst>(V)) {
2225+
if (!SI->isSimple() || SI->getOperand(0) != OldPN)
2226+
return nullptr;
2227+
} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2228+
// Verify it's a B->A cast.
2229+
Type *TyB = BCI->getOperand(0)->getType();
2230+
Type *TyA = BCI->getType();
2231+
if (TyA != DestTy || TyB != SrcTy)
2232+
return nullptr;
2233+
} else if (auto *PHI = dyn_cast<PHINode>(V)) {
2234+
// As long as the user is another old PHI node, then even if we don't
2235+
// rewrite it, the PHI web we're considering won't have any users
2236+
// outside itself, so it'll be dead.
2237+
if (OldPhiNodes.count(PHI) == 0)
2238+
return nullptr;
2239+
} else {
2240+
return nullptr;
2241+
}
2242+
}
2243+
}
2244+
22202245
// For each old PHI node, create a corresponding new PHI node with a type A.
22212246
SmallDenseMap<PHINode *, PHINode *> NewPNodes;
22222247
for (auto *OldPN : OldPhiNodes) {
@@ -2234,9 +2259,14 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
22342259
if (auto *C = dyn_cast<Constant>(V)) {
22352260
NewV = ConstantExpr::getBitCast(C, DestTy);
22362261
} else if (auto *LI = dyn_cast<LoadInst>(V)) {
2237-
Builder.SetInsertPoint(LI->getNextNode());
2238-
NewV = Builder.CreateBitCast(LI, DestTy);
2239-
Worklist.Add(LI);
2262+
// Explicitly perform load combine to make sure no opposing transform
2263+
// can remove the bitcast in the meantime and trigger an infinite loop.
2264+
Builder.SetInsertPoint(LI);
2265+
NewV = combineLoadToNewType(*LI, DestTy);
2266+
// Remove the old load and its use in the old phi, which itself becomes
2267+
// dead once the whole transform finishes.
2268+
replaceInstUsesWith(*LI, UndefValue::get(LI->getType()));
2269+
eraseInstFromFunction(*LI);
22402270
} else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
22412271
NewV = BCI->getOperand(0);
22422272
} else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
@@ -2259,26 +2289,33 @@ Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
22592289
Instruction *RetVal = nullptr;
22602290
for (auto *OldPN : OldPhiNodes) {
22612291
PHINode *NewPN = NewPNodes[OldPN];
2262-
for (User *V : OldPN->users()) {
2292+
for (auto It = OldPN->user_begin(), End = OldPN->user_end(); It != End; ) {
2293+
User *V = *It;
2294+
// We may remove this user, advance to avoid iterator invalidation.
2295+
++It;
22632296
if (auto *SI = dyn_cast<StoreInst>(V)) {
2264-
if (SI->isSimple() && SI->getOperand(0) == OldPN) {
2265-
Builder.SetInsertPoint(SI);
2266-
auto *NewBC =
2267-
cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2268-
SI->setOperand(0, NewBC);
2269-
Worklist.Add(SI);
2270-
assert(hasStoreUsersOnly(*NewBC));
2271-
}
2297+
assert(SI->isSimple() && SI->getOperand(0) == OldPN);
2298+
Builder.SetInsertPoint(SI);
2299+
auto *NewBC =
2300+
cast<BitCastInst>(Builder.CreateBitCast(NewPN, SrcTy));
2301+
SI->setOperand(0, NewBC);
2302+
Worklist.Add(SI);
2303+
assert(hasStoreUsersOnly(*NewBC));
22722304
}
22732305
else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
2274-
// Verify it's a B->A cast.
22752306
Type *TyB = BCI->getOperand(0)->getType();
22762307
Type *TyA = BCI->getType();
2277-
if (TyA == DestTy && TyB == SrcTy) {
2278-
Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2279-
if (BCI == &CI)
2280-
RetVal = I;
2281-
}
2308+
assert(TyA == DestTy && TyB == SrcTy);
2309+
(void) TyA;
2310+
(void) TyB;
2311+
Instruction *I = replaceInstUsesWith(*BCI, NewPN);
2312+
if (BCI == &CI)
2313+
RetVal = I;
2314+
} else if (auto *PHI = dyn_cast<PHINode>(V)) {
2315+
assert(OldPhiNodes.count(PHI) > 0);
2316+
(void) PHI;
2317+
} else {
2318+
llvm_unreachable("all uses should be handled");
22822319
}
22832320
}
22842321
}

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

+3
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,9 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
405405
/// \return true if successful.
406406
bool replacePointer(Instruction &I, Value *V);
407407

408+
LoadInst *combineLoadToNewType(LoadInst &LI, Type *NewTy,
409+
const Twine &Suffix = "");
410+
408411
private:
409412
bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
410413
bool shouldChangeType(Type *From, Type *To) const;

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

+13-14
Original file line numberDiff line numberDiff line change
@@ -448,8 +448,8 @@ static bool isSupportedAtomicType(Type *Ty) {
448448
///
449449
/// Note that this will create all of the instructions with whatever insert
450450
/// point the \c InstCombiner currently is using.
451-
static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
452-
const Twine &Suffix = "") {
451+
LoadInst *InstCombiner::combineLoadToNewType(LoadInst &LI, Type *NewTy,
452+
const Twine &Suffix) {
453453
assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
454454
"can't fold an atomic load to requested type");
455455

@@ -462,9 +462,9 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
462462
if (!(match(Ptr, m_BitCast(m_Value(NewPtr))) &&
463463
NewPtr->getType()->getPointerElementType() == NewTy &&
464464
NewPtr->getType()->getPointerAddressSpace() == AS))
465-
NewPtr = IC.Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
465+
NewPtr = Builder.CreateBitCast(Ptr, NewTy->getPointerTo(AS));
466466

467-
LoadInst *NewLoad = IC.Builder.CreateAlignedLoad(
467+
LoadInst *NewLoad = Builder.CreateAlignedLoad(
468468
NewTy, NewPtr, LI.getAlignment(), LI.isVolatile(), LI.getName() + Suffix);
469469
NewLoad->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
470470
MDBuilder MDB(NewLoad->getContext());
@@ -505,7 +505,7 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
505505
NewLoad->setMetadata(ID, N);
506506
break;
507507
case LLVMContext::MD_range:
508-
copyRangeMetadata(IC.getDataLayout(), LI, N, *NewLoad);
508+
copyRangeMetadata(getDataLayout(), LI, N, *NewLoad);
509509
break;
510510
}
511511
}
@@ -639,9 +639,8 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
639639
return SI && SI->getPointerOperand() != &LI &&
640640
!SI->getPointerOperand()->isSwiftError();
641641
})) {
642-
LoadInst *NewLoad = combineLoadToNewType(
643-
IC, LI,
644-
Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
642+
LoadInst *NewLoad = IC.combineLoadToNewType(
643+
LI, Type::getIntNTy(LI.getContext(), DL.getTypeStoreSizeInBits(Ty)));
645644
// Replace all the stores with stores of the newly loaded value.
646645
for (auto UI = LI.user_begin(), UE = LI.user_end(); UI != UE;) {
647646
auto *SI = cast<StoreInst>(*UI++);
@@ -663,7 +662,7 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
663662
if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
664663
if (CI->isNoopCast(DL))
665664
if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
666-
LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
665+
LoadInst *NewLoad = IC.combineLoadToNewType(LI, CI->getDestTy());
667666
CI->replaceAllUsesWith(NewLoad);
668667
IC.eraseInstFromFunction(*CI);
669668
return &LI;
@@ -691,8 +690,8 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
691690
// If the struct only have one element, we unpack.
692691
auto NumElements = ST->getNumElements();
693692
if (NumElements == 1) {
694-
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ST->getTypeAtIndex(0U),
695-
".unpack");
693+
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ST->getTypeAtIndex(0U),
694+
".unpack");
696695
AAMDNodes AAMD;
697696
LI.getAAMetadata(AAMD);
698697
NewLoad->setAAMetadata(AAMD);
@@ -741,7 +740,7 @@ static Instruction *unpackLoadToAggregate(InstCombiner &IC, LoadInst &LI) {
741740
auto *ET = AT->getElementType();
742741
auto NumElements = AT->getNumElements();
743742
if (NumElements == 1) {
744-
LoadInst *NewLoad = combineLoadToNewType(IC, LI, ET, ".unpack");
743+
LoadInst *NewLoad = IC.combineLoadToNewType(LI, ET, ".unpack");
745744
AAMDNodes AAMD;
746745
LI.getAAMetadata(AAMD);
747746
NewLoad->setAAMetadata(AAMD);
@@ -1377,8 +1376,8 @@ static bool removeBitcastsFromLoadStoreOnMinMax(InstCombiner &IC,
13771376
return false;
13781377

13791378
IC.Builder.SetInsertPoint(LI);
1380-
LoadInst *NewLI = combineLoadToNewType(
1381-
IC, *LI, LoadAddr->getType()->getPointerElementType());
1379+
LoadInst *NewLI = IC.combineLoadToNewType(
1380+
*LI, LoadAddr->getType()->getPointerElementType());
13821381
// Replace all the stores with stores of the newly loaded value.
13831382
for (auto *UI : LI->users()) {
13841383
auto *USI = cast<StoreInst>(UI);

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

+17-1
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,9 @@ STATISTIC(NumReassoc , "Number of reassociations");
121121
DEBUG_COUNTER(VisitCounter, "instcombine-visit",
122122
"Controls which instructions are visited");
123123

124+
static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
125+
static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
126+
124127
static cl::opt<bool>
125128
EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),
126129
cl::init(true));
@@ -129,6 +132,12 @@ static cl::opt<bool>
129132
EnableExpensiveCombines("expensive-combines",
130133
cl::desc("Enable expensive instruction combines"));
131134

135+
static cl::opt<unsigned> InfiniteLoopDetectionThreshold(
136+
"instcombine-infinite-loop-threshold",
137+
cl::desc("Number of instruction combining iterations considered an "
138+
"infinite loop"),
139+
cl::init(InstCombineDefaultInfiniteLoopThreshold), cl::Hidden);
140+
132141
static cl::opt<unsigned>
133142
MaxArraySize("instcombine-maxarray-size", cl::init(1024),
134143
cl::desc("Maximum array size considered when doing a combine"));
@@ -3508,9 +3517,16 @@ static bool combineInstructionsOverFunction(
35083517
MadeIRChange = LowerDbgDeclare(F);
35093518

35103519
// Iterate while there is work to do.
3511-
int Iteration = 0;
3520+
unsigned Iteration = 0;
35123521
while (true) {
35133522
++Iteration;
3523+
3524+
if (Iteration > InfiniteLoopDetectionThreshold) {
3525+
report_fatal_error(
3526+
"Instruction Combining seems stuck in an infinite loop after " +
3527+
Twine(InfiniteLoopDetectionThreshold) + " iterations.");
3528+
}
3529+
35143530
LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on "
35153531
<< F.getName() << "\n");
35163532

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt -S -instcombine < %s | FileCheck %s
3+
4+
@Q = internal unnamed_addr global double 1.000000e+00, align 8
5+
6+
define double @test(i1 %c, i64* %p) {
7+
; CHECK-LABEL: @test(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[END:%.*]]
10+
; CHECK: if:
11+
; CHECK-NEXT: [[LOAD1:%.*]] = load double, double* @Q, align 8
12+
; CHECK-NEXT: br label [[END]]
13+
; CHECK: end:
14+
; CHECK-NEXT: [[TMP0:%.*]] = phi double [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[LOAD1]], [[IF]] ]
15+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64* [[P:%.*]] to double*
16+
; CHECK-NEXT: store double [[TMP0]], double* [[TMP1]], align 8
17+
; CHECK-NEXT: ret double [[TMP0]]
18+
;
19+
entry:
20+
br i1 %c, label %if, label %end
21+
22+
if:
23+
%load = load i64, i64* bitcast (double* @Q to i64*), align 8
24+
br label %end
25+
26+
end:
27+
%phi = phi i64 [ 0, %entry ], [ %load, %if ]
28+
store i64 %phi, i64* %p, align 8
29+
%cast = bitcast i64 %phi to double
30+
ret double %cast
31+
32+
uselistorder i64 %phi, { 1, 0 }
33+
}

0 commit comments

Comments
 (0)