Skip to content

Commit 184936c

Browse files
committed
[coro][pgp] Do not insert counters in the suspend block
If we do, we can't lower the suspend call to a tail call. If this happened in a loop, it can lead to stack overflow (this was encountered in a benchmark, as an extreme case) We can instrument the other 2 edges instead, as long as they also don't point to the same basic block.
1 parent 5192e29 commit 184936c

13 files changed

+83
-22
lines changed

llvm/include/llvm/Transforms/Instrumentation/CFGMST.h

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include "llvm/Analysis/BlockFrequencyInfo.h"
2020
#include "llvm/Analysis/BranchProbabilityInfo.h"
2121
#include "llvm/Analysis/CFG.h"
22+
#include "llvm/IR/Instructions.h"
23+
#include "llvm/IR/IntrinsicInst.h"
2224
#include "llvm/Support/BranchProbability.h"
2325
#include "llvm/Support/Debug.h"
2426
#include "llvm/Support/raw_ostream.h"
@@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST {
121123

122124
static const uint32_t CriticalEdgeMultiplier = 1000;
123125

126+
auto GetCoroSuspendSwitch =
127+
[&](const Instruction *TI) -> const SwitchInst * {
128+
if (!F.isPresplitCoroutine())
129+
return nullptr;
130+
if (auto *SWInst = dyn_cast<SwitchInst>(TI))
131+
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
132+
if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
133+
return SWInst;
134+
return nullptr;
135+
};
136+
124137
for (BasicBlock &BB : F) {
125138
Instruction *TI = BB.getTerminator();
139+
const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
126140
uint64_t BBWeight =
127141
(BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
128142
uint64_t Weight = 2;
129143
if (int successors = TI->getNumSuccessors()) {
130144
for (int i = 0; i != successors; ++i) {
131145
BasicBlock *TargetBB = TI->getSuccessor(i);
132-
bool Critical = isCriticalEdge(TI, i);
133-
uint64_t scaleFactor = BBWeight;
134-
if (Critical) {
135-
if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
136-
scaleFactor *= CriticalEdgeMultiplier;
137-
else
138-
scaleFactor = UINT64_MAX;
146+
const bool Critical = isCriticalEdge(TI, i);
147+
const bool IsCoroSuspendTarget =
148+
CoroSuspendSwitch &&
149+
CoroSuspendSwitch->getDefaultDest() == TargetBB;
150+
// We must not add instrumentation to the BB representing the
151+
// "suspend" path, else CoroSplit won't be able to lower
152+
// llvm.coro.suspend to a tail call. We do want profiling info for
153+
// the other branches (resume/destroy). So we do 2 things:
154+
// 1. we prefer instrumenting those other edges by setting the weight
155+
// of the "suspend" edge to max, and
156+
// 2. we mark the edge as "Removed" to guarantee it is not considered
157+
// for instrumentation. That could technically happen:
158+
// (from test/Transforms/Coroutines/coro-split-musttail.ll)
159+
//
160+
// %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
161+
// switch i8 %suspend, label %exit [
162+
// i8 0, label %await.ready
163+
// i8 1, label %exit
164+
// ]
165+
if (IsCoroSuspendTarget) {
166+
Weight = UINT64_MAX;
167+
} else {
168+
bool Critical = isCriticalEdge(TI, i);
169+
uint64_t scaleFactor = BBWeight;
170+
if (Critical) {
171+
if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
172+
scaleFactor *= CriticalEdgeMultiplier;
173+
else
174+
scaleFactor = UINT64_MAX;
175+
}
176+
if (BPI != nullptr)
177+
Weight =
178+
BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
179+
if (Weight == 0)
180+
Weight++;
139181
}
140-
if (BPI != nullptr)
141-
Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
142-
if (Weight == 0)
143-
Weight++;
144182
auto *E = &addEdge(&BB, TargetBB, Weight);
145183
E->IsCritical = Critical;
184+
// See comment above - we must guarantee the coro suspend BB isn't
185+
// instrumented.
186+
if (IsCoroSuspendTarget)
187+
E->Removed = true;
146188
LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to "
147189
<< TargetBB->getName() << " w=" << Weight << "\n");
148-
149190
// Keep track of entry/exit edges:
150191
if (&BB == Entry) {
151192
if (Weight > MaxEntryOutWeight) {

llvm/test/Transforms/Coroutines/coro-split-musttail.ll

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that coro-split will convert coro.resume followed by a suspend to a
22
; musttail call.
3-
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
45

56
define void @f() #0 {
67
entry:
@@ -40,7 +41,9 @@ exit:
4041
; Verify that in the resume part resume call is marked with musttail.
4142
; CHECK-LABEL: @f.resume(
4243
; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
43-
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
44+
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
45+
; PGO: call void @llvm.instrprof
46+
; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
4447
; CHECK-NEXT: ret void
4548

4649
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1

llvm/test/Transforms/Coroutines/coro-split-musttail1.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that coro-split will convert coro.resume followed by a suspend to a
22
; musttail call.
3-
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
45

56
define void @f() #0 {
67
entry:
@@ -63,14 +64,17 @@ unreach:
6364
; CHECK-LABEL: @f.resume(
6465
; CHECK: %[[hdl:.+]] = call ptr @g()
6566
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
66-
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
67+
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
68+
; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
6769
; CHECK-NEXT: ret void
6870
; CHECK: %[[hdl2:.+]] = call ptr @h()
6971
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
70-
; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
72+
; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
73+
; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
7174
; CHECK-NEXT: ret void
7275
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
73-
; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
76+
; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
77+
; PGO: musttail call fastcc void %[[addr4]](ptr null)
7478
; CHECK-NEXT: ret void
7579

7680

llvm/test/Transforms/Coroutines/coro-split-musttail10.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that we would convert coro.resume to a musttail call if the target is
22
; Wasm64 with tail-call support.
33
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
45

56
target triple = "wasm64-unknown-unknown"
67

llvm/test/Transforms/Coroutines/coro-split-musttail11.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that we would convert coro.resume to a musttail call if the target is
22
; Wasm32 with tail-call support.
33
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
45

56
target triple = "wasm32-unknown-unknown"
67

llvm/test/Transforms/Coroutines/coro-split-musttail12.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; Tests that coro-split won't convert the cmp instruction prematurely.
22
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
34

45
declare void @fakeresume1(ptr)
56
declare void @print()

llvm/test/Transforms/Coroutines/coro-split-musttail13.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
22
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
34

45
declare void @fakeresume1(ptr)
56
declare void @may_throw(ptr)

llvm/test/Transforms/Coroutines/coro-split-musttail2.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that coro-split will convert coro.resume followed by a suspend to a
22
; musttail call.
33
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
45

56
define void @fakeresume1(ptr) {
67
entry:

llvm/test/Transforms/Coroutines/coro-split-musttail3.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that coro-split will convert coro.resume followed by a suspend to a
22
; musttail call.
3-
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
3+
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s
45

56
define void @f() #0 {
67
entry:
@@ -59,14 +60,17 @@ unreach:
5960
; CHECK-LABEL: @f.resume(
6061
; CHECK: %[[hdl:.+]] = call ptr @g()
6162
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
62-
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
63+
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
64+
; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
6365
; CHECK-NEXT: ret void
6466
; CHECK: %[[hdl2:.+]] = call ptr @h()
6567
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
66-
; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
68+
; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
69+
; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
6770
; CHECK-NEXT: ret void
6871
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
69-
; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
72+
; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
73+
; PGO: musttail call fastcc void %[[addr4]](ptr null)
7074
; CHECK-NEXT: ret void
7175

7276

llvm/test/Transforms/Coroutines/coro-split-musttail4.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that coro-split will convert a call before coro.suspend to a musttail call
22
; while the user of the coro.suspend is a icmpinst.
33
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
45

56
define void @fakeresume1(ptr) {
67
entry:

llvm/test/Transforms/Coroutines/coro-split-musttail5.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; Tests that sinked lifetime markers wouldn't provent optimization
22
; to convert a resuming call to a musttail call.
33
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
45

56
declare void @fakeresume1(ptr align 8)
67

llvm/test/Transforms/Coroutines/coro-split-musttail6.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
; an extra bitcast instruction in the path, which makes it harder to
55
; optimize.
66
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
7+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
78

89
declare void @fakeresume1(ptr align 8)
910

llvm/test/Transforms/Coroutines/coro-split-musttail7.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
; is that this contains dead instruction generated during the transformation,
55
; which makes the optimization harder.
66
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
7+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
78

89
declare void @fakeresume1(ptr align 8)
910

0 commit comments

Comments
 (0)