Skip to content

Commit ffd337b

Browse files
authored
[coro][pgo] Do not insert counters in the suspend block (llvm#71262)
If we did, we couldn't lower symmetric transfer resumes to tail calls. We can instrument the other 2 edges instead, as long as they also don't point to the same basic block.
1 parent 8ea8dd9 commit ffd337b

25 files changed

+1013
-10
lines changed

llvm/include/llvm/Transforms/Instrumentation/CFGMST.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#include "llvm/Analysis/BlockFrequencyInfo.h"
2020
#include "llvm/Analysis/BranchProbabilityInfo.h"
2121
#include "llvm/Analysis/CFG.h"
22+
#include "llvm/IR/Instructions.h"
23+
#include "llvm/IR/IntrinsicInst.h"
2224
#include "llvm/Support/BranchProbability.h"
2325
#include "llvm/Support/Debug.h"
2426
#include "llvm/Support/raw_ostream.h"
@@ -82,6 +84,38 @@ template <class Edge, class BBInfo> class CFGMST {
8284
return true;
8385
}
8486

87+
void handleCoroSuspendEdge(Edge *E) {
88+
// We must not add instrumentation to the BB representing the
89+
// "suspend" path, else CoroSplit won't be able to lower
90+
// llvm.coro.suspend to a tail call. We do want profiling info for
91+
// the other branches (resume/destroy). So we do 2 things:
92+
// 1. we prefer instrumenting those other edges by setting the weight
93+
// of the "suspend" edge to max, and
94+
// 2. we mark the edge as "Removed" to guarantee it is not considered
95+
// for instrumentation. That could technically happen:
96+
// (from test/Transforms/Coroutines/coro-split-musttail.ll)
97+
//
98+
// %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
99+
// switch i8 %suspend, label %exit [
100+
// i8 0, label %await.ready
101+
// i8 1, label %exit
102+
// ]
103+
const BasicBlock *EdgeTarget = E->DestBB;
104+
if (!EdgeTarget)
105+
return;
106+
assert(E->SrcBB);
107+
const Function *F = EdgeTarget->getParent();
108+
if (!F->isPresplitCoroutine())
109+
return;
110+
111+
const Instruction *TI = E->SrcBB->getTerminator();
112+
if (auto *SWInst = dyn_cast<SwitchInst>(TI))
113+
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
114+
if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend &&
115+
SWInst->getDefaultDest() == EdgeTarget)
116+
E->Removed = true;
117+
}
118+
85119
// Traverse the CFG using a stack. Find all the edges and assign the weight.
86120
// Edges with large weight will be put into MST first so they are less likely
87121
// to be instrumented.
@@ -133,6 +167,7 @@ template <class Edge, class BBInfo> class CFGMST {
133167
Weight++;
134168
auto *E = &addEdge(&BB, TargetBB, Weight);
135169
E->IsCritical = Critical;
170+
handleCoroSuspendEdge(E);
136171
LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to "
137172
<< TargetBB->getName() << " w=" << Weight << "\n");
138173

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
; Tests that instrumentation doesn't interfere with lowering (coro-split).
2+
; It should convert coro.resume followed by a suspend to a musttail call.
3+
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
5+
6+
define void @f() #0 {
7+
entry:
8+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
9+
%alloc = call ptr @malloc(i64 16) #3
10+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
11+
12+
%save = call token @llvm.coro.save(ptr null)
13+
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
14+
call fastcc void %addr1(ptr null)
15+
16+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
17+
switch i8 %suspend, label %exit [
18+
i8 0, label %await.ready
19+
i8 1, label %exit
20+
]
21+
await.ready:
22+
%save2 = call token @llvm.coro.save(ptr null)
23+
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
24+
call fastcc void %addr2(ptr null)
25+
26+
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
27+
switch i8 %suspend2, label %exit [
28+
i8 0, label %exit
29+
i8 1, label %exit
30+
]
31+
exit:
32+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
33+
ret void
34+
}
35+
36+
; Verify that in the initial function resume is not marked with musttail.
37+
; CHECK-LABEL: @f(
38+
; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
39+
; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
40+
41+
; Verify that in the resume part resume call is marked with musttail.
42+
; CHECK-LABEL: @f.resume(
43+
; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
44+
; CHECK: call void @llvm.instrprof
45+
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
46+
; CHECK-NEXT: ret void
47+
48+
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
49+
declare i1 @llvm.coro.alloc(token) #2
50+
declare i64 @llvm.coro.size.i64() #3
51+
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
52+
declare token @llvm.coro.save(ptr) #2
53+
declare ptr @llvm.coro.frame() #3
54+
declare i8 @llvm.coro.suspend(token, i1) #2
55+
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
56+
declare i1 @llvm.coro.end(ptr, i1, token) #2
57+
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
58+
declare ptr @malloc(i64)
59+
60+
attributes #0 = { presplitcoroutine }
61+
attributes #1 = { argmemonly nounwind readonly }
62+
attributes #2 = { nounwind }
63+
attributes #3 = { nounwind readnone }
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
; Tests that instrumentation doesn't interfere with lowering (coro-split).
2+
; It should convert coro.resume followed by a suspend to a musttail call.
3+
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
5+
6+
define void @f() #0 {
7+
entry:
8+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
9+
%alloc = call ptr @malloc(i64 16) #3
10+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
11+
12+
%save = call token @llvm.coro.save(ptr null)
13+
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
14+
call fastcc void %addr1(ptr null)
15+
16+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
17+
switch i8 %suspend, label %exit [
18+
i8 0, label %await.suspend
19+
i8 1, label %exit
20+
]
21+
await.suspend:
22+
%save2 = call token @llvm.coro.save(ptr null)
23+
%br0 = call i8 @switch_result()
24+
switch i8 %br0, label %unreach [
25+
i8 0, label %await.resume3
26+
i8 1, label %await.resume1
27+
i8 2, label %await.resume2
28+
]
29+
await.resume1:
30+
%hdl = call ptr @g()
31+
%addr2 = call ptr @llvm.coro.subfn.addr(ptr %hdl, i8 0)
32+
call fastcc void %addr2(ptr %hdl)
33+
br label %final.suspend
34+
await.resume2:
35+
%hdl2 = call ptr @h()
36+
%addr3 = call ptr @llvm.coro.subfn.addr(ptr %hdl2, i8 0)
37+
call fastcc void %addr3(ptr %hdl2)
38+
br label %final.suspend
39+
await.resume3:
40+
%addr4 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
41+
call fastcc void %addr4(ptr null)
42+
br label %final.suspend
43+
final.suspend:
44+
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
45+
switch i8 %suspend2, label %exit [
46+
i8 0, label %pre.exit
47+
i8 1, label %exit
48+
]
49+
pre.exit:
50+
br label %exit
51+
exit:
52+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
53+
ret void
54+
unreach:
55+
unreachable
56+
}
57+
58+
; Verify that in the initial function resume is not marked with musttail.
59+
; CHECK-LABEL: @f(
60+
; CHECK: %[[addr1:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
61+
; CHECK-NOT: musttail call fastcc void %[[addr1]](ptr null)
62+
63+
; Verify that in the resume part resume call is marked with musttail.
64+
; CHECK-LABEL: @f.resume(
65+
; CHECK: %[[hdl:.+]] = call ptr @g()
66+
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
67+
; CHECK: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
68+
; CHECK-NEXT: ret void
69+
; CHECK: %[[hdl2:.+]] = call ptr @h()
70+
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
71+
; CHECK: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
72+
; CHECK-NEXT: ret void
73+
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
74+
; CHECK: musttail call fastcc void %[[addr4]](ptr null)
75+
; CHECK-NEXT: ret void
76+
77+
78+
79+
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
80+
declare i1 @llvm.coro.alloc(token) #2
81+
declare i64 @llvm.coro.size.i64() #3
82+
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
83+
declare token @llvm.coro.save(ptr) #2
84+
declare ptr @llvm.coro.frame() #3
85+
declare i8 @llvm.coro.suspend(token, i1) #2
86+
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
87+
declare i1 @llvm.coro.end(ptr, i1, token) #2
88+
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
89+
declare ptr @malloc(i64)
90+
declare i8 @switch_result()
91+
declare ptr @g()
92+
declare ptr @h()
93+
94+
attributes #0 = { presplitcoroutine }
95+
attributes #1 = { argmemonly nounwind readonly }
96+
attributes #2 = { nounwind }
97+
attributes #3 = { nounwind readnone }
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; Tests that instrumentation doesn't interfere with lowering (coro-split).
2+
; It should convert coro.resume followed by a suspend to a musttail call.
3+
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
5+
6+
target triple = "wasm64-unknown-unknown"
7+
8+
define void @f() #0 {
9+
entry:
10+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
11+
%alloc = call ptr @malloc(i64 16) #3
12+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
13+
14+
%save = call token @llvm.coro.save(ptr null)
15+
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
16+
call fastcc void %addr1(ptr null)
17+
18+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
19+
switch i8 %suspend, label %exit [
20+
i8 0, label %await.ready
21+
i8 1, label %exit
22+
]
23+
await.ready:
24+
%save2 = call token @llvm.coro.save(ptr null)
25+
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
26+
call fastcc void %addr2(ptr null)
27+
28+
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
29+
switch i8 %suspend2, label %exit [
30+
i8 0, label %exit
31+
i8 1, label %exit
32+
]
33+
exit:
34+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
35+
ret void
36+
}
37+
38+
; CHECK: musttail call
39+
40+
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
41+
declare i1 @llvm.coro.alloc(token) #2
42+
declare i64 @llvm.coro.size.i64() #3
43+
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
44+
declare token @llvm.coro.save(ptr) #2
45+
declare ptr @llvm.coro.frame() #3
46+
declare i8 @llvm.coro.suspend(token, i1) #2
47+
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
48+
declare i1 @llvm.coro.end(ptr, i1, token) #2
49+
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
50+
declare ptr @malloc(i64)
51+
52+
attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
53+
attributes #1 = { argmemonly nounwind readonly }
54+
attributes #2 = { nounwind }
55+
attributes #3 = { nounwind readnone }
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; Tests that instrumentation doesn't interfere with lowering (coro-split).
2+
; It should convert coro.resume followed by a suspend to a musttail call.
3+
4+
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
5+
6+
target triple = "wasm32-unknown-unknown"
7+
8+
define void @f() #0 {
9+
entry:
10+
%id = call token @llvm.coro.id(i32 0, ptr null, ptr null, ptr null)
11+
%alloc = call ptr @malloc(i64 16) #3
12+
%vFrame = call noalias nonnull ptr @llvm.coro.begin(token %id, ptr %alloc)
13+
14+
%save = call token @llvm.coro.save(ptr null)
15+
%addr1 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
16+
call fastcc void %addr1(ptr null)
17+
18+
%suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
19+
switch i8 %suspend, label %exit [
20+
i8 0, label %await.ready
21+
i8 1, label %exit
22+
]
23+
await.ready:
24+
%save2 = call token @llvm.coro.save(ptr null)
25+
%addr2 = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
26+
call fastcc void %addr2(ptr null)
27+
28+
%suspend2 = call i8 @llvm.coro.suspend(token %save2, i1 false)
29+
switch i8 %suspend2, label %exit [
30+
i8 0, label %exit
31+
i8 1, label %exit
32+
]
33+
exit:
34+
call i1 @llvm.coro.end(ptr null, i1 false, token none)
35+
ret void
36+
}
37+
38+
; CHECK: musttail call
39+
40+
declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
41+
declare i1 @llvm.coro.alloc(token) #2
42+
declare i64 @llvm.coro.size.i64() #3
43+
declare ptr @llvm.coro.begin(token, ptr writeonly) #2
44+
declare token @llvm.coro.save(ptr) #2
45+
declare ptr @llvm.coro.frame() #3
46+
declare i8 @llvm.coro.suspend(token, i1) #2
47+
declare ptr @llvm.coro.free(token, ptr nocapture readonly) #1
48+
declare i1 @llvm.coro.end(ptr, i1, token) #2
49+
declare ptr @llvm.coro.subfn.addr(ptr nocapture readonly, i8) #1
50+
declare ptr @malloc(i64)
51+
52+
attributes #0 = { presplitcoroutine "target-features"="+tail-call" }
53+
attributes #1 = { argmemonly nounwind readonly }
54+
attributes #2 = { nounwind }
55+
attributes #3 = { nounwind readnone }

0 commit comments

Comments
 (0)