Skip to content

[coro][pgo] Do not insert counters in the suspend block #71262

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 53 additions & 12 deletions llvm/include/llvm/Transforms/Instrumentation/CFGMST.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
Expand Down Expand Up @@ -121,31 +123,70 @@ template <class Edge, class BBInfo> class CFGMST {

static const uint32_t CriticalEdgeMultiplier = 1000;

auto GetCoroSuspendSwitch =
[&](const Instruction *TI) -> const SwitchInst * {
if (!F.isPresplitCoroutine())
return nullptr;
if (auto *SWInst = dyn_cast<SwitchInst>(TI))
if (auto *Intrinsic = dyn_cast<IntrinsicInst>(SWInst->getCondition()))
if (Intrinsic->getIntrinsicID() == Intrinsic::coro_suspend)
return SWInst;
return nullptr;
};

for (BasicBlock &BB : F) {
Instruction *TI = BB.getTerminator();
const SwitchInst *CoroSuspendSwitch = GetCoroSuspendSwitch(TI);
uint64_t BBWeight =
(BFI != nullptr ? BFI->getBlockFreq(&BB).getFrequency() : 2);
uint64_t Weight = 2;
if (int successors = TI->getNumSuccessors()) {
for (int i = 0; i != successors; ++i) {
BasicBlock *TargetBB = TI->getSuccessor(i);
bool Critical = isCriticalEdge(TI, i);
uint64_t scaleFactor = BBWeight;
if (Critical) {
if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
scaleFactor *= CriticalEdgeMultiplier;
else
scaleFactor = UINT64_MAX;
const bool Critical = isCriticalEdge(TI, i);
const bool IsCoroSuspendTarget =
CoroSuspendSwitch &&
CoroSuspendSwitch->getDefaultDest() == TargetBB;
// We must not add instrumentation to the BB representing the
// "suspend" path, else CoroSplit won't be able to lower
// llvm.coro.suspend to a tail call. We do want profiling info for
// the other branches (resume/destroy). So we do 2 things:
// 1. we prefer instrumenting those other edges by setting the weight
// of the "suspend" edge to max, and
// 2. we mark the edge as "Removed" to guarantee it is not considered
// for instrumentation. That could technically happen:
// (from test/Transforms/Coroutines/coro-split-musttail.ll)
//
// %suspend = call i8 @llvm.coro.suspend(token %save, i1 false)
// switch i8 %suspend, label %exit [
// i8 0, label %await.ready
// i8 1, label %exit
// ]
if (IsCoroSuspendTarget) {
Weight = UINT64_MAX;
} else {
bool Critical = isCriticalEdge(TI, i);
uint64_t scaleFactor = BBWeight;
if (Critical) {
if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier)
scaleFactor *= CriticalEdgeMultiplier;
else
scaleFactor = UINT64_MAX;
}
if (BPI != nullptr)
Weight =
BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
if (Weight == 0)
Weight++;
}
if (BPI != nullptr)
Weight = BPI->getEdgeProbability(&BB, TargetBB).scale(scaleFactor);
if (Weight == 0)
Weight++;
auto *E = &addEdge(&BB, TargetBB, Weight);
E->IsCritical = Critical;
// See comment above - we must guarantee the coro suspend BB isn't
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suggest move all co-routine handling code in one helper function and call it after Edge creation:

void HandleCoroutine(Edge *E, ....) {
if (!F.notPresplitCoroutine)
return;
if (!NotSwitch(SI))
return;
...
E->Weight = UINT64_MAX;
E->Removed = true;
}

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

// instrumented.
if (IsCoroSuspendTarget)
E->Removed = true;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is setting MAX weighted needed if the removed flag is set?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC yes (as per @xur-llvm) because it helps pick the other edges for instrumentation. If there's a more explicit way to do that, I'd prefer that instead.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I take that back. Fixing.

LLVM_DEBUG(dbgs() << " Edge: from " << BB.getName() << " to "
<< TargetBB->getName() << " w=" << Weight << "\n");

// Keep track of entry/exit edges:
if (&BB == Entry) {
if (Weight > MaxEntryOutWeight) {
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/Transforms/Coroutines/coro-split-musttail.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s

define void @f() #0 {
entry:
Expand Down Expand Up @@ -40,7 +41,9 @@ exit:
; Verify that in the resume part resume call is marked with musttail.
; CHECK-LABEL: @f.resume(
; CHECK: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr null)
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
; PGO: call void @llvm.instrprof
; PGO-NEXT: musttail call fastcc void %[[addr2]](ptr null)
; CHECK-NEXT: ret void

declare token @llvm.coro.id(i32, ptr readnone, ptr nocapture readonly, ptr) #1
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/Transforms/Coroutines/coro-split-musttail1.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s

define void @f() #0 {
entry:
Expand Down Expand Up @@ -63,14 +64,17 @@ unreach:
; CHECK-LABEL: @f.resume(
; CHECK: %[[hdl:.+]] = call ptr @g()
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; CHECK-NEXT: ret void
; CHECK: %[[hdl2:.+]] = call ptr @h()
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; CHECK-NEXT: ret void
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
; PGO: musttail call fastcc void %[[addr4]](ptr null)
; CHECK-NEXT: ret void


Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail10.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that we would convert coro.resume to a musttail call if the target is
; Wasm64 with tail-call support.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

target triple = "wasm64-unknown-unknown"

Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail11.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that we would convert coro.resume to a musttail call if the target is
; Wasm32 with tail-call support.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

target triple = "wasm32-unknown-unknown"

Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail12.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; Tests that coro-split won't convert the cmp instruction prematurely.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare void @fakeresume1(ptr)
declare void @print()
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail13.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
; Tests that coro-split won't fall in infinite loop when simplify the terminators leading to ret.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare void @fakeresume1(ptr)
declare void @may_throw(ptr)
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail2.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define void @fakeresume1(ptr) {
entry:
Expand Down
12 changes: 8 additions & 4 deletions llvm/test/Transforms/Coroutines/coro-split-musttail3.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that coro-split will convert coro.resume followed by a suspend to a
; musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,NOPGO %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck --check-prefixes=CHECK,PGO %s

define void @f() #0 {
entry:
Expand Down Expand Up @@ -59,14 +60,17 @@ unreach:
; CHECK-LABEL: @f.resume(
; CHECK: %[[hdl:.+]] = call ptr @g()
; CHECK-NEXT: %[[addr2:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl]], i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; NOPGO-NEXT: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; PGO: musttail call fastcc void %[[addr2]](ptr %[[hdl]])
; CHECK-NEXT: ret void
; CHECK: %[[hdl2:.+]] = call ptr @h()
; CHECK-NEXT: %[[addr3:.+]] = call ptr @llvm.coro.subfn.addr(ptr %[[hdl2]], i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; NOPGO-NEXT: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; PGO: musttail call fastcc void %[[addr3]](ptr %[[hdl2]])
; CHECK-NEXT: ret void
; CHECK: %[[addr4:.+]] = call ptr @llvm.coro.subfn.addr(ptr null, i8 0)
; CHECK-NEXT: musttail call fastcc void %[[addr4]](ptr null)
; NOPGO-NEXT: musttail call fastcc void %[[addr4]](ptr null)
; PGO: musttail call fastcc void %[[addr4]](ptr null)
; CHECK-NEXT: ret void


Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail4.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that coro-split will convert a call before coro.suspend to a musttail call
; while the user of the coro.suspend is a icmpinst.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

define void @fakeresume1(ptr) {
entry:
Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail5.ll
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
; Tests that sinked lifetime markers wouldn't provent optimization
; to convert a resuming call to a musttail call.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare void @fakeresume1(ptr align 8)

Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail6.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
; an extra bitcast instruction in the path, which makes it harder to
; optimize.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare void @fakeresume1(ptr align 8)

Expand Down
1 change: 1 addition & 0 deletions llvm/test/Transforms/Coroutines/coro-split-musttail7.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
; is that this contains dead instruction generated during the transformation,
; which makes the optimization harder.
; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
; RUN: opt < %s -passes='pgo-instr-gen,cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s

declare void @fakeresume1(ptr align 8)

Expand Down