Skip to content

Commit 4247806

Browse files
committed
[NFC] [Coroutines] Add a fastpath when computing the cross suspend point information
Mitigate #62348 The root cause for the above issue is that we used a textbook dataflow analysis for the cross suspend point information. The analysis is powerful but not scaling. It is not easy to improve the current algorithm and the patch tries to prune some branches to mitigate the problems. Before the patch: ``` n: 20000 real 0m11.081s user 0m10.597s sys 0m0.320s n: 40000 real 0m32.927s user 0m31.403s sys 0m1.043s n: 60000 real 1m2.145s user 0m58.903s sys 0m2.268s n: 80000 real 1m47.143s user 1m41.630s sys 0m3.857s n: 100000 real 2m34.758s user 2m26.587s sys 0m5.922s ``` After the patch: ``` n: 20000 real 0m10.418s user 0m9.945s sys 0m0.311s n: 40000 real 0m27.884s user 0m26.430s sys 0m1.036s n: 60000 real 0m52.420s user 0m49.321s sys 0m2.267s n: 80000 real 1m25.389s user 1m20.247s sys 0m3.856s n: 100000 real 2m4.275s user 1m56.405s sys 0m5.975s ``` This patch intended to be a NFC patch.
1 parent de24d08 commit 4247806

File tree

1 file changed

+81
-66
lines changed

1 file changed

+81
-66
lines changed

llvm/lib/Transforms/Coroutines/CoroFrame.cpp

Lines changed: 81 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class BlockToIndexMapping {
8989
// crosses a suspend point.
9090
//
9191
namespace {
92-
struct SuspendCrossingInfo {
92+
class SuspendCrossingInfo {
9393
BlockToIndexMapping Mapping;
9494

9595
struct BlockData {
@@ -98,18 +98,26 @@ struct SuspendCrossingInfo {
9898
bool Suspend = false;
9999
bool End = false;
100100
bool KillLoop = false;
101+
bool Changed = false;
101102
};
102103
SmallVector<BlockData, SmallVectorThreshold> Block;
103104

104-
iterator_range<succ_iterator> successors(BlockData const &BD) const {
105+
iterator_range<pred_iterator> predecessors(BlockData const &BD) const {
105106
BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
106-
return llvm::successors(BB);
107+
return llvm::predecessors(BB);
107108
}
108109

109110
BlockData &getBlockData(BasicBlock *BB) {
110111
return Block[Mapping.blockToIndex(BB)];
111112
}
112113

114+
/// Compute the BlockData for the current function in one iteration.
115+
/// Returns whether the BlockData changes in this iteration.
116+
/// Initialize - Whether this is the first iteration, we can optimize
117+
/// the initial case a little bit by manual loop switch.
118+
template <bool Initialize = false> bool computeBlockData();
119+
120+
public:
113121
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
114122
void dump() const;
115123
void dump(StringRef Label, BitVector const &BV) const;
@@ -215,6 +223,72 @@ LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
215223
}
216224
#endif
217225

226+
template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
227+
const size_t N = Mapping.size();
228+
bool Changed = false;
229+
230+
for (size_t I = 0; I < N; ++I) {
231+
auto &B = Block[I];
232+
233+
// We don't need to count the predecessors when initialization.
234+
if constexpr (!Initialize)
235+
// If all the predecessors of the current Block don't change,
236+
// the BlockData for the current block must not change too.
237+
if (all_of(predecessors(B), [this](BasicBlock *BB) {
238+
return !Block[Mapping.blockToIndex(BB)].Changed;
239+
})) {
240+
B.Changed = false;
241+
continue;
242+
}
243+
244+
// Saved Consumes and Kills bitsets so that it is easy to see
245+
// if anything changed after propagation.
246+
auto SavedConsumes = B.Consumes;
247+
auto SavedKills = B.Kills;
248+
249+
for (BasicBlock *PI : predecessors(B)) {
250+
auto PrevNo = Mapping.blockToIndex(PI);
251+
auto &P = Block[PrevNo];
252+
253+
// Propagate Kills and Consumes from predecessors into B.
254+
B.Consumes |= P.Consumes;
255+
B.Kills |= P.Kills;
256+
257+
// If block P is a suspend block, it should propagate kills into block
258+
// B for every block P consumes.
259+
if (P.Suspend)
260+
B.Kills |= P.Consumes;
261+
}
262+
263+
if (B.Suspend) {
264+
// If block S is a suspend block, it should kill all of the blocks it
265+
// consumes.
266+
B.Kills |= B.Consumes;
267+
} else if (B.End) {
268+
// If block B is an end block, it should not propagate kills as the
269+
// blocks following coro.end() are reached during initial invocation
270+
// of the coroutine while all the data are still available on the
271+
// stack or in the registers.
272+
B.Kills.reset();
273+
} else {
274+
// This is reached when B block it not Suspend nor coro.end and it
275+
// need to make sure that it is not in the kill set.
276+
B.KillLoop |= B.Kills[I];
277+
B.Kills.reset(I);
278+
}
279+
280+
if constexpr (!Initialize) {
281+
B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
282+
Changed |= B.Changed;
283+
}
284+
}
285+
286+
if constexpr (Initialize)
287+
return true;
288+
289+
return Changed;
290+
}
291+
218292
SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
219293
: Mapping(F) {
220294
const size_t N = Mapping.size();
@@ -226,6 +300,7 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
226300
B.Consumes.resize(N);
227301
B.Kills.resize(N);
228302
B.Consumes.set(I);
303+
B.Changed = true;
229304
}
230305

231306
// Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
@@ -250,71 +325,11 @@ SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
250325
markSuspendBlock(Save);
251326
}
252327

253-
// Iterate propagating consumes and kills until they stop changing.
254-
int Iteration = 0;
255-
(void)Iteration;
328+
computeBlockData</*Initialize=*/true>();
256329

257-
bool Changed;
258-
do {
259-
LLVM_DEBUG(dbgs() << "iteration " << ++Iteration);
260-
LLVM_DEBUG(dbgs() << "==============\n");
261-
262-
Changed = false;
263-
for (size_t I = 0; I < N; ++I) {
264-
auto &B = Block[I];
265-
for (BasicBlock *SI : successors(B)) {
266-
267-
auto SuccNo = Mapping.blockToIndex(SI);
268-
269-
// Saved Consumes and Kills bitsets so that it is easy to see
270-
// if anything changed after propagation.
271-
auto &S = Block[SuccNo];
272-
auto SavedConsumes = S.Consumes;
273-
auto SavedKills = S.Kills;
274-
275-
// Propagate Kills and Consumes from block B into its successor S.
276-
S.Consumes |= B.Consumes;
277-
S.Kills |= B.Kills;
278-
279-
// If block B is a suspend block, it should propagate kills into the
280-
// its successor for every block B consumes.
281-
if (B.Suspend) {
282-
S.Kills |= B.Consumes;
283-
}
284-
if (S.Suspend) {
285-
// If block S is a suspend block, it should kill all of the blocks it
286-
// consumes.
287-
S.Kills |= S.Consumes;
288-
} else if (S.End) {
289-
// If block S is an end block, it should not propagate kills as the
290-
// blocks following coro.end() are reached during initial invocation
291-
// of the coroutine while all the data are still available on the
292-
// stack or in the registers.
293-
S.Kills.reset();
294-
} else {
295-
// This is reached when S block it not Suspend nor coro.end and it
296-
// need to make sure that it is not in the kill set.
297-
S.KillLoop |= S.Kills[SuccNo];
298-
S.Kills.reset(SuccNo);
299-
}
300-
301-
// See if anything changed.
302-
Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
330+
while (computeBlockData())
331+
;
303332

304-
if (S.Kills != SavedKills) {
305-
LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
306-
<< "\n");
307-
LLVM_DEBUG(dump("S.Kills", S.Kills));
308-
LLVM_DEBUG(dump("SavedKills", SavedKills));
309-
}
310-
if (S.Consumes != SavedConsumes) {
311-
LLVM_DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
312-
LLVM_DEBUG(dump("S.Consume", S.Consumes));
313-
LLVM_DEBUG(dump("SavedCons", SavedConsumes));
314-
}
315-
}
316-
}
317-
} while (Changed);
318333
LLVM_DEBUG(dump());
319334
}
320335

0 commit comments

Comments
 (0)