Skip to content

Commit 98dd2f9

Browse files
spupyrevhtyu
spupyrev
authored andcommitted
profi - a flow-based profile inference algorithm: Part II (out of 3)
This is a continuation of D109860. Traditional flow-based algorithms cannot guarantee that the resulting edge frequencies correspond to a *connected* flow in the control-flow graph. For example, for an instance in the attached figure, a flow-based (or any other) inference algorithm may produce an output in which the hot loop is disconnected from the entry block (refer to the rightmost graph in the figure). Furthermore, creating a connected minimum-cost maximum flow is a computationally NP-hard problem. Hence, we apply a post-processing adjustments to the computed flow by connecting all isolated flow components ("islands"). This feature helps to keep all blocks with sample counts connected and results in significant performance wins for some binaries. {F19077343} Reviewed By: hoy Differential Revision: https://reviews.llvm.org/D109903
1 parent 5439242 commit 98dd2f9

File tree

3 files changed

+447
-0
lines changed

3 files changed

+447
-0
lines changed

llvm/lib/Transforms/Utils/SampleProfileInference.cpp

+208
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,177 @@ class MinCostMaxFlow {
271271
uint64_t Target;
272272
};
273273

274+
/// Post-processing adjustment of the control flow.
275+
class FlowAdjuster {
276+
public:
277+
FlowAdjuster(FlowFunction &Func) : Func(Func) {
278+
assert(Func.Blocks[Func.Entry].isEntry() &&
279+
"incorrect index of the entry block");
280+
}
281+
282+
// Run the post-processing
283+
void run() {
284+
/// We adjust the control flow in a function so as to remove all
285+
/// "isolated" components with positive flow that are unreachable
286+
/// from the entry block. For every such component, we find the shortest
287+
/// path from the entry to an exit passing through the component, and
288+
/// increase the flow by one unit along the path.
289+
joinIsolatedComponents();
290+
}
291+
292+
private:
293+
void joinIsolatedComponents() {
294+
// Find blocks that are reachable from the source
295+
auto Visited = std::vector<bool>(NumBlocks(), false);
296+
findReachable(Func.Entry, Visited);
297+
298+
// Iterate over all non-reachable blocks and adjust their weights
299+
for (uint64_t I = 0; I < NumBlocks(); I++) {
300+
auto &Block = Func.Blocks[I];
301+
if (Block.Flow > 0 && !Visited[I]) {
302+
// Find a path from the entry to an exit passing through the block I
303+
auto Path = findShortestPath(I);
304+
// Increase the flow along the path
305+
assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
306+
"incorrectly computed path adjusting control flow");
307+
Func.Blocks[Func.Entry].Flow += 1;
308+
for (auto &Jump : Path) {
309+
Jump->Flow += 1;
310+
Func.Blocks[Jump->Target].Flow += 1;
311+
// Update reachability
312+
findReachable(Jump->Target, Visited);
313+
}
314+
}
315+
}
316+
}
317+
318+
/// Run bfs from a given block along the jumps with a positive flow and mark
319+
/// all reachable blocks.
320+
void findReachable(uint64_t Src, std::vector<bool> &Visited) {
321+
if (Visited[Src])
322+
return;
323+
std::queue<uint64_t> Queue;
324+
Queue.push(Src);
325+
Visited[Src] = true;
326+
while (!Queue.empty()) {
327+
Src = Queue.front();
328+
Queue.pop();
329+
for (auto Jump : Func.Blocks[Src].SuccJumps) {
330+
uint64_t Dst = Jump->Target;
331+
if (Jump->Flow > 0 && !Visited[Dst]) {
332+
Queue.push(Dst);
333+
Visited[Dst] = true;
334+
}
335+
}
336+
}
337+
}
338+
339+
/// Find the shortest path from the entry block to an exit block passing
340+
/// through a given block.
341+
std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
342+
// A path from the entry block to BlockIdx
343+
auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
344+
// A path from BlockIdx to an exit block
345+
auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
346+
347+
// Concatenate the two paths
348+
std::vector<FlowJump *> Result;
349+
Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
350+
Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
351+
return Result;
352+
}
353+
354+
/// Apply the Dijkstra algorithm to find the shortest path from a given
355+
/// Source to a given Target block.
356+
/// If Target == -1, then the path ends at an exit block.
357+
std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
358+
// Quit early, if possible
359+
if (Source == Target)
360+
return std::vector<FlowJump *>();
361+
if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
362+
return std::vector<FlowJump *>();
363+
364+
// Initialize data structures
365+
auto Distance = std::vector<int64_t>(NumBlocks(), INF);
366+
auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
367+
Distance[Source] = 0;
368+
std::set<std::pair<uint64_t, uint64_t>> Queue;
369+
Queue.insert(std::make_pair(Distance[Source], Source));
370+
371+
// Run the Dijkstra algorithm
372+
while (!Queue.empty()) {
373+
uint64_t Src = Queue.begin()->second;
374+
Queue.erase(Queue.begin());
375+
// If we found a solution, quit early
376+
if (Src == Target ||
377+
(Func.Blocks[Src].isExit() && Target == AnyExitBlock))
378+
break;
379+
380+
for (auto Jump : Func.Blocks[Src].SuccJumps) {
381+
uint64_t Dst = Jump->Target;
382+
int64_t JumpDist = jumpDistance(Jump);
383+
if (Distance[Dst] > Distance[Src] + JumpDist) {
384+
Queue.erase(std::make_pair(Distance[Dst], Dst));
385+
386+
Distance[Dst] = Distance[Src] + JumpDist;
387+
Parent[Dst] = Jump;
388+
389+
Queue.insert(std::make_pair(Distance[Dst], Dst));
390+
}
391+
}
392+
}
393+
// If Target is not provided, find the closest exit block
394+
if (Target == AnyExitBlock) {
395+
for (uint64_t I = 0; I < NumBlocks(); I++) {
396+
if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
397+
if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
398+
Target = I;
399+
}
400+
}
401+
}
402+
}
403+
assert(Parent[Target] != nullptr && "a path does not exist");
404+
405+
// Extract the constructed path
406+
std::vector<FlowJump *> Result;
407+
uint64_t Now = Target;
408+
while (Now != Source) {
409+
assert(Now == Parent[Now]->Target && "incorrect parent jump");
410+
Result.push_back(Parent[Now]);
411+
Now = Parent[Now]->Source;
412+
}
413+
// Reverse the path, since it is extracted from Target to Source
414+
std::reverse(Result.begin(), Result.end());
415+
return Result;
416+
}
417+
418+
/// A distance of a path for a given jump.
419+
/// In order to incite the path to use blocks/jumps with large positive flow,
420+
/// and avoid changing branch probability of outgoing edges drastically,
421+
/// set the distance as follows:
422+
/// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
423+
/// if Block.Weight > 0, then distance = 1
424+
/// otherwise distance >> 1
425+
int64_t jumpDistance(FlowJump *Jump) const {
426+
int64_t BaseDistance = 100;
427+
if (Jump->IsUnlikely)
428+
return MinCostMaxFlow::AuxCostUnlikely;
429+
if (Jump->Flow > 0)
430+
return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
431+
if (Func.Blocks[Jump->Target].Weight > 0)
432+
return BaseDistance;
433+
return BaseDistance * (NumBlocks() + 1);
434+
};
435+
436+
uint64_t NumBlocks() const { return Func.Blocks.size(); }
437+
438+
/// A constant indicating an arbitrary exit block of a function.
439+
static constexpr uint64_t AnyExitBlock = uint64_t(-1);
440+
441+
/// The function.
442+
FlowFunction &Func;
443+
};
444+
274445
/// Initializing flow network for a given function.
275446
///
276447
/// Every block is split into three nodes that are responsible for (i) an
@@ -440,6 +611,39 @@ void verifyWeights(const FlowFunction &Func) {
440611
}
441612
}
442613
assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
614+
615+
// Verify that there are no isolated flow components
616+
// One could modify FlowFunction to hold edges indexed by the sources, which
617+
// will avoid a creation of the object
618+
auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
619+
for (auto &Jump : Func.Jumps) {
620+
if (Jump.Flow > 0) {
621+
PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
622+
}
623+
}
624+
625+
// Run bfs from the source along edges with positive flow
626+
std::queue<uint64_t> Queue;
627+
auto Visited = std::vector<bool>(NumBlocks, false);
628+
Queue.push(Func.Entry);
629+
Visited[Func.Entry] = true;
630+
while (!Queue.empty()) {
631+
uint64_t Src = Queue.front();
632+
Queue.pop();
633+
for (uint64_t Dst : PositiveFlowEdges[Src]) {
634+
if (!Visited[Dst]) {
635+
Queue.push(Dst);
636+
Visited[Dst] = true;
637+
}
638+
}
639+
}
640+
641+
// Verify that every block that has a positive flow is reached from the source
642+
// along edges with a positive flow
643+
for (uint64_t I = 0; I < NumBlocks; I++) {
644+
auto &Block = Func.Blocks[I];
645+
assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
646+
}
443647
}
444648
#endif
445649

@@ -455,6 +659,10 @@ void llvm::applyFlowInference(FlowFunction &Func) {
455659
// Extract flow values for every block and every edge
456660
extractWeights(InferenceNetwork, Func);
457661

662+
// Post-processing adjustments to the flow
663+
auto Adjuster = FlowAdjuster(Func);
664+
Adjuster.run();
665+
458666
#ifndef NDEBUG
459667
// Verify the result
460668
verifyWeights(Func);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
islands_1:10822:0
2+
1: 1
3+
2: 100
4+
3: 100
5+
4: 1
6+
5: 0
7+
6: 1
8+
7: 0
9+
!CFGChecksum: 120879332589
10+
11+
islands_2:108:0
12+
1: 0
13+
2: 10000
14+
3: 10000
15+
4: 0
16+
!CFGChecksum: 69495280403
17+
18+
islands_3:108:0
19+
1: 10
20+
2: 0
21+
3: 10
22+
4: 0
23+
5: 1000
24+
6: 1000
25+
7: 0
26+
8: 10
27+
!CFGChecksum: 156608410269

0 commit comments

Comments
 (0)