Skip to content

Commit 2ec50bd

Browse files
JIT: Switch optOptimizeLayout to pre-layout optimization phase (#113224)
Part of #107749. This is part of my broader goal of phasing out fgReorderBlocks entirely, but there are a few transformations we may want to do right before block layout. In particular, I noticed in #113108 (comment) that some regressed benchmarks have loops with unconditional backedges; in the absence of better loop inversion, we might want to run our unconditional-to-conditional branch duplication optimization right before layout to improve branch behavior in loops. We can put such optimizations in this pre-layout phase.
1 parent 3f40427 commit 2ec50bd

File tree

5 files changed

+32
-32
lines changed

5 files changed

+32
-32
lines changed

src/coreclr/jit/compiler.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -4936,13 +4936,13 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
49364936
//
49374937
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);
49384938

4939-
// Optimize block order
4939+
// Conditional to switch conversion, and switch peeling
49404940
//
4941-
DoPhase(this, PHASE_OPTIMIZE_LAYOUT, &Compiler::optOptimizeLayout);
4941+
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps);
49424942

4943-
// Conditional to Switch conversion
4943+
// Run flow optimizations before reordering blocks
49444944
//
4945-
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optSwitchRecognition);
4945+
DoPhase(this, PHASE_OPTIMIZE_PRE_LAYOUT, &Compiler::optOptimizePreLayout);
49464946

49474947
// Run profile repair
49484948
//

src/coreclr/jit/compiler.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -7006,13 +7006,13 @@ class Compiler
70067006

70077007
public:
70087008
PhaseStatus optOptimizeBools();
7009-
PhaseStatus optSwitchRecognition();
7009+
PhaseStatus optRecognizeAndOptimizeSwitchJumps();
70107010
bool optSwitchConvert(BasicBlock* firstBlock, int testsCount, ssize_t* testValues, weight_t falseLikelihood, GenTree* nodeToTest);
70117011
bool optSwitchDetectAndConvert(BasicBlock* firstBlock);
70127012

70137013
PhaseStatus optInvertLoops(); // Invert loops so they're entered at top and tested at bottom.
70147014
PhaseStatus optOptimizeFlow(); // Simplify flow graph and do tail duplication
7015-
PhaseStatus optOptimizeLayout(); // Optimize the BasicBlock layout of the method
7015+
PhaseStatus optOptimizePreLayout(); // Optimize flow before running block layout
70167016
PhaseStatus optOptimizePostLayout(); // Run optimizations after block layout is finalized
70177017
PhaseStatus optSetBlockWeights();
70187018
PhaseStatus optFindLoopsPhase(); // Finds loops and records them in the loop table

src/coreclr/jit/compphases.h

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks",
6565
CompPhaseNameMacro(PHASE_INVERT_LOOPS, "Invert loops", false, -1, false)
6666
CompPhaseNameMacro(PHASE_HEAD_TAIL_MERGE2, "Post-morph head and tail merge", false, -1, false)
6767
CompPhaseNameMacro(PHASE_OPTIMIZE_FLOW, "Optimize control flow", false, -1, false)
68+
CompPhaseNameMacro(PHASE_OPTIMIZE_PRE_LAYOUT, "Optimize pre-layout", false, -1, false)
6869
CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", false, -1, false)
6970
CompPhaseNameMacro(PHASE_OPTIMIZE_POST_LAYOUT, "Optimize post-layout", false, -1, false)
7071
CompPhaseNameMacro(PHASE_COMPUTE_DOMINATORS, "Compute dominators", false, -1, false)

src/coreclr/jit/optimizer.cpp

+11-17
Original file line numberDiff line numberDiff line change
@@ -2395,30 +2395,24 @@ PhaseStatus Compiler::optOptimizeFlow()
23952395
}
23962396

23972397
//-----------------------------------------------------------------------------
2398-
// optOptimizeLayout: reorder blocks to reduce cost of control flow
2398+
// optOptimizePreLayout: Optimizes flow before reordering blocks.
23992399
//
24002400
// Returns:
24012401
// suitable phase status
24022402
//
2403-
// Notes:
2404-
// Reorders using profile data, if available.
2405-
//
2406-
PhaseStatus Compiler::optOptimizeLayout()
2403+
PhaseStatus Compiler::optOptimizePreLayout()
24072404
{
2408-
noway_assert(opts.OptimizationEnabled());
2405+
assert(opts.OptimizationEnabled());
24092406

2410-
fgUpdateFlowGraph(/* doTailDuplication */ false);
2411-
fgReorderBlocks(/* useProfile */ true);
2412-
fgUpdateFlowGraph(/* doTailDuplication */ false, /* isPhase */ false);
2407+
bool modified = fgUpdateFlowGraph();
24132408

2414-
// fgReorderBlocks can cause IR changes even if it does not modify
2415-
// the flow graph. It calls gtPrepareCost which can cause operand swapping.
2416-
// Work around this for now.
2417-
//
2418-
// Note phase status only impacts dumping and checking done post-phase,
2419-
// it has no impact on a release build.
2420-
//
2421-
return PhaseStatus::MODIFIED_EVERYTHING;
2409+
// TODO: Always rely on profile synthesis to identify cold blocks.
2410+
if (!fgIsUsingProfileWeights())
2411+
{
2412+
modified |= fgExpandRarelyRunBlocks();
2413+
}
2414+
2415+
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
24222416
}
24232417

24242418
//-----------------------------------------------------------------------------

src/coreclr/jit/switchrecognition.cpp

+14-9
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,22 @@
1212
#define SWITCH_MIN_TESTS 3
1313

1414
//-----------------------------------------------------------------------------
15-
// optSwitchRecognition: Optimize range check for `x == cns1 || x == cns2 || x == cns3 ...`
16-
// pattern and convert it to Switch block (jump table) which is then *might* be converted
15+
// optRecognizeAndOptimizeSwitchJumps: Optimize range check for `x == cns1 || x == cns2 || x == cns3 ...`
16+
// pattern and convert it to a BBJ_SWITCH block (jump table), which then *might* be converted
1717
// to a bitmap test via TryLowerSwitchToBitTest.
18+
// If we have PGO data, try peeling switches with dominant cases.
1819
// TODO: recognize general jump table patterns.
1920
//
2021
// Return Value:
21-
// MODIFIED_EVERYTHING if the optimization was applied.
22+
// MODIFIED_EVERYTHING if any switches were newly identified and/or optimized, false otherwise
2223
//
23-
PhaseStatus Compiler::optSwitchRecognition()
24+
PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
2425
{
26+
bool modified = false;
27+
2528
// Limit to XARCH, ARM is already doing a great job with such comparisons using
2629
// a series of ccmp instruction (see ifConvert phase).
2730
#ifdef TARGET_XARCH
28-
bool modified = false;
2931
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
3032
{
3133
// block->KindIs(BBJ_COND) check is for better throughput.
@@ -35,13 +37,16 @@ PhaseStatus Compiler::optSwitchRecognition()
3537
modified = true;
3638
}
3739
}
40+
#endif
3841

39-
if (modified)
42+
// When we have profile data, we can identify a switch's dominant case.
43+
// Try peeling the dominant case by checking for it up front.
44+
if (fgIsUsingProfileWeights())
4045
{
41-
return PhaseStatus::MODIFIED_EVERYTHING;
46+
modified |= fgOptimizeSwitchJumps();
4247
}
43-
#endif
44-
return PhaseStatus::MODIFIED_NOTHING;
48+
49+
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
4550
}
4651

4752
//------------------------------------------------------------------------------

0 commit comments

Comments
 (0)