Skip to content

[CodeGen][NPM] Port MachineBlockPlacement to NPM #129828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 14, 2025
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions llvm/include/llvm/CodeGen/MachineBlockPlacement.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//===- llvm/CodeGen/MachineBlockPlacement.h ---------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
#define LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H

#include "llvm/CodeGen/MachinePassManager.h"

namespace llvm {

class MachineBlockPlacementPass
: public PassInfoMixin<MachineBlockPlacementPass> {

bool AllowTailMerge = true;

public:
MachineBlockPlacementPass(bool AllowTailMerge)
: AllowTailMerge(AllowTailMerge) {}
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
static bool isRequired() { return true; }

void
printPipeline(raw_ostream &OS,
function_ref<StringRef(StringRef)> MapClassName2PassName) const;
};

} // namespace llvm

#endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
2 changes: 1 addition & 1 deletion llvm/include/llvm/InitializePasses.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry &);
void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
void initializeMachineBlockPlacementPass(PassRegistry &);
void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
void initializeMachineBlockPlacementStatsPass(PassRegistry &);
void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
void initializeMachineCFGPrinterPass(PassRegistry &);
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/Passes/CodeGenPassBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "llvm/CodeGen/LocalStackSlotAllocation.h"
#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineCopyPropagation.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
Expand Down Expand Up @@ -1223,7 +1224,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineLateOptimization(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addBlockPlacement(
AddMachinePass &addPass) const {
addPass(MachineBlockPlacementPass());
addPass(MachineBlockPlacementPass(Opt.EnableTailMerge));
// Run a separate pass to collect block placement statistics.
if (Opt.EnableBlockPlacementStats)
addPass(MachineBlockPlacementStatsPass());
Expand Down
10 changes: 9 additions & 1 deletion llvm/include/llvm/Passes/MachinePassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,15 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
PARAMS)
#endif

MACHINE_FUNCTION_PASS_WITH_PARAMS(
"block-placement", "MachineBlockPlacementPass",
[](bool AllowTailMerge) {
// Default is true.
return MachineBlockPlacementPass(AllowTailMerge);
},
parseMachineBlockPlacementPassOptions, "no-tail-merge;tail-merge")

MACHINE_FUNCTION_PASS_WITH_PARAMS(
"machine-sink", "MachineSinkingPass",
[](bool EnableSinkAndFold) {
Expand Down Expand Up @@ -242,7 +251,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass)
#endif
DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass)
DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass)
DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass)
DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Target/CGPassBuilderOption.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ struct CGPassBuilderOption {
bool EnableGlobalMergeFunc = false;
bool EnableMachineFunctionSplitter = false;
bool EnableSinkAndFold = false;
bool EnableTailMerge = true;
bool MISchedPostRA = false;
bool EarlyLiveIntervals = false;
bool GCEmptyBlocks = false;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/CodeGen/CodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMIRNamerPass(Registry);
initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementLegacyPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCFGPrinterPass(Registry);
initializeMachineCSELegacyPass(Registry);
Expand Down
127 changes: 94 additions & 33 deletions llvm/lib/CodeGen/MachineBlockPlacement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
//
//===----------------------------------------------------------------------===//

#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "BranchFolding.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
Expand Down Expand Up @@ -357,7 +358,7 @@ class BlockChain {
unsigned UnscheduledPredecessors = 0;
};

class MachineBlockPlacement : public MachineFunctionPass {
class MachineBlockPlacement {
/// A type for a block filter set.
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;

Expand Down Expand Up @@ -409,7 +410,11 @@ class MachineBlockPlacement : public MachineFunctionPass {

ProfileSummaryInfo *PSI = nullptr;

TargetPassConfig *PassConfig = nullptr;
// Tail merging is also determined based on
// whether structured CFG is required.
bool AllowTailMerge;

CodeGenOptLevel OptLevel;

/// Duplicator used to duplicate tails during placement.
///
Expand Down Expand Up @@ -608,18 +613,48 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Create a single CFG chain from the current block order.
void createCFGChainExtTsp();

public:
MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,
MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
std::unique_ptr<MBFIWrapper> MBFI,
MachinePostDominatorTree *MPDT, bool AllowTailMerge)
: MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
AllowTailMerge(AllowTailMerge) {};

bool run(MachineFunction &F);

static bool allowTailDupPlacement(MachineFunction &MF) {
return TailDupPlacement && !MF.getTarget().requiresStructuredCFG();
}
};

class MachineBlockPlacementLegacy : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid

MachineBlockPlacement() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
initializeMachineBlockPlacementLegacyPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &F) override;
bool runOnMachineFunction(MachineFunction &MF) override {
if (skipFunction(MF.getFunction()))
return false;

bool allowTailDupPlacement() const {
assert(F);
return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
auto *MBPI =
&getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
auto MBFI = std::make_unique<MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
.getPostDomTree()
: nullptr;
auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
auto *PassConfig = &getAnalysis<TargetPassConfig>();
bool AllowTailMerge = PassConfig->getEnableTailMerge();
return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
AllowTailMerge)
.run(MF);
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
Expand All @@ -636,18 +671,18 @@ class MachineBlockPlacement : public MachineFunctionPass {

} // end anonymous namespace

char MachineBlockPlacement::ID = 0;
char MachineBlockPlacementLegacy::ID = 0;

char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
char &llvm::MachineBlockPlacementID = MachineBlockPlacementLegacy::ID;

INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
INITIALIZE_PASS_BEGIN(MachineBlockPlacementLegacy, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
INITIALIZE_PASS_END(MachineBlockPlacementLegacy, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)

#ifndef NDEBUG
Expand Down Expand Up @@ -1130,7 +1165,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
MachineBasicBlock *Succ1 = BestA.Dest;
MachineBasicBlock *Succ2 = BestB.Dest;
// Check to see if tail-duplication would be profitable.
if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
Chain, BlockFilter)) {
Expand Down Expand Up @@ -1655,7 +1690,7 @@ MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
Chain, BlockFilter)) {
// If tail duplication would make Succ profitable, place it.
if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))
DupCandidates.emplace_back(SuccProb, Succ);
continue;
}
Expand Down Expand Up @@ -1883,7 +1918,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
MachineBasicBlock *BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
if (allowTailDupPlacement())
if (allowTailDupPlacement(*F))
ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
BB, BestSucc, Chain, BlockFilter));

Expand All @@ -1910,7 +1945,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,

// Placement may have changed tail duplication opportunities.
// Check for that now.
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {
repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
BlockFilter, PrevUnplacedBlockIt,
PrevUnplacedBlockInFilterIt);
Expand Down Expand Up @@ -3466,7 +3501,7 @@ void MachineBlockPlacement::initTailDupThreshold() {

// For aggressive optimization, we can adjust some thresholds to be less
// conservative.
if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
if (OptLevel >= CodeGenOptLevel::Aggressive) {
// At O3 we should be more willing to copy blocks for tail duplication. This
// increases size pressure, so we only do it at O3
// Do this unless only the regular threshold is explicitly set.
Expand All @@ -3478,29 +3513,56 @@ void MachineBlockPlacement::initTailDupThreshold() {
// If there's no threshold provided through options, query the target
// information for a threshold instead.
if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
(PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
(OptLevel < CodeGenOptLevel::Aggressive ||
TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
TailDupSize = TII->getTailDuplicateSize(OptLevel);
}

bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
PreservedAnalyses
MachineBlockPlacementPass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
auto MBFI = std::make_unique<MBFIWrapper>(
MFAM.getResult<MachineBlockFrequencyAnalysis>(MF));
auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
? &MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF)
: nullptr;
auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
.getCachedResult<ProfileSummaryAnalysis>(
*MF.getFunction().getParent());
if (!PSI)
report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",
false);

MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
AllowTailMerge);

if (!MBP.run(MF))
return PreservedAnalyses::all();

return getMachineFunctionPassPreservedAnalyses();
}

void MachineBlockPlacementPass::printPipeline(
raw_ostream &OS,
function_ref<StringRef(StringRef)> MapClassName2PassName) const {
OS << MapClassName2PassName(name());
if (!AllowTailMerge)
OS << "<no-tail-merge>";
}

bool MachineBlockPlacement::run(MachineFunction &MF) {

// Check for single-block functions and skip them.
if (std::next(MF.begin()) == MF.end())
return false;

F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
MBFI = std::make_unique<MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
OptLevel = F->getTarget().getOptLevel();

TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
MPDT = nullptr;
PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
PassConfig = &getAnalysis<TargetPassConfig>();

// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
Expand Down Expand Up @@ -3529,8 +3591,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}

// Apply tail duplication.
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
if (allowTailDupPlacement(*F)) {
if (OptForSize)
TailDupSize = 1;
const bool PreRegAlloc = false;
Expand All @@ -3548,8 +3609,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structured CFG.
const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
PassConfig->getEnableTailMerge() &&
BranchFoldPlacement && MF.size() > 3;
AllowTailMerge && BranchFoldPlacement &&
MF.size() > 3;
// No tail merging opportunities if the block number is less than four.
if (EnableTailMerge) {
const unsigned TailMergeSize = TailDupSize + 1;
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/Passes/PassBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineCopyPropagation.h"
Expand Down Expand Up @@ -1439,6 +1440,19 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
"MachineSinkingPass");
}

Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
bool AllowTailMerge = true;
if (!Params.empty()) {
AllowTailMerge = !Params.consume_front("no-");
if (Params != "tail-merge")
return make_error<StringError>(
formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
.str(),
inconvertibleErrorCode());
}
return AllowTailMerge;
}

} // namespace

/// Tests whether a pass name starts with a valid prefix for a default pipeline
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple aarch64-none-elf -run-pass=block-placement -O3 -o - %s | FileCheck %s
# RUN: llc -mtriple aarch64-none-elf -passes='require<profile-summary>,function(machine-function(block-placement))' -O3 -o - %s | FileCheck %s

## Check that block-placement does not perform tail duplication on the
## PAUTH_EPILOGUE instruction. If that happened, the two prologues would use
Expand Down
2 changes: 2 additions & 0 deletions llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s

# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s

# Used to fail with
# Assertion `Out && "Header of loop has no predecessors from outside loop?"

Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/X86/block-placement.mir
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -run-pass=block-placement -o - %s | FileCheck %s
# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -passes='require<profile-summary>,function(machine-function(block-placement))' -o - %s | FileCheck %s

--- |
; ModuleID = 'test.ll'
Expand Down
Loading