diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h new file mode 100644 index 0000000000000..733d24ab719a8 --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h @@ -0,0 +1,35 @@ +//===- llvm/CodeGen/MachineBlockPlacement.h ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H +#define LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { + +class MachineBlockPlacementPass + : public PassInfoMixin { + + bool AllowTailMerge = true; + +public: + MachineBlockPlacementPass(bool AllowTailMerge) + : AllowTailMerge(AllowTailMerge) {} + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } + + void + printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName) const; +}; + +} // namespace llvm + +#endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index a05e876806ab5..a27b5630b308e 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -184,7 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &); void initializeMIRNamerPass(PassRegistry &); void initializeMIRPrintingPassPass(PassRegistry &); void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &); -void initializeMachineBlockPlacementPass(PassRegistry &); +void initializeMachineBlockPlacementLegacyPass(PassRegistry &); void initializeMachineBlockPlacementStatsPass(PassRegistry &); void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &); void initializeMachineCFGPrinterPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 25899d04dc664..1dfbb6c7eec2e 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -46,6 +46,7 @@ #include "llvm/CodeGen/LocalStackSlotAllocation.h" #include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/MIRPrinter.h" +#include "llvm/CodeGen/MachineBlockPlacement.h" #include "llvm/CodeGen/MachineCSE.h" #include "llvm/CodeGen/MachineCopyPropagation.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" @@ -1223,7 +1224,7 @@ void CodeGenPassBuilder::addMachineLateOptimization( template void CodeGenPassBuilder::addBlockPlacement( AddMachinePass &addPass) const { - addPass(MachineBlockPlacementPass()); + addPass(MachineBlockPlacementPass(Opt.EnableTailMerge)); // Run a separate pass to collect block placement statistics. if (Opt.EnableBlockPlacementStats) addPass(MachineBlockPlacementStatsPass()); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index f99a5f2c74bf3..517401b3f4ebe 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -193,6 +193,15 @@ MACHINE_FUNCTION_PASS("verify", MachineTraceMetricsVerifi #define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \ PARAMS) #endif + +MACHINE_FUNCTION_PASS_WITH_PARAMS( + "block-placement", "MachineBlockPlacementPass", + [](bool AllowTailMerge) { + // Default is true. + return MachineBlockPlacementPass(AllowTailMerge); + }, + parseMachineBlockPlacementPassOptions, "no-tail-merge;tail-merge") + MACHINE_FUNCTION_PASS_WITH_PARAMS( "machine-sink", "MachineSinkingPass", [](bool EnableSinkAndFold) { @@ -242,7 +251,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass) #endif DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass) DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass) -DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass) DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass) DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass) DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass) diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h index f006ef1fcb40b..51f25c1360b87 100644 --- a/llvm/include/llvm/Target/CGPassBuilderOption.h +++ b/llvm/include/llvm/Target/CGPassBuilderOption.h @@ -50,6 +50,7 @@ struct CGPassBuilderOption { bool EnableGlobalMergeFunc = false; bool EnableMachineFunctionSplitter = false; bool EnableSinkAndFold = false; + bool EnableTailMerge = true; bool MISchedPostRA = false; bool EarlyLiveIntervals = false; bool GCEmptyBlocks = false; diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index beb7fb284a376..daa31073e7151 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -72,7 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeMIRNamerPass(Registry); initializeMIRProfileLoaderPassPass(Registry); initializeMachineBlockFrequencyInfoWrapperPassPass(Registry); - initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementLegacyPass(Registry); initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCFGPrinterPass(Registry); initializeMachineCSELegacyPass(Registry); diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index 9ccfadc318fa4..40edc47f3e6bb 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -24,6 +24,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/MachineBlockPlacement.h" #include "BranchFolding.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -357,7 +358,7 @@ class BlockChain { unsigned UnscheduledPredecessors = 0; }; -class MachineBlockPlacement : public MachineFunctionPass { +class MachineBlockPlacement { /// A type for a block filter set. using BlockFilterSet = SmallSetVector; @@ -409,7 +410,11 @@ class MachineBlockPlacement : public MachineFunctionPass { ProfileSummaryInfo *PSI = nullptr; - TargetPassConfig *PassConfig = nullptr; + // Tail merging is also determined based on + // whether structured CFG is required. + bool AllowTailMerge; + + CodeGenOptLevel OptLevel; /// Duplicator used to duplicate tails during placement. /// @@ -608,18 +613,48 @@ class MachineBlockPlacement : public MachineFunctionPass { /// Create a single CFG chain from the current block order. void createCFGChainExtTsp(); +public: + MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI, + MachineLoopInfo *MLI, ProfileSummaryInfo *PSI, + std::unique_ptr MBFI, + MachinePostDominatorTree *MPDT, bool AllowTailMerge) + : MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI), + AllowTailMerge(AllowTailMerge) {}; + + bool run(MachineFunction &F); + + static bool allowTailDupPlacement(MachineFunction &MF) { + return TailDupPlacement && !MF.getTarget().requiresStructuredCFG(); + } +}; + +class MachineBlockPlacementLegacy : public MachineFunctionPass { public: static char ID; // Pass identification, replacement for typeid - MachineBlockPlacement() : MachineFunctionPass(ID) { - initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + MachineBlockPlacementLegacy() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementLegacyPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &F) override; + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; - bool allowTailDupPlacement() const { - assert(F); - return TailDupPlacement && !F->getTarget().requiresStructuredCFG(); + auto *MBPI = + &getAnalysis().getMBPI(); + auto MBFI = std::make_unique( + getAnalysis().getMBFI()); + auto *MLI = &getAnalysis().getLI(); + auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF) + ? &getAnalysis() + .getPostDomTree() + : nullptr; + auto *PSI = &getAnalysis().getPSI(); + auto *PassConfig = &getAnalysis(); + bool AllowTailMerge = PassConfig->getEnableTailMerge(); + return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT, + AllowTailMerge) + .run(MF); } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -636,18 +671,18 @@ class MachineBlockPlacement : public MachineFunctionPass { } // end anonymous namespace -char MachineBlockPlacement::ID = 0; +char MachineBlockPlacementLegacy::ID = 0; -char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; +char &llvm::MachineBlockPlacementID = MachineBlockPlacementLegacy::ID; -INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(MachineBlockPlacementLegacy, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE, +INITIALIZE_PASS_END(MachineBlockPlacementLegacy, DEBUG_TYPE, "Branch Probability Basic Block Placement", false, false) #ifndef NDEBUG @@ -1130,7 +1165,7 @@ MachineBlockPlacement::getBestTrellisSuccessor( MachineBasicBlock *Succ1 = BestA.Dest; MachineBasicBlock *Succ2 = BestB.Dest; // Check to see if tail-duplication would be profitable. - if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) && + if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) && canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) && isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1), Chain, BlockFilter)) { @@ -1655,7 +1690,7 @@ MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB, if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. - if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) + if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ)) DupCandidates.emplace_back(SuccProb, Succ); continue; } @@ -1883,7 +1918,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB, auto Result = selectBestSuccessor(BB, Chain, BlockFilter); MachineBasicBlock *BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; - if (allowTailDupPlacement()) + if (allowTailDupPlacement(*F)) ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds( BB, BestSucc, Chain, BlockFilter)); @@ -1910,7 +1945,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB, // Placement may have changed tail duplication opportunities. // Check for that now. - if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { + if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) { repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, BlockFilter, PrevUnplacedBlockIt, PrevUnplacedBlockInFilterIt); @@ -3466,7 +3501,7 @@ void MachineBlockPlacement::initTailDupThreshold() { // For aggressive optimization, we can adjust some thresholds to be less // conservative. - if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) { + if (OptLevel >= CodeGenOptLevel::Aggressive) { // At O3 we should be more willing to copy blocks for tail duplication. This // increases size pressure, so we only do it at O3 // Do this unless only the regular threshold is explicitly set. @@ -3478,29 +3513,56 @@ void MachineBlockPlacement::initTailDupThreshold() { // If there's no threshold provided through options, query the target // information for a threshold instead. if (TailDupPlacementThreshold.getNumOccurrences() == 0 && - (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive || + (OptLevel < CodeGenOptLevel::Aggressive || TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0)) - TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel()); + TailDupSize = TII->getTailDuplicateSize(OptLevel); } -bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction())) - return false; +PreservedAnalyses +MachineBlockPlacementPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto *MBPI = &MFAM.getResult(MF); + auto MBFI = std::make_unique( + MFAM.getResult(MF)); + auto *MLI = &MFAM.getResult(MF); + auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF) + ? &MFAM.getResult(MF) + : nullptr; + auto *PSI = MFAM.getResult(MF) + .getCachedResult( + *MF.getFunction().getParent()); + if (!PSI) + report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis", + false); + + MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT, + AllowTailMerge); + + if (!MBP.run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses(); +} + +void MachineBlockPlacementPass::printPipeline( + raw_ostream &OS, + function_ref MapClassName2PassName) const { + OS << MapClassName2PassName(name()); + if (!AllowTailMerge) + OS << ""; +} + +bool MachineBlockPlacement::run(MachineFunction &MF) { // Check for single-block functions and skip them. if (std::next(MF.begin()) == MF.end()) return false; F = &MF; - MBPI = &getAnalysis().getMBPI(); - MBFI = std::make_unique( - getAnalysis().getMBFI()); - MLI = &getAnalysis().getLI(); + OptLevel = F->getTarget().getOptLevel(); + TII = MF.getSubtarget().getInstrInfo(); TLI = MF.getSubtarget().getTargetLowering(); - MPDT = nullptr; - PSI = &getAnalysis().getPSI(); - PassConfig = &getAnalysis(); // Initialize PreferredLoopExit to nullptr here since it may never be set if // there are no MachineLoops. @@ -3529,8 +3591,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { } // Apply tail duplication. - if (allowTailDupPlacement()) { - MPDT = &getAnalysis().getPostDomTree(); + if (allowTailDupPlacement(*F)) { if (OptForSize) TailDupSize = 1; const bool PreRegAlloc = false; @@ -3548,8 +3609,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) { // TailMerge can create jump into if branches that make CFG irreducible for // HW that requires structured CFG. const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && - PassConfig->getEnableTailMerge() && - BranchFoldPlacement && MF.size() > 3; + AllowTailMerge && BranchFoldPlacement && + MF.size() > 3; // No tail merging opportunities if the block number is less than four. if (EnableTailMerge) { const unsigned TailMergeSize = TailDupSize + 1; diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 8080059f0bb03..555349cbe0398 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -110,6 +110,7 @@ #include "llvm/CodeGen/LowerEmuTLS.h" #include "llvm/CodeGen/MIRPrinter.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBlockPlacement.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineCSE.h" #include "llvm/CodeGen/MachineCopyPropagation.h" @@ -1439,6 +1440,19 @@ Expected parseMachineSinkingPassOptions(StringRef Params) { "MachineSinkingPass"); } +Expected parseMachineBlockPlacementPassOptions(StringRef Params) { + bool AllowTailMerge = true; + if (!Params.empty()) { + AllowTailMerge = !Params.consume_front("no-"); + if (Params != "tail-merge") + return make_error( + formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params) + .str(), + inconvertibleErrorCode()); + } + return AllowTailMerge; +} + } // namespace /// Tests whether a pass name starts with a valid prefix for a default pipeline diff --git a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir index 5e57604263793..7b107f8a24045 100644 --- a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir +++ b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 # RUN: llc -mtriple aarch64-none-elf -run-pass=block-placement -O3 -o - %s | FileCheck %s +# RUN: llc -mtriple aarch64-none-elf -passes='require,function(machine-function(block-placement))' -O3 -o - %s | FileCheck %s ## Check that block-placement does not perform tail duplication on the ## PAUTH_EPILOGUE instruction. If that happened, the two prologues would use diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir index efa24a9bee7de..05cfe53224582 100644 --- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir +++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir @@ -2,6 +2,8 @@ # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s +# RUN: llc -mtriple=amdgcn -o - -passes='require,function(machine-function(block-placement))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s + # Used to fail with # Assertion `Out && "Header of loop has no predecessors from outside loop?" diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir index 3f69ca0a40ad3..de7a80718a297 100644 --- a/llvm/test/CodeGen/X86/block-placement.mir +++ b/llvm/test/CodeGen/X86/block-placement.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -run-pass=block-placement -o - %s | FileCheck %s +# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -passes='require,function(machine-function(block-placement))' -o - %s | FileCheck %s --- | ; ModuleID = 'test.ll' diff --git a/llvm/test/tools/llc/new-pm/option-parsing.mir b/llvm/test/tools/llc/new-pm/option-parsing.mir new file mode 100644 index 0000000000000..721b9a8623a48 --- /dev/null +++ b/llvm/test/tools/llc/new-pm/option-parsing.mir @@ -0,0 +1,13 @@ +# RUN: llc -mtriple=x86_64-unknown-linux-gnu -passes='block-placement,block-placement' -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=CHECK + +# RUN: not llc -mtriple=x86_64-unknown-linux-gnu -passes='block-placement' -print-pipeline-passes -filetype=null %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT + +# CHECK: block-placement,block-placement +# CHECK-NOT: invalid MachineBlockPlacementPass parameter 'invalid-opt' + +--- +name: f +body: | + bb.0: + RET 0 +...