Skip to content

Commit c92a8c0

Browse files
committed
[LPM] Port CGProfilePass from NPM to LPM
Reviewers: hans, chandlerc!, asbirlea, nikic Reviewed By: hans, nikic Subscribers: steven_wu, dexonsmith, nikic, echristo, void, zhizhouy, cfe-commits, aeubanks, MaskRay, jvesely, nhaehnle, hiraditya, kerbowa, llvm-commits Tags: #llvm, #clang Differential Revision: https://reviews.llvm.org/D83013
1 parent ff5b9a7 commit c92a8c0

File tree

17 files changed

+130
-50
lines changed

17 files changed

+130
-50
lines changed

clang/include/clang/Basic/CodeGenOptions.def

-1
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,6 @@ CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables.
252252
CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer.
253253
CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer.
254254
CODEGENOPT(ProfileSampleAccurate, 1, 0) ///< Sample profile is accurate.
255-
CODEGENOPT(CallGraphProfile , 1, 0) ///< Run call graph profile.
256255

257256
/// Attempt to use register sized accesses to bit-fields in structures, when
258257
/// possible.

clang/lib/CodeGen/BackendUtil.cpp

+3-2
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
620620
PMBuilder.SizeLevel = CodeGenOpts.OptimizeSize;
621621
PMBuilder.SLPVectorize = CodeGenOpts.VectorizeSLP;
622622
PMBuilder.LoopVectorize = CodeGenOpts.VectorizeLoop;
623+
PMBuilder.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
623624

624625
PMBuilder.DisableUnrollLoops = !CodeGenOpts.UnrollLoops;
625626
// Loop interleaving in the loop vectorizer has historically been set to be
@@ -1144,7 +1145,7 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
11441145
PTO.LoopInterleaving = CodeGenOpts.UnrollLoops;
11451146
PTO.LoopVectorization = CodeGenOpts.VectorizeLoop;
11461147
PTO.SLPVectorization = CodeGenOpts.VectorizeSLP;
1147-
PTO.CallGraphProfile = CodeGenOpts.CallGraphProfile;
1148+
PTO.CallGraphProfile = !CodeGenOpts.DisableIntegratedAS;
11481149
PTO.Coroutines = LangOpts.Coroutines;
11491150

11501151
PassInstrumentationCallbacks PIC;
@@ -1562,7 +1563,7 @@ static void runThinLTOBackend(
15621563
Conf.PTO.LoopInterleaving = CGOpts.UnrollLoops;
15631564
Conf.PTO.LoopVectorization = CGOpts.VectorizeLoop;
15641565
Conf.PTO.SLPVectorization = CGOpts.VectorizeSLP;
1565-
Conf.PTO.CallGraphProfile = CGOpts.CallGraphProfile;
1566+
Conf.PTO.CallGraphProfile = !CGOpts.DisableIntegratedAS;
15661567

15671568
// Context sensitive profile.
15681569
if (CGOpts.hasProfileCSIRInstr()) {

clang/lib/Frontend/CompilerInvocation.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,6 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
860860
Opts.RerollLoops = Args.hasArg(OPT_freroll_loops);
861861

862862
Opts.DisableIntegratedAS = Args.hasArg(OPT_fno_integrated_as);
863-
Opts.CallGraphProfile = !Opts.DisableIntegratedAS;
864863
Opts.Autolink = !Args.hasArg(OPT_fno_autolink);
865864
Opts.SampleProfileFile =
866865
std::string(Args.getLastArgValue(OPT_fprofile_sample_use_EQ));

llvm/include/llvm/InitializePasses.h

+1
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ void initializeCFGViewerLegacyPassPass(PassRegistry&);
103103
void initializeCFIInstrInserterPass(PassRegistry&);
104104
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
105105
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
106+
void initializeCGProfileLegacyPassPass(PassRegistry &);
106107
void initializeCallGraphDOTPrinterPass(PassRegistry&);
107108
void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
108109
void initializeCallGraphViewerPass(PassRegistry&);

llvm/include/llvm/Transforms/IPO.h

+2
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ ModulePass *createSampleProfileLoaderPass(StringRef Name);
282282
ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str,
283283
raw_ostream *ThinLinkOS = nullptr);
284284

285+
ModulePass *createCGProfileLegacyPass();
286+
285287
} // End llvm namespace
286288

287289
#endif

llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h

+1
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ class PassManagerBuilder {
156156

157157
bool DisableTailCalls;
158158
bool DisableUnrollLoops;
159+
bool CallGraphProfile;
159160
bool SLPVectorize;
160161
bool LoopVectorize;
161162
bool LoopsInterleaved;

llvm/include/llvm/Transforms/Instrumentation/CGProfile.h

-5
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,6 @@ namespace llvm {
1919
class CGProfilePass : public PassInfoMixin<CGProfilePass> {
2020
public:
2121
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
22-
23-
private:
24-
void addModuleFlags(
25-
Module &M,
26-
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const;
2722
};
2823
} // end namespace llvm
2924

llvm/lib/Passes/PassBuilder.cpp

+1-5
Original file line numberDiff line numberDiff line change
@@ -248,10 +248,6 @@ static cl::opt<bool>
248248
EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
249249
cl::desc("Enable control height reduction optimization (CHR)"));
250250

251-
static cl::opt<bool> EnableCallGraphProfile(
252-
"enable-npm-call-graph-profile", cl::init(true), cl::Hidden,
253-
cl::desc("Enable call graph profile pass for the new PM (default = on)"));
254-
255251
/// Flag to enable inline deferral during PGO.
256252
static cl::opt<bool>
257253
EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(true),
@@ -267,7 +263,7 @@ PipelineTuningOptions::PipelineTuningOptions() {
267263
Coroutines = false;
268264
LicmMssaOptCap = SetLicmMssaOptCap;
269265
LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
270-
CallGraphProfile = EnableCallGraphProfile;
266+
CallGraphProfile = true;
271267
}
272268

273269
extern cl::opt<bool> EnableHotColdSplit;

llvm/lib/Transforms/IPO/PassManagerBuilder.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ PassManagerBuilder::PassManagerBuilder() {
195195
PrepareForThinLTO = EnablePrepareForThinLTO;
196196
PerformThinLTO = EnablePerformThinLTO;
197197
DivergentTarget = false;
198+
CallGraphProfile = true;
198199
}
199200

200201
PassManagerBuilder::~PassManagerBuilder() {
@@ -834,6 +835,10 @@ void PassManagerBuilder::populateModulePassManager(
834835
if (MergeFunctions)
835836
MPM.add(createMergeFunctionsPass());
836837

838+
// Add Module flag "CG Profile" based on Branch Frequency Information.
839+
if (CallGraphProfile)
840+
MPM.add(createCGProfileLegacyPass());
841+
837842
// LoopSink pass sinks instructions hoisted by LICM, which serves as a
838843
// canonicalization pass that enables other optimizations. As a result,
839844
// LoopSink pass needs to be a very late IR pass to avoid undoing LICM

llvm/lib/Transforms/Instrumentation/CGProfile.cpp

+79-25
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,48 @@
1010

1111
#include "llvm/ADT/MapVector.h"
1212
#include "llvm/Analysis/BlockFrequencyInfo.h"
13+
#include "llvm/Analysis/LazyBlockFrequencyInfo.h"
1314
#include "llvm/Analysis/TargetTransformInfo.h"
1415
#include "llvm/IR/Constants.h"
1516
#include "llvm/IR/Instructions.h"
1617
#include "llvm/IR/MDBuilder.h"
1718
#include "llvm/IR/PassManager.h"
19+
#include "llvm/InitializePasses.h"
1820
#include "llvm/ProfileData/InstrProf.h"
21+
#include "llvm/Transforms/IPO.h"
1922
#include "llvm/Transforms/Instrumentation.h"
2023

2124
#include <array>
2225

2326
using namespace llvm;
2427

25-
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
28+
static bool
29+
addModuleFlags(Module &M,
30+
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) {
31+
if (Counts.empty())
32+
return false;
33+
34+
LLVMContext &Context = M.getContext();
35+
MDBuilder MDB(Context);
36+
std::vector<Metadata *> Nodes;
37+
38+
for (auto E : Counts) {
39+
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
40+
ValueAsMetadata::get(E.first.second),
41+
MDB.createConstant(ConstantInt::get(
42+
Type::getInt64Ty(Context), E.second))};
43+
Nodes.push_back(MDNode::get(Context, Vals));
44+
}
45+
46+
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
47+
return true;
48+
}
49+
50+
static bool
51+
runCGProfilePass(Module &M,
52+
function_ref<BlockFrequencyInfo &(Function &)> GetBFI,
53+
function_ref<TargetTransformInfo &(Function &)> GetTTI) {
2654
MapVector<std::pair<Function *, Function *>, uint64_t> Counts;
27-
FunctionAnalysisManager &FAM =
28-
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2955
InstrProfSymtab Symtab;
3056
auto UpdateCounts = [&](TargetTransformInfo &TTI, Function *F,
3157
Function *CalledF, uint64_t NewCount) {
@@ -35,14 +61,14 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
3561
Count = SaturatingAdd(Count, NewCount);
3662
};
3763
// Ignore error here. Indirect calls are ignored if this fails.
38-
(void)(bool)Symtab.create(M);
64+
(void)(bool) Symtab.create(M);
3965
for (auto &F : M) {
40-
if (F.isDeclaration())
66+
if (F.isDeclaration() || !F.getEntryCount())
4167
continue;
42-
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
68+
auto &BFI = GetBFI(F);
4369
if (BFI.getEntryFreq() == 0)
4470
continue;
45-
TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F);
71+
TargetTransformInfo &TTI = GetTTI(F);
4672
for (auto &BB : F) {
4773
Optional<uint64_t> BBCount = BFI.getBlockProfileCount(&BB);
4874
if (!BBCount)
@@ -69,28 +95,56 @@ PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
6995
}
7096
}
7197

72-
addModuleFlags(M, Counts);
73-
74-
return PreservedAnalyses::all();
98+
return addModuleFlags(M, Counts);
7599
}
76100

77-
void CGProfilePass::addModuleFlags(
78-
Module &M,
79-
MapVector<std::pair<Function *, Function *>, uint64_t> &Counts) const {
80-
if (Counts.empty())
81-
return;
101+
namespace {
102+
struct CGProfileLegacyPass final : public ModulePass {
103+
static char ID;
104+
CGProfileLegacyPass() : ModulePass(ID) {
105+
initializeCGProfileLegacyPassPass(*PassRegistry::getPassRegistry());
106+
}
82107

83-
LLVMContext &Context = M.getContext();
84-
MDBuilder MDB(Context);
85-
std::vector<Metadata *> Nodes;
108+
void getAnalysisUsage(AnalysisUsage &AU) const override {
109+
AU.setPreservesCFG();
110+
AU.addRequired<LazyBlockFrequencyInfoPass>();
111+
AU.addRequired<TargetTransformInfoWrapperPass>();
112+
}
86113

87-
for (auto E : Counts) {
88-
Metadata *Vals[] = {ValueAsMetadata::get(E.first.first),
89-
ValueAsMetadata::get(E.first.second),
90-
MDB.createConstant(ConstantInt::get(
91-
Type::getInt64Ty(Context), E.second))};
92-
Nodes.push_back(MDNode::get(Context, Vals));
114+
bool runOnModule(Module &M) override {
115+
auto GetBFI = [this](Function &F) -> BlockFrequencyInfo & {
116+
return this->getAnalysis<LazyBlockFrequencyInfoPass>(F).getBFI();
117+
};
118+
auto GetTTI = [this](Function &F) -> TargetTransformInfo & {
119+
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
120+
};
121+
122+
return runCGProfilePass(M, GetBFI, GetTTI);
93123
}
124+
};
94125

95-
M.addModuleFlag(Module::Append, "CG Profile", MDNode::get(Context, Nodes));
126+
} // namespace
127+
128+
char CGProfileLegacyPass::ID = 0;
129+
130+
INITIALIZE_PASS(CGProfileLegacyPass, "cg-profile", "Call Graph Profile", false,
131+
false)
132+
133+
ModulePass *llvm::createCGProfileLegacyPass() {
134+
return new CGProfileLegacyPass();
135+
}
136+
137+
PreservedAnalyses CGProfilePass::run(Module &M, ModuleAnalysisManager &MAM) {
138+
FunctionAnalysisManager &FAM =
139+
MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
140+
auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
141+
return FAM.getResult<BlockFrequencyAnalysis>(F);
142+
};
143+
auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
144+
return FAM.getResult<TargetIRAnalysis>(F);
145+
};
146+
147+
runCGProfilePass(M, GetBFI, GetTTI);
148+
149+
return PreservedAnalyses::all();
96150
}

llvm/lib/Transforms/Instrumentation/Instrumentation.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) {
112112
initializePGOInstrumentationUseLegacyPassPass(Registry);
113113
initializePGOIndirectCallPromotionLegacyPassPass(Registry);
114114
initializePGOMemOPSizeOptLegacyPassPass(Registry);
115+
initializeCGProfileLegacyPassPass(Registry);
115116
initializeInstrOrderFileLegacyPassPass(Registry);
116117
initializeInstrProfilingLegacyPassPass(Registry);
117118
initializeMemorySanitizerLegacyPassPass(Registry);

llvm/test/CodeGen/AMDGPU/opt-pipeline.ll

+18
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,12 @@
276276
; GCN-O1-NEXT: Warn about non-applied transformations
277277
; GCN-O1-NEXT: Alignment from assumptions
278278
; GCN-O1-NEXT: Strip Unused Function Prototypes
279+
; GCN-O1-NEXT: Call Graph Profile
280+
; GCN-O1-NEXT: FunctionPass Manager
281+
; GCN-O1-NEXT: Dominator Tree Construction
282+
; GCN-O1-NEXT: Natural Loop Information
283+
; GCN-O1-NEXT: Lazy Branch Probability Analysis
284+
; GCN-O1-NEXT: Lazy Block Frequency Analysis
279285
; GCN-O1-NEXT: FunctionPass Manager
280286
; GCN-O1-NEXT: Dominator Tree Construction
281287
; GCN-O1-NEXT: Natural Loop Information
@@ -623,6 +629,12 @@
623629
; GCN-O2-NEXT: Strip Unused Function Prototypes
624630
; GCN-O2-NEXT: Dead Global Elimination
625631
; GCN-O2-NEXT: Merge Duplicate Global Constants
632+
; GCN-O2-NEXT: Call Graph Profile
633+
; GCN-O2-NEXT: FunctionPass Manager
634+
; GCN-O2-NEXT: Dominator Tree Construction
635+
; GCN-O2-NEXT: Natural Loop Information
636+
; GCN-O2-NEXT: Lazy Branch Probability Analysis
637+
; GCN-O2-NEXT: Lazy Block Frequency Analysis
626638
; GCN-O2-NEXT: FunctionPass Manager
627639
; GCN-O2-NEXT: Dominator Tree Construction
628640
; GCN-O2-NEXT: Natural Loop Information
@@ -975,6 +987,12 @@
975987
; GCN-O3-NEXT: Strip Unused Function Prototypes
976988
; GCN-O3-NEXT: Dead Global Elimination
977989
; GCN-O3-NEXT: Merge Duplicate Global Constants
990+
; GCN-O3-NEXT: Call Graph Profile
991+
; GCN-O3-NEXT: FunctionPass Manager
992+
; GCN-O3-NEXT: Dominator Tree Construction
993+
; GCN-O3-NEXT: Natural Loop Information
994+
; GCN-O3-NEXT: Lazy Branch Probability Analysis
995+
; GCN-O3-NEXT: Lazy Block Frequency Analysis
978996
; GCN-O3-NEXT: FunctionPass Manager
979997
; GCN-O3-NEXT: Dominator Tree Construction
980998
; GCN-O3-NEXT: Natural Loop Information

llvm/test/Instrumentation/cgprofile.ll

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: opt < %s -passes cg-profile -S | FileCheck %s
2+
; RUN: opt < %s -cg-profile -S | FileCheck %s
23

34
declare void @b()
45

llvm/test/Other/new-pm-cgprofile.ll

-11
This file was deleted.

llvm/test/Other/opt-O2-pipeline.ll

+6
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,12 @@
280280
; CHECK-NEXT: Strip Unused Function Prototypes
281281
; CHECK-NEXT: Dead Global Elimination
282282
; CHECK-NEXT: Merge Duplicate Global Constants
283+
; CHECK-NEXT: Call Graph Profile
284+
; CHECK-NEXT: FunctionPass Manager
285+
; CHECK-NEXT: Dominator Tree Construction
286+
; CHECK-NEXT: Natural Loop Information
287+
; CHECK-NEXT: Lazy Branch Probability Analysis
288+
; CHECK-NEXT: Lazy Block Frequency Analysis
283289
; CHECK-NEXT: FunctionPass Manager
284290
; CHECK-NEXT: Dominator Tree Construction
285291
; CHECK-NEXT: Natural Loop Information

llvm/test/Other/opt-O3-pipeline.ll

+6
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,12 @@
285285
; CHECK-NEXT: Strip Unused Function Prototypes
286286
; CHECK-NEXT: Dead Global Elimination
287287
; CHECK-NEXT: Merge Duplicate Global Constants
288+
; CHECK-NEXT: Call Graph Profile
289+
; CHECK-NEXT: FunctionPass Manager
290+
; CHECK-NEXT: Dominator Tree Construction
291+
; CHECK-NEXT: Natural Loop Information
292+
; CHECK-NEXT: Lazy Branch Probability Analysis
293+
; CHECK-NEXT: Lazy Block Frequency Analysis
288294
; CHECK-NEXT: FunctionPass Manager
289295
; CHECK-NEXT: Dominator Tree Construction
290296
; CHECK-NEXT: Natural Loop Information

llvm/test/Other/opt-Os-pipeline.ll

+6
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,12 @@
266266
; CHECK-NEXT: Strip Unused Function Prototypes
267267
; CHECK-NEXT: Dead Global Elimination
268268
; CHECK-NEXT: Merge Duplicate Global Constants
269+
; CHECK-NEXT: Call Graph Profile
270+
; CHECK-NEXT: FunctionPass Manager
271+
; CHECK-NEXT: Dominator Tree Construction
272+
; CHECK-NEXT: Natural Loop Information
273+
; CHECK-NEXT: Lazy Branch Probability Analysis
274+
; CHECK-NEXT: Lazy Block Frequency Analysis
269275
; CHECK-NEXT: FunctionPass Manager
270276
; CHECK-NEXT: Dominator Tree Construction
271277
; CHECK-NEXT: Natural Loop Information

0 commit comments

Comments
 (0)