Skip to content

Commit c99bd3c

Browse files
authored
[ctx_prof] Extend WorkloadImportsManager to use the contextual profile (llvm#98682)
Keeping the json-based input as it's useful for diagnostics or for driving the import by other means than contextual composition. The support for the contextual profile is just another modality for constructing the import list (`WorkloadImportsManager::Workloads`). Everything else - i.e. the actual importing logic - is already independent from how that list was obtained.
1 parent bb4aeb6 commit c99bd3c

File tree

2 files changed

+149
-12
lines changed

2 files changed

+149
-12
lines changed

llvm/lib/Transforms/IPO/FunctionImport.cpp

Lines changed: 76 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "llvm/ADT/StringRef.h"
2020
#include "llvm/Bitcode/BitcodeReader.h"
2121
#include "llvm/IR/AutoUpgrade.h"
22-
#include "llvm/IR/Constants.h"
2322
#include "llvm/IR/Function.h"
2423
#include "llvm/IR/GlobalAlias.h"
2524
#include "llvm/IR/GlobalObject.h"
@@ -30,6 +29,7 @@
3029
#include "llvm/IR/ModuleSummaryIndex.h"
3130
#include "llvm/IRReader/IRReader.h"
3231
#include "llvm/Linker/IRMover.h"
32+
#include "llvm/ProfileData/PGOCtxProfReader.h"
3333
#include "llvm/Support/Casting.h"
3434
#include "llvm/Support/CommandLine.h"
3535
#include "llvm/Support/Debug.h"
@@ -185,6 +185,10 @@ static cl::opt<bool> ImportAssumeUniqueLocal(
185185
"user specify the full module path."),
186186
cl::Hidden);
187187

188+
static cl::opt<std::string>
189+
ContextualProfile("thinlto-pgo-ctx-prof",
190+
cl::desc("Path to a contextual profile."), cl::Hidden);
191+
188192
namespace llvm {
189193
extern cl::opt<bool> EnableMemProfContextDisambiguation;
190194
}
@@ -604,13 +608,7 @@ class WorkloadImportsManager : public ModuleImportsManager {
604608
LLVM_DEBUG(dbgs() << "[Workload] Done\n");
605609
}
606610

607-
public:
608-
WorkloadImportsManager(
609-
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
610-
IsPrevailing,
611-
const ModuleSummaryIndex &Index,
612-
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
613-
: ModuleImportsManager(IsPrevailing, Index, ExportLists) {
611+
void loadFromJson() {
614612
// Since the workload def uses names, we need a quick lookup
615613
// name->ValueInfo.
616614
StringMap<ValueInfo> NameToValueInfo;
@@ -680,15 +678,81 @@ class WorkloadImportsManager : public ModuleImportsManager {
680678
}
681679
Set.insert(ElemIt->second);
682680
}
683-
LLVM_DEBUG({
681+
}
682+
}
683+
684+
void loadFromCtxProf() {
685+
std::error_code EC;
686+
auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ContextualProfile);
687+
if (std::error_code EC = BufferOrErr.getError()) {
688+
report_fatal_error("Failed to open contextual profile file");
689+
return;
690+
}
691+
auto Buffer = std::move(BufferOrErr.get());
692+
693+
PGOCtxProfileReader Reader(Buffer->getBuffer());
694+
auto Ctx = Reader.loadContexts();
695+
if (!Ctx) {
696+
report_fatal_error("Failed to parse contextual profiles");
697+
return;
698+
}
699+
const auto &CtxMap = *Ctx;
700+
DenseSet<GlobalValue::GUID> ContainedGUIDs;
701+
for (const auto &[RootGuid, Root] : CtxMap) {
702+
// Avoid ContainedGUIDs to get in/out of scope. Reuse its memory for
703+
// subsequent roots, but clear its contents.
704+
ContainedGUIDs.clear();
705+
706+
auto RootVI = Index.getValueInfo(RootGuid);
707+
if (!RootVI) {
708+
LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
709+
<< " not found in this linkage unit.\n");
710+
continue;
711+
}
712+
if (RootVI.getSummaryList().size() != 1) {
713+
LLVM_DEBUG(dbgs() << "[Workload] Root " << RootGuid
714+
<< " should have exactly one summary, but has "
715+
<< RootVI.getSummaryList().size() << ". Skipping.\n");
716+
continue;
717+
}
718+
StringRef RootDefiningModule =
719+
RootVI.getSummaryList().front()->modulePath();
720+
LLVM_DEBUG(dbgs() << "[Workload] Root defining module for " << RootGuid
721+
<< " is : " << RootDefiningModule << "\n");
722+
auto &Set = Workloads[RootDefiningModule];
723+
Root.getContainedGuids(ContainedGUIDs);
724+
for (auto Guid : ContainedGUIDs)
725+
if (auto VI = Index.getValueInfo(Guid))
726+
Set.insert(VI);
727+
}
728+
}
729+
730+
public:
731+
WorkloadImportsManager(
732+
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
733+
IsPrevailing,
734+
const ModuleSummaryIndex &Index,
735+
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists)
736+
: ModuleImportsManager(IsPrevailing, Index, ExportLists) {
737+
if (ContextualProfile.empty() == WorkloadDefinitions.empty()) {
738+
report_fatal_error(
739+
"Pass only one of: -thinlto-pgo-ctx-prof or -thinlto-workload-def");
740+
return;
741+
}
742+
if (!ContextualProfile.empty())
743+
loadFromCtxProf();
744+
else
745+
loadFromJson();
746+
LLVM_DEBUG({
747+
for (const auto &[Root, Set] : Workloads) {
684748
dbgs() << "[Workload] Root: " << Root << " we have " << Set.size()
685749
<< " distinct callees.\n";
686750
for (const auto &VI : Set) {
687751
dbgs() << "[Workload] Root: " << Root
688752
<< " Would include: " << VI.getGUID() << "\n";
689753
}
690-
});
691-
}
754+
}
755+
});
692756
}
693757
};
694758

@@ -697,7 +761,7 @@ std::unique_ptr<ModuleImportsManager> ModuleImportsManager::create(
697761
IsPrevailing,
698762
const ModuleSummaryIndex &Index,
699763
DenseMap<StringRef, FunctionImporter::ExportSetTy> *ExportLists) {
700-
if (WorkloadDefinitions.empty()) {
764+
if (WorkloadDefinitions.empty() && ContextualProfile.empty()) {
701765
LLVM_DEBUG(dbgs() << "[Workload] Using the regular imports manager.\n");
702766
return std::unique_ptr<ModuleImportsManager>(
703767
new ModuleImportsManager(IsPrevailing, Index, ExportLists));

llvm/test/ThinLTO/X86/ctxprof.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
; Test workload based importing via -thinlto-pgo-ctx-prof
2+
; Use external linkage symbols so we don't depend on module paths which are
3+
; used when computing the GUIDs of internal linkage symbols.
4+
; The functionality is shared with what workload.ll tests, so here we only care
5+
; about testing the ctx profile is loaded and handled correctly.
6+
;
7+
; Set up
8+
; RUN: rm -rf %t
9+
; RUN: mkdir -p %t
10+
; RUN: split-file %s %t
11+
;
12+
; RUN: opt -module-summary %t/m1.ll -o %t/m1.bc
13+
; RUN: opt -module-summary %t/m2.ll -o %t/m2.bc
14+
; RUN: llvm-dis %t/m1.bc -o - | FileCheck %s --check-prefix=GUIDS-1
15+
; RUN: llvm-dis %t/m2.bc -o - | FileCheck %s --check-prefix=GUIDS-2
16+
;
17+
; GUIDS-1: name: "m1_f1"
18+
; GUIDS-1-SAME: guid = 6019442868614718803
19+
; GUIDS-2: name: "m2_f1"
20+
; GUIDS-2-SAME: guid = 15593096274670919754
21+
;
22+
; RUN: rm -rf %t_baseline
23+
; RUN: rm -rf %t_exp
24+
; RUN: mkdir -p %t_baseline
25+
; RUN: mkdir -p %t_exp
26+
;
27+
; Normal run. m1 shouldn't get m2_f1 because it's not referenced from there, and
28+
; m1_f1 shouldn't go to m2.
29+
;
30+
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
31+
; RUN: -o %t_baseline/result.o -save-temps \
32+
; RUN: -r %t/m1.bc,m1_f1,plx \
33+
; RUN: -r %t/m2.bc,m2_f1,plx
34+
; RUN: llvm-dis %t_baseline/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-1
35+
; RUN: llvm-dis %t_baseline/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=NOPROF-2
36+
;
37+
; NOPROF-1-NOT: m2_f1()
38+
; NOPROF-2-NOT: m1_f1()
39+
;
40+
; The run with workload definitions - same other options.
41+
;
42+
; RUN: echo '[ \
43+
; RUN: {"Guid": 6019442868614718803, "Counters": [1], "Callsites": [[{"Guid": 15593096274670919754, "Counters": [1]}]]}, \
44+
; RUN: {"Guid": 15593096274670919754, "Counters": [1], "Callsites": [[{"Guid": 6019442868614718803, "Counters": [1]}]]} \
45+
; RUN: ]' > %t_exp/ctxprof.json
46+
; RUN: llvm-ctxprof-util fromJSON --input %t_exp/ctxprof.json --output %t_exp/ctxprof.bitstream
47+
; RUN: llvm-lto2 run %t/m1.bc %t/m2.bc \
48+
; RUN: -o %t_exp/result.o -save-temps \
49+
; RUN: -thinlto-pgo-ctx-prof=%t_exp/ctxprof.bitstream \
50+
; RUN: -r %t/m1.bc,m1_f1,plx \
51+
; RUN: -r %t/m2.bc,m2_f1,plx
52+
; RUN: llvm-dis %t_exp/result.o.1.3.import.bc -o - | FileCheck %s --check-prefix=FIRST
53+
; RUN: llvm-dis %t_exp/result.o.2.3.import.bc -o - | FileCheck %s --check-prefix=SECOND
54+
;
55+
;
56+
; FIRST: m2_f1()
57+
; SECOND: m1_f1()
58+
;
59+
;--- m1.ll
60+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
61+
target triple = "x86_64-pc-linux-gnu"
62+
63+
define dso_local void @m1_f1() {
64+
ret void
65+
}
66+
67+
;--- m2.ll
68+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
69+
target triple = "x86_64-pc-linux-gnu"
70+
71+
define dso_local void @m2_f1() {
72+
ret void
73+
}

0 commit comments

Comments
 (0)