-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[AMDGPU][NPM] Port SIMemoryLegalizer to NPM #130060
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][NPM] Port SIMemoryLegalizer to NPM #130060
Conversation
d82b6dd
to
9ba0133
Compare
54641a8
to
be751bb
Compare
9ba0133
to
9d01cd5
Compare
be751bb
to
b1402ed
Compare
@llvm/pr-subscribers-backend-amdgpu Author: Akshat Oke (optimisan) ChangesFull diff: https://github.com/llvm/llvm-project/pull/130060.diff 4 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f331f741e3993..4197a60e77014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -364,6 +364,13 @@ class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> {
MachineFunctionAnalysisManager &AM);
};
+class SIMemoryLegalizerPass : public PassInfoMixin<SIMemoryLegalizerPass> {
+public:
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+};
+
FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -428,7 +435,7 @@ class SIAnnotateControlFlowPass
void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &);
extern char &SIAnnotateControlFlowLegacyPassID;
-void initializeSIMemoryLegalizerPass(PassRegistry&);
+void initializeSIMemoryLegalizerLegacyPass(PassRegistry &);
extern char &SIMemoryLegalizerID;
void initializeSIModeRegisterLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0e3dcb4267ede..de959f8a2aa62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
+MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPas
DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 73ae9135eb319..dbe212ad0a216 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSILateBranchLoweringPass(*PR);
- initializeSIMemoryLegalizerPass(*PR);
+ initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
initializeSIPreAllocateWWMRegsLegacyPass(*PR);
initializeSIFormMemoryClausesLegacyPass(*PR);
@@ -2151,7 +2151,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
addPass(GCNCreateVOPDPass());
}
- // TODO: addPass(SIMemoryLegalizerPass());
+
+ addPass(SIMemoryLegalizerPass());
// TODO: addPass(SIInsertWaitcntsPass());
// TODO: addPass(SIModeRegisterPass());
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 34953f9c08db7..1375ba201ec58 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -21,8 +21,10 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -625,9 +627,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
}
};
-class SIMemoryLegalizer final : public MachineFunctionPass {
+class SIMemoryLegalizer final {
private:
-
+ const MachineModuleInfo &MMI;
/// Cache Control.
std::unique_ptr<SICacheControl> CC = nullptr;
@@ -661,10 +663,16 @@ class SIMemoryLegalizer final : public MachineFunctionPass {
bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
MachineBasicBlock::iterator &MI);
+public:
+ SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {};
+ bool run(MachineFunction &MF);
+};
+
+class SIMemoryLegalizerLegacy final : public MachineFunctionPass {
public:
static char ID;
- SIMemoryLegalizer() : MachineFunctionPass(ID) {}
+ SIMemoryLegalizerLegacy() : MachineFunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
@@ -2767,11 +2775,26 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI,
return Changed;
}
-bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
- bool Changed = false;
-
+bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) {
const MachineModuleInfo &MMI =
getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
+ return SIMemoryLegalizer(MMI).run(MF);
+}
+
+PreservedAnalyses
+SIMemoryLegalizerPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *MMI = MFAM.getResult<ModuleAnalysisManagerFunctionProxy>(MF)
+ .getCachedResult<MachineModuleAnalysis>(
+ *MF.getFunction().getParent());
+ assert(MMI && "MachineModuleAnalysis must be available");
+ if (!SIMemoryLegalizer(MMI->getMMI()).run(MF))
+ return PreservedAnalyses::all();
+ return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>();
+}
+
+bool SIMemoryLegalizer::run(MachineFunction &MF) {
+ bool Changed = false;
SIMemOpAccess MOA(MMI.getObjFileInfo<AMDGPUMachineModuleInfo>());
CC = SICacheControl::create(MF.getSubtarget<GCNSubtarget>());
@@ -2812,11 +2835,11 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
-INITIALIZE_PASS(SIMemoryLegalizer, DEBUG_TYPE, PASS_NAME, false, false)
+INITIALIZE_PASS(SIMemoryLegalizerLegacy, DEBUG_TYPE, PASS_NAME, false, false)
-char SIMemoryLegalizer::ID = 0;
-char &llvm::SIMemoryLegalizerID = SIMemoryLegalizer::ID;
+char SIMemoryLegalizerLegacy::ID = 0;
+char &llvm::SIMemoryLegalizerID = SIMemoryLegalizerLegacy::ID;
FunctionPass *llvm::createSIMemoryLegalizerPass() {
- return new SIMemoryLegalizer();
+ return new SIMemoryLegalizerLegacy();
}
|
.getCachedResult<MachineModuleAnalysis>( | ||
*MF.getFunction().getParent()); | ||
assert(MMI && "MachineModuleAnalysis must be available"); | ||
if (!SIMemoryLegalizer(MMI->getMMI()).run(MF)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This pass shouldn't really depend on MachineModuleInfo (not this patch's fault though)
9d01cd5
to
4ceba75
Compare
861804a
to
f80cce0
Compare
Perhaps best to avoid large stacks like this to reduce upstack rebasing on mid-stack updates. |
4d7120d
to
1aeebc2
Compare
Rearranging this stack to make it independent (Graphite is still failing update the PRs itself), GitHub might ping for every force push, sorry! |
f80cce0
to
b61adcc
Compare
No description provided.