Skip to content

Commit fcaefc2

Browse files
authored
[AMDGPU][NPM] Port SIPreEmitPeephole to NPM (llvm#130065)
1 parent 79cb6f0 commit fcaefc2

7 files changed

+39
-14
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
210210
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
211211
extern char &SILowerControlFlowLegacyID;
212212

213-
void initializeSIPreEmitPeepholePass(PassRegistry &);
213+
void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
214214
extern char &SIPreEmitPeepholeID;
215215

216216
void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -399,6 +399,13 @@ class SILateBranchLoweringPass
399399
static bool isRequired() { return true; }
400400
};
401401

402+
class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
403+
public:
404+
PreservedAnalyses run(MachineFunction &MF,
405+
MachineFunctionAnalysisManager &MFAM);
406+
static bool isRequired() { return true; }
407+
};
408+
402409
class AMDGPUSetWavePriorityPass
403410
: public PassInfoMixin<AMDGPUSetWavePriorityPass> {
404411
public:

llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
127127
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
128128
MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
129129
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
130+
MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
130131
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
131132
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
132133
#undef MACHINE_FUNCTION_PASS
@@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
135136
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
136137
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())
137138

138-
DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
139139
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
140140
// already exists.
141141
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
542542
initializeSIModeRegisterLegacyPass(*PR);
543543
initializeSIWholeQuadModeLegacyPass(*PR);
544544
initializeSILowerControlFlowLegacyPass(*PR);
545-
initializeSIPreEmitPeepholePass(*PR);
545+
initializeSIPreEmitPeepholeLegacyPass(*PR);
546546
initializeSILateBranchLoweringLegacyPass(*PR);
547547
initializeSIMemoryLegalizerLegacyPass(*PR);
548548
initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2173,9 +2173,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
21732173
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
21742174
addPass(AMDGPUSetWavePriorityPass());
21752175

2176-
if (TM.getOptLevel() > CodeGenOptLevel::None) {
2177-
// TODO: addPass(SIPreEmitPeepholePass());
2178-
}
2176+
if (TM.getOptLevel() > CodeGenOptLevel::None)
2177+
addPass(SIPreEmitPeepholePass());
21792178

21802179
// The hazard recognizer that runs as part of the post-ra scheduler does not
21812180
// guarantee to be able handle all hazards correctly. This is because if there

llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ using namespace llvm;
2424

2525
namespace {
2626

27-
class SIPreEmitPeephole : public MachineFunctionPass {
27+
class SIPreEmitPeephole {
2828
private:
2929
const SIInstrInfo *TII = nullptr;
3030
const SIRegisterInfo *TRI = nullptr;
@@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
4040
const MachineBasicBlock &To) const;
4141
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);
4242

43+
public:
44+
bool run(MachineFunction &MF);
45+
};
46+
47+
class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
4348
public:
4449
static char ID;
4550

46-
SIPreEmitPeephole() : MachineFunctionPass(ID) {
47-
initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
51+
SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
52+
initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
4853
}
4954

50-
bool runOnMachineFunction(MachineFunction &MF) override;
55+
bool runOnMachineFunction(MachineFunction &MF) override {
56+
return SIPreEmitPeephole().run(MF);
57+
}
5158
};
5259

5360
} // End anonymous namespace.
5461

55-
INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
62+
INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
5663
"SI peephole optimizations", false, false)
5764

58-
char SIPreEmitPeephole::ID = 0;
65+
char SIPreEmitPeepholeLegacy::ID = 0;
5966

60-
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
67+
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;
6168

6269
bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
6370
// Match:
@@ -410,7 +417,16 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
410417
return true;
411418
}
412419

413-
bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
420+
PreservedAnalyses
421+
llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
422+
MachineFunctionAnalysisManager &MFAM) {
423+
if (!SIPreEmitPeephole().run(MF))
424+
return PreservedAnalyses::all();
425+
426+
return getMachineFunctionPassPreservedAnalyses();
427+
}
428+
429+
bool SIPreEmitPeephole::run(MachineFunction &MF) {
414430
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
415431
TII = ST.getInstrInfo();
416432
TRI = &TII->getRegisterInfo();

llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s
34

45
---
56

llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
34
# Make sure mandatory skips are not removed around mode defs.
45

56
---

llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
34

45
---
56
name: simple

0 commit comments

Comments
 (0)