Skip to content

[AMDGPU][NPM] Port SIPreEmitPeephole to NPM #130065

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
void initializeSILowerControlFlowLegacyPass(PassRegistry &);
extern char &SILowerControlFlowLegacyID;

void initializeSIPreEmitPeepholePass(PassRegistry &);
void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
extern char &SIPreEmitPeepholeID;

void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
Expand Down Expand Up @@ -399,6 +399,13 @@ class SILateBranchLoweringPass
static bool isRequired() { return true; }
};

class SIPreEmitPeepholePass : public PassInfoMixin<SIPreEmitPeepholePass> {
public:
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
static bool isRequired() { return true; }
};

class AMDGPUSetWavePriorityPass
: public PassInfoMixin<AMDGPUSetWavePriorityPass> {
public:
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr
MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
#undef MACHINE_FUNCTION_PASS
Expand All @@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass())
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass())

DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
// already exists.
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass())
Expand Down
7 changes: 3 additions & 4 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
initializeSIModeRegisterLegacyPass(*PR);
initializeSIWholeQuadModeLegacyPass(*PR);
initializeSILowerControlFlowLegacyPass(*PR);
initializeSIPreEmitPeepholePass(*PR);
initializeSIPreEmitPeepholeLegacyPass(*PR);
initializeSILateBranchLoweringLegacyPass(*PR);
initializeSIMemoryLegalizerLegacyPass(*PR);
initializeSIOptimizeExecMaskingLegacyPass(*PR);
Expand Down Expand Up @@ -2171,9 +2171,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
addPass(AMDGPUSetWavePriorityPass());

if (TM.getOptLevel() > CodeGenOptLevel::None) {
// TODO: addPass(SIPreEmitPeepholePass());
}
if (TM.getOptLevel() > CodeGenOptLevel::None)
addPass(SIPreEmitPeepholePass());

// The hazard recognizer that runs as part of the post-ra scheduler does not
// guarantee to be able handle all hazards correctly. This is because if there
Expand Down
32 changes: 24 additions & 8 deletions llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ using namespace llvm;

namespace {

class SIPreEmitPeephole : public MachineFunctionPass {
class SIPreEmitPeephole {
private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
Expand All @@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
const MachineBasicBlock &To) const;
bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);

public:
bool run(MachineFunction &MF);
};

class SIPreEmitPeepholeLegacy : public MachineFunctionPass {
public:
static char ID;

SIPreEmitPeephole() : MachineFunctionPass(ID) {
initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry());
SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) {
initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry());
}

bool runOnMachineFunction(MachineFunction &MF) override;
bool runOnMachineFunction(MachineFunction &MF) override {
return SIPreEmitPeephole().run(MF);
}
};

} // End anonymous namespace.

INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE,
INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE,
"SI peephole optimizations", false, false)

char SIPreEmitPeephole::ID = 0;
char SIPreEmitPeepholeLegacy::ID = 0;

char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID;

bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
// Match:
Expand Down Expand Up @@ -410,7 +417,16 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI,
return true;
}

bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) {
PreservedAnalyses
llvm::SIPreEmitPeepholePass::run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM) {
if (!SIPreEmitPeephole().run(MF))
return PreservedAnalyses::all();

return getMachineFunctionPassPreservedAnalyses();
}

bool SIPreEmitPeephole::run(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
TRI = &TII->getRegisterInfo();
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s

---

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s
# Make sure mandatory skips are not removed around mode defs.

---
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX
# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX

---
name: simple
Expand Down
Loading