diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 278f10a670070..03cd45d7de6f2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID; void initializeSILowerControlFlowLegacyPass(PassRegistry &); extern char &SILowerControlFlowLegacyID; -void initializeSIPreEmitPeepholePass(PassRegistry &); +void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &); extern char &SIPreEmitPeepholeID; void initializeSILateBranchLoweringLegacyPass(PassRegistry &); @@ -399,6 +399,13 @@ class SILateBranchLoweringPass static bool isRequired() { return true; } }; +class SIPreEmitPeepholePass : public PassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); + static bool isRequired() { return true; } +}; + class AMDGPUSetWavePriorityPass : public PassInfoMixin { public: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index bebb69d765654..538b1b181f643 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -127,6 +127,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPr MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) +MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) #undef MACHINE_FUNCTION_PASS @@ -135,7 +136,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", AMDGPUPreloadKernArgPrologPass()) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 4b5c70f09155f..d9ea5989a3b1a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIModeRegisterLegacyPass(*PR); initializeSIWholeQuadModeLegacyPass(*PR); initializeSILowerControlFlowLegacyPass(*PR); - initializeSIPreEmitPeepholePass(*PR); + initializeSIPreEmitPeepholeLegacyPass(*PR); initializeSILateBranchLoweringLegacyPass(*PR); initializeSIMemoryLegalizerLegacyPass(*PR); initializeSIOptimizeExecMaskingLegacyPass(*PR); @@ -2171,9 +2171,8 @@ void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) addPass(AMDGPUSetWavePriorityPass()); - if (TM.getOptLevel() > CodeGenOptLevel::None) { - // TODO: addPass(SIPreEmitPeepholePass()); - } + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(SIPreEmitPeepholePass()); // The hazard recognizer that runs as part of the post-ra scheduler does not // guarantee to be able handle all hazards correctly. This is because if there diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp index 2bb70c138a50c..2c2ceedf8a2f6 100644 --- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp +++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp @@ -24,7 +24,7 @@ using namespace llvm; namespace { -class SIPreEmitPeephole : public MachineFunctionPass { +class SIPreEmitPeephole { private: const SIInstrInfo *TII = nullptr; const SIRegisterInfo *TRI = nullptr; @@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass { const MachineBasicBlock &To) const; bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB); +public: + bool run(MachineFunction &MF); +}; + +class SIPreEmitPeepholeLegacy : public MachineFunctionPass { public: static char ID; - SIPreEmitPeephole() : MachineFunctionPass(ID) { - initializeSIPreEmitPeepholePass(*PassRegistry::getPassRegistry()); + SIPreEmitPeepholeLegacy() : MachineFunctionPass(ID) { + initializeSIPreEmitPeepholeLegacyPass(*PassRegistry::getPassRegistry()); } - bool runOnMachineFunction(MachineFunction &MF) override; + bool runOnMachineFunction(MachineFunction &MF) override { + return SIPreEmitPeephole().run(MF); + } }; } // End anonymous namespace. -INITIALIZE_PASS(SIPreEmitPeephole, DEBUG_TYPE, +INITIALIZE_PASS(SIPreEmitPeepholeLegacy, DEBUG_TYPE, "SI peephole optimizations", false, false) -char SIPreEmitPeephole::ID = 0; +char SIPreEmitPeepholeLegacy::ID = 0; -char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID; +char &llvm::SIPreEmitPeepholeID = SIPreEmitPeepholeLegacy::ID; bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const { // Match: @@ -410,7 +417,16 @@ bool SIPreEmitPeephole::removeExeczBranch(MachineInstr &MI, return true; } -bool SIPreEmitPeephole::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +llvm::SIPreEmitPeepholePass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + if (!SIPreEmitPeephole().run(MF)) + return PreservedAnalyses::all(); + + return getMachineFunctionPassPreservedAnalyses(); +} + +bool SIPreEmitPeephole::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); TRI = &TII->getRegisterInfo(); diff --git a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir index d89f306c96a36..785f5bed97904 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-handle-flat-vmem-ds.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=polaris10 -run-pass si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=polaris10 -passes si-pre-emit-peephole %s -o - | FileCheck %s --- diff --git a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir index 20de119471ba3..2c8739a87626e 100644 --- a/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir +++ b/llvm/test/CodeGen/AMDGPU/remove-short-exec-branches-special-instructions.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | FileCheck %s # Make sure mandatory skips are not removed around mode defs. --- diff --git a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir index 796a70cfe8a39..002d43f937837 100644 --- a/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir +++ b/llvm/test/CodeGen/AMDGPU/set-gpr-idx-peephole.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass si-pre-emit-peephole -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -passes si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s -implicit-check-not=S_SET_GPR_IDX --- name: simple