Skip to content

Commit aaf2d07

Browse files
author
Sumanth Gundapaneni
authored
[Hexagon] Clean up redundant transfer instructions. (#82663)
This patch adds a Hexagon specific backend pass that cleans up redundant transfers after register allocation.
1 parent 5bd0c44 commit aaf2d07

File tree

7 files changed

+366
-7
lines changed

7 files changed

+366
-7
lines changed

llvm/lib/Target/Hexagon/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ add_llvm_target(HexagonCodeGen
6262
HexagonTargetMachine.cpp
6363
HexagonTargetObjectFile.cpp
6464
HexagonTargetTransformInfo.cpp
65+
HexagonTfrCleanup.cpp
6566
HexagonVectorCombine.cpp
6667
HexagonVectorLoopCarriedReuse.cpp
6768
HexagonVectorPrint.cpp

llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ static cl::opt<bool> EnableExpandCondsets("hexagon-expand-condsets",
6565
cl::init(true), cl::Hidden,
6666
cl::desc("Early expansion of MUX"));
6767

68+
static cl::opt<bool> EnableTfrCleanup("hexagon-tfr-cleanup", cl::init(true),
69+
cl::Hidden,
70+
cl::desc("Cleanup of TFRs/COPYs"));
71+
6872
static cl::opt<bool> EnableEarlyIf("hexagon-eif", cl::init(true), cl::Hidden,
6973
cl::desc("Enable early if-conversion"));
7074

@@ -153,6 +157,7 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
153157

154158
namespace llvm {
155159
extern char &HexagonExpandCondsetsID;
160+
extern char &HexagonTfrCleanupID;
156161
void initializeHexagonBitSimplifyPass(PassRegistry&);
157162
void initializeHexagonConstExtendersPass(PassRegistry&);
158163
void initializeHexagonConstPropagationPass(PassRegistry&);
@@ -169,6 +174,7 @@ namespace llvm {
169174
void initializeHexagonPostIncOptPass(PassRegistry &);
170175
void initializeHexagonRDFOptPass(PassRegistry&);
171176
void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
177+
void initializeHexagonTfrCleanupPass(PassRegistry &);
172178
void initializeHexagonVExtractPass(PassRegistry &);
173179
void initializeHexagonVectorCombineLegacyPass(PassRegistry&);
174180
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
@@ -204,6 +210,7 @@ namespace llvm {
204210
FunctionPass *createHexagonSplitConst32AndConst64();
205211
FunctionPass *createHexagonSplitDoubleRegs();
206212
FunctionPass *createHexagonStoreWidening();
213+
FunctionPass *createHexagonTfrCleanup();
207214
FunctionPass *createHexagonVectorCombineLegacyPass();
208215
FunctionPass *createHexagonVectorPrint();
209216
FunctionPass *createHexagonVExtract();
@@ -258,6 +265,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
258265
(HexagonNoOpt ? CodeGenOptLevel::None : OL)),
259266
TLOF(std::make_unique<HexagonTargetObjectFile>()) {
260267
initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
268+
initializeHexagonTfrCleanupPass(*PassRegistry::getPassRegistry());
261269
initializeHexagonPostIncOptPass(*PassRegistry::getPassRegistry());
262270
initAsmInfo();
263271
}
@@ -426,6 +434,8 @@ void HexagonPassConfig::addPreRegAlloc() {
426434
addPass(createHexagonConstExtenders());
427435
if (EnableExpandCondsets)
428436
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
437+
if (EnableTfrCleanup)
438+
insertPass(&VirtRegRewriterID, &HexagonTfrCleanupID);
429439
if (!DisableStoreWidening)
430440
addPass(createHexagonStoreWidening());
431441
if (EnableGenMemAbs)
Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
//===------- HexagonTfrCleanup.cpp - Hexagon Transfer Cleanup Pass -------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This pass is to address a situation that appears after register allocaion
9+
// evey now and then, namely a register copy from a source that was defined
10+
// as an immediate value in the same block (usually just before the copy).
11+
//
12+
// Here is an example of actual code emitted that shows this problem:
13+
//
14+
// .LBB0_5:
15+
// {
16+
// r5 = zxtb(r8)
17+
// r6 = or(r6, ##12345)
18+
// }
19+
// {
20+
// r3 = xor(r1, r2)
21+
// r1 = #0 <-- r1 set to #0
22+
// }
23+
// {
24+
// r7 = r1 <-- r7 set to r1
25+
// r0 = zxtb(r3)
26+
// }
27+
28+
#define DEBUG_TYPE "tfr-cleanup"
29+
#include "HexagonTargetMachine.h"
30+
31+
#include "llvm/CodeGen/LiveInterval.h"
32+
#include "llvm/CodeGen/LiveIntervals.h"
33+
#include "llvm/CodeGen/MachineFunction.h"
34+
#include "llvm/CodeGen/MachineInstrBuilder.h"
35+
#include "llvm/CodeGen/MachineRegisterInfo.h"
36+
#include "llvm/CodeGen/Passes.h"
37+
#include "llvm/CodeGen/TargetInstrInfo.h"
38+
#include "llvm/CodeGen/TargetRegisterInfo.h"
39+
#include "llvm/Support/CommandLine.h"
40+
#include "llvm/Support/Debug.h"
41+
#include "llvm/Support/raw_ostream.h"
42+
#include "llvm/Target/TargetMachine.h"
43+
44+
using namespace llvm;
45+
46+
namespace llvm {
47+
FunctionPass *createHexagonTfrCleanup();
48+
void initializeHexagonTfrCleanupPass(PassRegistry &);
49+
} // namespace llvm
50+
51+
namespace {
52+
class HexagonTfrCleanup : public MachineFunctionPass {
53+
public:
54+
static char ID;
55+
HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {
56+
PassRegistry &R = *PassRegistry::getPassRegistry();
57+
initializeHexagonTfrCleanupPass(R);
58+
}
59+
StringRef getPassName() const override { return "Hexagon TFR Cleanup"; }
60+
void getAnalysisUsage(AnalysisUsage &AU) const override {
61+
AU.setPreservesAll();
62+
MachineFunctionPass::getAnalysisUsage(AU);
63+
}
64+
bool runOnMachineFunction(MachineFunction &MF) override;
65+
66+
private:
67+
const HexagonInstrInfo *HII;
68+
const TargetRegisterInfo *TRI;
69+
70+
typedef DenseMap<unsigned, uint64_t> ImmediateMap;
71+
72+
bool isIntReg(unsigned Reg, bool &Is32);
73+
void setReg(unsigned R32, uint32_t V32, ImmediateMap &IMap);
74+
bool getReg(unsigned Reg, uint64_t &Val, ImmediateMap &IMap);
75+
bool updateImmMap(MachineInstr *MI, ImmediateMap &IMap);
76+
bool rewriteIfImm(MachineInstr *MI, ImmediateMap &IMap, SlotIndexes *Indexes);
77+
bool eraseIfRedundant(MachineInstr *MI, SlotIndexes *Indexes);
78+
};
79+
} // namespace
80+
81+
char HexagonTfrCleanup::ID = 0;
82+
83+
namespace llvm {
84+
char &HexagonTfrCleanupID = HexagonTfrCleanup::ID;
85+
}
86+
87+
bool HexagonTfrCleanup::isIntReg(unsigned Reg, bool &Is32) {
88+
Is32 = Hexagon::IntRegsRegClass.contains(Reg);
89+
return Is32 || Hexagon::DoubleRegsRegClass.contains(Reg);
90+
}
91+
92+
// Assign given value V32 to the specified the register R32 in the map. Only
93+
// 32-bit registers are valid arguments.
94+
void HexagonTfrCleanup::setReg(unsigned R32, uint32_t V32, ImmediateMap &IMap) {
95+
ImmediateMap::iterator F = IMap.find(R32);
96+
if (F == IMap.end())
97+
IMap.insert(std::make_pair(R32, V32));
98+
else
99+
F->second = V32;
100+
}
101+
102+
// Retrieve a value of the provided register Reg and store it into Val.
103+
// Return "true" if a value was found, "false" otherwise.
104+
bool HexagonTfrCleanup::getReg(unsigned Reg, uint64_t &Val,
105+
ImmediateMap &IMap) {
106+
bool Is32;
107+
if (!isIntReg(Reg, Is32))
108+
return false;
109+
110+
if (Is32) {
111+
ImmediateMap::iterator F = IMap.find(Reg);
112+
if (F == IMap.end())
113+
return false;
114+
Val = F->second;
115+
return true;
116+
}
117+
118+
// For 64-bit registers, compose the value from the values of its
119+
// subregisters.
120+
unsigned SubL = TRI->getSubReg(Reg, Hexagon::isub_lo);
121+
unsigned SubH = TRI->getSubReg(Reg, Hexagon::isub_hi);
122+
ImmediateMap::iterator FL = IMap.find(SubL), FH = IMap.find(SubH);
123+
if (FL == IMap.end() || FH == IMap.end())
124+
return false;
125+
Val = (FH->second << 32) | FL->second;
126+
return true;
127+
}
128+
129+
// Process an instruction and record the relevant information in the imme-
130+
// diate map.
131+
bool HexagonTfrCleanup::updateImmMap(MachineInstr *MI, ImmediateMap &IMap) {
132+
using namespace Hexagon;
133+
134+
if (MI->isCall()) {
135+
IMap.clear();
136+
return true;
137+
}
138+
139+
// If this is an instruction that loads a constant into a register,
140+
// record this information in IMap.
141+
unsigned Opc = MI->getOpcode();
142+
if (Opc == A2_tfrsi || Opc == A2_tfrpi) {
143+
unsigned DefR = MI->getOperand(0).getReg();
144+
bool Is32;
145+
if (!isIntReg(DefR, Is32))
146+
return false;
147+
if (!MI->getOperand(1).isImm()) {
148+
if (!Is32) {
149+
IMap.erase(TRI->getSubReg(DefR, isub_lo));
150+
IMap.erase(TRI->getSubReg(DefR, isub_hi));
151+
} else {
152+
IMap.erase(DefR);
153+
}
154+
return false;
155+
}
156+
uint64_t Val = MI->getOperand(1).getImm();
157+
// If it's a 64-bit register, break it up into subregisters.
158+
if (!Is32) {
159+
uint32_t VH = (Val >> 32), VL = (Val & 0xFFFFFFFFU);
160+
setReg(TRI->getSubReg(DefR, isub_lo), VL, IMap);
161+
setReg(TRI->getSubReg(DefR, isub_hi), VH, IMap);
162+
} else {
163+
setReg(DefR, Val, IMap);
164+
}
165+
return true;
166+
}
167+
168+
// Not a A2_tfr[sp]i. Invalidate all modified registers in IMap.
169+
for (MachineInstr::mop_iterator Mo = MI->operands_begin(),
170+
E = MI->operands_end();
171+
Mo != E; ++Mo) {
172+
if (Mo->isRegMask()) {
173+
IMap.clear();
174+
return true;
175+
}
176+
if (!Mo->isReg() || !Mo->isDef())
177+
continue;
178+
unsigned R = Mo->getReg();
179+
for (MCRegAliasIterator AR(R, TRI, true); AR.isValid(); ++AR) {
180+
ImmediateMap::iterator F = IMap.find(*AR);
181+
if (F != IMap.end())
182+
IMap.erase(F);
183+
}
184+
}
185+
return true;
186+
}
187+
188+
// Rewrite the instruction as A2_tfrsi/A2_tfrpi, it is a copy of a source that
189+
// has a known constant value.
190+
bool HexagonTfrCleanup::rewriteIfImm(MachineInstr *MI, ImmediateMap &IMap,
191+
SlotIndexes *Indexes) {
192+
using namespace Hexagon;
193+
unsigned Opc = MI->getOpcode();
194+
switch (Opc) {
195+
case A2_tfr:
196+
case A2_tfrp:
197+
case COPY:
198+
break;
199+
default:
200+
return false;
201+
}
202+
203+
unsigned DstR = MI->getOperand(0).getReg();
204+
unsigned SrcR = MI->getOperand(1).getReg();
205+
bool Tmp, Is32;
206+
if (!isIntReg(DstR, Is32) || !isIntReg(SrcR, Tmp))
207+
return false;
208+
assert(Tmp == Is32 && "Register size mismatch");
209+
uint64_t Val;
210+
bool Found = getReg(SrcR, Val, IMap);
211+
if (!Found)
212+
return false;
213+
214+
MachineBasicBlock &B = *MI->getParent();
215+
DebugLoc DL = MI->getDebugLoc();
216+
int64_t SVal = Is32 ? int32_t(Val) : Val;
217+
auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
218+
MachineInstr *NewMI;
219+
if (Is32)
220+
NewMI = BuildMI(B, MI, DL, HII->get(A2_tfrsi), DstR).addImm(SVal);
221+
else if (isInt<8>(SVal))
222+
NewMI = BuildMI(B, MI, DL, HII->get(A2_tfrpi), DstR).addImm(SVal);
223+
else if (isInt<8>(SVal >> 32) && isInt<8>(int32_t(Val & 0xFFFFFFFFLL)))
224+
NewMI = BuildMI(B, MI, DL, HII->get(A2_combineii), DstR)
225+
.addImm(int32_t(SVal >> 32))
226+
.addImm(int32_t(Val & 0xFFFFFFFFLL));
227+
else if (HST.isTinyCore())
228+
// Disable generating CONST64 since it requires load resource.
229+
return false;
230+
else
231+
NewMI = BuildMI(B, MI, DL, HII->get(CONST64), DstR).addImm(Val);
232+
233+
// Replace the MI to reuse the same slot index
234+
if (Indexes)
235+
Indexes->replaceMachineInstrInMaps(*MI, *NewMI);
236+
MI->eraseFromParent();
237+
return true;
238+
}
239+
240+
// Remove the instruction if it is a self-assignment.
241+
bool HexagonTfrCleanup::eraseIfRedundant(MachineInstr *MI,
242+
SlotIndexes *Indexes) {
243+
unsigned Opc = MI->getOpcode();
244+
unsigned DefR, SrcR;
245+
bool IsUndef = false;
246+
switch (Opc) {
247+
case Hexagon::A2_tfr:
248+
// Rd = Rd
249+
DefR = MI->getOperand(0).getReg();
250+
SrcR = MI->getOperand(1).getReg();
251+
IsUndef = MI->getOperand(1).isUndef();
252+
break;
253+
case Hexagon::A2_tfrt:
254+
case Hexagon::A2_tfrf:
255+
// if ([!]Pu) Rd = Rd
256+
DefR = MI->getOperand(0).getReg();
257+
SrcR = MI->getOperand(2).getReg();
258+
IsUndef = MI->getOperand(2).isUndef();
259+
break;
260+
default:
261+
return false;
262+
}
263+
if (DefR != SrcR)
264+
return false;
265+
if (IsUndef) {
266+
MachineBasicBlock &B = *MI->getParent();
267+
DebugLoc DL = MI->getDebugLoc();
268+
auto DefI = BuildMI(B, MI, DL, HII->get(TargetOpcode::IMPLICIT_DEF), DefR);
269+
for (auto &Op : MI->operands())
270+
if (Op.isReg() && Op.isDef() && Op.isImplicit())
271+
DefI->addOperand(Op);
272+
}
273+
274+
if (Indexes)
275+
Indexes->removeMachineInstrFromMaps(*MI);
276+
MI->eraseFromParent();
277+
return true;
278+
}
279+
280+
bool HexagonTfrCleanup::runOnMachineFunction(MachineFunction &MF) {
281+
bool Changed = false;
282+
// Map: 32-bit register -> immediate value.
283+
// 64-bit registers are stored through their subregisters.
284+
ImmediateMap IMap;
285+
SlotIndexes *Indexes = this->getAnalysisIfAvailable<SlotIndexes>();
286+
287+
auto &HST = MF.getSubtarget<HexagonSubtarget>();
288+
HII = HST.getInstrInfo();
289+
TRI = HST.getRegisterInfo();
290+
291+
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
292+
MachineBasicBlock &B = *I;
293+
MachineBasicBlock::iterator J, F, NextJ;
294+
IMap.clear();
295+
bool Inserted = false, Erased = false;
296+
for (J = B.begin(), F = B.end(); J != F; J = NextJ) {
297+
NextJ = std::next(J);
298+
MachineInstr *MI = &*J;
299+
bool E = eraseIfRedundant(MI, Indexes);
300+
Erased |= E;
301+
if (E)
302+
continue;
303+
Inserted |= rewriteIfImm(MI, IMap, Indexes);
304+
MachineBasicBlock::iterator NewJ = std::prev(NextJ);
305+
updateImmMap(&*NewJ, IMap);
306+
}
307+
bool BlockC = Inserted | Erased;
308+
Changed |= BlockC;
309+
if (BlockC && Indexes)
310+
Indexes->repairIndexesInRange(&B, B.begin(), B.end());
311+
}
312+
313+
return Changed;
314+
}
315+
316+
//===----------------------------------------------------------------------===//
317+
// Public Constructor Functions
318+
//===----------------------------------------------------------------------===//
319+
INITIALIZE_PASS(HexagonTfrCleanup, "tfr-cleanup", "Hexagon TFR Cleanup", false,
320+
false)
321+
322+
FunctionPass *llvm::createHexagonTfrCleanup() {
323+
return new HexagonTfrCleanup();
324+
}

llvm/test/CodeGen/Hexagon/atomicrmw-uinc-udec-wrap.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -160,10 +160,8 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) {
160160
; CHECK-NEXT: }
161161
; CHECK-NEXT: {
162162
; CHECK-NEXT: p0 = cmp.gtu(r3:2,r5:4)
163-
; CHECK-NEXT: }
164-
; CHECK-NEXT: {
165-
; CHECK-NEXT: r8 = mux(p0,r8,r1)
166-
; CHECK-NEXT: r9 = mux(p0,r9,r1)
163+
; CHECK-NEXT: if (!p0.new) r8 = add(r1,#0)
164+
; CHECK-NEXT: if (!p0.new) r9 = add(r1,#0)
167165
; CHECK-NEXT: }
168166
; CHECK-NEXT: {
169167
; CHECK-NEXT: memd_locked(r0,p0) = r9:8

0 commit comments

Comments
 (0)