Skip to content

Commit 0e6f64c

Browse files
authored
[LoongArch] Reimplement to prevent Pseudo{CALL, LA*}_LARGE instruction reordering (#100099)
The Pseudo{CALL, LA*}_LARGE instruction patterns specified in psABI v2.30 cannot be reordered. This patch sets scheduling boundaries for these instructions to prevent reordering. The Pseudo{CALL, LA*}_LARGE instruction is moved back to Pre-RA expansion, which will help with subsequent address calculation optimizations.
1 parent 3e2631c commit 0e6f64c

11 files changed

+591
-545
lines changed

llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp

Lines changed: 309 additions & 336 deletions
Large diffs are not rendered by default.

llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,83 @@ bool LoongArchInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
347347
}
348348
}
349349

350+
bool LoongArchInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
351+
const MachineBasicBlock *MBB,
352+
const MachineFunction &MF) const {
353+
if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
354+
return true;
355+
356+
auto MII = MI.getIterator();
357+
auto MIE = MBB->end();
358+
359+
// According to psABI v2.30:
360+
//
361+
// https://github.com/loongson/la-abi-specs/releases/tag/v2.30
362+
//
363+
// The following instruction patterns are prohibited from being reordered:
364+
//
365+
// * pcaddu18 $ra, %call36(s)
366+
// jirl $ra, $ra, 0
367+
//
368+
// * pcalau12i $a0, %pc_hi20(s)
369+
// addi.d $a1, $zero, %pc_lo12(s)
370+
// lu32i.d $a1, %pc64_lo20(s)
371+
// lu52i.d $a1, $a1, %pc64_hi12(s)
372+
//
373+
// * pcalau12i $a0, %got_pc_hi20(s) | %ld_pc_hi20(s) | %gd_pc_hi20(s)
374+
// addi.d $a1, $zero, %got_pc_lo12(s)
375+
// lu32i.d $a1, %got64_pc_lo20(s)
376+
// lu52i.d $a1, $a1, %got64_pc_hi12(s)
377+
//
378+
// * pcalau12i $a0, %ie_pc_hi20(s)
379+
// addi.d $a1, $zero, %ie_pc_lo12(s)
380+
// lu32i.d $a1, %ie64_pc_lo20(s)
381+
// lu52i.d $a1, $a1, %ie64_pc_hi12(s)
382+
//
383+
// For simplicity, only pcalau12i and lu52i.d are marked as scheduling
384+
// boundaries, and the instructions between them are guaranteed to be
385+
// ordered according to data dependencies.
386+
switch (MI.getOpcode()) {
387+
case LoongArch::PCADDU18I:
388+
if (MI.getOperand(1).getTargetFlags() == LoongArchII::MO_CALL36)
389+
return true;
390+
break;
391+
case LoongArch::PCALAU12I: {
392+
auto AddI = std::next(MII);
393+
if (AddI == MIE || AddI->getOpcode() != LoongArch::ADDI_D)
394+
break;
395+
auto Lu32I = std::next(AddI);
396+
if (Lu32I == MIE || Lu32I->getOpcode() != LoongArch::LU32I_D)
397+
break;
398+
auto MO0 = MI.getOperand(1).getTargetFlags();
399+
auto MO1 = AddI->getOperand(2).getTargetFlags();
400+
auto MO2 = Lu32I->getOperand(2).getTargetFlags();
401+
if (MO0 == LoongArchII::MO_PCREL_HI && MO1 == LoongArchII::MO_PCREL_LO &&
402+
MO2 == LoongArchII::MO_PCREL64_LO)
403+
return true;
404+
if ((MO0 == LoongArchII::MO_GOT_PC_HI || MO0 == LoongArchII::MO_LD_PC_HI ||
405+
MO0 == LoongArchII::MO_GD_PC_HI) &&
406+
MO1 == LoongArchII::MO_GOT_PC_LO && MO2 == LoongArchII::MO_GOT_PC64_LO)
407+
return true;
408+
if (MO0 == LoongArchII::MO_IE_PC_HI && MO1 == LoongArchII::MO_IE_PC_LO &&
409+
MO2 == LoongArchII::MO_IE_PC64_LO)
410+
return true;
411+
break;
412+
}
413+
case LoongArch::LU52I_D: {
414+
auto MO = MI.getOperand(2).getTargetFlags();
415+
if (MO == LoongArchII::MO_PCREL64_HI || MO == LoongArchII::MO_GOT_PC64_HI ||
416+
MO == LoongArchII::MO_IE_PC64_HI)
417+
return true;
418+
break;
419+
}
420+
default:
421+
break;
422+
}
423+
424+
return false;
425+
}
426+
350427
unsigned LoongArchInstrInfo::removeBranch(MachineBasicBlock &MBB,
351428
int *BytesRemoved) const {
352429
if (BytesRemoved)

llvm/lib/Target/LoongArch/LoongArchInstrInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ class LoongArchInstrInfo : public LoongArchGenInstrInfo {
6464
bool isBranchOffsetInRange(unsigned BranchOpc,
6565
int64_t BrOffset) const override;
6666

67+
bool isSchedulingBoundary(const MachineInstr &MI,
68+
const MachineBasicBlock *MBB,
69+
const MachineFunction &MF) const override;
70+
6771
unsigned removeBranch(MachineBasicBlock &MBB,
6872
int *BytesRemoved = nullptr) const override;
6973

llvm/lib/Target/LoongArch/LoongArchInstrInfo.td

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1481,7 +1481,7 @@ def : Pat<(loongarch_call tglobaladdr:$func), (PseudoCALL tglobaladdr:$func)>;
14811481
def : Pat<(loongarch_call texternalsym:$func), (PseudoCALL texternalsym:$func)>;
14821482

14831483
// Function call with 'Medium' code model.
1484-
let isCall = 1, Defs = [R1, R20], Size = 8 in
1484+
let isCall = 1, Defs = [R1] in
14851485
def PseudoCALL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$func)>;
14861486

14871487
let Predicates = [IsLA64] in {
@@ -1492,7 +1492,7 @@ def : Pat<(loongarch_call_medium texternalsym:$func),
14921492
} // Predicates = [IsLA64]
14931493

14941494
// Function call with 'Large' code model.
1495-
let isCall = 1, Defs = [R1, R20], Size = 24 in
1495+
let isCall = 1, Defs = [R1] in
14961496
def PseudoCALL_LARGE: Pseudo<(outs), (ins bare_symbol:$func)>;
14971497

14981498
let Predicates = [IsLA64] in {
@@ -1530,8 +1530,7 @@ def : Pat<(loongarch_tail (iPTR texternalsym:$dst)),
15301530
(PseudoTAIL texternalsym:$dst)>;
15311531

15321532
// Tail call with 'Medium' code model.
1533-
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
1534-
Uses = [R3], Defs = [R20], Size = 8 in
1533+
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
15351534
def PseudoTAIL_MEDIUM : Pseudo<(outs), (ins bare_symbol:$dst)>;
15361535

15371536
let Predicates = [IsLA64] in {
@@ -1542,8 +1541,7 @@ def : Pat<(loongarch_tail_medium (iPTR texternalsym:$dst)),
15421541
} // Predicates = [IsLA64]
15431542

15441543
// Tail call with 'Large' code model.
1545-
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
1546-
Uses = [R3], Defs = [R19, R20], Size = 24 in
1544+
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3] in
15471545
def PseudoTAIL_LARGE : Pseudo<(outs), (ins bare_symbol:$dst)>;
15481546

15491547
let Predicates = [IsLA64] in {
@@ -1575,12 +1573,12 @@ def PseudoJIRL_TAIL : Pseudo<(outs), (ins GPR:$rj, simm16_lsl2:$imm16)>,
15751573

15761574
/// call36/taill36 macro instructions
15771575
let isCall = 1, isBarrier = 1, isCodeGenOnly = 0, isAsmParserOnly = 1,
1578-
Defs = [R1], Size = 8, hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
1576+
Defs = [R1], hasSideEffects = 0, mayStore = 0, mayLoad = 0 in
15791577
def PseudoCALL36 : Pseudo<(outs), (ins bare_symbol:$dst), [],
15801578
"call36", "$dst">,
15811579
Requires<[IsLA64]>;
15821580
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [R3],
1583-
isCodeGenOnly = 0, isAsmParserOnly = 1, Size = 8, hasSideEffects = 0,
1581+
isCodeGenOnly = 0, isAsmParserOnly = 1, hasSideEffects = 0,
15841582
mayStore = 0, mayLoad = 0 in
15851583
def PseudoTAIL36 : Pseudo<(outs), (ins GPR:$tmp, bare_symbol:$dst), [],
15861584
"tail36", "$tmp, $dst">,
@@ -1617,7 +1615,6 @@ def PseudoLA_TLS_LD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
16171615
"la.tls.ld", "$dst, $src">;
16181616
def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
16191617
"la.tls.gd", "$dst, $src">;
1620-
let Defs = [R20], Size = 20 in {
16211618
def PseudoLA_PCREL_LARGE : Pseudo<(outs GPR:$dst),
16221619
(ins GPR:$tmp, bare_symbol:$src), [],
16231620
"la.pcrel", "$dst, $tmp, $src">,
@@ -1632,15 +1629,13 @@ def PseudoLA_TLS_GD_LARGE : Pseudo<(outs GPR:$dst),
16321629
(ins GPR:$tmp, bare_symbol:$src), [],
16331630
"la.tls.gd", "$dst, $tmp, $src">,
16341631
Requires<[IsLA64]>;
1635-
} // Defs = [R20], Size = 20
16361632
}
16371633
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 0,
16381634
isAsmParserOnly = 1 in {
16391635
def PseudoLA_GOT : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
16401636
"la.got", "$dst, $src">;
16411637
def PseudoLA_TLS_IE : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
16421638
"la.tls.ie", "$dst, $src">;
1643-
let Defs = [R20], Size = 20 in {
16441639
def PseudoLA_GOT_LARGE : Pseudo<(outs GPR:$dst),
16451640
(ins GPR:$tmp, bare_symbol:$src), [],
16461641
"la.got", "$dst, $tmp, $src">,
@@ -1649,7 +1644,6 @@ def PseudoLA_TLS_IE_LARGE : Pseudo<(outs GPR:$dst),
16491644
(ins GPR:$tmp, bare_symbol:$src), [],
16501645
"la.tls.ie", "$dst, $tmp, $src">,
16511646
Requires<[IsLA64]>;
1652-
} // Defs = [R20], Size = 20
16531647
}
16541648

16551649
// Used for expand PseudoLA_TLS_DESC_* instructions.
@@ -1674,7 +1668,7 @@ def PseudoLA_TLS_DESC_PC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [],
16741668
}
16751669

16761670
let isCall = 1, isBarrier = 1, hasSideEffects = 0, mayStore = 0, mayLoad = 0,
1677-
isCodeGenOnly = 0, isAsmParserOnly = 1, Defs = [R1, R4, R20], Size = 32 in
1671+
isCodeGenOnly = 0, isAsmParserOnly = 1, Defs = [R1, R4] in
16781672
def PseudoLA_TLS_DESC_PC_LARGE : Pseudo<(outs GPR:$dst),
16791673
(ins GPR:$tmp, bare_symbol:$src), [],
16801674
"la.tls.desc", "$dst, $tmp, $src">,

llvm/test/CodeGen/LoongArch/code-models.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ define i32 @call_globaladdress(i32 %a) nounwind {
3333
; LARGE: # %bb.0:
3434
; LARGE-NEXT: addi.d $sp, $sp, -16
3535
; LARGE-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
36-
; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(callee)
37-
; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee)
38-
; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee)
39-
; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee)
40-
; LARGE-NEXT: ldx.d $ra, $t8, $ra
36+
; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee)
37+
; LARGE-NEXT: addi.d $ra, $zero, %got_pc_lo12(callee)
38+
; LARGE-NEXT: lu32i.d $ra, %got64_pc_lo20(callee)
39+
; LARGE-NEXT: lu52i.d $ra, $ra, %got64_pc_hi12(callee)
40+
; LARGE-NEXT: ldx.d $ra, $ra, $a1
4141
; LARGE-NEXT: jirl $ra, $ra, 0
4242
; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
4343
; LARGE-NEXT: addi.d $sp, $sp, 16
@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) {
8282
; LARGE-NEXT: .cfi_offset 1, -8
8383
; LARGE-NEXT: ori $a2, $zero, 1000
8484
; LARGE-NEXT: move $a1, $zero
85-
; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset)
86-
; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset)
87-
; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset)
88-
; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset)
89-
; LARGE-NEXT: add.d $ra, $t8, $ra
85+
; LARGE-NEXT: pcalau12i $a3, %pc_hi20(memset)
86+
; LARGE-NEXT: addi.d $ra, $zero, %pc_lo12(memset)
87+
; LARGE-NEXT: lu32i.d $ra, %pc64_lo20(memset)
88+
; LARGE-NEXT: lu52i.d $ra, $ra, %pc64_hi12(memset)
89+
; LARGE-NEXT: add.d $ra, $ra, $a3
9090
; LARGE-NEXT: jirl $ra, $ra, 0
9191
; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
9292
; LARGE-NEXT: addi.d $sp, $sp, 16
@@ -105,17 +105,17 @@ define i32 @caller_tail(i32 %i) nounwind {
105105
;
106106
; MEDIUM-LABEL: caller_tail:
107107
; MEDIUM: # %bb.0: # %entry
108-
; MEDIUM-NEXT: pcaddu18i $t8, %call36(callee_tail)
109-
; MEDIUM-NEXT: jr $t8
108+
; MEDIUM-NEXT: pcaddu18i $a1, %call36(callee_tail)
109+
; MEDIUM-NEXT: jr $a1
110110
;
111111
; LARGE-LABEL: caller_tail:
112112
; LARGE: # %bb.0: # %entry
113-
; LARGE-NEXT: pcalau12i $t7, %got_pc_hi20(callee_tail)
114-
; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(callee_tail)
115-
; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(callee_tail)
116-
; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(callee_tail)
117-
; LARGE-NEXT: ldx.d $t7, $t8, $t7
118-
; LARGE-NEXT: jr $t7
113+
; LARGE-NEXT: pcalau12i $a1, %got_pc_hi20(callee_tail)
114+
; LARGE-NEXT: addi.d $a2, $zero, %got_pc_lo12(callee_tail)
115+
; LARGE-NEXT: lu32i.d $a2, %got64_pc_lo20(callee_tail)
116+
; LARGE-NEXT: lu52i.d $a2, $a2, %got64_pc_hi12(callee_tail)
117+
; LARGE-NEXT: ldx.d $a1, $a2, $a1
118+
; LARGE-NEXT: jr $a1
119119
entry:
120120
%r = tail call i32 @callee_tail(i32 %i)
121121
ret i32 %r

llvm/test/CodeGen/LoongArch/expand-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before loongarch-prera-expand-pseudo \
22
; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=NOEXPAND
3-
; RUN: llc --mtriple=loongarch64 -mattr=+d --stop-before machine-opt-remark-emitter \
3+
; RUN: llc --mtriple=loongarch64 --stop-after loongarch-prera-expand-pseudo \
44
; RUN: --verify-machineinstrs < %s | FileCheck %s --check-prefix=EXPAND
55

66
declare void @callee()

llvm/test/CodeGen/LoongArch/global-address.ll

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -53,32 +53,32 @@ define void @foo() nounwind {
5353
; LA64LARGENOPIC-LABEL: foo:
5454
; LA64LARGENOPIC: # %bb.0:
5555
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
56-
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G)
57-
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
58-
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
59-
; LA64LARGENOPIC-NEXT: ldx.d $a0, $t8, $a0
56+
; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G)
57+
; LA64LARGENOPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G)
58+
; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G)
59+
; LA64LARGENOPIC-NEXT: ldx.d $a0, $a1, $a0
6060
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
6161
; LA64LARGENOPIC-NEXT: pcalau12i $a0, %pc_hi20(g)
62-
; LA64LARGENOPIC-NEXT: addi.d $t8, $zero, %pc_lo12(g)
63-
; LA64LARGENOPIC-NEXT: lu32i.d $t8, %pc64_lo20(g)
64-
; LA64LARGENOPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(g)
65-
; LA64LARGENOPIC-NEXT: add.d $a0, $t8, $a0
62+
; LA64LARGENOPIC-NEXT: addi.d $a1, $zero, %pc_lo12(g)
63+
; LA64LARGENOPIC-NEXT: lu32i.d $a1, %pc64_lo20(g)
64+
; LA64LARGENOPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(g)
65+
; LA64LARGENOPIC-NEXT: add.d $a0, $a1, $a0
6666
; LA64LARGENOPIC-NEXT: ld.w $zero, $a0, 0
6767
; LA64LARGENOPIC-NEXT: ret
6868
;
6969
; LA64LARGEPIC-LABEL: foo:
7070
; LA64LARGEPIC: # %bb.0:
7171
; LA64LARGEPIC-NEXT: pcalau12i $a0, %got_pc_hi20(G)
72-
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(G)
73-
; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(G)
74-
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(G)
75-
; LA64LARGEPIC-NEXT: ldx.d $a0, $t8, $a0
72+
; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %got_pc_lo12(G)
73+
; LA64LARGEPIC-NEXT: lu32i.d $a1, %got64_pc_lo20(G)
74+
; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %got64_pc_hi12(G)
75+
; LA64LARGEPIC-NEXT: ldx.d $a0, $a1, $a0
7676
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
7777
; LA64LARGEPIC-NEXT: pcalau12i $a0, %pc_hi20(.Lg$local)
78-
; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(.Lg$local)
79-
; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(.Lg$local)
80-
; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(.Lg$local)
81-
; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0
78+
; LA64LARGEPIC-NEXT: addi.d $a1, $zero, %pc_lo12(.Lg$local)
79+
; LA64LARGEPIC-NEXT: lu32i.d $a1, %pc64_lo20(.Lg$local)
80+
; LA64LARGEPIC-NEXT: lu52i.d $a1, $a1, %pc64_hi12(.Lg$local)
81+
; LA64LARGEPIC-NEXT: add.d $a0, $a1, $a0
8282
; LA64LARGEPIC-NEXT: ld.w $zero, $a0, 0
8383
; LA64LARGEPIC-NEXT: ret
8484
%V = load volatile i32, ptr @G

llvm/test/CodeGen/LoongArch/global-variable-code-model.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,10 @@ define dso_local signext i32 @local_large() #0 {
2020
; CHECK-LABEL: local_large:
2121
; CHECK: # %bb.0:
2222
; CHECK-NEXT: pcalau12i $a0, %pc_hi20(b)
23-
; CHECK-NEXT: addi.d $t8, $zero, %pc_lo12(b)
24-
; CHECK-NEXT: lu32i.d $t8, %pc64_lo20(b)
25-
; CHECK-NEXT: lu52i.d $t8, $t8, %pc64_hi12(b)
26-
; CHECK-NEXT: add.d $a0, $t8, $a0
23+
; CHECK-NEXT: addi.d $a1, $zero, %pc_lo12(b)
24+
; CHECK-NEXT: lu32i.d $a1, %pc64_lo20(b)
25+
; CHECK-NEXT: lu52i.d $a1, $a1, %pc64_hi12(b)
26+
; CHECK-NEXT: add.d $a0, $a1, $a0
2727
; CHECK-NEXT: ld.w $a0, $a0, 0
2828
; CHECK-NEXT: ret
2929
%1 = load i32, ptr @b, align 4

0 commit comments

Comments
 (0)