Skip to content

Commit 1fe702f

Browse files
authored
[lld][LoongArch] Relax TLS LE/GD/LD (#123600)
In local-exec form, the code sequence is converted as follows: ``` From: lu12i.w $rd, %le_hi20_r(sym) R_LARCH_TLS_LE_HI20_R, R_LARCH_RELAX add.w/d $rd, $rd, $tp, %le_add_r(sym) R_LARCH_TLS_LE_ADD_R, R_LARCH_RELAX addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R, R_LARCH_RELAX To: addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym) R_LARCH_TLS_LE_LO12_R ``` In global-dynamic or local-dynamic, the code sequence is converted as follows: ``` From: pcalau12i $a0, %ld_pc_hi20(sym) | %gd_pc_hi20(sym) R_LARCH_TLS_GD_PC_HI20 | R_LARCH_TLS_LD_PC_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi $a0, %got_pc_lo12(sym) | %got_pc_lo12(sym) R_LARCH_TLS_GD_PCREL20_S2 | R_LARCH_TLS_LD_PCREL20_S2 ``` Note: For initial-exec form, since it involves the conversion from IE to LE, we will implement it in a future patch.
1 parent 75f76d4 commit 1fe702f

6 files changed

+392
-14
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 67 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,10 @@ static uint32_t setJ20(uint32_t insn, uint32_t imm) {
154154
return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
155155
}
156156

157+
static uint32_t setJ5(uint32_t insn, uint32_t imm) {
158+
return (insn & 0xfffffc1f) | (extractBits(imm, 4, 0) << 5);
159+
}
160+
157161
static uint32_t setK12(uint32_t insn, uint32_t imm) {
158162
return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
159163
}
@@ -761,10 +765,10 @@ static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
761765

762766
// Relax code sequence.
763767
// From:
764-
// pcalau12i $a0, %pc_hi20(sym)
765-
// addi.w/d $a0, $a0, %pc_lo12(sym)
768+
// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym)
769+
// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
766770
// To:
767-
// pcaddi $a0, %pc_lo12(sym)
771+
// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
768772
//
769773
// From:
770774
// pcalau12i $a0, %got_pc_hi20(sym_got)
@@ -778,6 +782,10 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
778782
if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
779783
rLo12.type == R_LARCH_PCALA_LO12) ||
780784
(rHi20.type == R_LARCH_GOT_PC_HI20 &&
785+
rLo12.type == R_LARCH_GOT_PC_LO12) ||
786+
(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
787+
rLo12.type == R_LARCH_GOT_PC_LO12) ||
788+
(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
781789
rLo12.type == R_LARCH_GOT_PC_LO12)))
782790
return;
783791

@@ -798,6 +806,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
798806
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
799807
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
800808
dest = rHi20.sym->getVA(ctx);
809+
else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
810+
dest = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
801811
else {
802812
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
803813
<< rHi20.expr << ") against symbol " << rHi20.sym
@@ -827,7 +837,12 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
827837
return;
828838

829839
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
830-
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
840+
if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
841+
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
842+
else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
843+
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
844+
else
845+
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
831846
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
832847
remove = 4;
833848
}
@@ -863,6 +878,33 @@ static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
863878
}
864879
}
865880

881+
// Relax code sequence.
882+
// From:
883+
// lu12i.w $rd, %le_hi20_r(sym)
884+
// add.w/d $rd, $rd, $tp, %le_add_r(sym)
885+
// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
886+
// To:
887+
// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
888+
static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
889+
uint64_t loc, Relocation &r, uint32_t &remove) {
890+
uint64_t val = r.sym->getVA(ctx, r.addend);
891+
// Check if the val exceeds the range of addi/ld/st.
892+
if (!isInt<12>(val))
893+
return;
894+
uint32_t currInsn = read32le(sec.content().data() + r.offset);
895+
switch (r.type) {
896+
case R_LARCH_TLS_LE_HI20_R:
897+
case R_LARCH_TLS_LE_ADD_R:
898+
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
899+
remove = 4;
900+
break;
901+
case R_LARCH_TLS_LE_LO12_R:
902+
sec.relaxAux->writes.push_back(setJ5(currInsn, R_TP));
903+
sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R;
904+
break;
905+
}
906+
}
907+
866908
static bool relax(Ctx &ctx, InputSection &sec) {
867909
const uint64_t secAddr = sec.getVA();
868910
const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -903,6 +945,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
903945
}
904946
case R_LARCH_PCALA_HI20:
905947
case R_LARCH_GOT_PC_HI20:
948+
case R_LARCH_TLS_GD_PC_HI20:
949+
case R_LARCH_TLS_LD_PC_HI20:
906950
// The overflow check for i+2 will be carried out in isPairRelaxable.
907951
if (isPairRelaxable(relocs, i))
908952
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
@@ -911,6 +955,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
911955
if (relaxable(relocs, i))
912956
relaxCall36(ctx, sec, i, loc, r, remove);
913957
break;
958+
case R_LARCH_TLS_LE_HI20_R:
959+
case R_LARCH_TLS_LE_ADD_R:
960+
case R_LARCH_TLS_LE_LO12_R:
961+
if (relaxable(relocs, i))
962+
relaxTlsLe(ctx, sec, i, loc, r, remove);
963+
break;
914964
}
915965

916966
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -1015,8 +1065,21 @@ void LoongArch::finalizeRelax(int passes) const {
10151065
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
10161066
break;
10171067
case R_LARCH_B26:
1068+
case R_LARCH_TLS_LE_LO12_R:
1069+
skip = 4;
1070+
write32le(p, aux.writes[writesIdx++]);
1071+
break;
1072+
case R_LARCH_TLS_GD_PCREL20_S2:
1073+
// Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead
1074+
// of R_TLSLD_PC due to historical reasons. In fact, right now TLSLD
1075+
// behaves exactly like TLSGD on LoongArch.
1076+
//
1077+
// This reason has also been mentioned in mold commit:
1078+
// https://github.com/rui314/mold/commit/5dfa1cf07c03bd57cb3d493b652ef22441bcd71c
1079+
case R_LARCH_TLS_LD_PCREL20_S2:
10181080
skip = 4;
10191081
write32le(p, aux.writes[writesIdx++]);
1082+
r.expr = R_TLSGD_PC;
10201083
break;
10211084
default:
10221085
llvm_unreachable("unsupported type");

lld/test/ELF/loongarch-relax-emit-relocs.s

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,45 @@
2727
# RELAX-NEXT: R_LARCH_RELAX *ABS*
2828
# RELAX-NEXT: R_LARCH_PCREL20_S2 _start
2929
# RELAX-NEXT: R_LARCH_RELAX *ABS*
30-
# RELAX32-NEXT: nop
31-
# RELAX32-NEXT: R_LARCH_ALIGN *ABS*+0xc
32-
# RELAX32-NEXT: nop
33-
# RELAX32-NEXT: ret
3430

3531
# RELAX64-NEXT: bl -8
3632
# RELAX64-NEXT: R_LARCH_B26 _start
3733
# RELAX64-NEXT: R_LARCH_RELAX *ABS*
3834
# RELAX64-NEXT: b -12
3935
# RELAX64-NEXT: R_LARCH_B26 _start
4036
# RELAX64-NEXT: R_LARCH_RELAX *ABS*
41-
# RELAX64-NEXT: ret
37+
38+
# RELAX-NEXT: lu12i.w $a0, 0
39+
# RELAX-NEXT: R_LARCH_TLS_LE_HI20 a
40+
# RELAX-NEXT: ori $a0, $a0, 0
41+
# RELAX-NEXT: R_LARCH_TLS_LE_LO12 a
42+
# RELAX-NEXT: pcaddi $a0, [[#]]
43+
# RELAX-NEXT: R_LARCH_RELAX a
44+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
45+
# RELAX-NEXT: R_LARCH_TLS_GD_PCREL20_S2 a
46+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
47+
# RELAX-NEXT: pcaddi $a0, [[#]]
48+
# RELAX-NEXT: R_LARCH_RELAX a
49+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
50+
# RELAX-NEXT: R_LARCH_TLS_LD_PCREL20_S2 a
51+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
52+
# RELAX-NEXT: addi.{{[dw]}} $a0, $tp, 0
53+
# RELAX-NEXT: R_LARCH_RELAX a
54+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
55+
# RELAX-NEXT: R_LARCH_RELAX a
56+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
57+
# RELAX-NEXT: R_LARCH_TLS_LE_LO12_R a
58+
# RELAX-NEXT: R_LARCH_RELAX *ABS*
59+
60+
# RELAX32-NEXT: nop
61+
# RELAX32-NEXT: R_LARCH_ALIGN *ABS*+0xc
62+
# RELAX32-NEXT: ret
63+
64+
# RELAX64-NEXT: nop
4265
# RELAX64-NEXT: R_LARCH_ALIGN *ABS*+0xc
66+
# RELAX64-NEXT: nop
67+
# RELAX64-NEXT: nop
68+
# RELAX64-NEXT: ret
4369

4470
# NORELAX: <_start>:
4571
# NORELAX-NEXT: pcalau12i $a0, 0
@@ -62,8 +88,36 @@
6288
# NORELAX-NEXT: R_LARCH_CALL36 _start
6389
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
6490
# NORELAX-NEXT: jirl $zero, $a0, -24
65-
# NORELAX-NEXT: ret
91+
# NORELAX-NEXT: lu12i.w $a0, 0
92+
# NORELAX-NEXT: R_LARCH_TLS_LE_HI20 a
93+
# NORELAX-NEXT: ori $a0, $a0, 0
94+
# NORELAX-NEXT: R_LARCH_TLS_LE_LO12 a
95+
# NORELAX-NEXT: pcalau12i $a0, 16
96+
# NORELAX-NEXT: R_LARCH_TLS_GD_PC_HI20 a
97+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
98+
# NORELAX-NEXT: addi.d $a0, $a0, 8
99+
# NORELAX-NEXT: R_LARCH_GOT_PC_LO12 a
100+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
101+
# NORELAX-NEXT: pcalau12i $a0, 16
102+
# NORELAX-NEXT: R_LARCH_TLS_LD_PC_HI20 a
103+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
104+
# NORELAX-NEXT: addi.d $a0, $a0, 8
105+
# NORELAX-NEXT: R_LARCH_GOT_PC_LO12 a
106+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
107+
# NORELAX-NEXT: lu12i.w $a0, 0
108+
# NORELAX-NEXT: R_LARCH_TLS_LE_HI20_R a
109+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
110+
# NORELAX-NEXT: add.d $a0, $a0, $tp
111+
# NORELAX-NEXT: R_LARCH_TLS_LE_ADD_R a
112+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
113+
# NORELAX-NEXT: addi.d $a0, $a0, 0
114+
# NORELAX-NEXT: R_LARCH_TLS_LE_LO12_R a
115+
# NORELAX-NEXT: R_LARCH_RELAX *ABS*
116+
# NORELAX-NEXT: nop
66117
# NORELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc
118+
# NORELAX-NEXT: nop
119+
# NORELAX-NEXT: nop
120+
# NORELAX-NEXT: ret
67121

68122
# CHECKR: <_start>:
69123
# CHECKR-NEXT: pcalau12i $a0, 0
@@ -86,12 +140,53 @@
86140
# CHECKR-NEXT: R_LARCH_CALL36 _start
87141
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
88142
# CHECKR-NEXT: jr $a0
143+
# CHECKR-NEXT: lu12i.w $a0, 0
144+
# CHECKR-NEXT: R_LARCH_TLS_LE_HI20 a
145+
# CHECKR-NEXT: ori $a0, $a0, 0
146+
# CHECKR-NEXT: R_LARCH_TLS_LE_LO12 a
147+
# CHECKR-NEXT: pcalau12i $a0, 0
148+
# CHECKR-NEXT: R_LARCH_TLS_GD_PC_HI20 a
149+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
150+
# CHECKR-NEXT: addi.d $a0, $a0, 0
151+
# CHECKR-NEXT: R_LARCH_GOT_PC_LO12 a
152+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
153+
# CHECKR-NEXT: pcalau12i $a0, 0
154+
# CHECKR-NEXT: R_LARCH_TLS_LD_PC_HI20 a
155+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
156+
# CHECKR-NEXT: addi.d $a0, $a0, 0
157+
# CHECKR-NEXT: R_LARCH_GOT_PC_LO12 a
158+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
159+
# CHECKR-NEXT: lu12i.w $a0, 0
160+
# CHECKR-NEXT: R_LARCH_TLS_LE_HI20_R a
161+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
162+
# CHECKR-NEXT: add.d $a0, $a0, $tp
163+
# CHECKR-NEXT: R_LARCH_TLS_LE_ADD_R a
164+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
165+
# CHECKR-NEXT: addi.d $a0, $a0, 0
166+
# CHECKR-NEXT: R_LARCH_TLS_LE_LO12_R a
167+
# CHECKR-NEXT: R_LARCH_RELAX *ABS*
89168
# CHECKR-NEXT: nop
90169
# CHECKR-NEXT: R_LARCH_ALIGN *ABS*+0xc
91170
# CHECKR-NEXT: nop
92171
# CHECKR-NEXT: nop
93172
# CHECKR-NEXT: ret
94173

174+
.macro add dst, src1, src2, src3
175+
.ifdef ELF64
176+
add.d \dst, \src1, \src2, \src3
177+
.else
178+
add.w \dst, \src1, \src2, \src3
179+
.endif
180+
.endm
181+
182+
.macro addi dst, src1, src2
183+
.ifdef ELF64
184+
addi.d \dst, \src1, \src2
185+
.else
186+
addi.w \dst, \src1, \src2
187+
.endif
188+
.endm
189+
95190
.global _start
96191
_start:
97192
la.pcrel $a0, _start
@@ -101,5 +196,19 @@ _start:
101196
call36 _start
102197
tail36 $a0, _start
103198
.endif
199+
200+
la.tls.le $a0, a # without R_LARCH_RELAX reloaction
201+
la.tls.gd $a0, a
202+
la.tls.ld $a0, a
203+
204+
lu12i.w $a0, %le_hi20_r(a)
205+
add $a0, $a0, $tp, %le_add_r(a)
206+
addi $a0, $a0, %le_lo12_r(a)
207+
104208
.p2align 4
105209
ret
210+
211+
.section .tbss,"awT",@nobits
212+
.globl a
213+
a:
214+
.zero 4

0 commit comments

Comments
 (0)