Skip to content

Commit 6611d58

Browse files
committed
[ELF] Relax R_RISCV_ALIGN
Alternative to D125036. Implement R_RISCV_ALIGN relaxation so that we can handle -mrelax object files (i.e. -mno-relax is no longer needed) and creates a framework for future relaxation. `relaxAux` is placed in a union with InputSectionBase::jumpInstrMod, storing auxiliary information for relaxation. In the first pass, `relaxAux` is allocated. The main data structure is `relocDeltas`: when referencing `relocations[i]`, the actual offset is `r_offset - (i ? relocDeltas[i-1] : 0)`. `relaxOnce` performs one relaxation pass. It computes `relocDeltas` for all text section. Then, adjust st_value/st_size for symbols relative to this section based on `SymbolAnchor`. `bytesDropped` is set so that `assignAddresses` knows that the size has changed. Run `relaxOnce` in the `finalizeAddressDependentContent` loop to wait for convergence of text sections and other address dependent sections (e.g. SHT_RELR). Note: extrating `relaxOnce` into a separate loop works for many cases but has issues in some linker script edge cases. After convergence, compute section contents: shrink the NOP sequence of each R_RISCV_ALIGN as appropriate. Instead of deleting bytes, we run a sequence of memcpy on the content delimitered by relocation locations. For R_RISCV_ALIGN let the next memcpy skip the desired number of bytes. Section content computation is parallelizable, but let's ensure the implementation is mature before optimizations. Technically we can save a copy if we interleave some code with `OutputSection::writeTo`, but let's not pollute the generic code (we don't have templated relocation resolving, so using conditions can impose overhead to non-RISCV.) Tested: `make ARCH=riscv CROSS_COMPILE=riscv64-linux-gnu- LLVM=1 defconfig all` built Linux kernel using -mrelax is bootable. FreeBSD RISCV64 system using -mrelax is bootable. bash/curl/firefox/libevent/vim/tmux using -mrelax works. Differential Revision: https://reviews.llvm.org/D127581
1 parent ef7aed3 commit 6611d58

10 files changed

+515
-42
lines changed

lld/ELF/Arch/RISCV.cpp

Lines changed: 231 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "InputFiles.h"
10+
#include "OutputSections.h"
1011
#include "Symbols.h"
1112
#include "SyntheticSections.h"
1213
#include "Target.h"
14+
#include "llvm/Support/TimeProfiler.h"
1315

1416
using namespace llvm;
1517
using namespace llvm::object;
@@ -36,6 +38,7 @@ class RISCV final : public TargetInfo {
3638
const uint8_t *loc) const override;
3739
void relocate(uint8_t *loc, const Relocation &rel,
3840
uint64_t val) const override;
41+
bool relaxOnce(int pass) const override;
3942
};
4043

4144
} // end anonymous namespace
@@ -271,12 +274,7 @@ RelExpr RISCV::getRelExpr(const RelType type, const Symbol &s,
271274
case R_RISCV_TPREL_ADD:
272275
return R_NONE;
273276
case R_RISCV_ALIGN:
274-
// Not just a hint; always padded to the worst-case number of NOPs, so may
275-
// not currently be aligned, and without linker relaxation support we can't
276-
// delete NOPs to realign.
277-
errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires "
278-
"unimplemented linker relaxation; recompile with -mno-relax");
279-
return R_NONE;
277+
return R_RELAX_HINT;
280278
default:
281279
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
282280
") against symbol " + toString(s));
@@ -476,6 +474,233 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
476474
}
477475
}
478476

477+
namespace {
478+
struct SymbolAnchor {
479+
uint64_t offset;
480+
Defined *d;
481+
bool end; // true for the anchor of st_value+st_size
482+
};
483+
} // namespace
484+
485+
struct elf::RISCVRelaxAux {
486+
// This records symbol start and end offsets which will be adjusted according
487+
// to the nearest relocDeltas element.
488+
SmallVector<SymbolAnchor, 0> anchors;
489+
// For relocations[i], the actual offset is r_offset - (i ? relocDeltas[i-1] :
490+
// 0).
491+
std::unique_ptr<uint32_t[]> relocDeltas;
492+
};
493+
494+
static void initSymbolAnchors() {
495+
SmallVector<InputSection *, 0> storage;
496+
for (OutputSection *osec : outputSections) {
497+
if (!(osec->flags & SHF_EXECINSTR))
498+
continue;
499+
for (InputSection *sec : getInputSections(*osec, storage)) {
500+
sec->relaxAux = make<RISCVRelaxAux>();
501+
if (sec->relocations.size())
502+
sec->relaxAux->relocDeltas =
503+
std::make_unique<uint32_t[]>(sec->relocations.size());
504+
}
505+
}
506+
// Store anchors (st_value and st_value+st_size) for symbols relative to text
507+
// sections.
508+
for (InputFile *file : ctx->objectFiles)
509+
for (Symbol *sym : file->getSymbols()) {
510+
auto *d = dyn_cast<Defined>(sym);
511+
if (!d || d->file != file)
512+
continue;
513+
if (auto *sec = dyn_cast_or_null<InputSection>(d->section))
514+
if (sec->flags & SHF_EXECINSTR && sec->relaxAux) {
515+
// If sec is discarded, relaxAux will be nullptr.
516+
sec->relaxAux->anchors.push_back({d->value, d, false});
517+
sec->relaxAux->anchors.push_back({d->value + d->size, d, true});
518+
}
519+
}
520+
// Sort anchors by offset so that we can find the closest relocation
521+
// efficiently. For a zero size symbol, ensure that its start anchor precedes
522+
// its end anchor. For two symbols with anchors at the same offset, their
523+
// order does not matter.
524+
for (OutputSection *osec : outputSections) {
525+
if (!(osec->flags & SHF_EXECINSTR))
526+
continue;
527+
for (InputSection *sec : getInputSections(*osec, storage)) {
528+
llvm::sort(sec->relaxAux->anchors, [](auto &a, auto &b) {
529+
return std::make_pair(a.offset, a.end) <
530+
std::make_pair(b.offset, b.end);
531+
});
532+
}
533+
}
534+
}
535+
536+
static bool relax(InputSection &sec) {
537+
const uint64_t secAddr = sec.getVA();
538+
auto &aux = *sec.relaxAux;
539+
bool changed = false;
540+
541+
// Restore original st_value for symbols relative to this section.
542+
ArrayRef<SymbolAnchor> sa = makeArrayRef(aux.anchors);
543+
uint32_t delta = 0;
544+
for (auto it : llvm::enumerate(sec.relocations)) {
545+
for (; sa.size() && sa[0].offset <= it.value().offset; sa = sa.slice(1))
546+
if (!sa[0].end)
547+
sa[0].d->value += delta;
548+
delta = aux.relocDeltas[it.index()];
549+
}
550+
for (const SymbolAnchor &sa : sa)
551+
if (!sa.end)
552+
sa.d->value += delta;
553+
sa = makeArrayRef(aux.anchors);
554+
delta = 0;
555+
556+
for (auto it : llvm::enumerate(sec.relocations)) {
557+
Relocation &r = it.value();
558+
const size_t i = it.index();
559+
const uint64_t loc = secAddr + r.offset - delta;
560+
uint32_t &cur = aux.relocDeltas[i], remove = 0;
561+
switch (r.type) {
562+
case R_RISCV_ALIGN: {
563+
const uint64_t nextLoc = loc + r.addend;
564+
const uint64_t align = PowerOf2Ceil(r.addend + 2);
565+
// All bytes beyond the alignment boundary should be removed.
566+
remove = nextLoc - ((loc + align - 1) & -align);
567+
assert(static_cast<int32_t>(remove) >= 0 &&
568+
"R_RISCV_ALIGN needs expanding the content");
569+
break;
570+
}
571+
}
572+
573+
// For all anchors whose offsets are <= r.offset, they are preceded by
574+
// the previous relocation whose `relocDeltas` value equals `delta`.
575+
// Decrease their st_value and update their st_size.
576+
if (remove) {
577+
for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
578+
if (sa[0].end)
579+
sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
580+
else
581+
sa[0].d->value -= delta;
582+
}
583+
}
584+
delta += remove;
585+
if (delta != cur) {
586+
cur = delta;
587+
changed = true;
588+
}
589+
}
590+
591+
for (const SymbolAnchor &a : sa) {
592+
if (a.end)
593+
a.d->size = a.offset - delta - a.d->value;
594+
else
595+
a.d->value -= delta;
596+
}
597+
// Inform assignAddresses that the size has changed.
598+
if (!isUInt<16>(delta))
599+
fatal("section size decrease is too large");
600+
sec.bytesDropped = delta;
601+
return changed;
602+
}
603+
604+
// When relaxing just R_RISCV_ALIGN, relocDeltas is usually changed only once in
605+
// the absence of a linker script. For call and load/store R_RISCV_RELAX, code
606+
// shrinkage may reduce displacement and make more relocations eligible for
607+
// relaxation. Code shrinkage may increase displacement to a call/load/store
608+
// target at a higher fixed address, invalidating an earlier relaxation. Any
609+
// change in section sizes can have cascading effect and require another
610+
// relaxation pass.
611+
bool RISCV::relaxOnce(int pass) const {
612+
llvm::TimeTraceScope timeScope("RISC-V relaxOnce");
613+
if (config->relocatable)
614+
return false;
615+
616+
if (pass == 0)
617+
initSymbolAnchors();
618+
619+
SmallVector<InputSection *, 0> storage;
620+
bool changed = false;
621+
for (OutputSection *osec : outputSections) {
622+
if (!(osec->flags & SHF_EXECINSTR))
623+
continue;
624+
for (InputSection *sec : getInputSections(*osec, storage))
625+
changed |= relax(*sec);
626+
}
627+
return changed;
628+
}
629+
630+
void elf::riscvFinalizeRelax(int passes) {
631+
llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
632+
log("relaxation passes: " + Twine(passes));
633+
SmallVector<InputSection *, 0> storage;
634+
for (OutputSection *osec : outputSections) {
635+
if (!(osec->flags & SHF_EXECINSTR))
636+
continue;
637+
for (InputSection *sec : getInputSections(*osec, storage)) {
638+
RISCVRelaxAux &aux = *sec->relaxAux;
639+
if (!aux.relocDeltas)
640+
continue;
641+
642+
auto &rels = sec->relocations;
643+
ArrayRef<uint8_t> old = sec->rawData;
644+
size_t newSize =
645+
old.size() - aux.relocDeltas[sec->relocations.size() - 1];
646+
uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
647+
uint64_t offset = 0;
648+
int64_t delta = 0;
649+
sec->rawData = makeArrayRef(p, newSize);
650+
sec->bytesDropped = 0;
651+
652+
// Update section content: remove NOPs for R_RISCV_ALIGN and rewrite
653+
// instructions for relaxed relocations.
654+
for (size_t i = 0, e = rels.size(); i != e; ++i) {
655+
uint32_t remove = aux.relocDeltas[i] - delta;
656+
delta = aux.relocDeltas[i];
657+
if (remove == 0)
658+
continue;
659+
660+
// Copy from last location to the current relocated location.
661+
const Relocation &r = rels[i];
662+
uint64_t size = r.offset - offset;
663+
memcpy(p, old.data() + offset, size);
664+
p += size;
665+
666+
// For R_RISCV_ALIGN, we will place `offset` in a location (among NOPs)
667+
// to satisfy the alignment requirement. If `remove` is a multiple of 4,
668+
// it is as if we have skipped some NOPs. Otherwise we are in the middle
669+
// of a 4-byte NOP, and we need to rewrite the NOP sequence.
670+
int64_t skip = 0;
671+
if (r.type == R_RISCV_ALIGN) {
672+
if (remove % 4 != 0) {
673+
skip = r.addend - remove;
674+
int64_t j = 0;
675+
for (; j + 4 <= skip; j += 4)
676+
write32le(p + j, 0x00000013); // nop
677+
if (j != skip) {
678+
assert(j + 2 == skip);
679+
write16le(p + j, 0x0001); // c.nop
680+
}
681+
}
682+
}
683+
684+
p += skip;
685+
offset = r.offset + skip + remove;
686+
}
687+
memcpy(p, old.data() + offset, old.size() - offset);
688+
689+
// Substract the previous relocDeltas value from the relocation offset.
690+
// For a pair of R_RISCV_CALL/R_RISCV_RELAX with the same offset, decrease
691+
// their r_offset by the same delta.
692+
delta = 0;
693+
for (size_t i = 0, e = rels.size(); i != e;) {
694+
uint64_t cur = rels[i].offset;
695+
do {
696+
rels[i].offset -= delta;
697+
} while (++i != e && rels[i].offset == cur);
698+
delta = aux.relocDeltas[i - 1];
699+
}
700+
}
701+
}
702+
}
703+
479704
TargetInfo *elf::getRISCVTargetInfo() {
480705
static RISCV target;
481706
return &target;

lld/ELF/InputSection.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type,
622622
return sym.getVA(a);
623623
case R_ADDEND:
624624
return a;
625+
case R_RELAX_HINT:
626+
return 0;
625627
case R_ARM_SBREL:
626628
return sym.getVA(a) - getARMStaticBase(sym);
627629
case R_GOT:
@@ -987,6 +989,8 @@ void InputSectionBase::relocateAlloc(uint8_t *buf, uint8_t *bufEnd) {
987989
*rel.sym, rel.expr),
988990
bits);
989991
switch (rel.expr) {
992+
case R_RELAX_HINT:
993+
continue;
990994
case R_RELAX_GOT_PC:
991995
case R_RELAX_GOT_PC_NOPIC:
992996
target.relaxGot(bufLoc, rel, targetVA);

lld/ELF/InputSection.h

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
#define LLD_ELF_INPUT_SECTION_H
1111

1212
#include "Relocations.h"
13+
#include "lld/Common/CommonLinkerContext.h"
1314
#include "lld/Common/LLVM.h"
15+
#include "lld/Common/Memory.h"
1416
#include "llvm/ADT/CachedHashString.h"
1517
#include "llvm/ADT/DenseSet.h"
1618
#include "llvm/ADT/TinyPtrVector.h"
@@ -97,6 +99,8 @@ class SectionBase {
9799
link(link), info(info) {}
98100
};
99101

102+
struct RISCVRelaxAux;
103+
100104
// This corresponds to a section of an input file.
101105
class InputSectionBase : public SectionBase {
102106
public:
@@ -129,11 +133,10 @@ class InputSectionBase : public SectionBase {
129133
return cast_or_null<ObjFile<ELFT>>(file);
130134
}
131135

132-
// If basic block sections are enabled, many code sections could end up with
133-
// one or two jump instructions at the end that could be relaxed to a smaller
134-
// instruction. The members below help trimming the trailing jump instruction
135-
// and shrinking a section.
136-
uint8_t bytesDropped = 0;
136+
// Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to
137+
// indicate the number of bytes which is not counted in the size. This should
138+
// be reset to zero after uses.
139+
uint16_t bytesDropped = 0;
137140

138141
// Whether the section needs to be padded with a NOP filler due to
139142
// deleteFallThruJmpInsn.
@@ -201,11 +204,17 @@ class InputSectionBase : public SectionBase {
201204
// This vector contains such "cooked" relocations.
202205
SmallVector<Relocation, 0> relocations;
203206

204-
// These are modifiers to jump instructions that are necessary when basic
205-
// block sections are enabled. Basic block sections creates opportunities to
206-
// relax jump instructions at basic block boundaries after reordering the
207-
// basic blocks.
208-
JumpInstrMod *jumpInstrMod = nullptr;
207+
union {
208+
// These are modifiers to jump instructions that are necessary when basic
209+
// block sections are enabled. Basic block sections creates opportunities
210+
// to relax jump instructions at basic block boundaries after reordering the
211+
// basic blocks.
212+
JumpInstrMod *jumpInstrMod = nullptr;
213+
214+
// Auxiliary information for RISC-V linker relaxation. RISC-V does not use
215+
// jumpInstrMod.
216+
RISCVRelaxAux *relaxAux;
217+
};
209218

210219
// A function compiled with -fsplit-stack calling a function
211220
// compiled without -fsplit-stack needs its prologue adjusted. Find

lld/ELF/Relocations.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -958,8 +958,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type,
958958
const Symbol &sym,
959959
uint64_t relOff) const {
960960
// These expressions always compute a constant
961-
if (oneof<R_GOTPLT, R_GOT_OFF, R_MIPS_GOT_LOCAL_PAGE, R_MIPS_GOTREL,
962-
R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
961+
if (oneof<R_GOTPLT, R_GOT_OFF, R_RELAX_HINT, R_MIPS_GOT_LOCAL_PAGE,
962+
R_MIPS_GOTREL, R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
963963
R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
964964
R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT,
965965
R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e))
@@ -2118,7 +2118,9 @@ bool ThunkCreator::normalizeExistingThunk(Relocation &rel, uint64_t src) {
21182118
// made no changes. If the target requires range extension thunks, currently
21192119
// ARM, then any future change in offset between caller and callee risks a
21202120
// relocation out of range error.
2121-
bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
2121+
bool ThunkCreator::createThunks(uint32_t pass,
2122+
ArrayRef<OutputSection *> outputSections) {
2123+
this->pass = pass;
21222124
bool addressesChanged = false;
21232125

21242126
if (pass == 0 && target->getThunkSectionSpacing())
@@ -2180,7 +2182,6 @@ bool ThunkCreator::createThunks(ArrayRef<OutputSection *> outputSections) {
21802182

21812183
// Merge all created synthetic ThunkSections back into OutputSection
21822184
mergeThunks(outputSections);
2183-
++pass;
21842185
return addressesChanged;
21852186
}
21862187

0 commit comments

Comments
 (0)