diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -36,6 +37,7 @@ const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void finalizeSections() const override; }; } // end anonymous namespace @@ -271,12 +273,7 @@ case R_RISCV_TPREL_ADD: return R_NONE; case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; + return R_RELAX_HINT; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -476,6 +473,141 @@ } } +using AdjustRange = InputSectionBase::AdjustRange; +using AdjustRanges = SmallVector; + +// As input, the addend of R_RISCV_ALIGN holds the number of NOP bytes emitted +// by the compiler. We derive the desired alignment boundary by rounding this up +// to the nearest power of two. The multi-pass relaxation algorithm needs two +// quantities per R_RISCV_ALIGN: current NOP byte count, and alignment +// boundary. Once we alter the NOP byte count, we lose the capacity to correctly +// derive the alignment boundary. Therefore, we must derive the boundary from +// the initial value of NOP byte count and then store it. The 64-bit addend +// member is more than wide enough to keep both the NOP byte count and alignment +// boundary. +struct AlignAddend { + uint32_t bytes; + uint32_t boundary; +}; + +// NOTE: The code structure is more complex than necessary +// for handling R_RISCV_ALIGN alone. It is designed to accommodate +// call/jump/load/store/addr-arithmetic relocs in later diffs. + +// Derive & store alignment boundaries for all R_RISCV_ALIGN relocs +static void setAlignBoundaries() { + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(*osec)) + if (isec->flags & SHF_EXECINSTR) + for (Relocation &r : isec->relocations) + if (r.type == R_RISCV_ALIGN) { + AlignAddend *aa = reinterpret_cast(&r.addend); + uint32_t bytes = r.addend; + aa->bytes = bytes; + aa->boundary = PowerOf2Ceil(bytes + 2); + } +} + +// Adjust the NOP byte count of one R_RISCV_ALIGN to maintain alignment +static void relaxAlign(InputSection *isec, Relocation &r, int64_t &delta, + AdjustRanges &ranges) { + const uint64_t pc = isec->getVA(r.offset) + delta; + AlignAddend *aa = reinterpret_cast(&r.addend); + const uint64_t oldNopBytes = aa->bytes; + const uint64_t newNopBytes = alignTo(pc, aa->boundary) - pc; + const uint64_t minNopWidth = config->eflags & EF_RISCV_RVC ? 2 : 4; + if (newNopBytes % minNopWidth != 0) { + errorOrWarn(isec->getObjMsg(r.offset) + ": alignment NOP bytes " + + Twine(newNopBytes) + " must be a multiple of " + + Twine(minNopWidth)); + return; + } + + aa->bytes = newNopBytes; + if (int incr = newNopBytes - oldNopBytes) { + ranges.push_back({r.offset, incr}); + delta += incr; + } +} + +// After one or more contractions and/or expansions of the address range that +// rounds-up to the alignment boundary, the sequence of NOPs emitted by the +// compiler could be corrupted. Repair by rewriting an optimal sequence of NOPs. +// When compressed (2-byte) instructions are disabled, the NOP sequence length +// will be a multiple of 4, and all NOPs can use the 4-byte encoding. When +// compressed instructions are enabled, the NOP sequence length will be a +// multiple of 2, and we can mix 4-byte NOPs with 2-byte C.NOPs. In order to +// reduce instruction count, we emit as many 4-byte NOPs as possible, then +// if necessary, use a single 2-byte C.NOP to finish. +void fillAdjustGaps() { + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(*osec)) { + if (!(isec->flags & SHF_EXECINSTR)) + continue; + + for (Relocation &r : isec->relocations) + if (r.type == R_RISCV_ALIGN) { + uint8_t *buf = isec->mutableData().data() + r.offset; + AlignAddend *aa = reinterpret_cast(&r.addend); + int keepNopBytes = aa->bytes; + while (keepNopBytes > 0) { + if (keepNopBytes >= 4) { + write32le(buf, 0x00000013); // nop + keepNopBytes -= 4; + buf += 4; + } else if (keepNopBytes == 2) { + assert(config->eflags & EF_RISCV_RVC && + "expected RVC for 2-byte NOP"); + write16le(buf, 0x0001); // c.nop + keepNopBytes -= 2; + buf += 2; + } + } + assert(keepNopBytes == 0); + } + } +} + +// Execute a single relaxation pass. Return TRUE if we did something. The +// caller will repeatedly call this until the algorithm converges, as indicated +// by a pass where nothing happens. +static bool relaxOnce(SectionSymbolAddrs §ionSymbolAddrs) { + bool changed = false; + AdjustRanges ranges; + for (OutputSection *osec : outputSections) { + for (InputSection *isec : getInputSections(*osec)) { + if (!(isec->flags & SHF_EXECINSTR)) + continue; + int64_t delta = 0; + for (Relocation &r : isec->relocations) { + if (r.type == R_RISCV_ALIGN) + relaxAlign(isec, r, delta, ranges); + // TODO(gkm): handle call/jump/load/store/addr-arithmetic relaxation + } + if (!ranges.empty()) { + isec->adjustRanges(ranges, sectionSymbolAddrs[isec]); + changed = true; + ranges.clear(); + } + } + } + if (changed) + script->assignAddresses(); + return changed; +} + +void RISCV::finalizeSections() const { + if (config->relocatable) + return; + + setAlignBoundaries(); + SectionSymbolAddrs sectionSymbolAddrs; + fillSectionSymbolAddrs(sectionSymbolAddrs); + while (relaxOnce(sectionSymbolAddrs)) + ; + fillAdjustGaps(); +} + TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -10,7 +10,9 @@ #define LLD_ELF_INPUT_SECTION_H #include "Relocations.h" +#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" @@ -37,6 +39,20 @@ bool areRelocsRel() const { return rels.size(); } }; +// Wrap a defined symbol with two of these, allowing uniform access to its start +// and end offsets, supporting a unified vector of symbol starts and ends which +// we can inspect and adjust individually. +struct SymbolAddr { + uint64_t offset; + Defined *d; + void adjust(int64_t delta); +}; + +using SymbolAddrs = llvm::SmallVector; +using SectionSymbolAddrs = llvm::DenseMap; + +void fillSectionSymbolAddrs(SectionSymbolAddrs §ionSymbolAddrs); + // This is the base class of all sections that lld handles. Some are sections in // input files, some are sections in the produced output file and some exist // just as a convenience for implementing special ways of combining some @@ -149,6 +165,7 @@ bytesDropped -= num; } + bool copiedData = false; mutable ArrayRef rawData; void trim() { @@ -164,6 +181,37 @@ return rawData; } + MutableArrayRef mutableData() { + if (!copiedData) { + size_t size = rawData.size(); + uint8_t *mutData = context().bAlloc.Allocate(size); + memcpy(mutData, rawData.data(), size); + rawData = llvm::makeArrayRef(mutData, size); + copiedData = true; + } + + return llvm::makeMutableArrayRef(const_cast(rawData.data()), + rawData.size()); + } + + // Add or delete DELTA bytes. Positive (expand) DELTA pertains to + // R_RELAX_ALIGN during multi-pass relaxation. The first relaxation pass will + // always delete excess NOPs generated by the compiler, but subsequent + // relaxation passes might require expansion of NOP padding in order to + // round-up to the alignment boundary. + class AdjustRange { + public: + uint64_t offset; + int64_t delta; + bool contains(uint64_t a) const { return offset < a && a < offset - delta; } + }; + + // Adjust (delete or expand) a range of input-section content, symbols, and + // relocations. The adjustRanges must be sorted by offset and must not + // overlap. + bool adjustRanges(ArrayRef adjustRanges, + SymbolAddrs &symbolAddrs); + // The next member in the section group if this section is in a group. This is // used by --gc-sections. InputSectionBase *nextInSectionGroup = nullptr; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -150,6 +150,110 @@ return ret; } +void SymbolAddr::adjust(int64_t delta) { + if (offset == d->value) { // symbol-start + // Extend size as much as we reduce value, thereby preserving original + // value+size. We will fix size later when we bump symbol-end. + d->value += delta; + d->size -= delta; + } else { // symbol-end + d->size += delta; + } + // Keep our offset up-to-date for possible future adjustments. + offset += delta; +} + +void lld::elf::fillSectionSymbolAddrs(SectionSymbolAddrs §ionSymbolAddrs) { + // Collect the input files that contain the code sections. + DenseSet inputFiles; + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(*osec)) + if (isec->file && isec->flags & SHF_EXECINSTR) + inputFiles.insert(isec->file); + // Store symbol start/end offsets for defined symbols of each code section. + for (InputFile *ifile : inputFiles) + for (Symbol *sym : ifile->getSymbols()) + if (auto *d = dyn_cast(sym)) + if (auto *isec = dyn_cast_or_null(d->section)) { + // Make a unified vector of start/end offsets for all defined symbols + // within this section. Each symbol gets two entries in the vector. + SymbolAddrs &symbolAddrs = sectionSymbolAddrs[isec]; + symbolAddrs.push_back({d->value, d}); + symbolAddrs.push_back({d->value + d->size, d}); + } + // Sort symbol start/end addresses for each code section. + for (auto &kv : sectionSymbolAddrs) + llvm::sort(kv.second, [](const SymbolAddr &a, const SymbolAddr &b) { + return a.offset < b.offset; + }); +} + +// The relaxation pass queued a vector of adjustments for this input +// section. Most often these remove excess bytes from shortened instructions for +// calls/jumps/loads/stores/address-arithmentic/alignments. Sometimes, these add +// bytes for alignments and undoing relaxations. We process everything in +// ascending address order, adjusting section contents, reloc offsets, symbol +// values, and function-symbol sizes. Function symbol sizes must be adjusted +// when code size changes within the function. +bool InputSectionBase::adjustRanges(ArrayRef ranges, + SymbolAddrs &symbolAddrs) { + if (ranges.empty()) + return false; + + // Adjust symbol offsets and sizes. + int64_t delta = 0; + size_t i = 0; + for (SymbolAddr &sa : symbolAddrs) { + // The assertions below check that AdjustRanges ... + // (a) are all disjoint, i.e., do not overlap; and ... + // (b) do not have symbol boundaries within their interior, i.e., an + // AdjustRange is entirely inside or entirely outside a symbol's range. + for (; i < ranges.size() && ranges[i].offset < sa.offset; i++) { + // An AdjustRange should not span a symbol start/end offset + assert(!ranges[i].contains(sa.offset)); + if (i > 0) + // AdjustRanges should increase monotonically and never overlap. + assert(!ranges[i - 1].contains(ranges[i].offset)); + delta += ranges[i].delta; + } + if (i > 0) + // An AdjustRange should not span a symbol start/end offset. + assert(!ranges[i - 1].contains(sa.offset)); + sa.adjust(delta); + } + + // Adjust relocation offsets within the section. + delta = 0; + const AdjustRange *ar = ranges.begin(); + for (Relocation &r : relocations) { + for (; ar != ranges.end() && ar->offset < r.offset; ++ar) + delta += ar->delta; + r.offset += delta; + } + + // Remove or add bytes to the section piece-wise. + MutableArrayRef buf = this->mutableData(); + uint8_t *dest = buf.begin() + ranges.begin()->offset; + for (size_t i = 0; i < ranges.size(); i++) { + const AdjustRange &ar = ranges[i]; + uint8_t *src0 = buf.begin() + ar.offset - ar.delta; + uint8_t *srcN = + i + 1 < ranges.size() ? buf.begin() + ranges[i + 1].offset : buf.end(); + size_t length = srcN - src0; + if (dest == src0) + ; + else if (dest < src0) + std::copy(src0, srcN, dest); + else + std::copy_backward(src0, srcN, dest + length); + dest += length; + } + + // Resize the section contents. + rawData = makeArrayRef(data().data(), dest); + return true; +} + uint64_t SectionBase::getOffset(uint64_t offset) const { switch (kind()) { case Output: { @@ -822,6 +926,8 @@ return in.got->getTlsIndexOff() + a; case R_TLSLD_PC: return in.got->getTlsIndexVA() + a - p; + case R_RELAX_HINT: + return 0; default: llvm_unreachable("invalid expression"); } @@ -1011,6 +1117,8 @@ *rel.sym, rel.expr), bits); switch (rel.expr) { + case R_RELAX_HINT: + continue; case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target.relaxGot(bufLoc, rel, targetVA); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -46,6 +46,7 @@ R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -960,7 +960,8 @@ R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_RELAX_HINT, R_AARCH64_GOT_PAGE>( + e)) return true; // These never do, except if the entire file is position dependent or if diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -92,6 +92,8 @@ virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} + virtual void finalizeSections() const {} + virtual ~TargetInfo(); // This deletes a jump insn at the end of the section if it is a fall thru to diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1630,6 +1630,8 @@ if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); + target->finalizeSections(); + int assignPasses = 0; for (;;) { bool changed = target->needsThunks && tc.createThunks(outputSections); diff --git a/lld/test/ELF/riscv-relax-align-rvc.s b/lld/test/ELF/riscv-relax-align-rvc.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align-rvc.s @@ -0,0 +1,59 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o rv64.o + +# Check that alignment is always handled for compressed instructions, +# regardless of --relax option. +# +# Aligning the after the second `.balign 16` requires an overlapped copy. +# Verify that no instructions are clobbered. + +# RUN: ld.lld rv32.o -o relax.rv32 +# RUN: ld.lld rv64.o -o relax.rv64 +# RUN: llvm-objdump -d -M no-aliases relax.rv32 > relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases relax.rv64 > relax.rv64.dis +# RUN: FileCheck %s < relax.rv32.dis +# RUN: FileCheck %s < relax.rv64.dis + +# RUN: ld.lld --no-relax rv32.o -o no-relax.rv32 +# RUN: ld.lld --no-relax rv64.o -o no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases no-relax.rv32 > no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases no-relax.rv64 > no-relax.rv64.dis +# RUN: FileCheck %s < no-relax.rv32.dis +# RUN: FileCheck %s < no-relax.rv64.dis + +# CHECK: c.addi a0, 1 +# CHECK-NEXT: c.addi a1, 2 +# CHECK-NEXT: c.addi a2, 3 +# CHECK-NEXT: c.addi a3, 4 +# CHECK-NEXT: c.addi a4, 5 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: c.nop +# CHECK-NEXT: c.add a0, a0 +# CHECK-NEXT: c.add a1, a1 +# CHECK-NEXT: c.add a2, a2 +# CHECK-NEXT: c.add a3, a3 +# CHECK-NEXT: c.add a4, a4 +# CHECK-NEXT: c.add a5, a5 +# CHECK-NEXT: c.add a6, a6 +# CHECK-NEXT: c.add a7, a7 + +.global _start +_start: +.balign 4 + c.addi a0, 1 + c.addi a1, 2 + c.addi a2, 3 + c.addi a3, 4 + c.addi a4, 5 +.balign 16 + c.add a0, a0 + c.add a1, a1 + c.add a2, a2 + c.add a3, a3 + c.add a4, a4 + c.add a5, a5 + c.add a6, a6 + c.add a7, a7 diff --git a/lld/test/ELF/riscv-relax-align.s b/lld/test/ELF/riscv-relax-align.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align.s @@ -0,0 +1,46 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o rv64.o + +# Check that alignment is always handled for uncompressed instructions, +# regardless of --relax option. +# +# Aligning the after the second `.balign 16` requires an overlapped copy. +# Verify that no instructions are clobbered. + +# RUN: ld.lld rv32.o -o relax.rv32 +# RUN: ld.lld rv64.o -o relax.rv64 +# RUN: llvm-objdump -d -M no-aliases relax.rv32 > relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases relax.rv64 > relax.rv64.dis +# RUN: FileCheck %s < relax.rv32.dis +# RUN: FileCheck %s < relax.rv64.dis + +# RUN: ld.lld --no-relax rv32.o -o no-relax.rv32 +# RUN: ld.lld --no-relax rv64.o -o no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases no-relax.rv32 > no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases no-relax.rv64 > no-relax.rv64.dis +# RUN: FileCheck %s < no-relax.rv32.dis +# RUN: FileCheck %s < no-relax.rv64.dis + +# CHECK: addi a0, a0, 1 +# CHECK-NEXT: addi a1, a1, 2 +# CHECK-NEXT: addi a2, a2, 3 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: add a0, a0, a1 +# CHECK-NEXT: add a1, a1, a2 +# CHECK-NEXT: add a2, a2, a3 +# CHECK-NEXT: add a3, a3, a4 + +.global _start +_start: +.balign 4 + addi a0, a0, 1 + addi a1, a1, 2 + addi a2, a2, 3 +.balign 16 + add a0, a0, a1 + add a1, a1, a2 + add a2, a2, a3 + add a3, a3, a4 diff --git a/lld/test/ELF/riscv-relax-syms.s b/lld/test/ELF/riscv-relax-syms.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-syms.s @@ -0,0 +1,44 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +## Check that relaxation correctly adjusts symbol addresses and sizes. + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf %s -o rv64.o +# RUN: ld.lld -Ttext=0x100000 rv32.o -o rv32 +# RUN: ld.lld -Ttext=0x100000 rv64.o -o rv64 + +# RUN: llvm-readelf -s rv32 > rv32.dis +# RUN: llvm-readelf -s rv64 > rv64.dis +# RUN: FileCheck %s < rv32.dis +# RUN: FileCheck %s < rv64.dis + +# CHECK-DAG: 100000 4 NOTYPE LOCAL DEFAULT 1 a +# CHECK-DAG: 100000 12 NOTYPE LOCAL DEFAULT 1 b +# CHECK-DAG: 100004 8 NOTYPE LOCAL DEFAULT 1 c +# CHECK-DAG: 100008 12 NOTYPE LOCAL DEFAULT 1 d +# CHECK-DAG: 10000c 8 NOTYPE LOCAL DEFAULT 1 e +# CHECK-DAG: 100010 4 NOTYPE LOCAL DEFAULT 1 f +# CHECK-DAG: 100000 20 NOTYPE GLOBAL DEFAULT 1 _start + +.global _start +.balign 4 +_start: # 0 +a: +b: + add a0, a1, a2 # [0..4) = the semi-open interval 0,1,2,3 +.size a, .-a # 4 +c: + add s0, s1, s2 # [4..8) +d: + add t0, t1, t2 # [8..12) +.size b, .-b # 12 +.size c, .-c +e: +.balign 16 +f: # 16 + add x0, x1, x2 # [16..20) +.size d, .-d # 20 +.size e, .-e # 20 +.size f, .-f # 20 +.size _start, .-_start diff --git a/lld/test/ELF/riscv-reloc-align.s b/lld/test/ELF/riscv-reloc-align.s deleted file mode 100644 --- a/lld/test/ELF/riscv-reloc-align.s +++ /dev/null @@ -1,12 +0,0 @@ -# REQUIRES: riscv - -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o %t.o -# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s - -# CHECK: relocation R_RISCV_ALIGN requires unimplemented linker relaxation - -.global _start -_start: - nop - .balign 8 - nop