diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -36,6 +37,7 @@ const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void finalizeSections() const override; }; } // end anonymous namespace @@ -271,12 +273,7 @@ case R_RISCV_TPREL_ADD: return R_NONE; case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; + return R_RELAX_HINT; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -476,6 +473,148 @@ } } +using AdjustRanges = SmallVector; + +// As input, the addend of R_RISCV_ALIGN holds the number of NOP bytes emitted +// by the compiler. We derive the desired alignment boundary by rounding this up +// to the nearest power of two. The multi-pass relaxation algorithm needs two +// quantities per R_RISCV_ALIGN: current NOP byte count, and alignment +// boundary. Once we alter the NOP byte count, we lose the capacity to correctly +// derive the alignment boundary. Therefore, we must derive the boundary from +// the initial value of NOP byte count and then store it. The 64-bit addend +// member is more than wide enough to keep both the NOP byte count and alignment +// boundary. + +constexpr int alignNopBytesWidth = 16; +constexpr int alignNopBytesMask = ((1 << alignNopBytesWidth) - 1); + +static void setAlignBoundary(Relocation &r, unsigned n) { + assert((r.addend & ~alignNopBytesMask) == 0); + r.addend = (n << alignNopBytesWidth) | (r.addend & alignNopBytesMask); +} + +static unsigned getAlignBoundary(Relocation &r) { + return r.addend >> alignNopBytesWidth; +} + +static void setAlignNopBytes(Relocation &r, unsigned n) { + r.addend = (r.addend & ~alignNopBytesMask) | n; +} + +static unsigned getAlignNopBytes(Relocation &r) { + return r.addend & alignNopBytesMask; +} + +// NOTE: The code structure is more complex than necessary +// for handling R_RISCV_ALIGN alone. It is designed to accommodate +// call/jump/load/store/addr-arithmetic relocs in later diffs. + +// Derive & store alignment boundaries for all R_RISCV_ALIGN relocs +static void setAlignBoundaries() { + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(*osec)) + if (isec->flags & SHF_EXECINSTR) + for (Relocation &r : isec->relocations) + if (r.type == R_RISCV_ALIGN) + setAlignBoundary(r, PowerOf2Ceil(r.addend + 2)); +} + +// Adjust the NOP byte count of one R_RISCV_ALIGN to maintain alignment +static void relaxAlign(InputSection *isec, Relocation &r, int64_t &delta, + AdjustRanges &adjustRanges) { + const uint64_t pc = isec->getVA(r.offset) + delta; + const uint64_t oldNopBytes = getAlignNopBytes(r); + const uint64_t newNopBytes = alignTo(pc, getAlignBoundary(r)) - pc; + const uint64_t minNopWidth = config->eflags & EF_RISCV_RVC ? 2 : 4; + if (newNopBytes % minNopWidth != 0) { + errorOrWarn(isec->getObjMsg(r.offset) + ": alignment NOP bytes " + + Twine(newNopBytes) + " must be a multiple of " + + Twine(minNopWidth)); + return; + } + + setAlignNopBytes(r, newNopBytes); + int incr = newNopBytes - oldNopBytes; + if (incr) { + adjustRanges.push_back({r.offset, incr}); + delta += incr; + } +} + +// After one or more contractions and/or expansions of the address range that +// rounds-up to the alignment boundary, the sequence of NOPs emitted by the +// compiler could be corrupted. Repair by rewriting an optimal sequence of NOPs. +// When compressed (2-byte) instructions are disabled, the NOP sequence length +// will be a multiple of 4, and all NOPs can use the 4-byte encoding. When +// compressed instructions are enabled, the NOP sequence length will be a +// multiple of 2, and we can mix 4-byte NOPs with 2-byte C.NOPs. In order to +// reduce instruction count, we emit as many 4-byte NOPs as possible, then +// if necessary, use a single 2-byte C.NOP to finish. +void fillAlignNops() { + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(*osec)) { + if (!(isec->flags & SHF_EXECINSTR)) + continue; + for (Relocation &r : isec->relocations) + if (r.type == R_RISCV_ALIGN) { + uint8_t *buf = isec->mutableData().data() + r.offset; + int keepNopBytes = getAlignNopBytes(r); + while (keepNopBytes > 0) { + if (keepNopBytes >= 4) { + write32le(buf, 0x00000013); // nop + keepNopBytes -= 4; + buf += 4; + } else if (keepNopBytes == 2) { + assert(config->eflags & EF_RISCV_RVC && + "expected RVC for 2-byte NOP"); + write16le(buf, 0x0001); // c.nop + keepNopBytes -= 2; + buf += 2; + } + } + assert(keepNopBytes == 0); + } + } +} + +// Execute a single relaxation pass. Return TRUE if we did something. The +// caller will repeatedly call this until the algorithm converges, as indicated +// by a pass where nothing happens. +static bool relaxOnce() { + bool changed = false; + for (OutputSection *osec : outputSections) { + for (InputSection *isec : getInputSections(*osec)) { + if (!(isec->flags & SHF_EXECINSTR)) + continue; + + AdjustRanges adjustRanges; + int64_t delta = 0; + for (Relocation &r : isec->relocations) + if (r.type == R_RISCV_ALIGN && r.addend) + relaxAlign(isec, r, delta, adjustRanges); + + // TODO(gkm): handle call/jump/load/store/addr-arithmetic relocs here + + if (isec->adjustRanges(adjustRanges)) { + script->assignAddresses(); + changed = true; + } + } + } + return changed; +} + +void RISCV::finalizeSections() const { + // Can't perform relaxation if it is not a final link. + if (config->relocatable) + return; + + setAlignBoundaries(); + while (relaxOnce()) + ; + fillAlignNops(); +} + TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -10,7 +10,9 @@ #define LLD_ELF_INPUT_SECTION_H #include "Relocations.h" +#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" @@ -149,6 +151,7 @@ bytesDropped -= num; } + mutable bool copiedData = false; mutable ArrayRef rawData; void trim() { @@ -164,6 +167,36 @@ return rawData; } + MutableArrayRef mutableData() const { + if (!copiedData) { + size_t size = data().size(); + uint8_t *mutData = context().bAlloc.Allocate(size); + memcpy(mutData, data().data(), size); + rawData = llvm::makeArrayRef(mutData, size); + copiedData = true; + } + + return llvm::makeMutableArrayRef(const_cast(rawData.data()), + rawData.size()); + } + + // Add or delete DELTA bytes. Positive (expand) DELTA pertains to + // R_RELAX_ALIGN during multi-pass relaxation. The first relaxation pass will + // always delete excess NOPs generated by the compiler, but subsequent + // relaxation passes might require expansion of NOP padding in order to + // round-up to the alignment boundary. + class AdjustRange { + public: + uint64_t offset; + int64_t delta; + bool contains(uint64_t a) const { return offset < a && a < offset - delta; } + }; + + // Adjust (delete or expand) a range of input-section content, symbols, and + // relocations. The adjustRanges must be sorted by offset and must not + // overlap. + bool adjustRanges(ArrayRef adjustRanges); + // The next member in the section group if this section is in a group. This is // used by --gc-sections. InputSectionBase *nextInSectionGroup = nullptr; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -150,6 +150,103 @@ return ret; } +// Wrap a defined symbol with two of these, allowing uniform access to its start +// and end offsets, supporting a unified vector of symbol starts and ends which +// we can inspect and adjust individually. +class SymbolAddr { +public: + uint64_t offset; + Defined *d; + + void bump(int64_t delta) { + if (offset == d->value) { // symbol-start + // Extend size as much as we reduce value, thereby preserving original + // value+size. We will fix size later when we bump symbol-end. + d->value += delta; + d->size -= delta; + } else { // symbol-end + d->size += delta; + } + } +}; + +// The relaxation pass queued a vector of adjustments for this input +// section. Most often these remove excess bytes from shortened instructions for +// calls/jumps/loads/stores/address-arithmentic/alignments. Sometimes, these add +// bytes for alignments and undoing relaxations. We process everything in +// ascending address order, adjusting section contents, reloc offsets, symbol +// values, and function-symbol sizes. Function symbol sizes must be adjusted +// when code size changes within the function. +bool InputSectionBase::adjustRanges(ArrayRef ranges) { + if (ranges.empty()) + return false; + + // Make a unified vector of start & end offsets for all defined symbols within + // this section. Each symbol gets two entries in the vector, distinguished by + // the `isStart` member. + SmallVector symbolAddrs; + for (Symbol *sym : file->getSymbols()) + if (Defined *d = dyn_cast(sym)) + if (d->section == this) { + symbolAddrs.push_back({d->value, d}); + symbolAddrs.push_back({d->value + d->size, d}); + } + llvm::sort(symbolAddrs, [](const SymbolAddr &a, const SymbolAddr &b) { + return a.offset < b.offset; + }); + + // Adjust symbol offsets and sizes. + int64_t delta = 0; + size_t i = 0; + for (SymbolAddr &sa : symbolAddrs) { + // The assertions below check that AdjustRanges ... + // (a) are all disjoint, i.e., do not overlap; and ... + // (b) do not have symbol boundaries within their interior, i.e., an + // AdjustRange is entirely inside or entirely outside a symbol's range. + for (; i < ranges.size() && ranges[i].offset < sa.offset; i++) { + // An AdjustRange should not span a symbol start/end offset + assert(!ranges[i].contains(sa.offset)); + if (i > 0) + // AdjustRanges should increase monotonically and never overlap. + assert(!ranges[i - 1].contains(ranges[i].offset)); + delta += ranges[i].delta; + } + if (i > 0) + // An AdjustRange should not span a symbol start/end offset. + assert(!ranges[i - 1].contains(sa.offset)); + sa.bump(delta); + } + + // Adjust relocation offsets within the section. + delta = 0; + const auto *ar = ranges.begin(); + for (auto &r : relocations) { + for (; ar != ranges.end() && ar->offset < r.offset; ++ar) + delta += ar->delta; + r.offset += delta; + } + + // Remove or add bytes to the section piece-wise. + MutableArrayRef buf = this->mutableData(); + auto *dest = buf.begin() + ranges.begin()->offset; + for (size_t i = 0; i < ranges.size(); i++) { + const AdjustRange &ar = ranges[i]; + auto *src0 = buf.begin() + ar.offset - ar.delta; + auto *srcN = (i + 1 < ranges.size()) ? (buf.begin() + ranges[i + 1].offset) + : buf.end(); + size_t length = srcN - src0; + if (dest < src0) + std::copy_backward(src0, srcN, dest + length); + else + std::copy(src0, srcN, dest); + dest += length; + } + + // Resize the section contents. + rawData = makeArrayRef(data().data(), dest); + return true; +} + uint64_t SectionBase::getOffset(uint64_t offset) const { switch (kind()) { case Output: { @@ -822,6 +919,8 @@ return in.got->getTlsIndexOff() + a; case R_TLSLD_PC: return in.got->getTlsIndexVA() + a - p; + case R_RELAX_HINT: + return 0; default: llvm_unreachable("invalid expression"); } @@ -1011,6 +1110,8 @@ *rel.sym, rel.expr), bits); switch (rel.expr) { + case R_RELAX_HINT: + continue; case R_RELAX_GOT_PC: case R_RELAX_GOT_PC_NOPIC: target.relaxGot(bufLoc, rel, targetVA); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -46,6 +46,7 @@ R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -960,7 +960,8 @@ R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_RELAX_HINT, R_AARCH64_GOT_PAGE>( + e)) return true; // These never do, except if the entire file is position dependent or if diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -92,6 +92,8 @@ virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} + virtual void finalizeSections() const {} + virtual ~TargetInfo(); // This deletes a jump insn at the end of the section if it is a fall thru to diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1630,6 +1630,8 @@ if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); + target->finalizeSections(); + int assignPasses = 0; for (;;) { bool changed = target->needsThunks && tc.createThunks(outputSections); diff --git a/lld/test/ELF/riscv-relax-align-rvc.s b/lld/test/ELF/riscv-relax-align-rvc.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align-rvc.s @@ -0,0 +1,43 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o rv64.o + +# Check that alignment is always handled, regardless of --relax option + +# RUN: ld.lld rv32.o -o relax.rv32 +# RUN: ld.lld rv64.o -o relax.rv64 +# RUN: llvm-objdump -d -M no-aliases relax.rv32 > relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases relax.rv64 > relax.rv64.dis +# RUN: FileCheck %s < relax.rv32.dis +# RUN: FileCheck %s < relax.rv64.dis + +# RUN: ld.lld --no-relax rv32.o -o no-relax.rv32 +# RUN: ld.lld --no-relax rv64.o -o no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases no-relax.rv32 > no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases no-relax.rv64 > no-relax.rv64.dis +# RUN: FileCheck %s < no-relax.rv32.dis +# RUN: FileCheck %s < no-relax.rv64.dis + +# CHECK: c.add a0, a1 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: c.nop +# CHECK-NEXT: c.add s0, s1 +# CHECK-NEXT: c.add s2, s3 +# CHECK-NEXT: c.add s4, s5 +# CHECK-NEXT: c.nop +# CHECK-NEXT: c.add t0, t1 + +.global _start +_start: +.balign 4 + c.add a0, a1 +.balign 16 + c.add s0, s1 + c.add s2, s3 + c.add s4, s5 +.balign 8 + c.add t0, t1 diff --git a/lld/test/ELF/riscv-relax-align.s b/lld/test/ELF/riscv-relax-align.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align.s @@ -0,0 +1,39 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o rv64.o + +# Check that alignment is always handled regardless of --relax option + +# RUN: ld.lld rv32.o -o relax.rv32 +# RUN: ld.lld rv64.o -o relax.rv64 +# RUN: llvm-objdump -d -M no-aliases relax.rv32 > relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases relax.rv64 > relax.rv64.dis +# RUN: FileCheck %s < relax.rv32.dis +# RUN: FileCheck %s < relax.rv64.dis + +# RUN: ld.lld --no-relax rv32.o -o no-relax.rv32 +# RUN: ld.lld --no-relax rv64.o -o no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases no-relax.rv32 > no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases no-relax.rv64 > no-relax.rv64.dis +# RUN: FileCheck %s < no-relax.rv32.dis +# RUN: FileCheck %s < no-relax.rv64.dis + +# CHECK: add a0, a1, a2 +# CHECK-NEXT: add a3, a4, a5 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: add s0, s1, s2 +# CHECK-NEXT: add t0, t1, t2 + +.global _start +_start: +.balign 4 + add a0, a1, a2 + add a3, a4, a5 +.balign 16 + add s0, s1, s2 +.balign 4 +.balign 4 + add t0, t1, t2 diff --git a/lld/test/ELF/riscv-relax-syms.s b/lld/test/ELF/riscv-relax-syms.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-syms.s @@ -0,0 +1,44 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir %t && cd %t + +## Check that relaxation correctly adjusts symbol addresses and sizes. + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf %s -o rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf %s -o rv64.o +# RUN: ld.lld -Ttext=0x100000 rv32.o -o rv32 +# RUN: ld.lld -Ttext=0x100000 rv64.o -o rv64 + +# RUN: llvm-readelf -s rv32 > rv32.dis +# RUN: llvm-readelf -s rv64 > rv64.dis +# RUN: FileCheck %s < rv32.dis +# RUN: FileCheck %s < rv64.dis + +# CHECK-DAG: 100000 4 NOTYPE LOCAL DEFAULT 1 a +# CHECK-DAG: 100000 12 NOTYPE LOCAL DEFAULT 1 b +# CHECK-DAG: 100004 8 NOTYPE LOCAL DEFAULT 1 c +# CHECK-DAG: 100008 12 NOTYPE LOCAL DEFAULT 1 d +# CHECK-DAG: 10000c 8 NOTYPE LOCAL DEFAULT 1 e +# CHECK-DAG: 100010 4 NOTYPE LOCAL DEFAULT 1 f +# CHECK-DAG: 100000 20 NOTYPE GLOBAL DEFAULT 1 _start + +.global _start +.balign 4 +_start: # 0 +a: +b: + add a0, a1, a2 # [0..4) = the semi-open interval 0,1,2,3 +.size a, .-a # 4 +c: + add s0, s1, s2 # [4..8) +d: + add t0, t1, t2 # [8..12) +.size b, .-b # 12 +.size c, .-c +e: +.balign 16 +f: # 16 + add x0, x1, x2 # [16..20) +.size d, .-d # 20 +.size e, .-e # 20 +.size f, .-f # 20 +.size _start, .-_start diff --git a/lld/test/ELF/riscv-reloc-align.s b/lld/test/ELF/riscv-reloc-align.s deleted file mode 100644 --- a/lld/test/ELF/riscv-reloc-align.s +++ /dev/null @@ -1,12 +0,0 @@ -# REQUIRES: riscv - -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o %t.o -# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s - -# CHECK: relocation R_RISCV_ALIGN requires unimplemented linker relaxation - -.global _start -_start: - nop - .balign 8 - nop