diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "InputFiles.h" +#include "OutputSections.h" #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" @@ -36,6 +37,7 @@ const uint8_t *loc) const override; void relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const override; + void finalizeSections() const override; }; } // end anonymous namespace @@ -271,12 +273,7 @@ case R_RISCV_TPREL_ADD: return R_NONE; case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; + return R_RELAX_HINT; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -476,6 +473,87 @@ } } +using DeleteRanges = std::vector<InputSectionBase::DeleteRange>; + +static void addDeleteRange(DeleteRanges &ranges, uint64_t offset, + uint64_t size) { + ranges.push_back({offset, size}); +} + +static void relaxAlign() { + bool rvc = config->eflags & EF_RISCV_RVC; + + for (OutputSection *os : outputSections) { + for (InputSection *is : getInputSections(*os)) { + if (!(is->flags & SHF_EXECINSTR)) + continue; + + uint64_t bytesDeleted = 0; + DeleteRanges deleteRanges; + for (auto &rel : is->relocations) { + if (rel.type != R_RISCV_ALIGN || rel.addend == 0) + continue; + uint64_t pc = is->getVA(rel.offset) - bytesDeleted; + uint64_t boundary = PowerOf2Ceil(rel.addend + 2); + uint64_t keepNopBytes = alignTo(pc, boundary) - pc; + if (keepNopBytes > (uint64_t)rel.addend) { + errorOrWarn(is->getObjMsg(rel.offset) + ": alignment requires " + + Twine(keepNopBytes) + " of nop, but only " + + Twine(rel.addend) + " bytes are available"); + break; + } + uint64_t minNopWidth = rvc ? 2 : 4; + if (keepNopBytes % minNopWidth != 0) { + errorOrWarn(is->getObjMsg(rel.offset) + ": alignment nop bytes " + + Twine(keepNopBytes) + " must be a multiple of " + + Twine(minNopWidth)); + break; + } + + uint64_t bytesToDelete = rel.addend - keepNopBytes; + if (bytesToDelete > 0) { + // Delete excess NOPs. It doesn't matter whether we delete the head or + // the tail of the NOP range--symbols whose address range overlaps the + // NOPs range will have their address and size adjusted correctly + // either way. + addDeleteRange(deleteRanges, rel.offset, bytesToDelete); + bytesDeleted += bytesToDelete; + } + + // The compiler already emitted a NOP sequence, but depending on how + // many bytes we must delete, the compiler's layout might be wrong. + // E.g., we might need to truncate a 4-byte NOP. Rather than decode the + // compiler-generated NOPs to determine the minimal fixup, just rewrite + // the entire pad range with the optimal sequence of NOPs. + uint8_t *buf = is->mutableData().data() + rel.offset; + while (keepNopBytes > 0) { + if (keepNopBytes >= 4) { + write32le(buf, 0x00000013); // nop + keepNopBytes -= 4; + buf += 4; + } else if (keepNopBytes == 2) { + assert(rvc && "expected RVC for 2-byte NOP"); + write16le(buf, 0x0001); // c.nop + keepNopBytes -= 2; + buf += 2; + } + } + } + + is->deleteRanges(deleteRanges); + script->assignAddresses(); + } + } +} + +void RISCV::finalizeSections() const { + // Can't perform relaxation if it is not a final link. + if (config->relocatable) + return; + + relaxAlign(); +} + TargetInfo *elf::getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -10,7 +10,9 @@ #define LLD_ELF_INPUT_SECTION_H #include "Relocations.h" +#include "lld/Common/CommonLinkerContext.h" #include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" #include "llvm/ADT/CachedHashString.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/TinyPtrVector.h" @@ -149,6 +151,7 @@ bytesDropped -= num; } + mutable bool copiedData = false; mutable ArrayRef<uint8_t> rawData; void trim() { @@ -164,6 +167,29 @@ return rawData; } + MutableArrayRef<uint8_t> mutableData() const { + if (!copiedData) { + size_t size = data().size(); + uint8_t *mutData = context().bAlloc.Allocate<uint8_t>(size); + memcpy(mutData, data().data(), size); + rawData = llvm::makeArrayRef(mutData, size); + copiedData = true; + } + + return llvm::makeMutableArrayRef(const_cast<uint8_t *>(rawData.data()), + rawData.size()); + } + + // A pair of range to delete in (offset, size) + struct DeleteRange { + uint64_t offset; + uint64_t size; + }; + + // Delete ranges and adjust section content, symbols and relocations. + // The deleteRanges must be sorted by offset and must not overlap. + void deleteRanges(ArrayRef<DeleteRange> deleteRanges); + // The next member in the section group if this section is in a group. This is // used by --gc-sections. InputSectionBase *nextInSectionGroup = nullptr; diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -150,6 +150,76 @@ return ret; } +// Wrap a defined symbol with two of these, allowing uniform access to its start +// and end addresses. This allows us to maintain a unified vector of symbol +// start and end addresses which we can query and and decrement individually. +class SymbolAddr { +public: + Defined *d; + bool isStart; + + uint64_t address() const { return d->value + (isStart ? 0 : d->size); } + void decrement(uint64_t x) { + if (isStart) { + d->value -= x; + d->size += x; // retain original end address, we'll will decrement later + } else { + d->size -= x; + } + } +}; + +void InputSectionBase::deleteRanges(ArrayRef<DeleteRange> ranges) { + if (ranges.empty()) + return; + + // Make a unified vector of start & end addresses for all defined + // symbols within this section. Each symbol gets two entries in the vector, + // distinguished by the `isStart` member. + SmallVector<SymbolAddr> symbolAddrs; + for (auto &sym : file->getSymbols()) { + if (Defined *d = dyn_cast<Defined>(sym)) + if (d->section == this) { + symbolAddrs.push_back({d, /*isStart*/ true}); + symbolAddrs.push_back({d, /*isStart*/ false}); + } + } + llvm::sort(symbolAddrs, [](const SymbolAddr &a, const SymbolAddr &b) { + if (a.address() == b.address()) + return a.isStart; + return a.address() < b.address(); + }); + // Note that the ranges vector is already sorted by ascending address + uint64_t deletedBytes = 0; + size_t i = 0; + for (SymbolAddr &sa : symbolAddrs) { + for (; i < ranges.size() && ranges[i].offset < sa.address(); i++) { + // DeleteRanges should increase monotonically and never overlap + if (i > 0) + assert(ranges[i].offset >= ranges[i - 1].offset + ranges[i - 1].size); + deletedBytes += ranges[i].size; + } + // The previous DeleteRange should not span a symbol start/end address + if (i > 0) + assert(sa.address() >= ranges[i - 1].offset + ranges[i - 1].size); + sa.decrement(deletedBytes); + } + + // Adjust section content piece-wise and resize the section. + MutableArrayRef<uint8_t> buf = this->mutableData(); + auto *dst = buf.begin() + ranges.begin()->offset; + for (auto it = ranges.begin(), e = ranges.end(); it != e; ++it) { + auto *from = buf.begin() + it->offset + it->size; + auto *to = std::next(it) != ranges.end() + ? (buf.begin() + std::next(it)->offset) + : buf.end(); + dst = std::copy(from, to, dst); + } + + // Resize the section + rawData = makeArrayRef(data().data(), dst); +} + uint64_t SectionBase::getOffset(uint64_t offset) const { switch (kind()) { case Output: { @@ -997,7 +1067,7 @@ AArch64Relaxer aarch64relaxer(relocations); for (size_t i = 0, size = relocations.size(); i != size; ++i) { const Relocation &rel = relocations[i]; - if (rel.expr == R_NONE) + if (rel.expr == R_NONE || rel.expr == R_RELAX_HINT) continue; uint64_t offset = rel.offset; uint8_t *bufLoc = buf + offset; diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -46,6 +46,7 @@ R_PLT, R_PLT_PC, R_PLT_GOTPLT, + R_RELAX_HINT, R_RELAX_GOT_PC, R_RELAX_GOT_PC_NOPIC, R_RELAX_TLS_GD_TO_IE, diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -960,7 +960,8 @@ R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC, R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT, - R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e)) + R_PPC64_RELAX_TOC, R_RISCV_ADD, R_RELAX_HINT, R_AARCH64_GOT_PAGE>( + e)) return true; // These never do, except if the entire file is position dependent or if diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -92,6 +92,8 @@ virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type, JumpModType val) const {} + virtual void finalizeSections() const {} + virtual ~TargetInfo(); // This deletes a jump insn at the end of the section if it is a fall thru to diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1630,6 +1630,8 @@ if (config->emachine == EM_HEXAGON) hexagonTLSSymbolUpdate(outputSections); + target->finalizeSections(); + int assignPasses = 0; for (;;) { bool changed = target->needsThunks && tc.createThunks(outputSections); diff --git a/lld/test/ELF/riscv-relax-align-rvc.s b/lld/test/ELF/riscv-relax-align-rvc.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align-rvc.s @@ -0,0 +1,43 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir -p %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o %t/rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o %t/rv64.o + +# Check that alignment is always handled, regardless of --relax option + +# RUN: ld.lld %t/rv32.o -o %t/relax.rv32 +# RUN: ld.lld %t/rv64.o -o %t/relax.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/relax.rv32 > %t/relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/relax.rv64 > %t/relax.rv64.dis +# RUN: FileCheck %s < %t/relax.rv32.dis +# RUN: FileCheck %s < %t/relax.rv64.dis + +# RUN: ld.lld --no-relax %t/rv32.o -o %t/no-relax.rv32 +# RUN: ld.lld --no-relax %t/rv64.o -o %t/no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv32 > %t/no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv64 > %t/no-relax.rv64.dis +# RUN: FileCheck %s < %t/no-relax.rv32.dis +# RUN: FileCheck %s < %t/no-relax.rv64.dis + +# CHECK: c.add a0, a1 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: c.nop +# CHECK-NEXT: c.add s0, s1 +# CHECK-NEXT: c.add s2, s3 +# CHECK-NEXT: c.add s4, s5 +# CHECK-NEXT: c.nop +# CHECK-NEXT: c.add t0, t1 + +.global _start +_start: +.balign 4 + c.add a0, a1 +.balign 16 + c.add s0, s1 + c.add s2, s3 + c.add s4, s5 +.balign 8 + c.add t0, t1 diff --git a/lld/test/ELF/riscv-relax-align.s b/lld/test/ELF/riscv-relax-align.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-align.s @@ -0,0 +1,39 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir -p %t + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o %t/rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o %t/rv64.o + +# Check that alignment is always handled regardless of --relax option + +# RUN: ld.lld %t/rv32.o -o %t/relax.rv32 +# RUN: ld.lld %t/rv64.o -o %t/relax.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/relax.rv32 > %t/relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/relax.rv64 > %t/relax.rv64.dis +# RUN: FileCheck %s < %t/relax.rv32.dis +# RUN: FileCheck %s < %t/relax.rv64.dis + +# RUN: ld.lld --no-relax %t/rv32.o -o %t/no-relax.rv32 +# RUN: ld.lld --no-relax %t/rv64.o -o %t/no-relax.rv64 +# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv32 > %t/no-relax.rv32.dis +# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv64 > %t/no-relax.rv64.dis +# RUN: FileCheck %s < %t/no-relax.rv32.dis +# RUN: FileCheck %s < %t/no-relax.rv64.dis + +# CHECK: add a0, a1, a2 +# CHECK-NEXT: add a3, a4, a5 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: addi zero, zero, 0 +# CHECK-NEXT: add s0, s1, s2 +# CHECK-NEXT: add t0, t1, t2 + +.global _start +_start: +.balign 4 + add a0, a1, a2 + add a3, a4, a5 +.balign 16 + add s0, s1, s2 +.balign 4 +.balign 4 + add t0, t1, t2 diff --git a/lld/test/ELF/riscv-relax-syms.s b/lld/test/ELF/riscv-relax-syms.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/riscv-relax-syms.s @@ -0,0 +1,44 @@ +# REQUIRES: riscv +# RUN: rm -rf %t && mkdir -p %t + +// Check that relaxation correctly adjusts symbol addresses and sizes. + +# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf %s -o %t/rv32.o +# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf %s -o %t/rv64.o +# RUN: ld.lld -Ttext=0x100000 %t/rv32.o -o %t/rv32 +# RUN: ld.lld -Ttext=0x100000 %t/rv64.o -o %t/rv64 + +# RUN: llvm-readelf -s %t/rv32 > %t/rv32.dis +# RUN: llvm-readelf -s %t/rv64 > %t/rv64.dis +# RUN: FileCheck %s < %t/rv32.dis +# RUN: FileCheck %s < %t/rv64.dis + +# CHECK: 100000 4 NOTYPE LOCAL DEFAULT 1 a +# CHECK: 100000 12 NOTYPE LOCAL DEFAULT 1 b +# CHECK: 100004 8 NOTYPE LOCAL DEFAULT 1 c +# CHECK: 100008 12 NOTYPE LOCAL DEFAULT 1 d +# CHECK: 10000c 8 NOTYPE LOCAL DEFAULT 1 e +# CHECK: 100010 4 NOTYPE LOCAL DEFAULT 1 f +# CHECK: 100000 20 NOTYPE GLOBAL DEFAULT 1 _start + +.global _start +.balign 4 +_start: # 0 +a: +b: + add a0, a1, a2 # [0..4) +.size a, .-a # 4 +c: + add s0, s1, s2 # [4..8) +d: + add t0, t1, t2 # [8..12) +.size b, .-b # 12 +.size c, .-c +e: +.balign 16 +f: # 16 + add x0, x1, x2 # [16..20) +.size d, .-d # 20 +.size e, .-e # 20 +.size f, .-f # 20 +.size _start, .-_start diff --git a/lld/test/ELF/riscv-reloc-align.s b/lld/test/ELF/riscv-reloc-align.s deleted file mode 100644 --- a/lld/test/ELF/riscv-reloc-align.s +++ /dev/null @@ -1,12 +0,0 @@ -# REQUIRES: riscv - -# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o %t.o -# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s - -# CHECK: relocation R_RISCV_ALIGN requires unimplemented linker relaxation - -.global _start -_start: - nop - .balign 8 - nop