diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp --- a/lld/ELF/Arch/RISCV.cpp +++ b/lld/ELF/Arch/RISCV.cpp @@ -33,8 +33,7 @@ RelType getDynRel(RelType type) const override; RelExpr getRelExpr(RelType type, const Symbol &s, const uint8_t *loc) const override; - void relocate(uint8_t *loc, const Relocation &rel, - uint64_t val) const override; + bool relaxSection(InputSection *isec, int pass) const override; }; } // end anonymous namespace @@ -52,6 +51,7 @@ }; enum Reg { + X_ZERO = 0, X_RA = 1, X_T0 = 5, X_T1 = 6, @@ -59,6 +59,11 @@ X_T3 = 28, }; +enum RelaxPass { + PASS_OPT, + PASS_ALIGN, +}; + static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } static uint32_t lo12(uint32_t val) { return val & 4095; } @@ -72,6 +77,9 @@ return op | (rd << 7) | (imm << 12); } +const uint32_t nopInstr = itype(ADDI, X_ZERO, X_ZERO, 0); +const uint16_t rvcNopInstr = 0x1; + RISCV::RISCV() { copyRel = R_RISCV_COPY; noneRel = R_RISCV_NONE; @@ -101,6 +109,8 @@ pltHeaderSize = 32; pltEntrySize = 16; ipltEntrySize = 16; + + relaxPasses = {PASS_OPT, PASS_ALIGN}; } static uint32_t getEFlags(InputFile *f) { @@ -237,16 +247,12 @@ case R_RISCV_TPREL_LO12_I: case R_RISCV_TPREL_LO12_S: return R_TLS; + case R_RISCV_ALIGN: + return R_RISCV_RELAX_HINT; + // TODO: implement linker relaxation optimisation pass for these case R_RISCV_RELAX: case R_RISCV_TPREL_ADD: return R_NONE; - case R_RISCV_ALIGN: - // Not just a hint; always padded to the worst-case number of NOPs, so may - // not currently be aligned, and without linker relaxation support we can't - // delete NOPs to realign. - errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires " - "unimplemented linker relaxation; recompile with -mno-relax"); - return R_NONE; default: error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + ") against symbol " + toString(s)); @@ -438,14 +444,119 @@ write64le(loc, val - dtpOffset); break; + case R_RISCV_ALIGN: + assert(config->relocatable); + return; case R_RISCV_RELAX: - return; // Ignored (for now) + // Either this is a relocatable link, or we have disabled relaxations. + return; default: llvm_unreachable("unknown relocation"); } } +static void relaxAlign(InputSection *isec, Relocation &rel, + std::vector> &remove, + uint64_t &removeTotal) { + // Addend is the number of bytes of nops currently present. The alignment + // required is therefore the next power of two bigger than this. + uint64_t align = NextPowerOf2(rel.addend); + uint64_t offset = rel.offset - removeTotal; + uint64_t aligned = alignTo(offset, align); + uint64_t requiredBytes = aligned - offset; + uint8_t *loc = isec->mutableData().data() + rel.offset; + if (requiredBytes > (uint64_t)rel.addend) { + errorOrWarn(getErrorLocation(loc) + "need " + Twine(requiredBytes) + + " bytes to align to " + Twine(align) + + "-byte boundary, but only " + Twine(rel.addend) + " present"); + return; + } + + // Delete relocation + rel.expr = R_NONE; + + if (requiredBytes == (uint64_t)rel.addend) + return; + + // Fill with nops + for (uint8_t *p = loc, *e = loc + requiredBytes; p + 3 < e; p += 4) + write32le(p, nopInstr); + + // Write a single compressed nop if required + if (requiredBytes % 4 != 0) { + if (requiredBytes % 4 != 2) { + errorOrWarn(getErrorLocation(loc) + "need " + Twine(requiredBytes) + + " bytes to align to " + Twine(align) + + "-byte boundary, which is not an integral number of " + + "instructions"); + return; + } else if (!(config->eflags & EF_RISCV_RVC)) { + errorOrWarn(getErrorLocation(loc) + "need " + Twine(requiredBytes) + + " bytes to align to " + Twine(align) + + "-byte boundary, which requires a compressed nop, but " + + "extension not present"); + return; + } + write16le(loc + requiredBytes - 2, rvcNopInstr); + } + + remove.emplace_back(rel.offset + requiredBytes, rel.addend - requiredBytes); + removeTotal += rel.addend - requiredBytes; +} + +bool RISCV::relaxSection(InputSection *isec, int pass) const { + if (config->relocatable) + return false; + + std::vector> remove; + uint64_t removeTotal = 0; + for (auto i = isec->relocations.begin(), e = isec->relocations.end(); i != e; + ++i) { + Relocation &rel = *i; + switch (pass) { + default: + llvm_unreachable("unknown relaxation pass"); + + case PASS_OPT: { + // Check if this is paired with an R_RISCV_RELAX + if (i + 1 == e || (i + 1)->type != R_RISCV_RELAX || + rel.offset != (i + 1)->offset) + continue; + + // Skip the R_RISCV_RELAX next time + ++i; + + switch (rel.type) { + case R_RISCV_CALL: + case R_RISCV_CALL_PLT: + break; + } + break; + } + + case PASS_ALIGN: + if (rel.type == R_RISCV_ALIGN) + relaxAlign(isec, rel, remove, removeTotal); + break; + } + } + + auto brel = isec->relocations.begin(); + auto dest = brel; + for (auto src = brel, erel = isec->relocations.end(); src != erel; ++src) { + if (src->expr != R_NONE) + *dest++ = *src; + } + isec->relocations.resize(dest - brel); + + if (remove.size() == 0) + return false; + + isec->deleteRanges(remove); + return true; +} + TargetInfo *getRISCVTargetInfo() { static RISCV target; return ⌖ diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -348,6 +348,15 @@ // Called by ICF to merge two input sections. void replace(InputSection *other); + MutableArrayRef mutableData() { + if (!copiedData) + makeMutableDataCopy(); + return llvm::makeMutableArrayRef(const_cast(rawData.data()), + rawData.size()); + } + + void deleteRanges(std::vector> &ranges); + static InputSection discarded; private: @@ -355,6 +364,13 @@ void copyRelocations(uint8_t *buf, llvm::ArrayRef rels); template void copyShtGroup(uint8_t *buf); + + void makeMutableDataCopy(); + + // This field stores whether we have made a mutable copy of the data, either + // because we have uncompressed it or because during relaxation we have had + // to rewrite the contents. + mutable bool copiedData = false; }; inline bool isDebugSection(const InputSectionBase &sec) { diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -20,6 +20,7 @@ #include "Thunks.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" +#include "llvm/ADT/PriorityQueue.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" @@ -834,6 +835,8 @@ return in.got->getTlsIndexOff() + a; case R_TLSLD_PC: return in.got->getTlsIndexVA() + a - p; + case R_RISCV_RELAX_HINT: + return 0; default: llvm_unreachable("invalid expression"); } @@ -1181,6 +1184,104 @@ other->markDead(); } +void InputSection::makeMutableDataCopy() { + static std::mutex mu; + std::lock_guard lock(mu); + + ArrayRef oldRef = data(); + // In case the above just uncompressed + if (copiedData) + return; + + size_t size = oldRef.size(); + uint8_t *newData = bAlloc.Allocate(size); + memcpy(newData, oldRef.data(), size); + rawData = makeArrayRef(newData, size); +} + +void InputSection::deleteRanges( + std::vector> &ranges) { + // Delete bytes from data. + + uint64_t removed = 0; + MutableArrayRef mutRef = mutableData(); + uint8_t *buf = mutRef.data(); + size_t size = mutRef.size(); + for (auto i = ranges.begin(), e = ranges.end(); i != e; ++i) { + uint8_t *moveTo = buf + (i->first - removed); + uint8_t *moveFrom = buf + i->first + i->second; + uint64_t nextOffset = i + 1 == e ? size : (i + 1)->first; + memmove(moveTo, moveFrom, nextOffset - i->first - i->second); + removed += i->second; + } + rawData = makeArrayRef(buf, size - removed); + + // Update relocations; assumes already sorted. + + auto irange = ranges.begin(); + auto erange = ranges.end(); + removed = 0; + for (Relocation &rel : relocations) { + while (irange != erange && irange->first < rel.offset) { + removed += irange->second; + ++irange; + } + rel.offset -= removed; + } + + // Update symbols. + + std::vector symbols; + for (Symbol *s : file->getSymbols()) + if (auto *dr = dyn_cast(s)) + if (!dr->isSection() && dr->section == this) + symbols.push_back(dr); + + llvm::sort(symbols, + [](Defined *a, Defined *b) { return a->value < b->value; }); + + using DefinedEndPair = std::pair; + auto compareEnds = [](DefinedEndPair &a, DefinedEndPair &b) { + uint64_t aend = a.first->value + a.second + a.first->size; + uint64_t bend = b.first->value + b.second + b.first->size; + return aend > bend; + }; + PriorityQueue, + decltype(compareEnds)> + symbolEnds(compareEnds); + + auto isym = symbols.begin(); + auto esym = symbols.end(); + irange = ranges.begin(); + erange = ranges.end(); + removed = 0; + while (isym != esym || irange != erange || !symbolEnds.empty()) { + while (irange != erange || !symbolEnds.empty()) { + // Adjust the size of any earlier symbols whose ends do not overlap with + // the current range. + if (isym != esym && irange != erange && irange->first >= (*isym)->value) + break; + while (!symbolEnds.empty()) { + auto top = symbolEnds.top(); + uint64_t end = top.first->value + top.second + top.first->size; + if (irange != erange && end > irange->first) + break; + top.first->size -= removed - top.second; + symbolEnds.pop(); + } + if (irange != erange) { + removed += irange->second; + ++irange; + } + } + if (isym != esym) { + (*isym)->value -= removed; + symbolEnds.emplace(*isym, removed); + ++isym; + } + } +} + template EhInputSection::EhInputSection(ObjFile &f, const typename ELFT::Shdr &header, diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -97,6 +97,7 @@ R_PPC64_TOCBASE, R_RISCV_ADD, R_RISCV_PC_INDIRECT, + R_RISCV_RELAX_HINT, }; // Architecture-neutral representation of relocation. diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -397,8 +397,8 @@ R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC, R_PLT_PC, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC, R_PPC32_PLTREL, R_PPC64_CALL_PLT, R_PPC64_RELAX_TOC, R_RISCV_ADD, R_TLSDESC_CALL, - R_TLSDESC_PC, R_AARCH64_TLSDESC_PAGE, R_TLSLD_HINT, R_TLSIE_HINT>( - e)) + R_TLSDESC_PC, R_AARCH64_TLSDESC_PAGE, R_TLSLD_HINT, R_TLSIE_HINT, + R_RISCV_RELAX_HINT>(e)) return true; // These never do, except if the entire file is position dependent or if diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h --- a/lld/ELF/Target.h +++ b/lld/ELF/Target.h @@ -144,6 +144,10 @@ virtual void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; + llvm::SmallVector relaxPasses; + + virtual bool relaxSection(InputSection *isec, int pass) const; + protected: // On FreeBSD x86_64 the first page cannot be mmaped. // On Linux this is controlled by vm.mmap_min_addr. At least on some x86_64 diff --git a/lld/ELF/Target.cpp b/lld/ELF/Target.cpp --- a/lld/ELF/Target.cpp +++ b/lld/ELF/Target.cpp @@ -180,6 +180,10 @@ llvm_unreachable("Should not have claimed to be relaxable"); } +bool TargetInfo::relaxSection(InputSection *isec, int pass) const { + llvm_unreachable("Should not have claimed to have relaxation passes"); +} + uint64_t TargetInfo::getImageBase() const { // Use -image-base if set. Fall back to the target default if not. if (config->imageBase) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1668,6 +1668,33 @@ warn("address (0x" + Twine::utohexstr(os->addr) + ") of section " + os->name + " is not a multiple of alignment (" + Twine(os->alignment) + ")"); + + // We cannot relax until after thunk creation has finished, since that causes + // code to increase in size and potentially invalidate some relaxations. + for (int pass : target->relaxPasses) { + assignPasses = 0; + for (;;) { + bool changed = false; + for (OutputSection *osec : outputSections) + for (InputSection *isec : getInputSections(osec)) + changed |= target->relaxSection(isec, pass); + + const Defined *changedSym = script->assignAddresses(); + if (!changed) { + // Some symbols may be dependent on section addresses. When we break the + // loop, the symbol values are finalized because a previous + // assignAddresses() finalized section addresses. + if (!changedSym) + break; + if (++assignPasses == 5) { + errorOrWarn("assignment to symbol " + toString(*changedSym) + + " does not converge after relaxation pass " + + toString(pass)); + break; + } + } + } + } } static void finalizeSynthetic(SyntheticSection *sec) { @@ -1983,7 +2010,9 @@ // address of InputSections. For example, MIPS GOT section content or // android packed relocations sections content. // - // 3) Assign the final values for the linker script symbols. Linker scripts + // 3) Perform any linker relaxations that are address-dependent. + // + // 4) Assign the final values for the linker script symbols. Linker scripts // sometimes using forward symbol declarations. We want to set the correct // values. They also might change after adding the thunks. finalizeAddressDependentContent();