diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -856,11 +856,11 @@ }; const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop - // needsCopy indicates a non-ifunc canonical PLT entry whose address may + // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its // address may escape if referenced by a direct relocation. The condition is // conservative. - bool hasBti = btiHeader && (sym.needsCopy || sym.isInIplt); + bool hasBti = btiHeader && (sym.hasFlag(NEEDS_COPY) || sym.isInIplt); if (hasBti) { memcpy(buf, btiData, sizeof(btiData)); buf += sizeof(btiData); diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -59,7 +59,7 @@ for (Symbol *b : file->getSymbols()) if (auto *dr = dyn_cast(b)) if (!dr->isSection() && dr->section && dr->section->isLive() && - (dr->file == file || dr->needsCopy || dr->section->bss)) + (dr->file == file || dr->hasFlag(NEEDS_COPY) || dr->section->bss)) v.push_back(dr); return v; } diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -306,7 +306,8 @@ sym.exportDynamic = true; sym.isUsedInRegularObj = true; // A copy relocated alias may need a GOT entry. - sym.needsGot = old.needsGot; + if (old.hasFlag(NEEDS_GOT)) + sym.setFlags(NEEDS_GOT); } // Reserve space in .bss or .bss.rel.ro for copy relocation. @@ -579,6 +580,7 @@ }; std::vector undefs; +std::mutex relocMutex; } // Check whether the definition name def is a mangled function name that matches @@ -821,6 +823,7 @@ // Returns true if the undefined symbol will produce an error message. static bool maybeReportUndefined(Undefined &sym, InputSectionBase &sec, uint64_t offset) { + std::lock_guard lock(relocMutex); // If versioned, issue an error (even if the symbol is weak) because we don't // know the defining filename which is required to construct a Verneed entry. if (sym.hasVersionSuffix) { @@ -869,6 +872,7 @@ return type; } +template static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType type) { @@ -882,11 +886,14 @@ // address. if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) { isec.relocations.push_back({expr, type, offsetInSec, addend, &sym}); - part.relrDyn->relocs.push_back({&isec, offsetInSec}); + if (shard) + part.relrDyn->relocsVec[parallel::threadIndex].push_back({&isec, offsetInSec}); + else + part.relrDyn->relocs.push_back({&isec, offsetInSec}); return; } - part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym, - addend, type, expr); + part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, + sym, addend, type, expr); } template @@ -1054,11 +1061,12 @@ if (canWrite) { RelType rel = target.getDynRel(type); if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) { - addRelativeReloc(*sec, offset, sym, addend, expr, type); + addRelativeReloc(*sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target.symbolicRel) rel = target.relativeRel; + std::lock_guard lock(relocMutex); sec->getPartition().relaDyn->addSymbolReloc(rel, *sec, offset, sym, addend, type); @@ -1100,7 +1108,7 @@ " against symbol '" + toString(*ss) + "'; recompile with -fPIC or remove '-z nocopyreloc'" + getLocation(*sec, sym, offset)); - sym.needsCopy = true; + sym.setFlags(NEEDS_COPY); } sec->relocations.push_back({expr, type, offset, addend, &sym}); return; @@ -1138,8 +1146,7 @@ errorOrWarn("symbol '" + toString(sym) + "' cannot be preempted; recompile with -fPIE" + getLocation(*sec, sym, offset)); - sym.needsCopy = true; - sym.needsPlt = true; + sym.setFlags(NEEDS_COPY | NEEDS_PLT); sec->relocations.push_back({expr, type, offset, addend, &sym}); return; } @@ -1193,7 +1200,7 @@ R_TLSDESC_GOTPLT>(expr) && config->shared) { if (expr != R_TLSDESC_CALL) { - sym.needsTlsDesc = true; + sym.setFlags(NEEDS_TLSDESC); c.relocations.push_back({expr, type, offset, addend, &sym}); } return 1; @@ -1247,7 +1254,7 @@ // Local-Dynamic sequence where offset of tls variable relative to dynamic // thread pointer is stored in the got. This cannot be relaxed to Local-Exec. if (expr == R_TLSLD_GOT_OFF) { - sym.needsGotDtprel = true; + sym.setFlags(NEEDS_GOT_DTPREL); c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1255,7 +1262,7 @@ if (oneof(expr)) { if (!toExecRelax) { - sym.needsTlsGd = true; + sym.setFlags(NEEDS_TLSGD); c.relocations.push_back({expr, type, offset, addend, &sym}); return 1; } @@ -1263,7 +1270,7 @@ // Global-Dynamic relocs can be relaxed to Initial-Exec or Local-Exec // depending on the symbol being locally defined or not. if (sym.isPreemptible) { - sym.needsTlsGdToIe = true; + sym.setFlags(NEEDS_TLSGD_TO_IE); c.relocations.push_back( {target->adjustTlsExpr(type, R_RELAX_TLS_GD_TO_IE), type, offset, addend, &sym}); @@ -1283,10 +1290,10 @@ c.relocations.push_back( {R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym}); } else if (expr != R_TLSIE_HINT) { - sym.needsTlsIe = true; + sym.setFlags(NEEDS_TLSIE); // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) - addRelativeReloc(c, offset, sym, addend, expr, type); + addRelativeReloc(c, offset, sym, addend, expr, type); else c.relocations.push_back({expr, type, offset, addend, &sym}); } @@ -1371,10 +1378,10 @@ // The 5 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(expr)) { - in.gotPlt->hasGotPltOffRel = true; + in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed); } else if (oneof(expr)) { - in.got->hasGotOffRel = true; + in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } // Process TLS relocations, including relaxing TLS relocations. Note that @@ -1422,6 +1429,7 @@ // We were asked not to generate PLT entries for ifuncs. Instead, pass the // direct relocation on through. if (LLVM_UNLIKELY(isIfunc) && config->zIfuncNoplt) { + std::lock_guard lock(relocMutex); sym.exportDynamic = true; mainPart->relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type); return; @@ -1438,12 +1446,12 @@ // ftp://www.linux-mips.org/pub/linux/mips/doc/ABI/mipsabi.pdf in.mipsGot->addEntry(*sec->file, sym, addend, expr); } else { - sym.needsGot = true; + sym.setFlags(NEEDS_GOT); } } else if (needsPlt(expr)) { - sym.needsPlt = true; + sym.setFlags(NEEDS_PLT); } else if (LLVM_UNLIKELY(isIfunc)) { - sym.hasDirectReloc = true; + sym.setFlags(HAS_DIRECT_RELOC); } processAux(expr, type, offset, sym, addend); @@ -1530,10 +1538,31 @@ // determine if it needs special treatment, such as creating GOT, PLT, // copy relocations, etc. Note that relocations for non-alloc sections are // directly processed by InputSection::relocateNonAlloc. + + // Deterministic parallellism needs sorting relocations which is unsuitable + // for -z nocombreloc. AndroidPackedRelocationSection does not support + // parallelism. MIPS and PPC64 use global states which are not suitable for + // parallelism. + bool serial = !config->zCombreloc || config->emachine == EM_MIPS || + config->emachine == EM_PPC64; + parallel::TaskGroup tg; + for (ELFFileBase *f : ctx->objectFiles) { + auto fn = [f]() { + RelocationScanner scanner; + for (InputSectionBase *s : f->getSections()) { + if (s && s->kind() == SectionBase::Regular && s->isLive() && + (s->flags & SHF_ALLOC) && + !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM)) + scanner.template scanSection(*s); + } + }; + if (serial) + fn(); + else + tg.execute(fn); + } + RelocationScanner scanner; - for (InputSectionBase *sec : inputSections) - if (sec->isLive() && (sec->flags & SHF_ALLOC)) - scanner.template scanSection(*sec); for (Partition &part : partitions) { for (EhInputSection *sec : part.ehFrame->sections) scanner.template scanSection(*sec); @@ -1543,7 +1572,7 @@ } } -static bool handleNonPreemptibleIfunc(Symbol &sym) { +static bool handleNonPreemptibleIfunc(Symbol &sym, uint16_t flags) { // Handle a reference to a non-preemptible ifunc. These are special in a // few ways: // @@ -1587,7 +1616,7 @@ if (!sym.isGnuIFunc() || sym.isPreemptible || config->zIfuncNoplt) return false; // Skip unreferenced non-preemptible ifunc. - if (!(sym.needsGot || sym.needsPlt || sym.hasDirectReloc)) + if (!(flags & (NEEDS_GOT | NEEDS_PLT | HAS_DIRECT_RELOC))) return true; sym.isInIplt = true; @@ -1603,7 +1632,7 @@ sym.allocateAux(); symAux.back().pltIdx = symAux[directSym->auxIdx].pltIdx; - if (sym.hasDirectReloc) { + if (flags & HAS_DIRECT_RELOC) { // Change the value to the IPLT and redirect all references to it. auto &d = cast(sym); d.section = in.iplt.get(); @@ -1613,9 +1642,9 @@ // don't try to call the PLT as if it were an ifunc resolver. d.type = STT_FUNC; - if (sym.needsGot) + if (flags & NEEDS_GOT) addGotEntry(sym); - } else if (sym.needsGot) { + } else if (flags & NEEDS_GOT) { // Redirect GOT accesses to point to the Igot. sym.gotInIgot = true; } @@ -1624,30 +1653,31 @@ void elf::postScanRelocations() { auto fn = [](Symbol &sym) { - if (handleNonPreemptibleIfunc(sym)) + auto flags = sym.flags.load(std::memory_order_relaxed); + if (handleNonPreemptibleIfunc(sym, flags)) return; if (!sym.needsDynReloc()) return; sym.allocateAux(); - if (sym.needsGot) + if (flags & NEEDS_GOT) addGotEntry(sym); - if (sym.needsPlt) + if (flags & NEEDS_PLT) addPltEntry(*in.plt, *in.gotPlt, *in.relaPlt, target->pltRel, sym); - if (sym.needsCopy) { + if (flags & NEEDS_COPY) { if (sym.isObject()) { invokeELFT(addCopyRelSymbol, cast(sym)); - // needsCopy is cleared for sym and its aliases so that in later - // iterations aliases won't cause redundant copies. - assert(!sym.needsCopy); + // NEEDS_COPY is cleared for sym and its aliases so that in + // later iterations aliases won't cause redundant copies. + assert(!sym.hasFlag(NEEDS_COPY)); } else { - assert(sym.isFunc() && sym.needsPlt); + assert(sym.isFunc() && sym.hasFlag(NEEDS_PLT)); if (!sym.isDefined()) { replaceWithDefined(sym, *in.plt, target->pltHeaderSize + target->pltEntrySize * sym.getPltIdx(), 0); - sym.needsCopy = true; + sym.setFlags(NEEDS_COPY); if (config->emachine == EM_PPC) { // PPC32 canonical PLT entries are at the beginning of .glink cast(sym).value = in.plt->headerSize; @@ -1662,13 +1692,13 @@ return; bool isLocalInExecutable = !sym.isPreemptible && !config->shared; - if (sym.needsTlsDesc) { + if (flags & NEEDS_TLSDESC) { in.got->addTlsDescEntry(sym); mainPart->relaDyn->addAddendOnlyRelocIfNonPreemptible( target->tlsDescRel, *in.got, in.got->getTlsDescOffset(sym), sym, target->tlsDescRel); } - if (sym.needsTlsGd) { + if (flags & NEEDS_TLSGD) { in.got->addDynTlsEntry(sym); uint64_t off = in.got->getGlobalDynOffset(sym); if (isLocalInExecutable) @@ -1689,18 +1719,18 @@ in.got->relocations.push_back( {R_ABS, target->tlsOffsetRel, offsetOff, 0, &sym}); } - if (sym.needsTlsGdToIe) { + if (flags & NEEDS_TLSGD_TO_IE) { in.got->addEntry(sym); mainPart->relaDyn->addSymbolReloc(target->tlsGotRel, *in.got, sym.getGotOffset(), sym); } - if (sym.needsGotDtprel) { + if (flags & NEEDS_GOT_DTPREL) { in.got->addEntry(sym); in.got->relocations.push_back( {R_ABS, target->tlsOffsetRel, sym.getGotOffset(), 0, &sym}); } - if (sym.needsTlsIe && !sym.needsTlsGdToIe) + if ((flags & NEEDS_TLSIE) && !(flags & NEEDS_TLSGD_TO_IE)) addTpOffsetGotEntry(sym); }; diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -39,6 +39,20 @@ class LazyObject; class InputFile; +enum { + NEEDS_GOT = 1 << 0, + NEEDS_PLT = 1 << 1, + HAS_DIRECT_RELOC = 1 << 2, + // True if this symbol needs a canonical PLT entry, or (during + // postScanRelocations) a copy relocation. + NEEDS_COPY = 1 << 3, + NEEDS_TLSDESC = 1 << 4, + NEEDS_TLSGD = 1 << 5, + NEEDS_TLSGD_TO_IE = 1 << 6, + NEEDS_GOT_DTPREL = 1 << 7, + NEEDS_TLSIE = 1 << 8, +}; + // Some index properties of a symbol are stored separately in this auxiliary // struct to decrease sizeof(SymbolUnion) in the majority of cases. struct SymbolAux { @@ -67,6 +81,10 @@ // The file from which this symbol was created. InputFile *file; + // The default copy constructor is deleted due to atomic flags. Define one for + // places where no atomic is needed. + Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); } + protected: const char *nameData; // 32-bit size saves space. @@ -252,10 +270,7 @@ inDynamicList(false), referenced(false), referencedAfterWrap(false), traced(false), hasVersionSuffix(false), isInIplt(false), gotInIgot(false), folded(false), needsTocRestore(false), - scriptDefined(false), dsoProtected(false), needsCopy(false), - needsGot(false), needsPlt(false), needsTlsDesc(false), - needsTlsGd(false), needsTlsGdToIe(false), needsGotDtprel(false), - needsTlsIe(false), hasDirectReloc(false) {} + scriptDefined(false), dsoProtected(false) {} public: // True if this symbol is in the Iplt sub-section of the Plt and the Igot @@ -282,20 +297,9 @@ // True if defined in a DSO as protected visibility. uint8_t dsoProtected : 1; - // True if this symbol needs a canonical PLT entry, or (during - // postScanRelocations) a copy relocation. - uint8_t needsCopy : 1; - // Temporary flags used to communicate which symbol entries need PLT and GOT // entries during postScanRelocations(); - uint8_t needsGot : 1; - uint8_t needsPlt : 1; - uint8_t needsTlsDesc : 1; - uint8_t needsTlsGd : 1; - uint8_t needsTlsGdToIe : 1; - uint8_t needsGotDtprel : 1; - uint8_t needsTlsIe : 1; - uint8_t hasDirectReloc : 1; + std::atomic flags = 0; // A symAux index used to access GOT/PLT entry indexes. This is allocated in // postScanRelocations(). @@ -308,9 +312,17 @@ // Version definition index. uint16_t versionId; + void setFlags(uint16_t bits) { + flags.fetch_or(bits, std::memory_order_relaxed); + } + bool hasFlag(uint16_t bit) const { + return flags.load(std::memory_order_relaxed) & bit; + } + bool needsDynReloc() const { - return needsCopy || needsGot || needsPlt || needsTlsDesc || needsTlsGd || - needsTlsGdToIe || needsGotDtprel || needsTlsIe; + return flags.load(std::memory_order_relaxed) & + (NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD | + NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE); } void allocateAux() { assert(auxIdx == uint32_t(-1)); @@ -556,6 +568,13 @@ Defined(std::forward(args)...); } +inline Defined *makeDefined(Defined &o) { + auto *ret = reinterpret_cast( + getSpecificAllocSingleton().Allocate()); + memcpy(ret, &o, sizeof(o)); + return ret; +} + void reportDuplicate(const Symbol &sym, const InputFile *newFile, InputSectionBase *errSec, uint64_t errOffset); void maybeWarnUnorderableSymbol(const Symbol *sym); diff --git a/lld/ELF/Symbols.cpp b/lld/ELF/Symbols.cpp --- a/lld/ELF/Symbols.cpp +++ b/lld/ELF/Symbols.cpp @@ -122,7 +122,7 @@ // field etc) do the same trick as compiler uses to mark microMIPS // for CPU - set the less-significant bit. if (config->emachine == EM_MIPS && isMicroMips() && - ((sym.stOther & STO_MIPS_MICROMIPS) || sym.needsCopy)) + ((sym.stOther & STO_MIPS_MICROMIPS) || sym.hasFlag(NEEDS_COPY))) va |= 1; if (d.isTls() && !config->relocatable) { diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -26,6 +26,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Threading.h" namespace lld::elf { @@ -115,7 +116,7 @@ // Flag to force GOT to be in output if we have relocations // that relies on its address. - bool hasGotOffRel = false; + std::atomic hasGotOffRel = false; protected: size_t numEntries = 0; @@ -357,7 +358,7 @@ // Flag to force GotPlt to be in output if we have relocations // that relies on its address. - bool hasGotPltOffRel = false; + std::atomic hasGotPltOffRel = false; private: SmallVector entries; @@ -490,6 +491,7 @@ /// Add a dynamic relocation without writing an addend to the output section. /// This overload can be used if the addends are written directly instead of /// using relocations on the input section (e.g. MipsGotSection::writeTo()). + template void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); } /// Add a dynamic relocation against \p sym with an optional addend. void addSymbolReloc(RelType dynType, InputSectionBase &isec, @@ -497,21 +499,41 @@ llvm::Optional addendRelType = llvm::None); /// Add a relative dynamic relocation that uses the target address of \p sym /// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend. + template void addRelativeReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, - RelType addendRelType, RelExpr expr); + RelType addendRelType, RelExpr expr) { + // This function should only be called for non-preemptible symbols or + // RelExpr values that refer to an address inside the output file (e.g. the + // address of the GOT entry for a potentially preemptible symbol). + assert(expr != R_ADDEND && "expected non-addend relocation expression"); + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, + offsetInSec, sym, addend, expr, addendRelType); + } /// Add a dynamic relocation using the target address of \p sym as the addend /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym. void addAddendOnlyRelocIfNonPreemptible(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType); - void addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, - int64_t addend, RelExpr expr, RelType addendRelType); - bool isNeeded() const override { return !relocs.empty(); } + template + void addReloc(DynamicReloc::Kind kind, RelType dynType, InputSectionBase &sec, + uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, + RelType addendRelType) { + // Write the addends to the relocated address if required. We skip + // it if the written value would be zero. + if (config->writeAddends && (expr != R_ADDEND || addend != 0)) + sec.relocations.push_back( + {expr, addendRelType, offsetInSec, addend, &sym}); + addReloc({dynType, &sec, offsetInSec, kind, sym, addend, expr}); + } + bool isNeeded() const override { + return !relocs.empty() || + llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); }); + } size_t getSize() const override { return relocs.size() * this->entsize; } size_t getRelativeRelocCount() const { return numRelativeRelocs; } + void mergeRels(); void partitionRels(); void finalizeContents() override; static bool classof(const SectionBase *d) { @@ -521,6 +543,9 @@ } int32_t dynamicTag, sizeDynamicTag; SmallVector relocs; + // Used when parallel relocation scanning adds relocations. The elements + // should will be moved into relocs. + SmallVector, 0> relocsVec; protected: void computeRels(); @@ -528,6 +553,11 @@ bool combreloc; }; +template <> +inline void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { + relocsVec[llvm::parallel::threadIndex].push_back(reloc); +} + template class RelocationSection final : public RelocationBaseSection { using Elf_Rel = typename ELFT::Rel; @@ -566,8 +596,13 @@ class RelrBaseSection : public SyntheticSection { public: RelrBaseSection(); - bool isNeeded() const override { return !relocs.empty(); } + void mergeRels(); + bool isNeeded() const override { + return !relocs.empty() || + llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); }); + } SmallVector relocs; + SmallVector, 0> relocsVec; }; // RelrSection is used to encode offsets for relative relocations. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1586,19 +1586,6 @@ R_ADDEND, addendRelType ? *addendRelType : target->noneRel); } -void RelocationBaseSection::addRelativeReloc( - RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec, - Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) { - // This function should only be called for non-preemptible symbols or - // RelExpr values that refer to an address inside the output file (e.g. the - // address of the GOT entry for a potentially preemptible symbol). - assert((!sym.isPreemptible || expr == R_GOT) && - "cannot add relative relocation against preemptible symbol"); - assert(expr != R_ADDEND && "expected non-addend relocation expression"); - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec, - sym, addend, expr, addendRelType); -} - void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType) { @@ -1611,17 +1598,14 @@ sym, 0, R_ABS, addendRelType); } -void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase &inputSec, - uint64_t offsetInSec, Symbol &sym, - int64_t addend, RelExpr expr, - RelType addendRelType) { - // Write the addends to the relocated address if required. We skip - // it if the written value would be zero. - if (config->writeAddends && (expr != R_ADDEND || addend != 0)) - inputSec.relocations.push_back( - {expr, addendRelType, offsetInSec, addend, &sym}); - addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr}); +void RelocationBaseSection::mergeRels() { + size_t newSize = relocs.size(); + for (auto &v : relocsVec) + newSize += v.size(); + relocs.reserve(newSize); + for (auto &v : relocsVec) + llvm::append_range(relocs, v); + relocsVec.clear(); } void RelocationBaseSection::partitionRels() { @@ -1704,6 +1688,16 @@ config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, config->wordsize, ".relr.dyn") {} +void RelrBaseSection::mergeRels() { + size_t newSize = relocs.size(); + for (auto &v : relocsVec) + newSize += v.size(); + relocs.reserve(newSize); + for (auto &v : relocsVec) + llvm::append_range(relocs, v); + relocsVec.clear(); +} + template AndroidPackedRelocationSection::AndroidPackedRelocationSection( StringRef name) @@ -2175,8 +2169,8 @@ } static uint32_t getSymSectionIndex(Symbol *sym) { - assert(!(sym->needsCopy && sym->isObject())); - if (!isa(sym) || sym->needsCopy) + assert(!(sym->hasFlag(NEEDS_COPY) && sym->isObject())); + if (!isa(sym) || sym->hasFlag(NEEDS_COPY)) return SHN_UNDEF; if (const OutputSection *os = sym->getOutputSection()) return os->sectionIndex >= SHN_LORESERVE ? (uint32_t)SHN_XINDEX @@ -2236,7 +2230,7 @@ for (SymbolTableEntry &ent : symbols) { Symbol *sym = ent.sym; - if (sym->isInPlt() && sym->needsCopy) + if (sym->isInPlt() && sym->hasFlag(NEEDS_COPY)) eSym->st_other |= STO_MIPS_PLT; if (isMicroMips()) { // We already set the less-significant bit for symbols @@ -2247,7 +2241,7 @@ // like `objdump` will be able to deal with a correct // symbol position. if (sym->isDefined() && - ((sym->stOther & STO_MIPS_MICROMIPS) || sym->needsCopy)) { + ((sym->stOther & STO_MIPS_MICROMIPS) || sym->hasFlag(NEEDS_COPY))) { if (!strTabSec.isDynamic()) eSym->st_value &= ~1; eSym->st_other |= STO_MIPS_MICROMIPS; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -349,12 +349,14 @@ add(*part.memtagAndroidNote); } + const unsigned threadCount = parallel::strategy.compute_thread_count(); if (config->androidPackDynRelocs) part.relaDyn = std::make_unique>(relaDynName); else part.relaDyn = std::make_unique>( relaDynName, config->zCombreloc); + part.relaDyn->relocsVec.resize(threadCount); if (config->hasDynSymTab) { add(*part.dynSymTab); @@ -387,6 +389,7 @@ if (config->relrPackDynRelocs) { part.relrDyn = std::make_unique>(); + part.relrDyn->relocsVec.resize(threadCount); add(*part.relrDyn); } @@ -2074,16 +2077,20 @@ // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { if (part.relaDyn) { + part.relaDyn->mergeRels(); // Compute DT_RELACOUNT to be used by part.dynamic. part.relaDyn->partitionRels(); finalizeSynthetic(part.relaDyn.get()); } + if (part.relrDyn) { + part.relrDyn->mergeRels(); + finalizeSynthetic(part.relrDyn.get()); + } finalizeSynthetic(part.dynSymTab.get()); finalizeSynthetic(part.gnuHashTab.get()); finalizeSynthetic(part.hashTab.get()); finalizeSynthetic(part.verDef.get()); - finalizeSynthetic(part.relrDyn.get()); finalizeSynthetic(part.ehFrameHdr.get()); finalizeSynthetic(part.verSym.get()); finalizeSynthetic(part.verNeed.get()); diff --git a/lld/test/ELF/combreloc.s b/lld/test/ELF/combreloc.s --- a/lld/test/ELF/combreloc.s +++ b/lld/test/ELF/combreloc.s @@ -35,8 +35,8 @@ # NOCOMB-NEXT: 0x3400 R_X86_64_64 ccc 0x0 # NOCOMB-NEXT: 0x3408 R_X86_64_64 bbb 0x0 # NOCOMB-NEXT: 0x3410 R_X86_64_64 aaa 0x0 -# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420 # NOCOMB-NEXT: 0x23F0 R_X86_64_GLOB_DAT aaa 0x0 +# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420 # NOCOMB-NEXT: } .globl aaa, bbb, ccc diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s --- a/lld/test/ELF/comdat-discarded-error.s +++ b/lld/test/ELF/comdat-discarded-error.s @@ -5,7 +5,7 @@ # RUN: echo '.weak foo; foo: .section .text.foo,"axG",@progbits,foo,comdat; .globl bar; bar:' |\ # RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o -# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld --threads=1 %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s # CHECK: error: relocation refers to a symbol in a discarded section: bar # CHECK-NEXT: >>> defined in {{.*}}3.o diff --git a/lld/test/ELF/undef-multi.s b/lld/test/ELF/undef-multi.s --- a/lld/test/ELF/undef-multi.s +++ b/lld/test/ELF/undef-multi.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o -# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld --threads=1 %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s # CHECK: error: undefined symbol: zed2 # CHECK-NEXT: >>> referenced by undef-multi.s @@ -24,7 +24,7 @@ # RUN: echo " call zed2" >> %t.moreref.s # RUN: echo " call zed2" >> %t.moreref.s # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t.moreref.s -o %t3.o -# RUN: not ld.lld %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \ +# RUN: not ld.lld --threads=1 %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \ # RUN: FileCheck --check-prefix=LIMIT %s # LIMIT: error: undefined symbol: zed2 diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s --- a/lld/test/ELF/undef.s +++ b/lld/test/ELF/undef.s @@ -5,9 +5,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o # RUN: rm -f %t2.a # RUN: llvm-ar rc %t2.a %t2.o -# RUN: not ld.lld %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ +# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ # RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:" -# RUN: not ld.lld -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ +# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ # RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:" # CHECK: error: undefined symbol: foo diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -28,6 +28,7 @@ // this file. It defaults to using all hardware threads and should be // initialized before the first use of parallel routines. extern ThreadPoolStrategy strategy; +extern thread_local int threadIndex; namespace detail { class Latch { diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -18,6 +18,7 @@ #include llvm::ThreadPoolStrategy llvm::parallel::strategy; +thread_local int llvm::parallel::threadIndex; namespace llvm { namespace parallel { @@ -48,11 +49,15 @@ std::lock_guard Lock(Mutex); Threads[0] = std::thread([this, ThreadCount, S] { for (unsigned I = 1; I < ThreadCount; ++I) { - Threads.emplace_back([=] { work(S, I); }); + Threads.emplace_back([=] { + threadIndex = I; + work(S, I); + }); if (Stop) break; } ThreadsCreated.set_value(); + threadIndex = 0; work(S, 0); }); }