diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -580,6 +580,7 @@ }; std::vector undefs; +std::mutex relocMutex; } // Check whether the definition name def is a mangled function name that matches @@ -822,6 +823,7 @@ // Returns true if the undefined symbol will produce an error message. static bool maybeReportUndefined(Undefined &sym, InputSectionBase &sec, uint64_t offset) { + std::lock_guard lock(relocMutex); // If versioned, issue an error (even if the symbol is weak) because we don't // know the defining filename which is required to construct a Verneed entry. if (sym.hasVersionSuffix) { @@ -870,6 +872,7 @@ return type; } +template static void addRelativeReloc(InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, RelType type) { @@ -883,11 +886,14 @@ // address. if (part.relrDyn && isec.alignment >= 2 && offsetInSec % 2 == 0) { isec.relocations.push_back({expr, type, offsetInSec, addend, &sym}); - part.relrDyn->relocs.push_back({&isec, offsetInSec}); + if (shard) + part.relrDyn->relocsVec[parallel::threadIndex].push_back({&isec, offsetInSec}); + else + part.relrDyn->relocs.push_back({&isec, offsetInSec}); return; } - part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, sym, - addend, type, expr); + part.relaDyn->addRelativeReloc(target->relativeRel, isec, offsetInSec, + sym, addend, type, expr); } template @@ -1055,11 +1061,12 @@ if (canWrite) { RelType rel = target.getDynRel(type); if (expr == R_GOT || (rel == target.symbolicRel && !sym.isPreemptible)) { - addRelativeReloc(*sec, offset, sym, addend, expr, type); + addRelativeReloc(*sec, offset, sym, addend, expr, type); return; } else if (rel != 0) { if (config->emachine == EM_MIPS && rel == target.symbolicRel) rel = target.relativeRel; + std::lock_guard lock(relocMutex); sec->getPartition().relaDyn->addSymbolReloc(rel, *sec, offset, sym, addend, type); @@ -1286,7 +1293,7 @@ sym.setFlags(NEEDS_TLSIE); // R_GOT needs a relative relocation for PIC on i386 and Hexagon. if (expr == R_GOT && config->isPic && !target->usesOnlyLowPageBits(type)) - addRelativeReloc(c, offset, sym, addend, expr, type); + addRelativeReloc(c, offset, sym, addend, expr, type); else c.relocations.push_back({expr, type, offset, addend, &sym}); } @@ -1371,10 +1378,10 @@ // The 5 types that relative GOTPLT are all x86 and x86-64 specific. if (oneof(expr)) { - in.gotPlt->hasGotPltOffRel = true; + in.gotPlt->hasGotPltOffRel.store(true, std::memory_order_relaxed); } else if (oneof(expr)) { - in.got->hasGotOffRel = true; + in.got->hasGotOffRel.store(true, std::memory_order_relaxed); } // Process TLS relocations, including relaxing TLS relocations. Note that @@ -1422,6 +1429,7 @@ // We were asked not to generate PLT entries for ifuncs. Instead, pass the // direct relocation on through. if (LLVM_UNLIKELY(isIfunc) && config->zIfuncNoplt) { + std::lock_guard lock(relocMutex); sym.exportDynamic = true; mainPart->relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type); return; @@ -1530,10 +1538,31 @@ // determine if it needs special treatment, such as creating GOT, PLT, // copy relocations, etc. Note that relocations for non-alloc sections are // directly processed by InputSection::relocateNonAlloc. + + // Deterministic parallellism needs sorting relocations which is unsuitable + // for -z nocombreloc. AndroidPackedRelocationSection does not support + // parallelism. MIPS and PPC64 use global states which are not suitable for + // parallelism. + bool serial = !config->zCombreloc || config->emachine == EM_MIPS || + config->emachine == EM_PPC64; + parallel::TaskGroup tg; + for (ELFFileBase *f : ctx->objectFiles) { + auto fn = [f]() { + RelocationScanner scanner; + for (InputSectionBase *s : f->getSections()) { + if (s && s->kind() == SectionBase::Regular && s->isLive() && + (s->flags & SHF_ALLOC) && + !(s->type == SHT_ARM_EXIDX && config->emachine == EM_ARM)) + scanner.template scanSection(*s); + } + }; + if (serial) + fn(); + else + tg.execute(fn); + } + RelocationScanner scanner; - for (InputSectionBase *sec : inputSections) - if (sec->isLive() && (sec->flags & SHF_ALLOC)) - scanner.template scanSection(*sec); for (Partition &part : partitions) { for (EhInputSection *sec : part.ehFrame->sections) scanner.template scanSection(*sec); @@ -1624,7 +1653,7 @@ void elf::postScanRelocations() { auto fn = [](Symbol &sym) { - auto flags = sym.flags; + auto flags = sym.flags.load(std::memory_order_relaxed); if (handleNonPreemptibleIfunc(sym, flags)) return; if (!sym.needsDynReloc()) diff --git a/lld/ELF/Symbols.h b/lld/ELF/Symbols.h --- a/lld/ELF/Symbols.h +++ b/lld/ELF/Symbols.h @@ -81,6 +81,10 @@ // The file from which this symbol was created. InputFile *file; + // The default copy constructor is deleted due to atomic flags. Define one for + // places where no atomic is needed. + Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); } + protected: const char *nameData; // 32-bit size saves space. @@ -295,7 +299,7 @@ // Temporary flags used to communicate which symbol entries need PLT and GOT // entries during postScanRelocations(); - uint16_t flags = 0; + std::atomic flags = 0; // A symAux index used to access GOT/PLT entry indexes. This is allocated in // postScanRelocations(). @@ -309,15 +313,15 @@ uint16_t versionId; void setFlags(uint16_t bits) { - flags |= bits; + flags.fetch_or(bits, std::memory_order_relaxed); } bool hasFlag(uint16_t bit) const { assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2"); - return flags & bit; + return flags.load(std::memory_order_relaxed) & bit; } bool needsDynReloc() const { - return flags & + return flags.load(std::memory_order_relaxed) & (NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD | NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE); } diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -26,6 +26,7 @@ #include "llvm/ADT/MapVector.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Threading.h" namespace lld::elf { @@ -115,7 +116,7 @@ // Flag to force GOT to be in output if we have relocations // that relies on its address. - bool hasGotOffRel = false; + std::atomic hasGotOffRel = false; protected: size_t numEntries = 0; @@ -357,7 +358,7 @@ // Flag to force GotPlt to be in output if we have relocations // that relies on its address. - bool hasGotPltOffRel = false; + std::atomic hasGotPltOffRel = false; private: SmallVector entries; @@ -490,6 +491,7 @@ /// Add a dynamic relocation without writing an addend to the output section. /// This overload can be used if the addends are written directly instead of /// using relocations on the input section (e.g. MipsGotSection::writeTo()). + template void addReloc(const DynamicReloc &reloc) { relocs.push_back(reloc); } /// Add a dynamic relocation against \p sym with an optional addend. void addSymbolReloc(RelType dynType, InputSectionBase &isec, @@ -497,21 +499,41 @@ llvm::Optional addendRelType = llvm::None); /// Add a relative dynamic relocation that uses the target address of \p sym /// (i.e. InputSection::getRelocTargetVA()) + \p addend as the addend. + template void addRelativeReloc(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, int64_t addend, - RelType addendRelType, RelExpr expr); + RelType addendRelType, RelExpr expr) { + // This function should only be called for non-preemptible symbols or + // RelExpr values that refer to an address inside the output file (e.g. the + // address of the GOT entry for a potentially preemptible symbol). + assert(expr != R_ADDEND && "expected non-addend relocation expression"); + addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, isec, + offsetInSec, sym, addend, expr, addendRelType); + } /// Add a dynamic relocation using the target address of \p sym as the addend /// if \p sym is non-preemptible. Otherwise add a relocation against \p sym. void addAddendOnlyRelocIfNonPreemptible(RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType); - void addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase &inputSec, uint64_t offsetInSec, Symbol &sym, - int64_t addend, RelExpr expr, RelType addendRelType); - bool isNeeded() const override { return !relocs.empty(); } + template + void addReloc(DynamicReloc::Kind kind, RelType dynType, InputSectionBase &sec, + uint64_t offsetInSec, Symbol &sym, int64_t addend, RelExpr expr, + RelType addendRelType) { + // Write the addends to the relocated address if required. We skip + // it if the written value would be zero. + if (config->writeAddends && (expr != R_ADDEND || addend != 0)) + sec.relocations.push_back( + {expr, addendRelType, offsetInSec, addend, &sym}); + addReloc({dynType, &sec, offsetInSec, kind, sym, addend, expr}); + } + bool isNeeded() const override { + return !relocs.empty() || + llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); }); + } size_t getSize() const override { return relocs.size() * this->entsize; } size_t getRelativeRelocCount() const { return numRelativeRelocs; } + void mergeRels(); void partitionRels(); void finalizeContents() override; static bool classof(const SectionBase *d) { @@ -521,6 +543,9 @@ } int32_t dynamicTag, sizeDynamicTag; SmallVector relocs; + // Used when parallel relocation scanning adds relocations. The elements + // will be moved into relocs. + SmallVector, 0> relocsVec; protected: void computeRels(); @@ -528,6 +553,11 @@ bool combreloc; }; +template <> +inline void RelocationBaseSection::addReloc(const DynamicReloc &reloc) { + relocsVec[llvm::parallel::threadIndex].push_back(reloc); +} + template class RelocationSection final : public RelocationBaseSection { using Elf_Rel = typename ELFT::Rel; @@ -566,8 +596,13 @@ class RelrBaseSection : public SyntheticSection { public: RelrBaseSection(); - bool isNeeded() const override { return !relocs.empty(); } + void mergeRels(); + bool isNeeded() const override { + return !relocs.empty() || + llvm::any_of(relocsVec, [](auto &v) { return !v.empty(); }); + } SmallVector relocs; + SmallVector, 0> relocsVec; }; // RelrSection is used to encode offsets for relative relocations. diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1586,19 +1586,6 @@ R_ADDEND, addendRelType ? *addendRelType : target->noneRel); } -void RelocationBaseSection::addRelativeReloc( - RelType dynType, InputSectionBase &inputSec, uint64_t offsetInSec, - Symbol &sym, int64_t addend, RelType addendRelType, RelExpr expr) { - // This function should only be called for non-preemptible symbols or - // RelExpr values that refer to an address inside the output file (e.g. the - // address of the GOT entry for a potentially preemptible symbol). - assert((!sym.isPreemptible || expr == R_GOT) && - "cannot add relative relocation against preemptible symbol"); - assert(expr != R_ADDEND && "expected non-addend relocation expression"); - addReloc(DynamicReloc::AddendOnlyWithTargetVA, dynType, inputSec, offsetInSec, - sym, addend, expr, addendRelType); -} - void RelocationBaseSection::addAddendOnlyRelocIfNonPreemptible( RelType dynType, InputSectionBase &isec, uint64_t offsetInSec, Symbol &sym, RelType addendRelType) { @@ -1611,17 +1598,14 @@ sym, 0, R_ABS, addendRelType); } -void RelocationBaseSection::addReloc(DynamicReloc::Kind kind, RelType dynType, - InputSectionBase &inputSec, - uint64_t offsetInSec, Symbol &sym, - int64_t addend, RelExpr expr, - RelType addendRelType) { - // Write the addends to the relocated address if required. We skip - // it if the written value would be zero. - if (config->writeAddends && (expr != R_ADDEND || addend != 0)) - inputSec.relocations.push_back( - {expr, addendRelType, offsetInSec, addend, &sym}); - addReloc({dynType, &inputSec, offsetInSec, kind, sym, addend, expr}); +void RelocationBaseSection::mergeRels() { + size_t newSize = relocs.size(); + for (const auto &v : relocsVec) + newSize += v.size(); + relocs.reserve(newSize); + for (const auto &v : relocsVec) + llvm::append_range(relocs, v); + relocsVec.clear(); } void RelocationBaseSection::partitionRels() { @@ -1704,6 +1688,16 @@ config->useAndroidRelrTags ? SHT_ANDROID_RELR : SHT_RELR, config->wordsize, ".relr.dyn") {} +void RelrBaseSection::mergeRels() { + size_t newSize = relocs.size(); + for (const auto &v : relocsVec) + newSize += v.size(); + relocs.reserve(newSize); + for (const auto &v : relocsVec) + llvm::append_range(relocs, v); + relocsVec.clear(); +} + template AndroidPackedRelocationSection::AndroidPackedRelocationSection( StringRef name) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -349,12 +349,14 @@ add(*part.memtagAndroidNote); } + const unsigned threadCount = parallel::strategy.compute_thread_count(); if (config->androidPackDynRelocs) part.relaDyn = std::make_unique>(relaDynName); else part.relaDyn = std::make_unique>( relaDynName, config->zCombreloc); + part.relaDyn->relocsVec.resize(threadCount); if (config->hasDynSymTab) { add(*part.dynSymTab); @@ -387,6 +389,7 @@ if (config->relrPackDynRelocs) { part.relrDyn = std::make_unique>(); + part.relrDyn->relocsVec.resize(threadCount); add(*part.relrDyn); } @@ -2074,16 +2077,20 @@ // symbol table section (dynSymTab) must be the first one. for (Partition &part : partitions) { if (part.relaDyn) { + part.relaDyn->mergeRels(); // Compute DT_RELACOUNT to be used by part.dynamic. part.relaDyn->partitionRels(); finalizeSynthetic(part.relaDyn.get()); } + if (part.relrDyn) { + part.relrDyn->mergeRels(); + finalizeSynthetic(part.relrDyn.get()); + } finalizeSynthetic(part.dynSymTab.get()); finalizeSynthetic(part.gnuHashTab.get()); finalizeSynthetic(part.hashTab.get()); finalizeSynthetic(part.verDef.get()); - finalizeSynthetic(part.relrDyn.get()); finalizeSynthetic(part.ehFrameHdr.get()); finalizeSynthetic(part.verSym.get()); finalizeSynthetic(part.verNeed.get()); diff --git a/lld/test/ELF/combreloc.s b/lld/test/ELF/combreloc.s --- a/lld/test/ELF/combreloc.s +++ b/lld/test/ELF/combreloc.s @@ -35,8 +35,8 @@ # NOCOMB-NEXT: 0x3400 R_X86_64_64 ccc 0x0 # NOCOMB-NEXT: 0x3408 R_X86_64_64 bbb 0x0 # NOCOMB-NEXT: 0x3410 R_X86_64_64 aaa 0x0 -# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420 # NOCOMB-NEXT: 0x23F0 R_X86_64_GLOB_DAT aaa 0x0 +# NOCOMB-NEXT: 0x3418 R_X86_64_RELATIVE - 0x3420 # NOCOMB-NEXT: } .globl aaa, bbb, ccc diff --git a/lld/test/ELF/comdat-discarded-error.s b/lld/test/ELF/comdat-discarded-error.s --- a/lld/test/ELF/comdat-discarded-error.s +++ b/lld/test/ELF/comdat-discarded-error.s @@ -5,7 +5,7 @@ # RUN: echo '.weak foo; foo: .section .text.foo,"axG",@progbits,foo,comdat; .globl bar; bar:' |\ # RUN: llvm-mc -filetype=obj -triple=x86_64 - -o %t3.o -# RUN: not ld.lld %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld --threads=1 %t2.o %t3.o %t1.o -o /dev/null 2>&1 | FileCheck %s # CHECK: error: relocation refers to a symbol in a discarded section: bar # CHECK-NEXT: >>> defined in {{.*}}3.o diff --git a/lld/test/ELF/undef-multi.s b/lld/test/ELF/undef-multi.s --- a/lld/test/ELF/undef-multi.s +++ b/lld/test/ELF/undef-multi.s @@ -1,7 +1,7 @@ # REQUIRES: x86 # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef.s -o %t2.o -# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld --threads=1 %t.o %t2.o -o /dev/null 2>&1 | FileCheck %s # CHECK: error: undefined symbol: zed2 # CHECK-NEXT: >>> referenced by undef-multi.s @@ -24,7 +24,7 @@ # RUN: echo " call zed2" >> %t.moreref.s # RUN: echo " call zed2" >> %t.moreref.s # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %t.moreref.s -o %t3.o -# RUN: not ld.lld %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \ +# RUN: not ld.lld --threads=1 %t.o %t2.o %t3.o -o /dev/null -error-limit=2 2>&1 | \ # RUN: FileCheck --check-prefix=LIMIT %s # LIMIT: error: undefined symbol: zed2 diff --git a/lld/test/ELF/undef.s b/lld/test/ELF/undef.s --- a/lld/test/ELF/undef.s +++ b/lld/test/ELF/undef.s @@ -5,9 +5,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %p/Inputs/undef-bad-debug.s -o %t4.o # RUN: rm -f %t2.a # RUN: llvm-ar rc %t2.a %t2.o -# RUN: not ld.lld %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ +# RUN: not ld.lld --threads=1 %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ # RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:" -# RUN: not ld.lld -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ +# RUN: not ld.lld --threads=1 -pie %t.o %t2.a %t3.o %t4.o -o /dev/null 2>&1 \ # RUN: | FileCheck %s --implicit-check-not="error:" --implicit-check-not="warning:" # CHECK: error: undefined symbol: foo diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -28,6 +28,7 @@ // this file. It defaults to using all hardware threads and should be // initialized before the first use of parallel routines. extern ThreadPoolStrategy strategy; +extern thread_local unsigned threadIndex; namespace detail { class Latch { diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -18,6 +18,7 @@ #include llvm::ThreadPoolStrategy llvm::parallel::strategy; +thread_local unsigned llvm::parallel::threadIndex; namespace llvm { namespace parallel { @@ -95,6 +96,7 @@ private: void work(ThreadPoolStrategy S, unsigned ThreadID) { + threadIndex = ThreadID; S.apply_thread_strategy(ThreadID); while (true) { std::unique_lock Lock(Mutex);