diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h --- a/lld/ELF/OutputSections.h +++ b/lld/ELF/OutputSections.h @@ -101,7 +101,7 @@ bool hasInputSections = false; void finalize(); - template void writeTo(uint8_t *buf); + template void writeTo(uint8_t *buf, bool parallel = true); // Check that the addends for dynamic relocations were written correctly. void checkDynRelAddends(const uint8_t *bufStart); template void maybeCompress(); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -331,7 +331,24 @@ llvm_unreachable("unsupported Size argument"); } -template void OutputSection::writeTo(uint8_t *buf) { +template +void lld_parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling + // overhead on large inputs. + const ptrdiff_t TaskSize = 128; + parallel::detail::TaskGroup TG; + IndexTy I = Begin; + for (; I + TaskSize < End; I += TaskSize) { + TG.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + } + for (IndexTy J = I; J < End; ++J) + Fn(J); +} + +template void OutputSection::writeTo(uint8_t *buf, bool parallel) { llvm::TimeTraceScope timeScope("Write sections", name); if (type == SHT_NOBITS) return; @@ -353,7 +370,7 @@ if (nonZeroFiller) fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler); - parallelForEachN(0, sections.size(), [&](size_t i) { + auto fn = [&](size_t i) { InputSection *isec = sections[i]; isec->writeTo(buf + isec->outSecOff); @@ -371,7 +388,13 @@ } else fill(start, end - start, filler); } - }); + }; + if (parallel) { + lld_parallel_for_each_n(0, (int)sections.size(), [&](size_t i) { fn(i); }); + } else { + for (size_t i = 0, e = sections.size(); i != e; ++i) + fn(i); + } // Linker scripts may have BYTE()-family commands with which you // can write arbitrary bytes to the output. Process them if any. @@ -587,10 +610,10 @@ template void OutputSection::writeHeaderTo(ELF64LE::Shdr *Shdr); template void OutputSection::writeHeaderTo(ELF64BE::Shdr *Shdr); -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); -template void OutputSection::writeTo(uint8_t *Buf); +template void OutputSection::writeTo(uint8_t *, bool); +template void OutputSection::writeTo(uint8_t *, bool); +template void OutputSection::writeTo(uint8_t *, bool); +template void OutputSection::writeTo(uint8_t *, bool); template void OutputSection::maybeCompress(); template void OutputSection::maybeCompress(); diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -2870,6 +2870,15 @@ } } +static bool shouldParallel(const OutputSection *osec) { + if (osec->commands.size() != 1) + return false; + if (auto *isd = dyn_cast(osec->commands[0])) + return isd->sections.size() == 1 && + !isa(isd->sections[0]); + return false; +} + // Write section contents to a mmap'ed file. template void Writer::writeSections() { llvm::TimeTraceScope timeScope("Write sections"); @@ -2881,9 +2890,17 @@ if (sec->type == SHT_REL || sec->type == SHT_RELA) sec->writeTo(Out::bufferStart + sec->offset); + SmallVector vec; for (OutputSection *sec : outputSections) - if (sec->type != SHT_REL && sec->type != SHT_RELA) - sec->writeTo(Out::bufferStart + sec->offset); + if (sec->type != SHT_REL && sec->type != SHT_RELA) { + if (shouldParallel(sec)) + vec.push_back(sec); + else + sec->writeTo(Out::bufferStart + sec->offset); + } + parallelForEach(vec, [](OutputSection *sec) { + sec->writeTo(Out::bufferStart + sec->offset, false); + }); // Finally, check that all dynamic relocation addends were written correctly. if (config->checkDynamicRelocs && config->writeAddends) {