diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -21,7 +21,6 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" -#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "lld/Common/Version.h" #include "llvm/ADT/Optional.h" @@ -39,6 +38,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp --- a/lld/COFF/ICF.cpp +++ b/lld/COFF/ICF.cpp @@ -21,7 +21,6 @@ #include "Chunks.h" #include "Symbols.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" @@ -230,10 +229,10 @@ size_t boundaries[numShards + 1]; boundaries[0] = 0; boundaries[numShards] = chunks.size(); - parallelForEachN(1, numShards, [&](size_t i) { + parallel::for_each_n(1, numShards, [&](size_t i) { boundaries[i] = findBoundary((i - 1) * step, chunks.size()); }); - parallelForEachN(1, numShards + 1, [&](size_t i) { + parallel::for_each_n(1, numShards + 1, [&](size_t i) { if (boundaries[i - 1] < boundaries[i]) { forEachClassRange(boundaries[i - 1], boundaries[i], fn); } @@ -266,14 +265,14 @@ sc->eqClass[0] = nextId++; // Initially, we use hash values to partition sections. - parallelForEach(chunks, [&](SectionChunk *sc) { + parallel::for_each(chunks, [&](SectionChunk *sc) { sc->eqClass[0] = xxHash64(sc->getContents()); }); // Combine the hashes of the sections referenced by each section into its // hash. for (unsigned cnt = 0; cnt != 2; ++cnt) { - parallelForEach(chunks, [&](SectionChunk *sc) { + parallel::for_each(chunks, [&](SectionChunk *sc) { uint32_t hash = sc->eqClass[cnt % 2]; for (Symbol *b : sc->symbols()) if (auto *sym = dyn_cast_or_null(b)) diff --git a/lld/COFF/LLDMapFile.cpp b/lld/COFF/LLDMapFile.cpp --- a/lld/COFF/LLDMapFile.cpp +++ b/lld/COFF/LLDMapFile.cpp @@ -23,7 +23,7 @@ #include "Symbols.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -74,7 +74,7 @@ static DenseMap getSymbolStrings(ArrayRef syms) { std::vector str(syms.size()); - parallelForEachN((size_t)0, syms.size(), [&](size_t i) { + parallel::for_each_n((size_t)0, syms.size(), [&](size_t i) { raw_string_ostream os(str[i]); writeHeader(os, syms[i]->getRVA(), 0, 0); os << indent16 << toString(*syms[i]); diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp --- a/lld/COFF/MapFile.cpp +++ b/lld/COFF/MapFile.cpp @@ -32,8 +32,8 @@ #include "Symbols.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -76,7 +76,7 @@ v[i] = SortEntry(syms[i], i); // Remove duplicate symbol pointers - parallelSort(v, std::less()); + parallel::sort(v, std::less()); auto end = std::unique(v.begin(), v.end(), [](const SortEntry &a, const SortEntry &b) { return a.first == b.first; @@ -84,7 +84,7 @@ v.erase(end, v.end()); // Sort by RVA then original order - parallelSort(v, [](const SortEntry &a, const SortEntry &b) { + parallel::sort(v, [](const SortEntry &a, const SortEntry &b) { // Add config->imageBase to avoid comparing "negative" RVAs. // This can happen with symbols of Absolute kind uint64_t rvaa = config->imageBase + a.first->getRVA(); @@ -144,7 +144,7 @@ static DenseMap getSymbolStrings(ArrayRef syms) { std::vector str(syms.size()); - parallelForEachN((size_t)0, syms.size(), [&](size_t i) { + parallel::for_each_n((size_t)0, syms.size(), [&](size_t i) { raw_string_ostream os(str[i]); Defined *sym = syms[i]; diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp --- a/lld/COFF/PDB.cpp +++ b/lld/COFF/PDB.cpp @@ -16,7 +16,6 @@ #include "TypeMerger.h" #include "Writer.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h" #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h" @@ -57,6 +56,7 @@ #include "llvm/Support/Errc.h" #include "llvm/Support/FormatAdapters.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" #include @@ -1339,7 +1339,7 @@ if (!publics.empty()) { publicSymbols = publics.size(); // Sort the public symbols and add them to the stream. - parallelSort(publics, [](const PublicSym32 &l, const PublicSym32 &r) { + parallel::sort(publics, [](const PublicSym32 &l, const PublicSym32 &r) { return l.Name < r.Name; }); for (const PublicSym32 &pub : publics) diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp --- a/lld/COFF/Writer.cpp +++ b/lld/COFF/Writer.cpp @@ -17,7 +17,6 @@ #include "Symbols.h" #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" -#include "lld/Common/Threads.h" #include "lld/Common/Timer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -1785,7 +1784,7 @@ // ADD instructions). if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE) memset(secBuf, 0xCC, sec->getRawSize()); - parallelForEach(sec->chunks, [&](Chunk *c) { + parallel::for_each(sec->chunks, [&](Chunk *c) { c->writeTo(secBuf + c->getRVA() - sec->getRVA()); }); } @@ -1856,14 +1855,14 @@ uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize(); if (config->machine == AMD64) { struct Entry { ulittle32_t begin, end, unwind; }; - parallelSort( + parallel::sort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; } if (config->machine == ARMNT || config->machine == ARM64) { struct Entry { ulittle32_t begin, unwind; }; - parallelSort( + parallel::sort( MutableArrayRef((Entry *)begin, (Entry *)end), [](const Entry &a, const Entry &b) { return a.begin < b.begin; }); return; diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp --- a/lld/Common/ErrorHandler.cpp +++ b/lld/Common/ErrorHandler.cpp @@ -8,7 +8,7 @@ #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" +#include "llvm/Support/Parallel.h" #include "llvm/ADT/Twine.h" #include "llvm/IR/DiagnosticInfo.h" diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp --- a/lld/Common/Filesystem.cpp +++ b/lld/Common/Filesystem.cpp @@ -11,10 +11,10 @@ //===----------------------------------------------------------------------===// #include "lld/Common/Filesystem.h" -#include "lld/Common/Threads.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Parallel.h" #if LLVM_ON_UNIX #include #endif diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -43,7 +43,6 @@ #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" -#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" @@ -53,6 +52,7 @@ #include "llvm/Support/Compression.h" #include "llvm/Support/GlobPattern.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/TarWriter.h" #include "llvm/Support/TargetSelect.h" @@ -1749,7 +1749,7 @@ } // Update pointers in input files. - parallelForEach(objectFiles, [&](InputFile *file) { + parallel::for_each(objectFiles, [&](InputFile *file) { MutableArrayRef syms = file->getMutableSymbols(); for (size_t i = 0, e = syms.size(); i != e; ++i) if (Symbol *s = map.lookup(syms[i])) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -80,10 +80,10 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Writer.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/StringExtras.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" #include "llvm/Support/xxhash.h" #include @@ -419,11 +419,11 @@ boundaries[0] = 0; boundaries[numShards] = sections.size(); - parallelForEachN(1, numShards, [&](size_t i) { + parallel::for_each_n(1, numShards, [&](size_t i) { boundaries[i] = findBoundary((i - 1) * step, sections.size()); }); - parallelForEachN(1, numShards + 1, [&](size_t i) { + parallel::for_each_n(1, numShards + 1, [&](size_t i) { if (boundaries[i - 1] < boundaries[i]) forEachClassRange(boundaries[i - 1], boundaries[i], fn); }); @@ -467,12 +467,11 @@ } // Initially, we use hash values to partition sections. - parallelForEach(sections, [&](InputSection *s) { - s->eqClass[0] = xxHash64(s->data()); - }); + parallel::for_each( + sections, [&](InputSection *s) { s->eqClass[0] = xxHash64(s->data()); }); for (unsigned cnt = 0; cnt != 2; ++cnt) { - parallelForEach(sections, [&](InputSection *s) { + parallel::for_each(sections, [&](InputSection *s) { if (s->areRelocsRela) combineRelocHashes(cnt, s, s->template relas()); else diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp --- a/lld/ELF/LinkerScript.cpp +++ b/lld/ELF/LinkerScript.cpp @@ -21,7 +21,6 @@ #include "Writer.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" @@ -29,6 +28,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include #include diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp --- a/lld/ELF/MapFile.cpp +++ b/lld/ELF/MapFile.cpp @@ -26,9 +26,9 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -84,7 +84,7 @@ static DenseMap getSymbolStrings(ArrayRef syms) { std::vector str(syms.size()); - parallelForEachN(0, syms.size(), [&](size_t i) { + parallel::for_each_n(0, syms.size(), [&](size_t i) { raw_string_ostream os(str[i]); OutputSection *osec = syms[i]->getOutputSection(); uint64_t vma = syms[i]->getVA(); diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp --- a/lld/ELF/OutputSections.cpp +++ b/lld/ELF/OutputSections.cpp @@ -14,11 +14,11 @@ #include "Target.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Compression.h" #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/SHA1.h" #include @@ -337,7 +337,7 @@ if (nonZeroFiller) fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler); - parallelForEachN(0, sections.size(), [&](size_t i) { + parallel::for_each_n(0, sections.size(), [&](size_t i) { InputSection *isec = sections[i]; isec->writeTo(buf); diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -26,7 +26,6 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/StringExtras.h" @@ -37,6 +36,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/LEB128.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/TimeProfiler.h" #include #include @@ -2758,7 +2758,7 @@ // Instantiate GdbSymbols while uniqufying them by name. std::vector> symbols(numShards); - parallelForEachN(0, concurrency, [&](size_t threadId) { + parallel::for_each_n(0, concurrency, [&](size_t threadId) { uint32_t i = 0; for (ArrayRef entries : nameAttrs) { for (const NameAttrEntry &ent : entries) { @@ -2821,7 +2821,7 @@ std::vector chunks(sections.size()); std::vector> nameAttrs(sections.size()); - parallelForEachN(0, sections.size(), [&](size_t i) { + parallel::for_each_n(0, sections.size(), [&](size_t i) { // To keep memory usage low, we don't want to keep cached DWARFContext, so // avoid getDwarf() here. ObjFile *file = sections[i]->getFile(); @@ -2895,7 +2895,7 @@ // Write the string pool. hdr->constantPoolOff = buf - start; - parallelForEach(symbols, [&](GdbSymbol &sym) { + parallel::for_each(symbols, [&](GdbSymbol &sym) { memcpy(buf + sym.nameOff, sym.name.data(), sym.name.size()); }); @@ -3199,7 +3199,7 @@ numShards)); // Add section pieces to the builders. - parallelForEachN(0, concurrency, [&](size_t threadId) { + parallel::for_each_n(0, concurrency, [&](size_t threadId) { for (MergeInputSection *sec : sections) { for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) { if (!sec->pieces[i].live) @@ -3224,7 +3224,7 @@ // So far, section pieces have offsets from beginning of shards, but // we want offsets from beginning of the whole section. Fix them. - parallelForEach(sections, [&](MergeInputSection *sec) { + parallel::for_each(sections, [&](MergeInputSection *sec) { for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) if (sec->pieces[i].live) sec->pieces[i].outputOff += @@ -3245,7 +3245,7 @@ llvm::TimeTraceScope timeScope("Split sections"); // splitIntoPieces needs to be called on each MergeInputSection // before calling finalizeContents(). - parallelForEach(inputSections, [](InputSectionBase *sec) { + parallel::for_each(inputSections, [](InputSectionBase *sec) { if (auto *s = dyn_cast(sec)) s->splitIntoPieces(); else if (auto *eh = dyn_cast(sec)) diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -22,9 +22,9 @@ #include "lld/Common/Filesystem.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/SHA1.h" #include "llvm/Support/TimeProfiler.h" @@ -1749,7 +1749,7 @@ // the end of the section are relaxed. static void fixSymbolsAfterShrinking() { for (InputFile *File : objectFiles) { - parallelForEach(File->getSymbols(), [&](Symbol *Sym) { + parallel::for_each(File->getSymbols(), [&](Symbol *Sym) { auto *def = dyn_cast(Sym); if (!def) return; @@ -1808,7 +1808,7 @@ // Delete all fall through jump instructions. Also, check if two // consecutive jump instructions can be flipped so that a fall // through jmp instruction can be deleted. - parallelForEachN(0, sections.size(), [&](size_t i) { + parallel::for_each_n(0, sections.size(), [&](size_t i) { InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr; InputSection &is = *sections[i]; result[i] = @@ -2905,7 +2905,7 @@ std::vector hashes(chunks.size() * hashBuf.size()); // Compute hash values. - parallelForEachN(0, chunks.size(), [&](size_t i) { + parallel::for_each_n(0, chunks.size(), [&](size_t i) { hashFn(hashes.data() + i * hashBuf.size(), chunks[i]); }); diff --git a/lld/include/lld/Common/Threads.h b/lld/include/lld/Common/Threads.h deleted file mode 100644 --- a/lld/include/lld/Common/Threads.h +++ /dev/null @@ -1,90 +0,0 @@ -//===- Threads.h ------------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// LLD supports threads to distribute workloads to multiple cores. Using -// multicore is most effective when more than one core are idle. At the -// last step of a build, it is often the case that a linker is the only -// active process on a computer. So, we are naturally interested in using -// threads wisely to reduce latency to deliver results to users. -// -// That said, we don't want to do "too clever" things using threads. -// Complex multi-threaded algorithms are sometimes extremely hard to -// reason about and can easily mess up the entire design. -// -// Fortunately, when a linker links large programs (when the link time is -// most critical), it spends most of the time to work on massive number of -// small pieces of data of the same kind, and there are opportunities for -// large parallelism there. Here are examples: -// -// - We have hundreds of thousands of input sections that need to be -// copied to a result file at the last step of link. Once we fix a file -// layout, each section can be copied to its destination and its -// relocations can be applied independently. -// -// - We have tens of millions of small strings when constructing a -// mergeable string section. -// -// For the cases such as the former, we can just use parallelForEach -// instead of std::for_each (or a plain for loop). Because tasks are -// completely independent from each other, we can run them in parallel -// without any coordination between them. That's very easy to understand -// and reason about. -// -// For the cases such as the latter, we can use parallel algorithms to -// deal with massive data. We have to write code for a tailored algorithm -// for each problem, but the complexity of multi-threading is isolated in -// a single pass and doesn't affect the linker's overall design. -// -// The above approach seems to be working fairly well. As an example, when -// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to -// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my -// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from -// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the -// speedup is not linear, but as you add more cores, it gets faster. -// -// On a final note, if you are trying to optimize, keep the axiom "don't -// guess, measure!" in mind. Some important passes of the linker are not -// that slow. For example, resolving all symbols is not a very heavy pass, -// although it would be very hard to parallelize it. You want to first -// identify a slow pass and then optimize it. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_COMMON_THREADS_H -#define LLD_COMMON_THREADS_H - -#include "llvm/Support/Parallel.h" -#include - -namespace lld { - -template void parallelForEach(R &&range, FuncTy fn) { - if (llvm::parallel::strategy.ThreadsRequested != 1) - for_each(llvm::parallel::par, std::begin(range), std::end(range), fn); - else - for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn); -} - -inline void parallelForEachN(size_t begin, size_t end, - llvm::function_ref fn) { - if (llvm::parallel::strategy.ThreadsRequested != 1) - for_each_n(llvm::parallel::par, begin, end, fn); - else - for_each_n(llvm::parallel::seq, begin, end, fn); -} - -template void parallelSort(R &&range, FuncTy fn) { - if (llvm::parallel::strategy.ThreadsRequested != 1) - sort(llvm::parallel::par, std::begin(range), std::end(range), fn); - else - sort(llvm::parallel::seq, std::begin(range), std::end(range), fn); -} - -} // namespace lld - -#endif diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp --- a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -461,10 +461,11 @@ }); std::vector vec = decorate(atomRange); - sort(llvm::parallel::par, vec.begin(), vec.end(), - [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { - return compareAtoms(l, r, _customSorter); - }); + llvm::parallel::sort( + vec.begin(), vec.end(), + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); LLVM_DEBUG(checkTransitivity(vec, _customSorter)); undecorate(atomRange, vec); diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -19,7 +19,6 @@ #include "lld/Common/Memory.h" #include "lld/Common/Reproduce.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/Twine.h" #include "llvm/Object/Wasm.h" @@ -27,6 +26,7 @@ #include "llvm/Option/ArgList.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/TarWriter.h" @@ -679,7 +679,7 @@ } // Update pointers in input files. - parallelForEach(symtab->objectFiles, [&](InputFile *file) { + parallel::for_each(symtab->objectFiles, [&](InputFile *file) { MutableArrayRef syms = file->getMutableSymbols(); for (size_t i = 0, e = syms.size(); i != e; ++i) if (Symbol *s = map.lookup(syms[i])) diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp --- a/lld/wasm/OutputSections.cpp +++ b/lld/wasm/OutputSections.cpp @@ -12,9 +12,9 @@ #include "OutputSegment.h" #include "WriterUtils.h" #include "lld/Common/ErrorHandler.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Parallel.h" #define DEBUG_TYPE "lld" diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -20,7 +20,6 @@ #include "lld/Common/ErrorHandler.h" #include "lld/Common/Memory.h" #include "lld/Common/Strings.h" -#include "lld/Common/Threads.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -31,6 +30,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/LEB128.h" +#include "llvm/Support/Parallel.h" #include #include @@ -189,7 +189,7 @@ void Writer::writeSections() { uint8_t *buf = buffer->getBufferStart(); - parallelForEach(outputSections, [buf](OutputSection *s) { + parallel::for_each(outputSections, [buf](OutputSection *s) { assert(s->isNeeded()); s->writeTo(buf); }); diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -22,17 +22,6 @@ namespace llvm { namespace parallel { -struct sequential_execution_policy {}; -struct parallel_execution_policy {}; - -template -struct is_execution_policy - : public std::integral_constant< - bool, llvm::is_one_of::value> {}; - -constexpr sequential_execution_policy seq{}; -constexpr parallel_execution_policy par{}; // Strategy for the default executor used by the parallel routines provided by // this file. It defaults to using all hardware threads and should be @@ -169,59 +158,54 @@ #endif -template -using DefComparator = - std::less::value_type>; - } // namespace detail -// sequential algorithm implementations. -template > -void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, +template ::value_type>> +void sort(RandomAccessIterator Start, RandomAccessIterator End, const Comparator &Comp = Comparator()) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); +#if LLVM_ENABLE_THREADS + if (strategy.ThreadsRequested != 1) { + detail::parallel_sort(Start, End, Comp); + return; + } +#endif llvm::sort(Start, End, Comp); } -template -void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); +template +void for_each(IterTy Begin, IterTy End, FuncTy Fn) { +#if LLVM_ENABLE_THREADS + if (strategy.ThreadsRequested != 1) { + detail::parallel_for_each(Begin, End, Fn); + return; + } +#endif std::for_each(Begin, End, Fn); } -template -void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); - for (IndexTy I = Begin; I != End; ++I) - Fn(I); -} - -// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS -// is true. +template void for_each_n(size_t Begin, size_t End, FuncTy Fn) { #if LLVM_ENABLE_THREADS -template > -void sort(parallel_execution_policy policy, RandomAccessIterator Start, - RandomAccessIterator End, const Comparator &Comp = Comparator()) { - detail::parallel_sort(Start, End, Comp); + if (strategy.ThreadsRequested != 1) { + detail::parallel_for_each_n(Begin, End, Fn); + return; + } +#endif + for (size_t I = Begin; I != End; ++I) + Fn(I); } -template -void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End, - FuncTy Fn) { - detail::parallel_for_each(Begin, End, Fn); +// Range wrappers. +template > +void sort(RangeTy &&R, const Comparator &Comp = Comparator()) { + llvm::parallel::sort(std::begin(R), std::end(R), Comp); } -template -void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End, - FuncTy Fn) { - detail::parallel_for_each_n(Begin, End, Fn); +template void for_each(RangeTy &&R, FuncTy Fn) { + llvm::parallel::for_each(std::begin(R), std::end(R), Fn); } -#endif } // namespace parallel } // namespace llvm diff --git a/llvm/unittests/Support/ParallelTest.cpp b/llvm/unittests/Support/ParallelTest.cpp --- a/llvm/unittests/Support/ParallelTest.cpp +++ b/llvm/unittests/Support/ParallelTest.cpp @@ -30,7 +30,7 @@ for (auto &i : array) i = dist(randEngine); - sort(parallel::par, std::begin(array), std::end(array)); + parallel::sort(std::begin(array), std::end(array)); ASSERT_TRUE(llvm::is_sorted(array)); } @@ -40,7 +40,7 @@ // writing. uint32_t range[2050]; std::fill(range, range + 2050, 1); - for_each_n(parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; }); + parallel::for_each_n(0, 2049, [&range](size_t I) { ++range[I]; }); uint32_t expected[2049]; std::fill(expected, expected + 2049, 2); diff --git a/mlir/docs/Diagnostics.md b/mlir/docs/Diagnostics.md --- a/mlir/docs/Diagnostics.md +++ b/mlir/docs/Diagnostics.md @@ -390,8 +390,7 @@ // Process a list of operations in parallel. std::vector opsToProcess = ...; -llvm::for_each_n(llvm::parallel::par, 0, opsToProcess.size(), - [&](size_t i) { +llvm::parallel::for_each_n(0, opsToProcess.size(), [&](size_t i) { // Notify the handler that we are processing the i'th operation. handler.setOrderIDForThread(i); auto *op = opsToProcess[i]; diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -494,7 +494,7 @@ // An atomic failure variable for the async executors. std::atomic passFailed(false); llvm::parallel::for_each( - llvm::parallel::par, asyncExecutors.begin(), + asyncExecutors.begin(), std::next(asyncExecutors.begin(), std::min(asyncExecutors.size(), opAMPairs.size())), [&](MutableArrayRef pms) { diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp --- a/mlir/lib/Transforms/Inliner.cpp +++ b/mlir/lib/Transforms/Inliner.cpp @@ -497,8 +497,7 @@ if (context->isMultithreadingEnabled()) { ParallelDiagnosticHandler canonicalizationHandler(context); llvm::parallel::for_each_n( - llvm::parallel::par, /*Begin=*/size_t(0), - /*End=*/nodesToCanonicalize.size(), [&](size_t index) { + /*Begin=*/0, /*End=*/nodesToCanonicalize.size(), [&](size_t index) { // Set the order for this thread so that diagnostics will be properly // ordered. canonicalizationHandler.setOrderIDForThread(index);