diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -21,7 +21,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Memory.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Timer.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Optional.h"
@@ -39,6 +38,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/TarWriter.h"
diff --git a/lld/COFF/ICF.cpp b/lld/COFF/ICF.cpp
--- a/lld/COFF/ICF.cpp
+++ b/lld/COFF/ICF.cpp
@@ -21,7 +21,6 @@
 #include "Chunks.h"
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Timer.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/Support/Debug.h"
@@ -230,10 +229,10 @@
   size_t boundaries[numShards + 1];
   boundaries[0] = 0;
   boundaries[numShards] = chunks.size();
-  parallelForEachN(1, numShards, [&](size_t i) {
+  parallel::for_each_n(1, numShards, [&](size_t i) {
     boundaries[i] = findBoundary((i - 1) * step, chunks.size());
   });
-  parallelForEachN(1, numShards + 1, [&](size_t i) {
+  parallel::for_each_n(1, numShards + 1, [&](size_t i) {
     if (boundaries[i - 1] < boundaries[i]) {
       forEachClassRange(boundaries[i - 1], boundaries[i], fn);
     }
@@ -266,14 +265,14 @@
         sc->eqClass[0] = nextId++;
 
   // Initially, we use hash values to partition sections.
-  parallelForEach(chunks, [&](SectionChunk *sc) {
+  parallel::for_each(chunks, [&](SectionChunk *sc) {
     sc->eqClass[0] = xxHash64(sc->getContents());
   });
 
   // Combine the hashes of the sections referenced by each section into its
   // hash.
   for (unsigned cnt = 0; cnt != 2; ++cnt) {
-    parallelForEach(chunks, [&](SectionChunk *sc) {
+    parallel::for_each(chunks, [&](SectionChunk *sc) {
       uint32_t hash = sc->eqClass[cnt % 2];
       for (Symbol *b : sc->symbols())
         if (auto *sym = dyn_cast_or_null<DefinedRegular>(b))
diff --git a/lld/COFF/LLDMapFile.cpp b/lld/COFF/LLDMapFile.cpp
--- a/lld/COFF/LLDMapFile.cpp
+++ b/lld/COFF/LLDMapFile.cpp
@@ -23,7 +23,7 @@
 #include "Symbols.h"
 #include "Writer.h"
 #include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Threads.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -74,7 +74,7 @@
 static DenseMap<DefinedRegular *, std::string>
 getSymbolStrings(ArrayRef<DefinedRegular *> syms) {
   std::vector<std::string> str(syms.size());
-  parallelForEachN((size_t)0, syms.size(), [&](size_t i) {
+  parallel::for_each_n((size_t)0, syms.size(), [&](size_t i) {
     raw_string_ostream os(str[i]);
     writeHeader(os, syms[i]->getRVA(), 0, 0);
     os << indent16 << toString(*syms[i]);
diff --git a/lld/COFF/MapFile.cpp b/lld/COFF/MapFile.cpp
--- a/lld/COFF/MapFile.cpp
+++ b/lld/COFF/MapFile.cpp
@@ -32,8 +32,8 @@
 #include "Symbols.h"
 #include "Writer.h"
 #include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Timer.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -76,7 +76,7 @@
     v[i] = SortEntry(syms[i], i);
 
   // Remove duplicate symbol pointers
-  parallelSort(v, std::less<SortEntry>());
+  parallel::sort(v, std::less<SortEntry>());
   auto end = std::unique(v.begin(), v.end(),
                          [](const SortEntry &a, const SortEntry &b) {
                            return a.first == b.first;
@@ -84,7 +84,7 @@
   v.erase(end, v.end());
 
   // Sort by RVA then original order
-  parallelSort(v, [](const SortEntry &a, const SortEntry &b) {
+  parallel::sort(v, [](const SortEntry &a, const SortEntry &b) {
     // Add config->imageBase to avoid comparing "negative" RVAs.
     // This can happen with symbols of Absolute kind
     uint64_t rvaa = config->imageBase + a.first->getRVA();
@@ -144,7 +144,7 @@
 static DenseMap<Defined *, std::string>
 getSymbolStrings(ArrayRef<Defined *> syms) {
   std::vector<std::string> str(syms.size());
-  parallelForEachN((size_t)0, syms.size(), [&](size_t i) {
+  parallel::for_each_n((size_t)0, syms.size(), [&](size_t i) {
     raw_string_ostream os(str[i]);
     Defined *sym = syms[i];
 
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -16,7 +16,6 @@
 #include "TypeMerger.h"
 #include "Writer.h"
 #include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Timer.h"
 #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
@@ -57,6 +56,7 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FormatAdapters.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include <memory>
@@ -1339,7 +1339,7 @@
   if (!publics.empty()) {
     publicSymbols = publics.size();
     // Sort the public symbols and add them to the stream.
-    parallelSort(publics, [](const PublicSym32 &l, const PublicSym32 &r) {
+    parallel::sort(publics, [](const PublicSym32 &l, const PublicSym32 &r) {
       return l.Name < r.Name;
     });
     for (const PublicSym32 &pub : publics)
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -17,7 +17,6 @@
 #include "Symbols.h"
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Timer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
@@ -1785,7 +1784,7 @@
     // ADD instructions).
     if (sec->header.Characteristics & IMAGE_SCN_CNT_CODE)
       memset(secBuf, 0xCC, sec->getRawSize());
-    parallelForEach(sec->chunks, [&](Chunk *c) {
+    parallel::for_each(sec->chunks, [&](Chunk *c) {
       c->writeTo(secBuf + c->getRVA() - sec->getRVA());
     });
   }
@@ -1856,14 +1855,14 @@
   uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize();
   if (config->machine == AMD64) {
     struct Entry { ulittle32_t begin, end, unwind; };
-    parallelSort(
+    parallel::sort(
         MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
         [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
     return;
   }
   if (config->machine == ARMNT || config->machine == ARM64) {
     struct Entry { ulittle32_t begin, unwind; };
-    parallelSort(
+    parallel::sort(
         MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
         [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
     return;
diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp
--- a/lld/Common/ErrorHandler.cpp
+++ b/lld/Common/ErrorHandler.cpp
@@ -8,7 +8,7 @@
 
 #include "lld/Common/ErrorHandler.h"
 
-#include "lld/Common/Threads.h"
+#include "llvm/Support/Parallel.h"
 
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DiagnosticInfo.h"
diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp
--- a/lld/Common/Filesystem.cpp
+++ b/lld/Common/Filesystem.cpp
@@ -11,10 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/Filesystem.h"
-#include "lld/Common/Threads.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Parallel.h"
 #if LLVM_ON_UNIX
 #include <unistd.h>
 #endif
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -43,7 +43,6 @@
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -53,6 +52,7 @@
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/GlobPattern.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TarWriter.h"
 #include "llvm/Support/TargetSelect.h"
@@ -1749,7 +1749,7 @@
   }
 
   // Update pointers in input files.
-  parallelForEach(objectFiles, [&](InputFile *file) {
+  parallel::for_each(objectFiles, [&](InputFile *file) {
     MutableArrayRef<Symbol *> syms = file->getMutableSymbols();
     for (size_t i = 0, e = syms.size(); i != e; ++i)
       if (Symbol *s = map.lookup(syms[i]))
diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp
--- a/lld/ELF/ICF.cpp
+++ b/lld/ELF/ICF.cpp
@@ -80,10 +80,10 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Writer.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Object/ELF.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/xxhash.h"
 #include <algorithm>
@@ -419,11 +419,11 @@
   boundaries[0] = 0;
   boundaries[numShards] = sections.size();
 
-  parallelForEachN(1, numShards, [&](size_t i) {
+  parallel::for_each_n(1, numShards, [&](size_t i) {
     boundaries[i] = findBoundary((i - 1) * step, sections.size());
   });
 
-  parallelForEachN(1, numShards + 1, [&](size_t i) {
+  parallel::for_each_n(1, numShards + 1, [&](size_t i) {
     if (boundaries[i - 1] < boundaries[i])
       forEachClassRange(boundaries[i - 1], boundaries[i], fn);
   });
@@ -467,12 +467,11 @@
   }
 
   // Initially, we use hash values to partition sections.
-  parallelForEach(sections, [&](InputSection *s) {
-    s->eqClass[0] = xxHash64(s->data());
-  });
+  parallel::for_each(
+      sections, [&](InputSection *s) { s->eqClass[0] = xxHash64(s->data()); });
 
   for (unsigned cnt = 0; cnt != 2; ++cnt) {
-    parallelForEach(sections, [&](InputSection *s) {
+    parallel::for_each(sections, [&](InputSection *s) {
       if (s->areRelocsRela)
         combineRelocHashes<ELFT>(cnt, s, s->template relas<ELFT>());
       else
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -21,7 +21,6 @@
 #include "Writer.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/ELF.h"
@@ -29,6 +28,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include <algorithm>
 #include <cassert>
diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -26,9 +26,9 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -84,7 +84,7 @@
 static DenseMap<Symbol *, std::string>
 getSymbolStrings(ArrayRef<Defined *> syms) {
   std::vector<std::string> str(syms.size());
-  parallelForEachN(0, syms.size(), [&](size_t i) {
+  parallel::for_each_n(0, syms.size(), [&](size_t i) {
     raw_string_ostream os(str[i]);
     OutputSection *osec = syms[i]->getOutputSection();
     uint64_t vma = syms[i]->getVA();
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -14,11 +14,11 @@
 #include "Target.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/SHA1.h"
 #include <regex>
 
@@ -337,7 +337,7 @@
   if (nonZeroFiller)
     fill(buf, sections.empty() ? size : sections[0]->outSecOff, filler);
 
-  parallelForEachN(0, sections.size(), [&](size_t i) {
+  parallel::for_each_n(0, sections.size(), [&](size_t i) {
     InputSection *isec = sections[i];
     isec->writeTo<ELFT>(buf);
 
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -26,7 +26,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/StringExtras.h"
@@ -37,6 +36,7 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MD5.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/TimeProfiler.h"
 #include <cstdlib>
 #include <thread>
@@ -2758,7 +2758,7 @@
 
   // Instantiate GdbSymbols while uniqufying them by name.
   std::vector<std::vector<GdbSymbol>> symbols(numShards);
-  parallelForEachN(0, concurrency, [&](size_t threadId) {
+  parallel::for_each_n(0, concurrency, [&](size_t threadId) {
     uint32_t i = 0;
     for (ArrayRef<NameAttrEntry> entries : nameAttrs) {
       for (const NameAttrEntry &ent : entries) {
@@ -2821,7 +2821,7 @@
   std::vector<GdbChunk> chunks(sections.size());
   std::vector<std::vector<NameAttrEntry>> nameAttrs(sections.size());
 
-  parallelForEachN(0, sections.size(), [&](size_t i) {
+  parallel::for_each_n(0, sections.size(), [&](size_t i) {
     // To keep memory usage low, we don't want to keep cached DWARFContext, so
     // avoid getDwarf() here.
     ObjFile<ELFT> *file = sections[i]->getFile<ELFT>();
@@ -2895,7 +2895,7 @@
 
   // Write the string pool.
   hdr->constantPoolOff = buf - start;
-  parallelForEach(symbols, [&](GdbSymbol &sym) {
+  parallel::for_each(symbols, [&](GdbSymbol &sym) {
     memcpy(buf + sym.nameOff, sym.name.data(), sym.name.size());
   });
 
@@ -3199,7 +3199,7 @@
                        numShards));
 
   // Add section pieces to the builders.
-  parallelForEachN(0, concurrency, [&](size_t threadId) {
+  parallel::for_each_n(0, concurrency, [&](size_t threadId) {
     for (MergeInputSection *sec : sections) {
       for (size_t i = 0, e = sec->pieces.size(); i != e; ++i) {
         if (!sec->pieces[i].live)
@@ -3224,7 +3224,7 @@
 
   // So far, section pieces have offsets from beginning of shards, but
   // we want offsets from beginning of the whole section. Fix them.
-  parallelForEach(sections, [&](MergeInputSection *sec) {
+  parallel::for_each(sections, [&](MergeInputSection *sec) {
     for (size_t i = 0, e = sec->pieces.size(); i != e; ++i)
       if (sec->pieces[i].live)
         sec->pieces[i].outputOff +=
@@ -3245,7 +3245,7 @@
   llvm::TimeTraceScope timeScope("Split sections");
   // splitIntoPieces needs to be called on each MergeInputSection
   // before calling finalizeContents().
-  parallelForEach(inputSections, [](InputSectionBase *sec) {
+  parallel::for_each(inputSections, [](InputSectionBase *sec) {
     if (auto *s = dyn_cast<MergeInputSection>(sec))
       s->splitIntoPieces();
     else if (auto *eh = dyn_cast<EhInputSection>(sec))
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -22,9 +22,9 @@
 #include "lld/Common/Filesystem.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -1749,7 +1749,7 @@
 // the end of the section are relaxed.
 static void fixSymbolsAfterShrinking() {
   for (InputFile *File : objectFiles) {
-    parallelForEach(File->getSymbols(), [&](Symbol *Sym) {
+    parallel::for_each(File->getSymbols(), [&](Symbol *Sym) {
       auto *def = dyn_cast<Defined>(Sym);
       if (!def)
         return;
@@ -1808,7 +1808,7 @@
     // Delete all fall through jump instructions.  Also, check if two
     // consecutive jump instructions can be flipped so that a fall
     // through jmp instruction can be deleted.
-    parallelForEachN(0, sections.size(), [&](size_t i) {
+    parallel::for_each_n(0, sections.size(), [&](size_t i) {
       InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr;
       InputSection &is = *sections[i];
       result[i] =
@@ -2905,7 +2905,7 @@
   std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());
 
   // Compute hash values.
-  parallelForEachN(0, chunks.size(), [&](size_t i) {
+  parallel::for_each_n(0, chunks.size(), [&](size_t i) {
     hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
   });
 
diff --git a/lld/include/lld/Common/Threads.h b/lld/include/lld/Common/Threads.h
deleted file mode 100644
--- a/lld/include/lld/Common/Threads.h
+++ /dev/null
@@ -1,90 +0,0 @@
-//===- Threads.h ------------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// LLD supports threads to distribute workloads to multiple cores. Using
-// multicore is most effective when more than one core are idle. At the
-// last step of a build, it is often the case that a linker is the only
-// active process on a computer. So, we are naturally interested in using
-// threads wisely to reduce latency to deliver results to users.
-//
-// That said, we don't want to do "too clever" things using threads.
-// Complex multi-threaded algorithms are sometimes extremely hard to
-// reason about and can easily mess up the entire design.
-//
-// Fortunately, when a linker links large programs (when the link time is
-// most critical), it spends most of the time to work on massive number of
-// small pieces of data of the same kind, and there are opportunities for
-// large parallelism there. Here are examples:
-//
-//  - We have hundreds of thousands of input sections that need to be
-//    copied to a result file at the last step of link. Once we fix a file
-//    layout, each section can be copied to its destination and its
-//    relocations can be applied independently.
-//
-//  - We have tens of millions of small strings when constructing a
-//    mergeable string section.
-//
-// For the cases such as the former, we can just use parallelForEach
-// instead of std::for_each (or a plain for loop). Because tasks are
-// completely independent from each other, we can run them in parallel
-// without any coordination between them. That's very easy to understand
-// and reason about.
-//
-// For the cases such as the latter, we can use parallel algorithms to
-// deal with massive data. We have to write code for a tailored algorithm
-// for each problem, but the complexity of multi-threading is isolated in
-// a single pass and doesn't affect the linker's overall design.
-//
-// The above approach seems to be working fairly well. As an example, when
-// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to
-// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my
-// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from
-// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the
-// speedup is not linear, but as you add more cores, it gets faster.
-//
-// On a final note, if you are trying to optimize, keep the axiom "don't
-// guess, measure!" in mind. Some important passes of the linker are not
-// that slow. For example, resolving all symbols is not a very heavy pass,
-// although it would be very hard to parallelize it. You want to first
-// identify a slow pass and then optimize it.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLD_COMMON_THREADS_H
-#define LLD_COMMON_THREADS_H
-
-#include "llvm/Support/Parallel.h"
-#include <functional>
-
-namespace lld {
-
-template <typename R, class FuncTy> void parallelForEach(R &&range, FuncTy fn) {
-  if (llvm::parallel::strategy.ThreadsRequested != 1)
-    for_each(llvm::parallel::par, std::begin(range), std::end(range), fn);
-  else
-    for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn);
-}
-
-inline void parallelForEachN(size_t begin, size_t end,
-                             llvm::function_ref<void(size_t)> fn) {
-  if (llvm::parallel::strategy.ThreadsRequested != 1)
-    for_each_n(llvm::parallel::par, begin, end, fn);
-  else
-    for_each_n(llvm::parallel::seq, begin, end, fn);
-}
-
-template <typename R, class FuncTy> void parallelSort(R &&range, FuncTy fn) {
-  if (llvm::parallel::strategy.ThreadsRequested != 1)
-    sort(llvm::parallel::par, std::begin(range), std::end(range), fn);
-  else
-    sort(llvm::parallel::seq, std::begin(range), std::end(range), fn);
-}
-
-} // namespace lld
-
-#endif
diff --git a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
--- a/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
+++ b/lld/lib/ReaderWriter/MachO/LayoutPass.cpp
@@ -461,10 +461,11 @@
   });
 
   std::vector<LayoutPass::SortKey> vec = decorate(atomRange);
-  sort(llvm::parallel::par, vec.begin(), vec.end(),
-       [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
-         return compareAtoms(l, r, _customSorter);
-       });
+  llvm::parallel::sort(
+      vec.begin(), vec.end(),
+      [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool {
+        return compareAtoms(l, r, _customSorter);
+      });
   LLVM_DEBUG(checkTransitivity(vec, _customSorter));
   undecorate(atomRange, vec);
 
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -19,7 +19,6 @@
 #include "lld/Common/Memory.h"
 #include "lld/Common/Reproduce.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Object/Wasm.h"
@@ -27,6 +26,7 @@
 #include "llvm/Option/ArgList.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Host.h"
+#include "llvm/Support/Parallel.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/TarWriter.h"
@@ -679,7 +679,7 @@
   }
 
   // Update pointers in input files.
-  parallelForEach(symtab->objectFiles, [&](InputFile *file) {
+  parallel::for_each(symtab->objectFiles, [&](InputFile *file) {
     MutableArrayRef<Symbol *> syms = file->getMutableSymbols();
     for (size_t i = 0, e = syms.size(); i != e; ++i)
       if (Symbol *s = map.lookup(syms[i]))
diff --git a/lld/wasm/OutputSections.cpp b/lld/wasm/OutputSections.cpp
--- a/lld/wasm/OutputSections.cpp
+++ b/lld/wasm/OutputSections.cpp
@@ -12,9 +12,9 @@
 #include "OutputSegment.h"
 #include "WriterUtils.h"
 #include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/Parallel.h"
 
 #define DEBUG_TYPE "lld"
 
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -20,7 +20,6 @@
 #include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
-#include "lld/Common/Threads.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -31,6 +30,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/LEB128.h"
+#include "llvm/Support/Parallel.h"
 
 #include <cstdarg>
 #include <map>
@@ -189,7 +189,7 @@
 
 void Writer::writeSections() {
   uint8_t *buf = buffer->getBufferStart();
-  parallelForEach(outputSections, [buf](OutputSection *s) {
+  parallel::for_each(outputSections, [buf](OutputSection *s) {
     assert(s->isNeeded());
     s->writeTo(buf);
   });
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -22,17 +22,6 @@
 namespace llvm {
 
 namespace parallel {
-struct sequential_execution_policy {};
-struct parallel_execution_policy {};
-
-template <typename T>
-struct is_execution_policy
-    : public std::integral_constant<
-          bool, llvm::is_one_of<T, sequential_execution_policy,
-                                parallel_execution_policy>::value> {};
-
-constexpr sequential_execution_policy seq{};
-constexpr parallel_execution_policy par{};
 
 // Strategy for the default executor used by the parallel routines provided by
 // this file. It defaults to using all hardware threads and should be
@@ -169,59 +158,54 @@
 
 #endif
 
-template <typename Iter>
-using DefComparator =
-    std::less<typename std::iterator_traits<Iter>::value_type>;
-
 } // namespace detail
 
-// sequential algorithm implementations.
-template <class Policy, class RandomAccessIterator,
-          class Comparator = detail::DefComparator<RandomAccessIterator>>
-void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End,
+template <class RandomAccessIterator,
+          class Comparator = std::less<
+              typename std::iterator_traits<RandomAccessIterator>::value_type>>
+void sort(RandomAccessIterator Start, RandomAccessIterator End,
           const Comparator &Comp = Comparator()) {
-  static_assert(is_execution_policy<Policy>::value,
-                "Invalid execution policy!");
+#if LLVM_ENABLE_THREADS
+  if (strategy.ThreadsRequested != 1) {
+    detail::parallel_sort(Start, End, Comp);
+    return;
+  }
+#endif
   llvm::sort(Start, End, Comp);
 }
 
-template <class Policy, class IterTy, class FuncTy>
-void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) {
-  static_assert(is_execution_policy<Policy>::value,
-                "Invalid execution policy!");
+template <class IterTy, class FuncTy>
+void for_each(IterTy Begin, IterTy End, FuncTy Fn) {
+#if LLVM_ENABLE_THREADS
+  if (strategy.ThreadsRequested != 1) {
+    detail::parallel_for_each(Begin, End, Fn);
+    return;
+  }
+#endif
   std::for_each(Begin, End, Fn);
 }
 
-template <class Policy, class IndexTy, class FuncTy>
-void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) {
-  static_assert(is_execution_policy<Policy>::value,
-                "Invalid execution policy!");
-  for (IndexTy I = Begin; I != End; ++I)
-    Fn(I);
-}
-
-// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS
-// is true.
+template <class FuncTy> void for_each_n(size_t Begin, size_t End, FuncTy Fn) {
 #if LLVM_ENABLE_THREADS
-template <class RandomAccessIterator,
-          class Comparator = detail::DefComparator<RandomAccessIterator>>
-void sort(parallel_execution_policy policy, RandomAccessIterator Start,
-          RandomAccessIterator End, const Comparator &Comp = Comparator()) {
-  detail::parallel_sort(Start, End, Comp);
+  if (strategy.ThreadsRequested != 1) {
+    detail::parallel_for_each_n(Begin, End, Fn);
+    return;
+  }
+#endif
+  for (size_t I = Begin; I != End; ++I)
+    Fn(I);
 }
 
-template <class IterTy, class FuncTy>
-void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End,
-              FuncTy Fn) {
-  detail::parallel_for_each(Begin, End, Fn);
+// Range wrappers.
+template <class RangeTy,
+          class Comparator = std::less<decltype(*std::begin(RangeTy()))>>
+void sort(RangeTy &&R, const Comparator &Comp = Comparator()) {
+  llvm::parallel::sort(std::begin(R), std::end(R), Comp);
 }
 
-template <class IndexTy, class FuncTy>
-void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End,
-                FuncTy Fn) {
-  detail::parallel_for_each_n(Begin, End, Fn);
+template <class RangeTy, class FuncTy> void for_each(RangeTy &&R, FuncTy Fn) {
+  llvm::parallel::for_each(std::begin(R), std::end(R), Fn);
 }
-#endif
 
 } // namespace parallel
 } // namespace llvm
diff --git a/llvm/unittests/Support/ParallelTest.cpp b/llvm/unittests/Support/ParallelTest.cpp
--- a/llvm/unittests/Support/ParallelTest.cpp
+++ b/llvm/unittests/Support/ParallelTest.cpp
@@ -30,7 +30,7 @@
   for (auto &i : array)
     i = dist(randEngine);
 
-  sort(parallel::par, std::begin(array), std::end(array));
+  parallel::sort(std::begin(array), std::end(array));
   ASSERT_TRUE(llvm::is_sorted(array));
 }
 
@@ -40,7 +40,7 @@
   // writing.
   uint32_t range[2050];
   std::fill(range, range + 2050, 1);
-  for_each_n(parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; });
+  parallel::for_each_n(0, 2049, [&range](size_t I) { ++range[I]; });
 
   uint32_t expected[2049];
   std::fill(expected, expected + 2049, 2);
diff --git a/mlir/docs/Diagnostics.md b/mlir/docs/Diagnostics.md
--- a/mlir/docs/Diagnostics.md
+++ b/mlir/docs/Diagnostics.md
@@ -390,8 +390,7 @@
 
 // Process a list of operations in parallel.
 std::vector<Operation *> opsToProcess = ...;
-llvm::for_each_n(llvm::parallel::par, 0, opsToProcess.size(),
-                 [&](size_t i) {
+llvm::parallel::for_each_n(0, opsToProcess.size(), [&](size_t i) {
   // Notify the handler that we are processing the i'th operation.
   handler.setOrderIDForThread(i);
   auto *op = opsToProcess[i];
diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp
--- a/mlir/lib/Pass/Pass.cpp
+++ b/mlir/lib/Pass/Pass.cpp
@@ -494,7 +494,7 @@
   // An atomic failure variable for the async executors.
   std::atomic<bool> passFailed(false);
   llvm::parallel::for_each(
-      llvm::parallel::par, asyncExecutors.begin(),
+      asyncExecutors.begin(),
       std::next(asyncExecutors.begin(),
                 std::min(asyncExecutors.size(), opAMPairs.size())),
       [&](MutableArrayRef<OpPassManager> pms) {
diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp
--- a/mlir/lib/Transforms/Inliner.cpp
+++ b/mlir/lib/Transforms/Inliner.cpp
@@ -497,8 +497,7 @@
   if (context->isMultithreadingEnabled()) {
     ParallelDiagnosticHandler canonicalizationHandler(context);
     llvm::parallel::for_each_n(
-        llvm::parallel::par, /*Begin=*/size_t(0),
-        /*End=*/nodesToCanonicalize.size(), [&](size_t index) {
+        /*Begin=*/0, /*End=*/nodesToCanonicalize.size(), [&](size_t index) {
           // Set the order for this thread so that diagnostics will be properly
           // ordered.
           canonicalizationHandler.setOrderIDForThread(index);