Index: lld/COFF/ICF.cpp =================================================================== --- lld/COFF/ICF.cpp +++ lld/COFF/ICF.cpp @@ -192,7 +192,7 @@ // Split sections into 256 shards and call Fn in parallel. size_t NumShards = 256; size_t Step = Chunks.size() / NumShards; - parallel_for(size_t(0), NumShards, [&](size_t I) { + for_each_n(parallel::par, size_t(0), NumShards, [&](size_t I) { forEachClassRange(I * Step, (I + 1) * Step, Fn); }); forEachClassRange(Step * NumShards, Chunks.size(), Fn); Index: lld/COFF/MapFile.cpp =================================================================== --- lld/COFF/MapFile.cpp +++ lld/COFF/MapFile.cpp @@ -76,7 +76,7 @@ static DenseMap getSymbolStrings(ArrayRef Syms) { std::vector Str(Syms.size()); - parallel_for((size_t)0, Syms.size(), [&](size_t I) { + for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) { raw_string_ostream OS(Str[I]); writeHeader(OS, Syms[I]->getRVA(), 0, 0); OS << indent(2) << toString(*Syms[I]); Index: lld/COFF/Writer.cpp =================================================================== --- lld/COFF/Writer.cpp +++ lld/COFF/Writer.cpp @@ -745,8 +745,8 @@ // ADD instructions). if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) memset(SecBuf, 0xCC, Sec->getRawSize()); - parallel_for_each(Sec->getChunks().begin(), Sec->getChunks().end(), - [&](Chunk *C) { C->writeTo(SecBuf); }); + for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(), + [&](Chunk *C) { C->writeTo(SecBuf); }); } } @@ -760,16 +760,14 @@ uint8_t *End = Begin + Sec->getVirtualSize(); if (Config->Machine == AMD64) { struct Entry { ulittle32_t Begin, End, Unwind; }; - parallel_sort( - (Entry *)Begin, (Entry *)End, - [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + sort(parallel::par, (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } if (Config->Machine == ARMNT) { struct Entry { ulittle32_t Begin, Unwind; }; - parallel_sort( - (Entry *)Begin, (Entry *)End, - [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); + sort(parallel::par, (Entry *)Begin, (Entry *)End, + [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } errs() << "warning: don't know how to handle .pdata.\n"; Index: lld/ELF/Threads.h =================================================================== --- lld/ELF/Threads.h +++ lld/ELF/Threads.h @@ -70,20 +70,18 @@ template void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { + parallel::execution_policy pol = parallel::seq; if (Config->Threads) - parallel_for_each(Begin, End, Fn); - else - std::for_each(Begin, End, Fn); + pol = parallel::par; + parallel::for_each(pol, Begin, End, Fn); } inline void parallelFor(size_t Begin, size_t End, std::function Fn) { - if (Config->Threads) { - parallel_for(Begin, End, Fn); - } else { - for (size_t I = Begin; I < End; ++I) - Fn(I); - } + parallel::execution_policy pol = parallel::seq; + if (Config->Threads) + pol = parallel::par; + parallel::for_each_n(pol, Begin, End, Fn); } } } Index: lld/include/lld/Core/Parallel.h =================================================================== --- lld/include/lld/Core/Parallel.h +++ lld/include/lld/Core/Parallel.h @@ -12,8 +12,9 @@ #include "lld/Core/LLVM.h" #include "lld/Core/TaskGroup.h" -#include "llvm/Support/MathExtras.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/MathExtras.h" #include @@ -24,25 +25,121 @@ namespace lld { -#if !LLVM_ENABLE_THREADS -template -void parallel_sort( - RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp = std::less< - typename std::iterator_traits::value_type>()) { - std::sort(Start, End, Comp); +namespace parallel { +struct sequential_execution_policy {}; +struct parallel_execution_policy {}; +struct execution_policy; + +template +struct is_execution_policy + : public std::integral_constant< + bool, + llvm::is_one_of::value> { +}; + +namespace detail { +template struct policy_traits; +template <> struct policy_traits { + static constexpr unsigned id = 0; +}; +template <> struct policy_traits { + static constexpr unsigned id = 1; +}; +template <> struct policy_traits { + static constexpr unsigned id = 2; +}; } -#elif defined(_MSC_VER) -// Use ppl parallel_sort on Windows. + +// Dynamic execution policy +struct execution_policy { +private: + struct impl_base { + virtual ~impl_base() {} + virtual impl_base *clone() const = 0; + virtual unsigned id() const = 0; + }; + + template struct impl : public impl_base { + explicit impl(const Policy &P) : P(P) {} + + static_assert(is_execution_policy::value, + "Not an execution policy!"); + + static bool classof(const impl_base *Other) { + return Other->id() == detail::policy_traits::id; + } + + unsigned id() const override { return detail::policy_traits::id; } + + impl_base *clone() const override { return new impl(P); } + + const Policy &P; + }; + +public: + template + execution_policy(const Policy &exec) + : Impl(llvm::make_unique>(exec)) { + static_assert(is_execution_policy::value, + "Not an execution policy!"); + } + + execution_policy(const execution_policy &other) : Impl(other.Impl->clone()) {} + + template execution_policy &operator=(const Policy &exec) { + static_assert(is_execution_policy::value, + "Not an execution policy!"); + Impl = llvm::make_unique>(exec); + return *this; + } + + execution_policy &operator=(const execution_policy &other) { + Impl.reset(other.Impl->clone()); + return *this; + } + + template Policy *get() { + static_assert(is_execution_policy::value, + "Not an execution policy!"); + impl *Ptr = llvm::dyn_cast>(Impl.get()); + return Ptr ? &Ptr->P : nullptr; + } + template const Policy *get() const { + static_assert(is_execution_policy::value, + "Not an execution policy!"); + impl *Ptr = llvm::dyn_cast>(Impl.get()); + return Ptr ? &Ptr->P : nullptr; + } + +private: + std::unique_ptr Impl; +}; + +constexpr sequential_execution_policy seq{}; +constexpr parallel_execution_policy par{}; + +#if LLVM_ENABLE_THREADS + +namespace detail { + +#if defined(_MSC_VER) template -void parallel_sort( - RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp = std::less< - typename std::iterator_traits::value_type>()) { +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { concurrency::parallel_sort(Start, End, Comp); } +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); +} + #else -namespace detail { const ptrdiff_t MinParallelSize = 1024; /// \brief Inclusive median. @@ -83,46 +180,15 @@ }); parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1); } -} template -void parallel_sort( - RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp = std::less< - typename std::iterator_traits::value_type>()) { +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { TaskGroup TG; - detail::parallel_quick_sort(Start, End, Comp, TG, - llvm::Log2_64(std::distance(Start, End)) + 1); -} -#endif - -template void parallel_sort(T *Start, T *End) { - parallel_sort(Start, End, std::less()); -} - -#if !LLVM_ENABLE_THREADS -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - std::for_each(Begin, End, Fn); -} - -template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { - for (IndexTy I = Begin; I != End; ++I) - Fn(I); -} -#elif defined(_MSC_VER) -// Use ppl parallel_for_each on Windows. -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); + parallel_quick_sort(Start, End, Comp, TG, + llvm::Log2_64(std::distance(Start, End)) + 1); } -template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} -#else template void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { // TaskGroup has a relatively high overhead, so we want to reduce @@ -142,7 +208,7 @@ } template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { ptrdiff_t TaskSize = (End - Begin) / 1024; if (TaskSize == 0) TaskSize = 1; @@ -160,7 +226,100 @@ Fn(J); }); } + +#endif + +template +using DefComparator = + std::less::value_type>; + +} // namespace detail #endif + +// sequential algorithm implementations. +template > +void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp = Comparator()) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::sort(Start, End, Comp); +} + +template +void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::for_each(Begin, End, Fn); +} + +template +void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + for (IndexTy I = Begin; I != End; ++I) + Fn(I); +} + +// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS +// is true. +#if defined(LLVM_ENABLE_THREADS) +template > +void sort(parallel_execution_policy policy, RandomAccessIterator Start, + RandomAccessIterator End, const Comparator &Comp = Comparator()) { + detail::parallel_sort(Start, End, Comp); +} + +template +void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End, + FuncTy Fn) { + detail::parallel_for_each(Begin, End, Fn); +} + +template +void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End, + FuncTy Fn) { + detail::parallel_for_each_n(Begin, End, Fn); +} +#endif + +// Dynamic execution policy algorithms. +template > +void sort(const execution_policy &Policy, RandomAccessIterator Start, + RandomAccessIterator End, const Comparator &Comp = Comparator()) { + if (auto P = Policy.get()) + sort(*P, Start, End, Comp); + else if (auto P = Policy.get()) + sort(*P, Start, End, Comp); + else + llvm_unreachable("Unknown execution policy!"); +} + +template +void for_each(const execution_policy &Policy, IterTy Begin, IterTy End, + FuncTy Fn) { + if (auto P = Policy.get()) + for_each(*P, Begin, End, Fn); + else if (auto P = Policy.get()) + for_each(*P, Begin, End, Fn); + else + llvm_unreachable("Unknown execution policy!"); +} + +template +void for_each_n(const execution_policy &Policy, IndexTy Begin, IndexTy End, + FuncTy Fn) { + if (auto P = Policy.get()) + for_each_n(*P, Begin, End, Fn); + else if (auto P = Policy.get()) + for_each_n(*P, Begin, End, Fn); + else + llvm_unreachable("Unknown execution policy!"); +} + +} // namespace parallel } // End namespace lld #endif // LLD_CORE_PARALLEL_H Index: lld/lib/ReaderWriter/MachO/LayoutPass.cpp =================================================================== --- lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -461,10 +461,10 @@ }); std::vector vec = decorate(atomRange); - parallel_sort(vec.begin(), vec.end(), - [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { - return compareAtoms(l, r, _customSorter); - }); + sort(parallel::par, vec.begin(), vec.end(), + [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { + return compareAtoms(l, r, _customSorter); + }); DEBUG(checkTransitivity(vec, _customSorter)); undecorate(atomRange, vec); Index: lld/unittests/CoreTests/ParallelTest.cpp =================================================================== --- lld/unittests/CoreTests/ParallelTest.cpp +++ lld/unittests/CoreTests/ParallelTest.cpp @@ -26,7 +26,7 @@ for (auto &i : array) i = dist(randEngine); - lld::parallel_sort(std::begin(array), std::end(array)); + sort(lld::parallel::par, std::begin(array), std::end(array)); ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); } @@ -36,7 +36,7 @@ // writing. uint32_t range[2050]; std::fill(range, range + 2050, 1); - lld::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; }); + for_each_n(lld::parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; }); uint32_t expected[2049]; std::fill(expected, expected + 2049, 2); @@ -44,3 +44,27 @@ // Check that we don't write past the end of the requested range. ASSERT_EQ(range[2049], 1u); } + +TEST(Paralle, parallel_dynamic_sort) { + std::mt19937 randEngine; + std::uniform_int_distribution dist; + + for (auto &i : array) + i = dist(randEngine); + + lld::parallel::execution_policy policy = lld::parallel::par; + sort(policy, std::begin(array), std::end(array)); + ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); +} + +TEST(Paralle, sequential_dynamic_sort) { + std::mt19937 randEngine; + std::uniform_int_distribution dist; + + for (auto &i : array) + i = dist(randEngine); + + lld::parallel::execution_policy policy = lld::parallel::seq; + sort(policy, std::begin(array), std::end(array)); + ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); +} \ No newline at end of file