Index: lld/COFF/ICF.cpp =================================================================== --- lld/COFF/ICF.cpp +++ lld/COFF/ICF.cpp @@ -21,9 +21,9 @@ #include "Chunks.h" #include "Error.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" #include #include Index: lld/COFF/MapFile.cpp =================================================================== --- lld/COFF/MapFile.cpp +++ lld/COFF/MapFile.cpp @@ -25,7 +25,7 @@ #include "Symbols.h" #include "Writer.h" -#include "lld/Core/Parallel.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -76,7 +76,7 @@ static DenseMap getSymbolStrings(ArrayRef Syms) { std::vector Str(Syms.size()); - for_each_n(parallel::par, (size_t)0, Syms.size(), [&](size_t I) { + for_each_n(llvm::parallel::par, (size_t)0, Syms.size(), [&](size_t I) { raw_string_ostream OS(Str[I]); writeHeader(OS, Syms[I]->getRVA(), 0, 0); OS << indent(2) << toString(*Syms[I]); Index: lld/COFF/Writer.cpp =================================================================== --- lld/COFF/Writer.cpp +++ lld/COFF/Writer.cpp @@ -17,13 +17,13 @@ #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/raw_ostream.h" #include @@ -745,8 +745,8 @@ // ADD instructions). if (Sec->getPermissions() & IMAGE_SCN_CNT_CODE) memset(SecBuf, 0xCC, Sec->getRawSize()); - for_each(parallel::par, Sec->getChunks().begin(), Sec->getChunks().end(), - [&](Chunk *C) { C->writeTo(SecBuf); }); + for_each(llvm::parallel::par, Sec->getChunks().begin(), + Sec->getChunks().end(), [&](Chunk *C) { C->writeTo(SecBuf); }); } } @@ -760,13 +760,13 @@ uint8_t *End = Begin + Sec->getVirtualSize(); if (Config->Machine == AMD64) { struct Entry { ulittle32_t Begin, End, Unwind; }; - sort(parallel::par, (Entry *)Begin, (Entry *)End, + sort(llvm::parallel::par, (Entry *)Begin, (Entry *)End, [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } if (Config->Machine == ARMNT) { struct Entry { ulittle32_t Begin, Unwind; }; - sort(parallel::par, (Entry *)Begin, (Entry *)End, + sort(llvm::parallel::par, (Entry *)Begin, (Entry *)End, [](const Entry &A, const Entry &B) { return A.Begin < B.Begin; }); return; } Index: lld/ELF/Threads.h =================================================================== --- lld/ELF/Threads.h +++ lld/ELF/Threads.h @@ -61,7 +61,7 @@ #include "Config.h" -#include "lld/Core/Parallel.h" +#include "llvm/Support/Parallel.h" #include #include @@ -71,17 +71,17 @@ template void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { if (Config->Threads) - for_each(parallel::par, Begin, End, Fn); + for_each(llvm::parallel::par, Begin, End, Fn); else - for_each(parallel::seq, Begin, End, Fn); + for_each(llvm::parallel::seq, Begin, End, Fn); } inline void parallelFor(size_t Begin, size_t End, std::function Fn) { if (Config->Threads) - for_each_n(parallel::par, Begin, End, Fn); + for_each_n(llvm::parallel::par, Begin, End, Fn); else - for_each_n(parallel::seq, Begin, End, Fn); + for_each_n(llvm::parallel::seq, Begin, End, Fn); } } } Index: lld/include/lld/Core/Parallel.h =================================================================== --- lld/include/lld/Core/Parallel.h +++ /dev/null @@ -1,210 +0,0 @@ -//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_CORE_PARALLEL_H -#define LLD_CORE_PARALLEL_H - -#include "lld/Core/LLVM.h" -#include "lld/Core/TaskGroup.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/MathExtras.h" - -#include - -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#include -#include -#endif - -namespace lld { - -namespace parallel { -struct sequential_execution_policy {}; -struct parallel_execution_policy {}; - -template -struct is_execution_policy - : public std::integral_constant< - bool, llvm::is_one_of::value> {}; - -constexpr sequential_execution_policy seq{}; -constexpr parallel_execution_policy par{}; - -namespace detail { - -#if LLVM_ENABLE_THREADS - -#if defined(_MSC_VER) -template -void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp) { - concurrency::parallel_sort(Start, End, Comp); -} -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} - -#else -const ptrdiff_t MinParallelSize = 1024; - -/// \brief Inclusive median. -template -RandomAccessIterator medianOf3(RandomAccessIterator Start, - RandomAccessIterator End, - const Comparator &Comp) { - RandomAccessIterator Mid = Start + (std::distance(Start, End) / 2); - return Comp(*Start, *(End - 1)) - ? (Comp(*Mid, *(End - 1)) ? (Comp(*Start, *Mid) ? Mid : Start) - : End - 1) - : (Comp(*Mid, *Start) ? (Comp(*(End - 1), *Mid) ? Mid : End - 1) - : Start); -} - -template -void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp, TaskGroup &TG, size_t Depth) { - // Do a sequential sort for small inputs. - if (std::distance(Start, End) < detail::MinParallelSize || Depth == 0) { - std::sort(Start, End, Comp); - return; - } - - // Partition. - auto Pivot = medianOf3(Start, End, Comp); - // Move Pivot to End. - std::swap(*(End - 1), *Pivot); - Pivot = std::partition(Start, End - 1, [&Comp, End](decltype(*Start) V) { - return Comp(V, *(End - 1)); - }); - // Move Pivot to middle of partition. - std::swap(*Pivot, *(End - 1)); - - // Recurse. - TG.spawn([=, &Comp, &TG] { - parallel_quick_sort(Start, Pivot, Comp, TG, Depth - 1); - }); - parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1); -} - -template -void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp) { - TaskGroup TG; - parallel_quick_sort(Start, End, Comp, TG, - llvm::Log2_64(std::distance(Start, End)) + 1); -} - -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - // TaskGroup has a relatively high overhead, so we want to reduce - // the number of spawn() calls. We'll create up to 1024 tasks here. - // (Note that 1024 is an arbitrary number. This code probably needs - // improving to take the number of available cores into account.) - ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup TG; - while (TaskSize <= std::distance(Begin, End)) { - TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); - Begin += TaskSize; - } - TG.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); -} - -template -void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { - ptrdiff_t TaskSize = (End - Begin) / 1024; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup TG; - IndexTy I = Begin; - for (; I + TaskSize < End; I += TaskSize) { - TG.spawn([=, &Fn] { - for (IndexTy J = I, E = I + TaskSize; J != E; ++J) - Fn(J); - }); - } - TG.spawn([=, &Fn] { - for (IndexTy J = I; J < End; ++J) - Fn(J); - }); -} - -#endif - -#endif - -template -using DefComparator = - std::less::value_type>; - -} // namespace detail - -// sequential algorithm implementations. -template > -void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, - const Comparator &Comp = Comparator()) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); - std::sort(Start, End, Comp); -} - -template -void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); - std::for_each(Begin, End, Fn); -} - -template -void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) { - static_assert(is_execution_policy::value, - "Invalid execution policy!"); - for (IndexTy I = Begin; I != End; ++I) - Fn(I); -} - -// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS -// is true. -#if LLVM_ENABLE_THREADS -template > -void sort(parallel_execution_policy policy, RandomAccessIterator Start, - RandomAccessIterator End, const Comparator &Comp = Comparator()) { - detail::parallel_sort(Start, End, Comp); -} - -template -void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End, - FuncTy Fn) { - detail::parallel_for_each(Begin, End, Fn); -} - -template -void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End, - FuncTy Fn) { - detail::parallel_for_each_n(Begin, End, Fn); -} -#endif - -} // namespace parallel -} // End namespace lld - -#endif // LLD_CORE_PARALLEL_H Index: lld/include/lld/Core/TaskGroup.h =================================================================== --- lld/include/lld/Core/TaskGroup.h +++ /dev/null @@ -1,65 +0,0 @@ -//===- lld/Core/TaskGroup.h - Task Group ----------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_CORE_TASKGROUP_H -#define LLD_CORE_TASKGROUP_H - -#include "lld/Core/LLVM.h" - -#include -#include -#include - -namespace lld { -/// \brief Allows one or more threads to wait on a potentially unknown number of -/// events. -/// -/// A latch starts at \p count. inc() increments this, and dec() decrements it. -/// All calls to sync() will block while the count is not 0. -/// -/// Calling dec() on a Latch with a count of 0 has undefined behaivor. -class Latch { - uint32_t Count; - mutable std::mutex Mutex; - mutable std::condition_variable Cond; - -public: - explicit Latch(uint32_t count = 0) : Count(count) {} - ~Latch() { sync(); } - - void inc() { - std::unique_lock lock(Mutex); - ++Count; - } - - void dec() { - std::unique_lock lock(Mutex); - if (--Count == 0) - Cond.notify_all(); - } - - void sync() const { - std::unique_lock lock(Mutex); - Cond.wait(lock, [&] { return Count == 0; }); - } -}; - -/// \brief Allows launching a number of tasks and waiting for them to finish -/// either explicitly via sync() or implicitly on destruction. -class TaskGroup { - Latch L; - -public: - void spawn(std::function F); - - void sync() const { L.sync(); } -}; -} - -#endif Index: lld/lib/Core/CMakeLists.txt =================================================================== --- lld/lib/Core/CMakeLists.txt +++ lld/lib/Core/CMakeLists.txt @@ -12,7 +12,6 @@ Resolver.cpp SymbolTable.cpp TargetOptionsCommandFlags.cpp - TaskGroup.cpp Writer.cpp ADDITIONAL_HEADER_DIRS Index: lld/lib/Core/TaskGroup.cpp =================================================================== --- lld/lib/Core/TaskGroup.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===- lld/Core/TaskGroup.cpp - Task Group --------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "lld/Core/TaskGroup.h" -#include "llvm/Config/llvm-config.h" - -#include -#include -#include - -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#include -#include -#endif - -using namespace lld; - -namespace { - -/// \brief An abstract class that takes closures and runs them asynchronously. -class Executor { -public: - virtual ~Executor() = default; - virtual void add(std::function func) = 0; - - static Executor *getDefaultExecutor(); -}; - -#if !LLVM_ENABLE_THREADS -class SyncExecutor : public Executor { -public: - virtual void add(std::function F) { F(); } -}; - -Executor *Executor::getDefaultExecutor() { - static SyncExecutor Exec; - return &Exec; -} - -#elif defined(_MSC_VER) -/// \brief An Executor that runs tasks via ConcRT. -class ConcRTExecutor : public Executor { - struct Taskish { - Taskish(std::function Task) : Task(Task) {} - - std::function Task; - - static void run(void *P) { - Taskish *Self = static_cast(P); - Self->Task(); - concurrency::Free(Self); - } - }; - -public: - virtual void add(std::function F) { - Concurrency::CurrentScheduler::ScheduleTask( - Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); - } -}; - -Executor *Executor::getDefaultExecutor() { - static ConcRTExecutor exec; - return &exec; -} - -#else -/// \brief An implementation of an Executor that runs closures on a thread pool -/// in filo order. -class ThreadPoolExecutor : public Executor { -public: - explicit ThreadPoolExecutor( - unsigned ThreadCount = std::thread::hardware_concurrency()) - : Done(ThreadCount) { - // Spawn all but one of the threads in another thread as spawning threads - // can take a while. - std::thread([&, ThreadCount] { - for (size_t i = 1; i < ThreadCount; ++i) { - std::thread([=] { work(); }).detach(); - } - work(); - }).detach(); - } - - ~ThreadPoolExecutor() override { - std::unique_lock Lock(Mutex); - Stop = true; - Lock.unlock(); - Cond.notify_all(); - // Wait for ~Latch. - } - - void add(std::function F) override { - std::unique_lock Lock(Mutex); - WorkStack.push(F); - Lock.unlock(); - Cond.notify_one(); - } - -private: - void work() { - while (true) { - std::unique_lock Lock(Mutex); - Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); - if (Stop) - break; - auto Task = WorkStack.top(); - WorkStack.pop(); - Lock.unlock(); - Task(); - } - Done.dec(); - } - - std::atomic Stop{false}; - std::stack> WorkStack; - std::mutex Mutex; - std::condition_variable Cond; - Latch Done; -}; - -Executor *Executor::getDefaultExecutor() { - static ThreadPoolExecutor exec; - return &exec; -} -#endif -} - -void TaskGroup::spawn(std::function F) { - L.inc(); - Executor::getDefaultExecutor()->add([&, F] { - F(); - L.dec(); - }); -} Index: lld/lib/ReaderWriter/MachO/LayoutPass.cpp =================================================================== --- lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -9,12 +9,12 @@ #include "LayoutPass.h" #include "lld/Core/Instrumentation.h" -#include "lld/Core/Parallel.h" #include "lld/Core/PassManager.h" #include "lld/ReaderWriter/MachOLinkingContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" #include #include #include @@ -461,7 +461,7 @@ }); std::vector vec = decorate(atomRange); - sort(parallel::par, vec.begin(), vec.end(), + sort(llvm::parallel::par, vec.begin(), vec.end(), [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { return compareAtoms(l, r, _customSorter); }); Index: lld/unittests/CMakeLists.txt =================================================================== --- lld/unittests/CMakeLists.txt +++ lld/unittests/CMakeLists.txt @@ -12,6 +12,5 @@ target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS}) endfunction() -add_subdirectory(CoreTests) add_subdirectory(DriverTests) add_subdirectory(MachOTests) Index: lld/unittests/CoreTests/CMakeLists.txt =================================================================== --- lld/unittests/CoreTests/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_lld_unittest(CoreTests - ParallelTest.cpp - ) - -target_link_libraries(CoreTests - lldCore ${LLVM_PTHREAD_LIB} - ) Index: lld/unittests/CoreTests/ParallelTest.cpp =================================================================== --- lld/unittests/CoreTests/ParallelTest.cpp +++ /dev/null @@ -1,46 +0,0 @@ -//===- lld/unittest/ParallelTest.cpp --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Parallel.h unit tests. -/// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" -#include "lld/Core/Parallel.h" -#include -#include - -uint32_t array[1024 * 1024]; - -TEST(Parallel, sort) { - std::mt19937 randEngine; - std::uniform_int_distribution dist; - - for (auto &i : array) - i = dist(randEngine); - - sort(lld::parallel::par, std::begin(array), std::end(array)); - ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); -} - -TEST(Parallel, parallel_for) { - // We need to test the case with a TaskSize > 1. We are white-box testing - // here. The TaskSize is calculated as (End - Begin) / 1024 at the time of - // writing. - uint32_t range[2050]; - std::fill(range, range + 2050, 1); - for_each_n(lld::parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; }); - - uint32_t expected[2049]; - std::fill(expected, expected + 2049, 2); - ASSERT_TRUE(std::equal(range, range + 2049, expected)); - // Check that we don't write past the end of the requested range. - ASSERT_EQ(range[2049], 1u); -} Index: llvm/include/llvm/Support/Parallel.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/Parallel.h @@ -0,0 +1,251 @@ +//===- llvm/Support/Parallel.h - Parallel algorithms ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PARALLEL_H +#define LLVM_SUPPORT_PARALLEL_H + +#include "llvm/ADT/STLExtras.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/MathExtras.h" + +#include +#include +#include +#include + +#if defined(_MSC_VER) && LLVM_ENABLE_THREADS +#pragma warning(push) +#pragma warning(disable : 4530) +#include +#include +#pragma warning(pop) +#endif + +namespace llvm { + +namespace detail { +class Latch { + uint32_t Count; + mutable std::mutex Mutex; + mutable std::condition_variable Cond; + +public: + explicit Latch(uint32_t count = 0) : Count(Count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock lock(Mutex); + ++Count; + } + + void dec() { + std::unique_lock lock(Mutex); + if (--Count == 0) + Cond.notify_all(); + } + + void sync() const { + std::unique_lock lock(Mutex); + Cond.wait(lock, [&] { return Count == 0; }); + } +}; + +class TaskGroup { + Latch L; + +public: + void spawn(std::function f); + + void sync() const { L.sync(); } +}; +} + +namespace parallel { +struct sequential_execution_policy {}; +struct parallel_execution_policy {}; + +template +struct is_execution_policy + : public std::integral_constant< + bool, llvm::is_one_of::value> {}; + +constexpr sequential_execution_policy seq{}; +constexpr parallel_execution_policy par{}; + +namespace detail { + +#if LLVM_ENABLE_THREADS + +#if defined(_MSC_VER) +template +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { + concurrency::parallel_sort(Start, End, Comp); +} +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); +} + +#else +const ptrdiff_t MinParallelSize = 1024; + +/// \brief Inclusive median. +template +RandomAccessIterator medianOf3(RandomAccessIterator Start, + RandomAccessIterator End, + const Comparator &Comp) { + RandomAccessIterator Mid = Start + (std::distance(Start, End) / 2); + return Comp(*Start, *(End - 1)) + ? (Comp(*Mid, *(End - 1)) ? (Comp(*Start, *Mid) ? Mid : Start) + : End - 1) + : (Comp(*Mid, *Start) ? (Comp(*(End - 1), *Mid) ? Mid : End - 1) + : Start); +} + +template +void parallel_quick_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp, TaskGroup &TG, size_t Depth) { + // Do a sequential sort for small inputs. + if (std::distance(Start, End) < detail::MinParallelSize || Depth == 0) { + std::sort(Start, End, Comp); + return; + } + + // Partition. + auto Pivot = medianOf3(Start, End, Comp); + // Move Pivot to End. + std::swap(*(End - 1), *Pivot); + Pivot = std::partition(Start, End - 1, [&Comp, End](decltype(*Start) V) { + return Comp(V, *(End - 1)); + }); + // Move Pivot to middle of partition. + std::swap(*Pivot, *(End - 1)); + + // Recurse. + TG.spawn([=, &Comp, &TG] { + parallel_quick_sort(Start, Pivot, Comp, TG, Depth - 1); + }); + parallel_quick_sort(Pivot + 1, End, Comp, TG, Depth - 1); +} + +template +void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp) { + TaskGroup TG; + parallel_quick_sort(Start, End, Comp, TG, + llvm::Log2_64(std::distance(Start, End)) + 1); +} + +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + // TaskGroup has a relatively high overhead, so we want to reduce + // the number of spawn() calls. We'll create up to 1024 tasks here. + // (Note that 1024 is an arbitrary number. This code probably needs + // improving to take the number of available cores into account.) + ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup TG; + while (TaskSize <= std::distance(Begin, End)) { + TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); + Begin += TaskSize; + } + TG.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); +} + +template +void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) { + ptrdiff_t TaskSize = (End - Begin) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup TG; + IndexTy I = Begin; + for (; I + TaskSize < End; I += TaskSize) { + TG.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + } + TG.spawn([=, &Fn] { + for (IndexTy J = I; J < End; ++J) + Fn(J); + }); +} + +#endif + +#endif + +template +using DefComparator = + std::less::value_type>; + +} // namespace detail + +// sequential algorithm implementations. +template > +void sort(Policy policy, RandomAccessIterator Start, RandomAccessIterator End, + const Comparator &Comp = Comparator()) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::sort(Start, End, Comp); +} + +template +void for_each(Policy policy, IterTy Begin, IterTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + std::for_each(Begin, End, Fn); +} + +template +void for_each_n(Policy policy, IndexTy Begin, IndexTy End, FuncTy Fn) { + static_assert(is_execution_policy::value, + "Invalid execution policy!"); + for (IndexTy I = Begin; I != End; ++I) + Fn(I); +} + +// Parallel algorithm implementations, only available when LLVM_ENABLE_THREADS +// is true. +#if LLVM_ENABLE_THREADS +template > +void sort(parallel_execution_policy policy, RandomAccessIterator Start, + RandomAccessIterator End, const Comparator &Comp = Comparator()) { + detail::parallel_sort(Start, End, Comp); +} + +template +void for_each(parallel_execution_policy policy, IterTy Begin, IterTy End, + FuncTy Fn) { + detail::parallel_for_each(Begin, End, Fn); +} + +template +void for_each_n(parallel_execution_policy policy, IndexTy Begin, IndexTy End, + FuncTy Fn) { + detail::parallel_for_each_n(Begin, End, Fn); +} +#endif + +} // namespace parallel +} // namespace llvm + +#endif // LLVM_SUPPORT_PARALLEL_H Index: llvm/lib/Support/CMakeLists.txt =================================================================== --- llvm/lib/Support/CMakeLists.txt +++ llvm/lib/Support/CMakeLists.txt @@ -81,6 +81,7 @@ MD5.cpp NativeFormatting.cpp Options.cpp + Parallel.cpp PluginLoader.cpp PrettyStackTrace.cpp RandomNumberGenerator.cpp Index: llvm/lib/Support/Parallel.cpp =================================================================== --- /dev/null +++ llvm/lib/Support/Parallel.cpp @@ -0,0 +1,136 @@ +//===- llvm/Support/Parallel.cpp - Parallel algorithms --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Parallel.h" +#include "llvm/Config/llvm-config.h" + +#include +#include +#include + +using namespace llvm; + +namespace { + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function func) = 0; + + static Executor *getDefaultExecutor(); +}; + +#if !LLVM_ENABLE_THREADS +class SyncExecutor : public Executor { +public: + virtual void add(std::function F) { F(); } +}; + +Executor *Executor::getDefaultExecutor() { + static SyncExecutor Exec; + return &Exec; +} + +#elif defined(_MSC_VER) +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function Task) : Task(Task) {} + + std::function Task; + + static void run(void *P) { + Taskish *Self = static_cast(P); + Self->Task(); + concurrency::Free(Self); + } + }; + +public: + virtual void add(std::function F) { + Concurrency::CurrentScheduler::ScheduleTask( + Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(F)); + } +}; + +Executor *Executor::getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} + +#else +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor( + unsigned ThreadCount = std::thread::hardware_concurrency()) + : Done(ThreadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, ThreadCount] { + for (size_t i = 1; i < ThreadCount; ++i) { + std::thread([=] { work(); }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock Lock(Mutex); + Stop = true; + Lock.unlock(); + Cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function F) override { + std::unique_lock Lock(Mutex); + WorkStack.push(F); + Lock.unlock(); + Cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock Lock(Mutex); + Cond.wait(Lock, [&] { return Stop || !WorkStack.empty(); }); + if (Stop) + break; + auto Task = WorkStack.top(); + WorkStack.pop(); + Lock.unlock(); + Task(); + } + Done.dec(); + } + + std::atomic Stop{false}; + std::stack> WorkStack; + std::mutex Mutex; + std::condition_variable Cond; + Latch Done; +}; + +Executor *Executor::getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif +} + +void TaskGroup::spawn(std::function F) { + L.inc(); + Executor::getDefaultExecutor()->add([&, F] { + F(); + L.dec(); + }); +} Index: llvm/unittests/Support/CMakeLists.txt =================================================================== --- llvm/unittests/Support/CMakeLists.txt +++ llvm/unittests/Support/CMakeLists.txt @@ -36,6 +36,7 @@ MemoryBufferTest.cpp MemoryTest.cpp NativeFormatTests.cpp + ParallelTest.cpp Path.cpp ProcessTest.cpp ProgramTest.cpp Index: llvm/unittests/Support/ParallelTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Support/ParallelTest.cpp @@ -0,0 +1,48 @@ +//===- llvm/unittest/Support/ParallelTest.cpp -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Parallel.h unit tests. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Parallel.h" +#include "gtest/gtest.h" +#include +#include + +uint32_t array[1024 * 1024]; + +using namespace llvm; + +TEST(Parallel, sort) { + std::mt19937 randEngine; + std::uniform_int_distribution dist; + + for (auto &i : array) + i = dist(randEngine); + + sort(parallel::par, std::begin(array), std::end(array)); + ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); +} + +TEST(Parallel, parallel_for) { + // We need to test the case with a TaskSize > 1. We are white-box testing + // here. The TaskSize is calculated as (End - Begin) / 1024 at the time of + // writing. + uint32_t range[2050]; + std::fill(range, range + 2050, 1); + for_each_n(parallel::par, 0, 2049, [&range](size_t I) { ++range[I]; }); + + uint32_t expected[2049]; + std::fill(expected, expected + 2049, 2); + ASSERT_TRUE(std::equal(range, range + 2049, expected)); + // Check that we don't write past the end of the requested range. + ASSERT_EQ(range[2049], 1u); +}