Index: lld/CMakeLists.txt =================================================================== --- lld/CMakeLists.txt +++ lld/CMakeLists.txt @@ -192,7 +192,6 @@ "Build the lld tools. If OFF, just generate build targets." ON) if (MSVC) - add_definitions(-wd4530) # Suppress 'warning C4530: C++ exception handler used, but unwind semantics are not enabled.' add_definitions(-wd4062) # Suppress 'warning C4062: enumerator X in switch of enum Y is not handled' from system header. endif() Index: lld/COFF/ICF.cpp =================================================================== --- lld/COFF/ICF.cpp +++ lld/COFF/ICF.cpp @@ -21,9 +21,9 @@ #include "Chunks.h" #include "Error.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/ADT/Hashing.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" #include #include Index: lld/COFF/MapFile.cpp =================================================================== --- lld/COFF/MapFile.cpp +++ lld/COFF/MapFile.cpp @@ -25,7 +25,7 @@ #include "Symbols.h" #include "Writer.h" -#include "lld/Core/Parallel.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; Index: lld/COFF/Writer.cpp =================================================================== --- lld/COFF/Writer.cpp +++ lld/COFF/Writer.cpp @@ -17,13 +17,13 @@ #include "PDB.h" #include "SymbolTable.h" #include "Symbols.h" -#include "lld/Core/Parallel.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/FileOutputBuffer.h" +#include "llvm/Support/Parallel.h" #include "llvm/Support/RandomNumberGenerator.h" #include "llvm/Support/raw_ostream.h" #include Index: lld/ELF/Threads.h =================================================================== --- lld/ELF/Threads.h +++ lld/ELF/Threads.h @@ -61,7 +61,7 @@ #include "Config.h" -#include "lld/Core/Parallel.h" +#include "llvm/Support/Parallel.h" #include #include @@ -71,7 +71,7 @@ template void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) { if (Config->Threads) - parallel_for_each(Begin, End, Fn); + llvm::parallel_for_each(Begin, End, Fn); else std::for_each(Begin, End, Fn); } @@ -79,7 +79,7 @@ inline void parallelFor(size_t Begin, size_t End, std::function Fn) { if (Config->Threads) { - parallel_for(Begin, End, Fn); + llvm::parallel_for(Begin, End, Fn); } else { for (size_t I = Begin; I < End; ++I) Fn(I); Index: lld/include/lld/Core/Parallel.h =================================================================== --- lld/include/lld/Core/Parallel.h +++ /dev/null @@ -1,335 +0,0 @@ -//===- lld/Core/Parallel.h - Parallel utilities ---------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_CORE_PARALLEL_H -#define LLD_CORE_PARALLEL_H - -#include "lld/Core/Instrumentation.h" -#include "lld/Core/LLVM.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/thread.h" - -#include -#include -#include -#include -#include - -#if defined(_MSC_VER) && LLVM_ENABLE_THREADS -#include -#include -#endif - -namespace lld { -/// \brief Allows one or more threads to wait on a potentially unknown number of -/// events. -/// -/// A latch starts at \p count. inc() increments this, and dec() decrements it. -/// All calls to sync() will block while the count is not 0. -/// -/// Calling dec() on a Latch with a count of 0 has undefined behaivor. -class Latch { - uint32_t _count; - mutable std::mutex _condMut; - mutable std::condition_variable _cond; - -public: - explicit Latch(uint32_t count = 0) : _count(count) {} - ~Latch() { sync(); } - - void inc() { - std::unique_lock lock(_condMut); - ++_count; - } - - void dec() { - std::unique_lock lock(_condMut); - if (--_count == 0) - _cond.notify_all(); - } - - void sync() const { - std::unique_lock lock(_condMut); - _cond.wait(lock, [&] { - return _count == 0; - }); - } -}; - -// Classes in this namespace are implementation details of this header. -namespace internal { - -/// \brief An abstract class that takes closures and runs them asynchronously. -class Executor { -public: - virtual ~Executor() = default; - virtual void add(std::function func) = 0; -}; - -#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -class SyncExecutor : public Executor { -public: - virtual void add(std::function func) { - func(); - } -}; - -inline Executor *getDefaultExecutor() { - static SyncExecutor exec; - return &exec; -} -#elif defined(_MSC_VER) -/// \brief An Executor that runs tasks via ConcRT. -class ConcRTExecutor : public Executor { - struct Taskish { - Taskish(std::function task) : _task(task) {} - - std::function _task; - - static void run(void *p) { - Taskish *self = static_cast(p); - self->_task(); - concurrency::Free(self); - } - }; - -public: - virtual void add(std::function func) { - Concurrency::CurrentScheduler::ScheduleTask(Taskish::run, - new (concurrency::Alloc(sizeof(Taskish))) Taskish(func)); - } -}; - -inline Executor *getDefaultExecutor() { - static ConcRTExecutor exec; - return &exec; -} -#else -/// \brief An implementation of an Executor that runs closures on a thread pool -/// in filo order. -class ThreadPoolExecutor : public Executor { -public: - explicit ThreadPoolExecutor(unsigned threadCount = - std::thread::hardware_concurrency()) - : _stop(false), _done(threadCount) { - // Spawn all but one of the threads in another thread as spawning threads - // can take a while. - std::thread([&, threadCount] { - for (size_t i = 1; i < threadCount; ++i) { - std::thread([=] { - work(); - }).detach(); - } - work(); - }).detach(); - } - - ~ThreadPoolExecutor() override { - std::unique_lock lock(_mutex); - _stop = true; - lock.unlock(); - _cond.notify_all(); - // Wait for ~Latch. - } - - void add(std::function f) override { - std::unique_lock lock(_mutex); - _workStack.push(f); - lock.unlock(); - _cond.notify_one(); - } - -private: - void work() { - while (true) { - std::unique_lock lock(_mutex); - _cond.wait(lock, [&] { - return _stop || !_workStack.empty(); - }); - if (_stop) - break; - auto task = _workStack.top(); - _workStack.pop(); - lock.unlock(); - task(); - } - _done.dec(); - } - - std::atomic _stop; - std::stack> _workStack; - std::mutex _mutex; - std::condition_variable _cond; - Latch _done; -}; - -inline Executor *getDefaultExecutor() { - static ThreadPoolExecutor exec; - return &exec; -} -#endif - -} // namespace internal - -/// \brief Allows launching a number of tasks and waiting for them to finish -/// either explicitly via sync() or implicitly on destruction. -class TaskGroup { - Latch _latch; - -public: - void spawn(std::function f) { - _latch.inc(); - internal::getDefaultExecutor()->add([&, f] { - f(); - _latch.dec(); - }); - } - - void sync() const { _latch.sync(); } -}; - -#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -template -void parallel_sort( - RandomAccessIterator start, RandomAccessIterator end, - const Comp &comp = std::less< - typename std::iterator_traits::value_type>()) { - std::sort(start, end, comp); -} -#elif defined(_MSC_VER) -// Use ppl parallel_sort on Windows. -template -void parallel_sort( - RandomAccessIterator start, RandomAccessIterator end, - const Comp &comp = std::less< - typename std::iterator_traits::value_type>()) { - concurrency::parallel_sort(start, end, comp); -} -#else -namespace detail { -const ptrdiff_t minParallelSize = 1024; - -/// \brief Inclusive median. -template -RandomAccessIterator medianOf3(RandomAccessIterator start, - RandomAccessIterator end, const Comp &comp) { - RandomAccessIterator mid = start + (std::distance(start, end) / 2); - return comp(*start, *(end - 1)) - ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start) - : end - 1) - : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1) - : start); -} - -template -void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end, - const Comp &comp, TaskGroup &tg, size_t depth) { - // Do a sequential sort for small inputs. - if (std::distance(start, end) < detail::minParallelSize || depth == 0) { - std::sort(start, end, comp); - return; - } - - // Partition. - auto pivot = medianOf3(start, end, comp); - // Move pivot to end. - std::swap(*(end - 1), *pivot); - pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) { - return comp(v, *(end - 1)); - }); - // Move pivot to middle of partition. - std::swap(*pivot, *(end - 1)); - - // Recurse. - tg.spawn([=, &comp, &tg] { - parallel_quick_sort(start, pivot, comp, tg, depth - 1); - }); - parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1); -} -} - -template -void parallel_sort( - RandomAccessIterator start, RandomAccessIterator end, - const Comp &comp = std::less< - typename std::iterator_traits::value_type>()) { - TaskGroup tg; - detail::parallel_quick_sort(start, end, comp, tg, - llvm::Log2_64(std::distance(start, end)) + 1); -} -#endif - -template void parallel_sort(T *start, T *end) { - parallel_sort(start, end, std::less()); -} - -#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - std::for_each(Begin, End, Fn); -} - -template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { - for (IndexTy I = Begin; I != End; ++I) - Fn(I); -} -#elif defined(_MSC_VER) -// Use ppl parallel_for_each on Windows. -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - concurrency::parallel_for_each(Begin, End, Fn); -} - -template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { - concurrency::parallel_for(Begin, End, Fn); -} -#else -template -void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { - // TaskGroup has a relatively high overhead, so we want to reduce - // the number of spawn() calls. We'll create up to 1024 tasks here. - // (Note that 1024 is an arbitrary number. This code probably needs - // improving to take the number of available cores into account.) - ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup Tg; - while (TaskSize <= std::distance(Begin, End)) { - Tg.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); - Begin += TaskSize; - } - Tg.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); -} - -template -void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { - ptrdiff_t TaskSize = (End - Begin) / 1024; - if (TaskSize == 0) - TaskSize = 1; - - TaskGroup Tg; - IndexTy I = Begin; - for (; I + TaskSize < End; I += TaskSize) { - Tg.spawn([=, &Fn] { - for (IndexTy J = I, E = I + TaskSize; J != E; ++J) - Fn(J); - }); - } - Tg.spawn([=, &Fn] { - for (IndexTy J = I; J < End; ++J) - Fn(J); - }); -} -#endif -} // end namespace lld - -#endif // LLD_CORE_PARALLEL_H Index: lld/lib/ReaderWriter/MachO/LayoutPass.cpp =================================================================== --- lld/lib/ReaderWriter/MachO/LayoutPass.cpp +++ lld/lib/ReaderWriter/MachO/LayoutPass.cpp @@ -9,12 +9,12 @@ #include "LayoutPass.h" #include "lld/Core/Instrumentation.h" -#include "lld/Core/Parallel.h" #include "lld/Core/PassManager.h" #include "lld/ReaderWriter/MachOLinkingContext.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Parallel.h" #include #include #include @@ -461,7 +461,8 @@ }); std::vector vec = decorate(atomRange); - parallel_sort(vec.begin(), vec.end(), + llvm::parallel_sort( + vec.begin(), vec.end(), [&](const LayoutPass::SortKey &l, const LayoutPass::SortKey &r) -> bool { return compareAtoms(l, r, _customSorter); }); Index: lld/unittests/CMakeLists.txt =================================================================== --- lld/unittests/CMakeLists.txt +++ lld/unittests/CMakeLists.txt @@ -12,6 +12,5 @@ target_link_libraries(${test_dirname} ${LLVM_COMMON_LIBS}) endfunction() -add_subdirectory(CoreTests) add_subdirectory(DriverTests) add_subdirectory(MachOTests) Index: lld/unittests/CoreTests/CMakeLists.txt =================================================================== --- lld/unittests/CoreTests/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_lld_unittest(CoreTests - ParallelTest.cpp - ) - -target_link_libraries(CoreTests - ${LLVM_PTHREAD_LIB} - ) Index: lld/unittests/CoreTests/ParallelTest.cpp =================================================================== --- lld/unittests/CoreTests/ParallelTest.cpp +++ /dev/null @@ -1,46 +0,0 @@ -//===- lld/unittest/ParallelTest.cpp --------------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// \brief Parallel.h unit tests. -/// -//===----------------------------------------------------------------------===// - -#include "gtest/gtest.h" -#include "lld/Core/Parallel.h" -#include -#include - -uint32_t array[1024 * 1024]; - -TEST(Parallel, sort) { - std::mt19937 randEngine; - std::uniform_int_distribution dist; - - for (auto &i : array) - i = dist(randEngine); - - lld::parallel_sort(std::begin(array), std::end(array)); - ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); -} - -TEST(Parallel, parallel_for) { - // We need to test the case with a TaskSize > 1. We are white-box testing - // here. The TaskSize is calculated as (End - Begin) / 1024 at the time of - // writing. - uint32_t range[2050]; - std::fill(range, range + 2050, 1); - lld::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; }); - - uint32_t expected[2049]; - std::fill(expected, expected + 2049, 2); - ASSERT_TRUE(std::equal(range, range + 2049, expected)); - // Check that we don't write past the end of the requested range. - ASSERT_EQ(range[2049], 1u); -} Index: llvm/include/llvm/Support/Parallel.h =================================================================== --- /dev/null +++ llvm/include/llvm/Support/Parallel.h @@ -0,0 +1,331 @@ +//===- llvm/Support/Parallel.h - Parallel utilities -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PARALLEL_H +#define LLVM_SUPPORT_PARALLEL_H + +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/thread.h" + +#include +#include +#include +#include +#include + +#if defined(_MSC_VER) && LLVM_ENABLE_THREADS +// Suppress 'warning C4530: C++ exception handler used, but unwind semantics are +// not enabled.' +#pragma warning(push) +#pragma warning(disable : 4530) + +#include +#include +#pragma warning(pop) +#endif + +namespace llvm { +/// \brief Allows one or more threads to wait on a potentially unknown number of +/// events. +/// +/// A latch starts at \p count. inc() increments this, and dec() decrements it. +/// All calls to sync() will block while the count is not 0. +/// +/// Calling dec() on a Latch with a count of 0 has undefined behaivor. +class Latch { + uint32_t _count; + mutable std::mutex _condMut; + mutable std::condition_variable _cond; + +public: + explicit Latch(uint32_t count = 0) : _count(count) {} + ~Latch() { sync(); } + + void inc() { + std::unique_lock lock(_condMut); + ++_count; + } + + void dec() { + std::unique_lock lock(_condMut); + if (--_count == 0) + _cond.notify_all(); + } + + void sync() const { + std::unique_lock lock(_condMut); + _cond.wait(lock, [&] { return _count == 0; }); + } +}; + +// Classes in this namespace are implementation details of this header. +namespace internal { + +/// \brief An abstract class that takes closures and runs them asynchronously. +class Executor { +public: + virtual ~Executor() = default; + virtual void add(std::function func) = 0; +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +class SyncExecutor : public Executor { +public: + virtual void add(std::function func) { func(); } +}; + +inline Executor *getDefaultExecutor() { + static SyncExecutor exec; + return &exec; +} +#elif defined(_MSC_VER) +/// \brief An Executor that runs tasks via ConcRT. +class ConcRTExecutor : public Executor { + struct Taskish { + Taskish(std::function task) : _task(task) {} + + std::function _task; + + static void run(void *p) { + Taskish *self = static_cast(p); + self->_task(); + concurrency::Free(self); + } + }; + +public: + virtual void add(std::function func) { + Concurrency::CurrentScheduler::ScheduleTask( + Taskish::run, new (concurrency::Alloc(sizeof(Taskish))) Taskish(func)); + } +}; + +inline Executor *getDefaultExecutor() { + static ConcRTExecutor exec; + return &exec; +} +#else +/// \brief An implementation of an Executor that runs closures on a thread pool +/// in filo order. +class ThreadPoolExecutor : public Executor { +public: + explicit ThreadPoolExecutor( + unsigned threadCount = std::thread::hardware_concurrency()) + : _stop(false), _done(threadCount) { + // Spawn all but one of the threads in another thread as spawning threads + // can take a while. + std::thread([&, threadCount] { + for (size_t i = 1; i < threadCount; ++i) { + std::thread([=] { work(); }).detach(); + } + work(); + }).detach(); + } + + ~ThreadPoolExecutor() override { + std::unique_lock lock(_mutex); + _stop = true; + lock.unlock(); + _cond.notify_all(); + // Wait for ~Latch. + } + + void add(std::function f) override { + std::unique_lock lock(_mutex); + _workStack.push(f); + lock.unlock(); + _cond.notify_one(); + } + +private: + void work() { + while (true) { + std::unique_lock lock(_mutex); + _cond.wait(lock, [&] { return _stop || !_workStack.empty(); }); + if (_stop) + break; + auto task = _workStack.top(); + _workStack.pop(); + lock.unlock(); + task(); + } + _done.dec(); + } + + std::atomic _stop; + std::stack> _workStack; + std::mutex _mutex; + std::condition_variable _cond; + Latch _done; +}; + +inline Executor *getDefaultExecutor() { + static ThreadPoolExecutor exec; + return &exec; +} +#endif + +} // namespace internal + +/// \brief Allows launching a number of tasks and waiting for them to finish +/// either explicitly via sync() or implicitly on destruction. +class TaskGroup { + Latch _latch; + +public: + void spawn(std::function f) { + _latch.inc(); + internal::getDefaultExecutor()->add([&, f] { + f(); + _latch.dec(); + }); + } + + void sync() const { _latch.sync(); } +}; + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + std::sort(start, end, comp); +} +#elif defined(_MSC_VER) +// Use ppl parallel_sort on Windows. +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + concurrency::parallel_sort(start, end, comp); +} +#else +namespace detail { +const ptrdiff_t minParallelSize = 1024; + +/// \brief Inclusive median. +template +RandomAccessIterator medianOf3(RandomAccessIterator start, + RandomAccessIterator end, const Comp &comp) { + RandomAccessIterator mid = start + (std::distance(start, end) / 2); + return comp(*start, *(end - 1)) + ? (comp(*mid, *(end - 1)) ? (comp(*start, *mid) ? mid : start) + : end - 1) + : (comp(*mid, *start) ? (comp(*(end - 1), *mid) ? mid : end - 1) + : start); +} + +template +void parallel_quick_sort(RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp, TaskGroup &tg, size_t depth) { + // Do a sequential sort for small inputs. + if (std::distance(start, end) < detail::minParallelSize || depth == 0) { + std::sort(start, end, comp); + return; + } + + // Partition. + auto pivot = medianOf3(start, end, comp); + // Move pivot to end. + std::swap(*(end - 1), *pivot); + pivot = std::partition(start, end - 1, [&comp, end](decltype(*start) v) { + return comp(v, *(end - 1)); + }); + // Move pivot to middle of partition. + std::swap(*pivot, *(end - 1)); + + // Recurse. + tg.spawn([=, &comp, &tg] { + parallel_quick_sort(start, pivot, comp, tg, depth - 1); + }); + parallel_quick_sort(pivot + 1, end, comp, tg, depth - 1); +} +} + +template +void parallel_sort( + RandomAccessIterator start, RandomAccessIterator end, + const Comp &comp = std::less< + typename std::iterator_traits::value_type>()) { + TaskGroup tg; + detail::parallel_quick_sort(start, end, comp, tg, + llvm::Log2_64(std::distance(start, end)) + 1); +} +#endif + +template void parallel_sort(T *start, T *end) { + parallel_sort(start, end, std::less()); +} + +#if !defined(LLVM_ENABLE_THREADS) || LLVM_ENABLE_THREADS == 0 +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + std::for_each(Begin, End, Fn); +} + +template +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + for (IndexTy I = Begin; I != End; ++I) + Fn(I); +} +#elif defined(_MSC_VER) +// Use ppl parallel_for_each on Windows. +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + concurrency::parallel_for_each(Begin, End, Fn); +} + +template +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + concurrency::parallel_for(Begin, End, Fn); +} +#else +template +void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) { + // TaskGroup has a relatively high overhead, so we want to reduce + // the number of spawn() calls. We'll create up to 1024 tasks here. + // (Note that 1024 is an arbitrary number. This code probably needs + // improving to take the number of available cores into account.) + ptrdiff_t TaskSize = std::distance(Begin, End) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + while (TaskSize <= std::distance(Begin, End)) { + Tg.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); }); + Begin += TaskSize; + } + Tg.spawn([=, &Fn] { std::for_each(Begin, End, Fn); }); +} + +template +void parallel_for(IndexTy Begin, IndexTy End, FuncTy Fn) { + ptrdiff_t TaskSize = (End - Begin) / 1024; + if (TaskSize == 0) + TaskSize = 1; + + TaskGroup Tg; + IndexTy I = Begin; + for (; I + TaskSize < End; I += TaskSize) { + Tg.spawn([=, &Fn] { + for (IndexTy J = I, E = I + TaskSize; J != E; ++J) + Fn(J); + }); + } + Tg.spawn([=, &Fn] { + for (IndexTy J = I; J < End; ++J) + Fn(J); + }); +} +#endif +} // end namespace llvm + +#endif // LLVM_SUPPORT_PARALLEL_H Index: llvm/unittests/Support/CMakeLists.txt =================================================================== --- llvm/unittests/Support/CMakeLists.txt +++ llvm/unittests/Support/CMakeLists.txt @@ -36,6 +36,7 @@ MemoryBufferTest.cpp MemoryTest.cpp NativeFormatTests.cpp + ParallelTest.cpp Path.cpp ProcessTest.cpp ProgramTest.cpp @@ -65,7 +66,7 @@ xxhashTest.cpp ) -# ManagedStatic.cpp uses . +# ManagedStatic and Parallel uses . target_link_libraries(SupportTests ${LLVM_PTHREAD_LIB}) add_subdirectory(DynamicLibrary) Index: llvm/unittests/Support/ParallelTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/Support/ParallelTest.cpp @@ -0,0 +1,46 @@ +//===- lld/unittest/ParallelTest.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Parallel.h unit tests. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Parallel.h" +#include "gtest/gtest.h" +#include +#include + +uint32_t array[1024 * 1024]; + +TEST(Parallel, sort) { + std::mt19937 randEngine; + std::uniform_int_distribution dist; + + for (auto &i : array) + i = dist(randEngine); + + llvm::parallel_sort(std::begin(array), std::end(array)); + ASSERT_TRUE(std::is_sorted(std::begin(array), std::end(array))); +} + +TEST(Parallel, parallel_for) { + // We need to test the case with a TaskSize > 1. We are white-box testing + // here. The TaskSize is calculated as (End - Begin) / 1024 at the time of + // writing. + uint32_t range[2050]; + std::fill(range, range + 2050, 1); + llvm::parallel_for(0, 2049, [&range](size_t I) { ++range[I]; }); + + uint32_t expected[2049]; + std::fill(expected, expected + 2049, 2); + ASSERT_TRUE(std::equal(range, range + 2049, expected)); + // Check that we don't write past the end of the requested range. + ASSERT_EQ(range[2049], 1u); +}