diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h --- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h @@ -206,7 +206,7 @@ DataLayout DL; Triple TT; - std::unique_ptr CompileThreads; + std::unique_ptr> CompileThreads; std::unique_ptr ObjLinkingLayer; std::unique_ptr ObjTransformLayer; diff --git a/llvm/include/llvm/Support/TaskQueue.h b/llvm/include/llvm/Support/TaskQueue.h --- a/llvm/include/llvm/Support/TaskQueue.h +++ b/llvm/include/llvm/Support/TaskQueue.h @@ -66,7 +66,9 @@ public: /// Construct a task queue with no work. - TaskQueue(ThreadPool &Scheduler) : Scheduler(Scheduler) { (void)Scheduler; } + TaskQueue(ThreadPool &Scheduler) : Scheduler(Scheduler) { + (void)Scheduler; + } /// Blocking destructor: the queue will wait for all work to complete. ~TaskQueue() { @@ -121,7 +123,7 @@ } /// The thread pool on which to run the work. - ThreadPool &Scheduler; + ThreadPool &Scheduler; /// State which indicates whether the queue currently is currently processing /// any work. diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h --- a/llvm/include/llvm/Support/ThreadPool.h +++ b/llvm/include/llvm/Support/ThreadPool.h @@ -34,24 +34,89 @@ /// /// The pool keeps a vector of threads alive, waiting on a condition variable /// for some work to become available. -class ThreadPool { +template class ThreadPool { public: - using TaskTy = std::function; - using PackagedTaskTy = std::packaged_task; + using TaskTy = std::function; + using PackagedTaskTy = std::packaged_task; /// Construct a pool using the hardware strategy \p S for mapping hardware /// execution resources (threads, cores, CPUs) /// Defaults to using the maximum execution resources in the system, but /// accounting for the affinity mask. - ThreadPool(ThreadPoolStrategy S = hardware_concurrency()); + ThreadPool(ThreadPoolStrategy S = hardware_concurrency()) + : ThreadCount(S.compute_thread_count()) { +#ifdef LLVM_ENABLE_THREADS + // Create ThreadCount threads that will loop forever, wait on QueueCondition + // for tasks to be queued or the Pool to be destroyed. + Threads.reserve(ThreadCount); + for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) { + Threads.emplace_back([S, ThreadID, this] { + S.apply_thread_strategy(ThreadID); + while (true) { + PackagedTaskTy Task; + { + std::unique_lock LockGuard(QueueLock); + // Wait for tasks to be pushed in the queue + QueueCondition.wait(LockGuard, + [&] { return !EnableFlag || !Tasks.empty(); }); + // Exit condition + if (!EnableFlag && Tasks.empty()) + return; + // Yeah, we have a task, grab it and release the lock on the queue + + // We first need to signal that we are active before popping the + // queue in order for wait() to properly detect that even if the + // queue is empty, there is still a task in flight. + ++ActiveThreads; + Task = std::move(Tasks.front()); + Tasks.pop(); + } + // Run the task we just grabbed + Task(); + + bool Notify; + { + // Adjust `ActiveThreads`, in case someone waits on + // ThreadPool::wait() + std::lock_guard LockGuard(QueueLock); + --ActiveThreads; + Notify = workCompletedUnlocked(); + } + // Notify task completion if this is the last active thread, in case + // someone waits on ThreadPool::wait(). + if (Notify) + CompletionCondition.notify_all(); + } + }); + } +#else // LLVM_ENABLE_THREADS Disabled + if (ThreadCount != 1) { + errs() << "Warning: request a ThreadPool with " << ThreadCount + << " threads, but LLVM_ENABLE_THREADS has been turned off\n"; + } +#endif + } /// Blocking destructor: the pool will wait for all the threads to complete. - ~ThreadPool(); + ~ThreadPool() { +#if LLVM_ENABLE_THREADS + { + std::unique_lock LockGuard(QueueLock); + EnableFlag = false; + } + QueueCondition.notify_all(); + for (auto &Worker : Threads) + Worker.join(); +#else // LLVM_ENABLE_THREADS Disabled + wait(); +#endif + } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F, Args &&... ArgList) { + inline std::shared_future async(Function &&F, + Args &&...ArgList) { auto Task = std::bind(std::forward(F), std::forward(ArgList)...); return asyncImpl(std::move(Task)); @@ -60,25 +125,74 @@ /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. template - inline std::shared_future async(Function &&F) { + inline std::shared_future async(Function &&F) { return asyncImpl(std::forward(F)); } /// Blocking wait for all the threads to complete and the queue to be empty. /// It is an error to try to add new tasks while blocking on this call. - void wait(); + void wait() { +#if LLVM_ENABLE_THREADS + // Wait for all threads to complete and the queue to be empty + std::unique_lock LockGuard(QueueLock); + CompletionCondition.wait(LockGuard, + [&] { return workCompletedUnlocked(); }); + +#else // LLVM_ENABLE_THREADS Disabled + + // Sequential implementation running the tasks + while (!Tasks.empty()) { + auto Task = std::move(Tasks.front()); + Tasks.pop(); + Task(); + } +#endif + } unsigned getThreadCount() const { return ThreadCount; } /// Returns true if the current thread is a worker thread of this thread pool. - bool isWorkerThread() const; + bool isWorkerThread() const { + llvm::thread::id CurrentThreadId = llvm::this_thread::get_id(); + for (const llvm::thread &Thread : Threads) + if (CurrentThreadId == Thread.get_id()) + return true; + return false; + } private: bool workCompletedUnlocked() { return !ActiveThreads && Tasks.empty(); } /// Asynchronous submission of a task to the pool. The returned future can be /// used to wait for the task to finish and is *non-blocking* on destruction. - std::shared_future asyncImpl(TaskTy F); + std::shared_future asyncImpl(TaskTy Task) { +#ifdef LLVM_ENABLE_THREADS + /// Wrap the Task in a packaged_task to return a future object. + PackagedTaskTy PackagedTask(std::move(Task)); + auto Future = PackagedTask.get_future(); + { + // Lock the queue and push the new task + std::unique_lock LockGuard(QueueLock); + + // Don't allow enqueueing after disabling the pool + assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); + + Tasks.push(std::move(PackagedTask)); + } + QueueCondition.notify_one(); + return Future.share(); + +#else // LLVM_ENABLE_THREADS Disabled + + // Get a Future with launch::deferred execution using std::async + auto Future = std::async(std::launch::deferred, std::move(Task)).share(); + // Wrap the future so that both ThreadPool::wait() can operate and the + // returned future can be sync'ed on. + PackagedTaskTy PackagedTask([Future]() { Future.get(); }); + Tasks.push(std::move(PackagedTask)); + return Future; +#endif + } /// Threads in flight std::vector Threads; diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -53,7 +53,7 @@ // Create ThreadPool in nested scope so that threads will be joined // on destruction. { - ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size())); + ThreadPool CodegenThreadPool(hardware_concurrency(OSs.size())); int ThreadCount = 0; SplitModule( diff --git a/llvm/lib/DWARFLinker/DWARFLinker.cpp b/llvm/lib/DWARFLinker/DWARFLinker.cpp --- a/llvm/lib/DWARFLinker/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/DWARFLinker.cpp @@ -2571,7 +2571,7 @@ } EmitLambda(); } else { - ThreadPool Pool(hardware_concurrency(2)); + ThreadPool Pool(hardware_concurrency(2)); Pool.async(AnalyzeAll); Pool.async(CloneAll); Pool.wait(); diff --git a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp --- a/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp +++ b/llvm/lib/DebugInfo/GSYM/DwarfTransformer.cpp @@ -448,7 +448,7 @@ // Now parse all DIEs in case we have cross compile unit references in a // thread pool. - ThreadPool pool(hardware_concurrency(NumThreads)); + ThreadPool pool(hardware_concurrency(NumThreads)); for (const auto &CU : DICtx.compile_units()) pool.async([&CU]() { CU->getUnitDIE(false /*CUDieOnly*/); }); pool.wait(); diff --git a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp --- a/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp +++ b/llvm/lib/ExecutionEngine/Orc/LLJIT.cpp @@ -791,8 +791,8 @@ if (S.NumCompileThreads > 0) { InitHelperTransformLayer->setCloneToNewContextOnEmit(true); - CompileThreads = - std::make_unique(hardware_concurrency(S.NumCompileThreads)); + CompileThreads = std::make_unique>( + hardware_concurrency(S.NumCompileThreads)); ES->setDispatchTask([this](std::unique_ptr T) { // FIXME: We should be able to use move-capture here, but ThreadPool's // AsyncTaskTys are std::functions rather than unique_functions diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1181,7 +1181,7 @@ namespace { class InProcessThinBackend : public ThinBackendProc { - ThreadPool BackendThreadPool; + ThreadPool BackendThreadPool; AddStreamFn AddStream; FileCache Cache; std::set CfiFunctionDefs; diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -432,7 +432,7 @@ AddStreamFn AddStream, unsigned ParallelCodeGenParallelismLevel, Module &Mod, const ModuleSummaryIndex &CombinedIndex) { - ThreadPool CodegenThreadPool( + ThreadPool CodegenThreadPool( heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel)); unsigned ThreadCount = 0; const Target *T = &TM->getTarget(); diff --git a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp --- a/llvm/lib/LTO/ThinLTOCodeGenerator.cpp +++ b/llvm/lib/LTO/ThinLTOCodeGenerator.cpp @@ -1015,7 +1015,7 @@ if (CodeGenOnly) { // Perform only parallel codegen and return. - ThreadPool Pool; + ThreadPool Pool; int count = 0; for (auto &Mod : Modules) { Pool.async([&](int count) { @@ -1154,7 +1154,7 @@ // Parallel optimizer + codegen { - ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount)); + ThreadPool Pool(heavyweight_hardware_concurrency(ThreadCount)); for (auto IndexCount : ModulesOrdering) { auto &Mod = Modules[IndexCount]; Pool.async([&](int count) { diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -205,7 +205,6 @@ SystemUtils.cpp TarWriter.cpp TargetParser.cpp - ThreadPool.cpp TimeProfiler.cpp Timer.cpp ToolOutputFile.cpp diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp deleted file mode 100644 --- a/llvm/lib/Support/ThreadPool.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//==-- llvm/Support/ThreadPool.cpp - A ThreadPool implementation -*- C++ -*-==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements a crude C++11 based thread pool. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/ThreadPool.h" - -#include "llvm/Config/llvm-config.h" -#include "llvm/Support/Threading.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -#if LLVM_ENABLE_THREADS - -ThreadPool::ThreadPool(ThreadPoolStrategy S) - : ThreadCount(S.compute_thread_count()) { - // Create ThreadCount threads that will loop forever, wait on QueueCondition - // for tasks to be queued or the Pool to be destroyed. - Threads.reserve(ThreadCount); - for (unsigned ThreadID = 0; ThreadID < ThreadCount; ++ThreadID) { - Threads.emplace_back([S, ThreadID, this] { - S.apply_thread_strategy(ThreadID); - while (true) { - PackagedTaskTy Task; - { - std::unique_lock LockGuard(QueueLock); - // Wait for tasks to be pushed in the queue - QueueCondition.wait(LockGuard, - [&] { return !EnableFlag || !Tasks.empty(); }); - // Exit condition - if (!EnableFlag && Tasks.empty()) - return; - // Yeah, we have a task, grab it and release the lock on the queue - - // We first need to signal that we are active before popping the queue - // in order for wait() to properly detect that even if the queue is - // empty, there is still a task in flight. - ++ActiveThreads; - Task = std::move(Tasks.front()); - Tasks.pop(); - } - // Run the task we just grabbed - Task(); - - bool Notify; - { - // Adjust `ActiveThreads`, in case someone waits on ThreadPool::wait() - std::lock_guard LockGuard(QueueLock); - --ActiveThreads; - Notify = workCompletedUnlocked(); - } - // Notify task completion if this is the last active thread, in case - // someone waits on ThreadPool::wait(). - if (Notify) - CompletionCondition.notify_all(); - } - }); - } -} - -void ThreadPool::wait() { - // Wait for all threads to complete and the queue to be empty - std::unique_lock LockGuard(QueueLock); - CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); }); -} - -bool ThreadPool::isWorkerThread() const { - llvm::thread::id CurrentThreadId = llvm::this_thread::get_id(); - for (const llvm::thread &Thread : Threads) - if (CurrentThreadId == Thread.get_id()) - return true; - return false; -} - -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { - /// Wrap the Task in a packaged_task to return a future object. - PackagedTaskTy PackagedTask(std::move(Task)); - auto Future = PackagedTask.get_future(); - { - // Lock the queue and push the new task - std::unique_lock LockGuard(QueueLock); - - // Don't allow enqueueing after disabling the pool - assert(EnableFlag && "Queuing a thread during ThreadPool destruction"); - - Tasks.push(std::move(PackagedTask)); - } - QueueCondition.notify_one(); - return Future.share(); -} - -// The destructor joins all threads, waiting for completion. -ThreadPool::~ThreadPool() { - { - std::unique_lock LockGuard(QueueLock); - EnableFlag = false; - } - QueueCondition.notify_all(); - for (auto &Worker : Threads) - Worker.join(); -} - -#else // LLVM_ENABLE_THREADS Disabled - -// No threads are launched, issue a warning if ThreadCount is not 0 -ThreadPool::ThreadPool(ThreadPoolStrategy S) - : ThreadCount(S.compute_thread_count()) { - if (ThreadCount != 1) { - errs() << "Warning: request a ThreadPool with " << ThreadCount - << " threads, but LLVM_ENABLE_THREADS has been turned off\n"; - } -} - -void ThreadPool::wait() { - // Sequential implementation running the tasks - while (!Tasks.empty()) { - auto Task = std::move(Tasks.front()); - Tasks.pop(); - Task(); - } -} - -std::shared_future ThreadPool::asyncImpl(TaskTy Task) { - // Get a Future with launch::deferred execution using std::async - auto Future = std::async(std::launch::deferred, std::move(Task)).share(); - // Wrap the future so that both ThreadPool::wait() can operate and the - // returned future can be sync'ed on. - PackagedTaskTy PackagedTask([Future]() { Future.get(); }); - Tasks.push(std::move(PackagedTask)); - return Future; -} - -ThreadPool::~ThreadPool() { wait(); } - -#endif diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -605,7 +605,7 @@ S.ThreadsRequested = DebugMapPtrsOrErr->size(); S.Limit = true; } - ThreadPool Threads(S); + ThreadPool Threads(S); // If there is more than one link to execute, we need to generate // temporary files. diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -1076,7 +1076,7 @@ ShowFilenames); } else { // In -output-dir mode, it's safe to use multiple threads to print files. - ThreadPool Pool(S); + ThreadPool Pool(S); for (const std::string &SourceFile : SourceFiles) Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile, Coverage.get(), Printer.get(), ShowFilenames); diff --git a/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/llvm/tools/llvm-cov/CoverageExporterJson.cpp --- a/llvm/tools/llvm-cov/CoverageExporterJson.cpp +++ b/llvm/tools/llvm-cov/CoverageExporterJson.cpp @@ -237,7 +237,7 @@ S = heavyweight_hardware_concurrency(SourceFiles.size()); S.Limit = true; } - ThreadPool Pool(S); + ThreadPool Pool(S); json::Array FileArray; std::mutex FileArrayMutex; diff --git a/llvm/tools/llvm-cov/CoverageReport.cpp b/llvm/tools/llvm-cov/CoverageReport.cpp --- a/llvm/tools/llvm-cov/CoverageReport.cpp +++ b/llvm/tools/llvm-cov/CoverageReport.cpp @@ -395,7 +395,7 @@ S = heavyweight_hardware_concurrency(Files.size()); S.Limit = true; } - ThreadPool Pool(S); + ThreadPool Pool(S); std::vector FileReports; FileReports.reserve(Files.size()); diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -353,7 +353,7 @@ for (const auto &Input : Inputs) loadInput(Input, Remapper, Contexts[0].get()); } else { - ThreadPool Pool(hardware_concurrency(NumThreads)); + ThreadPool Pool(hardware_concurrency(NumThreads)); // Load the inputs in parallel (N/NumThreads serial steps). unsigned Ctx = 0; diff --git a/llvm/unittests/Support/TaskQueueTest.cpp b/llvm/unittests/Support/TaskQueueTest.cpp --- a/llvm/unittests/Support/TaskQueueTest.cpp +++ b/llvm/unittests/Support/TaskQueueTest.cpp @@ -22,7 +22,7 @@ }; TEST_F(TaskQueueTest, OrderedFutures) { - ThreadPool TP(hardware_concurrency(1)); + ThreadPool TP(hardware_concurrency(1)); TaskQueue TQ(TP); std::atomic X{ 0 }; std::atomic Y{ 0 }; @@ -66,7 +66,7 @@ } TEST_F(TaskQueueTest, UnOrderedFutures) { - ThreadPool TP(hardware_concurrency(1)); + ThreadPool TP(hardware_concurrency(1)); TaskQueue TQ(TP); std::atomic X{ 0 }; std::atomic Y{ 0 }; @@ -96,7 +96,7 @@ } TEST_F(TaskQueueTest, FutureWithReturnValue) { - ThreadPool TP(hardware_concurrency(1)); + ThreadPool TP(hardware_concurrency(1)); TaskQueue TQ(TP); std::future F1 = TQ.async([&] { return std::string("Hello"); }); std::future F2 = TQ.async([&] { return 42; }); diff --git a/llvm/unittests/Support/ThreadPool.cpp b/llvm/unittests/Support/ThreadPool.cpp --- a/llvm/unittests/Support/ThreadPool.cpp +++ b/llvm/unittests/Support/ThreadPool.cpp @@ -92,7 +92,7 @@ std::atomic_int checked_in{0}; - ThreadPool Pool; + ThreadPool Pool; for (size_t i = 0; i < 5; ++i) { Pool.async([this, &checked_in] { waitForMainThread(); @@ -112,7 +112,7 @@ // Test that async works with a function requiring multiple parameters. std::atomic_int checked_in{0}; - ThreadPool Pool; + ThreadPool Pool; for (size_t i = 0; i < 5; ++i) { Pool.async(TestFunc, std::ref(checked_in), i); } @@ -122,7 +122,7 @@ TEST_F(ThreadPoolTest, Async) { CHECK_UNSUPPORTED(); - ThreadPool Pool; + ThreadPool Pool; std::atomic_int i{0}; Pool.async([this, &i] { waitForMainThread(); @@ -137,7 +137,7 @@ TEST_F(ThreadPoolTest, GetFuture) { CHECK_UNSUPPORTED(); - ThreadPool Pool(hardware_concurrency(2)); + ThreadPool Pool(hardware_concurrency(2)); std::atomic_int i{0}; Pool.async([this, &i] { waitForMainThread(); @@ -156,7 +156,7 @@ // Test that we are waiting on destruction std::atomic_int checked_in{0}; { - ThreadPool Pool; + ThreadPool Pool; for (size_t i = 0; i < 5; ++i) { Pool.async([this, &checked_in] { waitForMainThread(); @@ -185,7 +185,7 @@ std::mutex AllThreadsLock; unsigned Active = 0; - ThreadPool Pool(S); + ThreadPool Pool(S); for (size_t I = 0; I < S.compute_thread_count(); ++I) { Pool.async([&] { {