diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1537,9 +1537,6 @@ tg.spawn(fn, serial); } - // Both the main thread and thread pool index 0 use getThreadIndex()==0. Be - // careful that they don't concurrently run scanSections. When serial is - // true, fn() has finished at this point, so running execute is safe. tg.spawn([] { RelocationScanner scanner; for (Partition &part : partitions) { diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -30,6 +30,14 @@ extern ThreadPoolStrategy strategy; #if LLVM_ENABLE_THREADS +#define GET_THREAD_INDEX_IMPL \ + if (parallel::strategy.ThreadsRequested == 1) \ + return 0; \ + assert((threadIndex != UINT_MAX) && \ + "getThreadIndex() must be called from a thread created by " \ + "ThreadPoolExecutor"); \ + return threadIndex; + #ifdef _WIN32 // Direct access to thread_local variables from a different DLL isn't // possible with Windows Native TLS. @@ -38,7 +46,7 @@ // Don't access this directly, use the getThreadIndex wrapper. extern thread_local unsigned threadIndex; -inline unsigned getThreadIndex() { return threadIndex; } +inline unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; } #endif #else inline unsigned getThreadIndex() { return 0; } diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -24,11 +24,11 @@ #if LLVM_ENABLE_THREADS #ifdef _WIN32 -static thread_local unsigned threadIndex; +static thread_local unsigned threadIndex = UINT_MAX; -unsigned getThreadIndex() { return threadIndex; } +unsigned getThreadIndex() { GET_THREAD_INDEX_IMPL; } #else -thread_local unsigned threadIndex; +thread_local unsigned threadIndex = UINT_MAX; #endif namespace detail { @@ -99,10 +99,13 @@ void add(std::function F, bool Sequential = false) override { { - bool UseSequentialQueue = - Sequential || parallel::strategy.ThreadsRequested == 1; + if (parallel::strategy.ThreadsRequested == 1) { + F(); + return; + } + std::lock_guard Lock(Mutex); - if (UseSequentialQueue) + if (Sequential) WorkQueueSequential.emplace_front(std::move(F)); else WorkQueue.emplace_back(std::move(F)); @@ -217,13 +220,9 @@ void llvm::parallelFor(size_t Begin, size_t End, llvm::function_ref Fn) { - // If we have zero or one items, then do not incur the overhead of spinning up - // a task group. They are surprisingly expensive, and because they do not - // support nested parallelism, a single entry task group can block parallel - // execution underneath them. #if LLVM_ENABLE_THREADS - auto NumItems = End - Begin; - if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) { + if (parallel::strategy.ThreadsRequested != 1) { + auto NumItems = End - Begin; // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling // overhead on large inputs. auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;