diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1537,9 +1537,6 @@ tg.spawn(fn, serial); } - // Both the main thread and thread pool index 0 use getThreadIndex()==0. Be - // careful that they don't concurrently run scanSections. When serial is - // true, fn() has finished at this point, so running execute is safe. tg.spawn([] { RelocationScanner scanner; for (Partition &part : partitions) { diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -38,7 +38,13 @@ // Don't access this directly, use the getThreadIndex wrapper. extern thread_local unsigned threadIndex; -inline unsigned getThreadIndex() { return threadIndex; } +inline unsigned getThreadIndex() { + assert(((parallel::strategy.ThreadsRequested == 1) || + (threadIndex != UINT_MAX)) && + "getThreadIndex() must be called from the thread created by " + "ThreadPoolExecutor"); + return threadIndex; +} #endif #else inline unsigned getThreadIndex() { return 0; } diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -24,11 +24,17 @@ #if LLVM_ENABLE_THREADS #ifdef _WIN32 -static thread_local unsigned threadIndex; - -unsigned getThreadIndex() { return threadIndex; } +static thread_local unsigned threadIndex = UINT_MAX; + +unsigned getThreadIndex() { + assert(((parallel::strategy.ThreadsRequested == 1) || + (threadIndex != UINT_MAX)) && + "getThreadIndex() must be called from the thread created by " + "ThreadPoolExecutor"); + return threadIndex; +} #else -thread_local unsigned threadIndex; +thread_local unsigned threadIndex = UINT_MAX; #endif namespace detail { @@ -216,13 +222,9 @@ void llvm::parallelFor(size_t Begin, size_t End, llvm::function_ref Fn) { - // If we have zero or one items, then do not incur the overhead of spinning up - // a task group. They are surprisingly expensive, and because they do not - // support nested parallelism, a single entry task group can block parallel - // execution underneath them. #if LLVM_ENABLE_THREADS - auto NumItems = End - Begin; - if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) { + if (parallel::strategy.ThreadsRequested != 1) { + auto NumItems = End - Begin; // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling // overhead on large inputs. auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;