diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -1537,9 +1537,6 @@
     tg.spawn(fn, serial);
   }
 
-  // Both the main thread and thread pool index 0 use getThreadIndex()==0. Be
-  // careful that they don't concurrently run scanSections. When serial is
-  // true, fn() has finished at this point, so running execute is safe.
   tg.spawn([] {
     RelocationScanner scanner;
     for (Partition &part : partitions) {
diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -38,7 +38,13 @@
 // Don't access this directly, use the getThreadIndex wrapper.
 extern thread_local unsigned threadIndex;
 
-inline unsigned getThreadIndex() { return threadIndex; }
+inline unsigned getThreadIndex() {
+  assert(((parallel::strategy.ThreadsRequested == 1) ||
+          (threadIndex != UINT_MAX)) &&
+         "getThreadIndex() must be called from the thread created by "
+         "ThreadPoolExecutor");
+  return threadIndex;
+}
 #endif
 #else
 inline unsigned getThreadIndex() { return 0; }
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -24,11 +24,17 @@
 #if LLVM_ENABLE_THREADS
 
 #ifdef _WIN32
-static thread_local unsigned threadIndex;
-
-unsigned getThreadIndex() { return threadIndex; }
+static thread_local unsigned threadIndex = UINT_MAX;
+
+unsigned getThreadIndex() {
+  assert(((parallel::strategy.ThreadsRequested == 1) ||
+          (threadIndex != UINT_MAX)) &&
+         "getThreadIndex() must be called from the thread created by "
+         "ThreadPoolExecutor");
+  return threadIndex;
+}
 #else
-thread_local unsigned threadIndex;
+thread_local unsigned threadIndex = UINT_MAX;
 #endif
 
 namespace detail {
@@ -216,13 +222,9 @@
 
 void llvm::parallelFor(size_t Begin, size_t End,
                        llvm::function_ref<void(size_t)> Fn) {
-  // If we have zero or one items, then do not incur the overhead of spinning up
-  // a task group.  They are surprisingly expensive, and because they do not
-  // support nested parallelism, a single entry task group can block parallel
-  // execution underneath them.
 #if LLVM_ENABLE_THREADS
-  auto NumItems = End - Begin;
-  if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) {
+  if (parallel::strategy.ThreadsRequested != 1) {
+    auto NumItems = End - Begin;
     // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
     // overhead on large inputs.
     auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;