diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -1144,7 +1144,17 @@ return; } - lld::threadsEnabled = args.hasFlag(OPT_threads, OPT_threads_no, true); + // /threads: takes a positive integer and provides the default value for + // /opt:lldltojobs=. + if (auto *arg = args.getLastArg(OPT_threads)) { + StringRef v(arg->getValue()); + unsigned threads = 0; + if (!llvm::to_integer(v, threads, 0) || threads == 0) + error(arg->getSpelling() + ": expected a positive integer, but got '" + + arg->getValue() + "'"); + parallel::strategy = hardware_concurrency(threads); + config->thinLTOJobs = v.str(); + } if (args.hasArg(OPT_show_timing)) config->showTiming = true; diff --git a/lld/COFF/Options.td b/lld/COFF/Options.td --- a/lld/COFF/Options.td +++ b/lld/COFF/Options.td @@ -219,9 +219,9 @@ "output native object for merged LTO unit to this path">; def dash_dash_version : Flag<["--"], "version">, HelpText<"Print version information">; -defm threads: B<"threads", - "Run the linker multi-threaded (default)", - "Do not run the linker multi-threaded">; +def threads + : P<"threads", "Number of threads. '1' disables multi-threading. By " + "default all available hardware threads are used">; // Flags for debugging def lldmap : F<"lldmap">; diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt --- a/lld/Common/CMakeLists.txt +++ b/lld/Common/CMakeLists.txt @@ -36,7 +36,6 @@ Reproduce.cpp Strings.cpp TargetOptionsCommandFlags.cpp - Threads.cpp Timer.cpp VCSVersion.inc Version.cpp diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp --- a/lld/Common/Filesystem.cpp +++ b/lld/Common/Filesystem.cpp @@ -43,7 +43,7 @@ #if defined(_WIN32) sys::fs::remove(path); #else - if (!threadsEnabled || !sys::fs::exists(path) || + if (parallel::strategy.ThreadsRequested == 1 || !sys::fs::exists(path) || !sys::fs::is_regular_file(path)) return; diff --git a/lld/Common/Threads.cpp b/lld/Common/Threads.cpp deleted file mode 100644 --- a/lld/Common/Threads.cpp +++ /dev/null @@ -1,11 +0,0 @@ -//===- Threads.cpp --------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "lld/Common/Threads.h" - -bool lld::threadsEnabled = true; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -860,7 +860,6 @@ args.hasFlag(OPT_fatal_warnings, OPT_no_fatal_warnings, false); errorHandler().vsDiagnostics = args.hasArg(OPT_visual_studio_diagnostics_format, false); - threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true); config->allowMultipleDefinition = args.hasFlag(OPT_allow_multiple_definition, @@ -974,7 +973,6 @@ config->thinLTOIndexOnly = args.hasArg(OPT_thinlto_index_only) || args.hasArg(OPT_thinlto_index_only_eq); config->thinLTOIndexOnlyArg = args.getLastArgValue(OPT_thinlto_index_only_eq); - config->thinLTOJobs = args.getLastArgValue(OPT_thinlto_jobs); config->thinLTOObjectSuffixReplace = getOldNewOptions(args, OPT_thinlto_object_suffix_replace_eq); config->thinLTOPrefixReplace = @@ -1036,6 +1034,20 @@ for (auto *arg : args.filtered(OPT_mllvm)) parseClangOption(arg->getValue(), arg->getSpelling()); + // --threads= takes a positive integer and provides the default value for + // --thinlto-jobs=. + if (auto *arg = args.getLastArg(OPT_threads)) { + StringRef v(arg->getValue()); + unsigned threads = 0; + if (!llvm::to_integer(v, threads, 0) || threads == 0) + error(arg->getSpelling() + ": expected a positive integer, but got '" + + arg->getValue() + "'"); + parallel::strategy = hardware_concurrency(threads); + config->thinLTOJobs = v; + } + if (auto *arg = args.getLastArg(OPT_thinlto_jobs)) + config->thinLTOJobs = arg->getValue(); + if (config->ltoo > 3) error("invalid optimization level for LTO: " + Twine(config->ltoo)); if (config->ltoPartitions == 0) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -400,7 +400,7 @@ void ICF::forEachClass(llvm::function_ref fn) { // If threading is disabled or the number of sections are // too small to use threading, call Fn sequentially. - if (!threadsEnabled || sections.size() < 1024) { + if (parallel::strategy.ThreadsRequested == 1 || sections.size() < 1024) { forEachClassRange(0, sections.size(), fn); ++cnt; return; diff --git a/lld/ELF/Options.td b/lld/ELF/Options.td --- a/lld/ELF/Options.td +++ b/lld/ELF/Options.td @@ -350,9 +350,10 @@ Eq<"target2", "Interpret R_ARM_TARGET2 as , where is one of rel, abs, or got-rel">, MetaVarName<"">; -defm threads: B<"threads", - "Run the linker multi-threaded (default)", - "Do not run the linker multi-threaded">; +defm threads + : Eq<"threads", + "Number of threads. '1' disables multi-threading. By default all " + "available hardware threads are used">; def time_trace: F<"time-trace">, HelpText<"Record time trace">; def time_trace_file_eq: J<"time-trace-file=">, HelpText<"Specify time trace output file">; @@ -509,7 +510,8 @@ def thinlto_emit_imports_files: F<"thinlto-emit-imports-files">; def thinlto_index_only: F<"thinlto-index-only">; def thinlto_index_only_eq: J<"thinlto-index-only=">; -def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; +def thinlto_jobs: J<"thinlto-jobs=">, + HelpText<"Number of ThinLTO jobs. Default to --threads=">; def thinlto_object_suffix_replace_eq: J<"thinlto-object-suffix-replace=">; def thinlto_prefix_replace_eq: J<"thinlto-prefix-replace=">; diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2746,12 +2746,11 @@ // The number of symbols we will handle in this function is of the order // of millions for very large executables, so we use multi-threading to // speed it up. - size_t numShards = 32; - size_t concurrency = 1; - if (threadsEnabled) - concurrency = std::min( - PowerOf2Floor(hardware_concurrency().compute_thread_count()), - numShards); + constexpr size_t numShards = 32; + size_t concurrency = PowerOf2Floor( + std::min(hardware_concurrency(parallel::strategy.ThreadsRequested) + .compute_thread_count(), + numShards)); // A sharded map to uniquify symbols by name. std::vector> map(numShards); @@ -3194,11 +3193,10 @@ // Concurrency level. Must be a power of 2 to avoid expensive modulo // operations in the following tight loop. - size_t concurrency = 1; - if (threadsEnabled) - concurrency = std::min( - PowerOf2Floor(hardware_concurrency().compute_thread_count()), - numShards); + size_t concurrency = PowerOf2Floor( + std::min(hardware_concurrency(parallel::strategy.ThreadsRequested) + .compute_thread_count(), + numShards)); // Add section pieces to the builders. parallelForEachN(0, concurrency, [&](size_t threadId) { diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -302,8 +302,6 @@ Do not set the text data sections to be writable, page align sections. .It Fl -no-rosegment Do not put read-only non-executable sections in their own segment. -.It Fl -no-threads -Do not run the linker multi-threaded. .It Fl -no-undefined-version Report version scripts that refer undefined symbols. .It Fl -no-undefined @@ -525,9 +523,12 @@ Pruning policy for the ThinLTO cache. .It Fl -thinlto-jobs Ns = Ns Ar value Number of ThinLTO jobs. -.It Fl -threads -Run the linker multi-threaded. -This option is enabled by default. +.It Fl -threads Ns = Ns Ar N +Number of threads. +.Cm all +(default) means all of concurrent threads supported. +.Cm 1 +disables multi-threading. .It Fl -trace Print the names of the input files. .It Fl -trace-symbol Ns = Ns Ar symbol , Fl y Ar symbol diff --git a/lld/include/lld/Common/Threads.h b/lld/include/lld/Common/Threads.h --- a/lld/include/lld/Common/Threads.h +++ b/lld/include/lld/Common/Threads.h @@ -63,10 +63,8 @@ namespace lld { -extern bool threadsEnabled; - template void parallelForEach(R &&range, FuncTy fn) { - if (threadsEnabled) + if (llvm::parallel::strategy.ThreadsRequested != 1) for_each(llvm::parallel::par, std::begin(range), std::end(range), fn); else for_each(llvm::parallel::seq, std::begin(range), std::end(range), fn); @@ -74,14 +72,14 @@ inline void parallelForEachN(size_t begin, size_t end, llvm::function_ref fn) { - if (threadsEnabled) + if (llvm::parallel::strategy.ThreadsRequested != 1) for_each_n(llvm::parallel::par, begin, end, fn); else for_each_n(llvm::parallel::seq, begin, end, fn); } template void parallelSort(R &&range, FuncTy fn) { - if (threadsEnabled) + if (llvm::parallel::strategy.ThreadsRequested != 1) sort(llvm::parallel::par, std::begin(range), std::end(range), fn); else sort(llvm::parallel::seq, std::begin(range), std::end(range), fn); diff --git a/lld/test/COFF/pdb-globals.test b/lld/test/COFF/pdb-globals.test --- a/lld/test/COFF/pdb-globals.test +++ b/lld/test/COFF/pdb-globals.test @@ -2,9 +2,9 @@ RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe /pdb:%t.pdb %t.obj RUN: llvm-pdbutil dump -symbols -globals %t.pdb | FileCheck %s -RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe /pdb:%t.pdb %t.obj /threads +RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe /pdb:%t.pdb %t.obj /threads:1 RUN: llvm-pdbutil dump -symbols -globals %t.pdb | FileCheck %s -RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe /pdb:%t.pdb %t.obj /threads:no +RUN: lld-link /debug /nodefaultlib /entry:main /out:%t.exe /pdb:%t.pdb %t.obj /threads:2 RUN: llvm-pdbutil dump -symbols -globals %t.pdb | FileCheck %s # Test that we correctly distribute symbols between the globals and module diff --git a/lld/test/ELF/build-id.s b/lld/test/ELF/build-id.s --- a/lld/test/ELF/build-id.s +++ b/lld/test/ELF/build-id.s @@ -5,26 +5,26 @@ # RUN: ld.lld --build-id %t -o %t2 # RUN: llvm-readobj -S %t2 | FileCheck -check-prefix=ALIGN %s -# RUN: ld.lld --build-id %t -o %t2 -threads +# RUN: ld.lld --build-id %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s -# RUN: ld.lld --build-id=fast %t -o %t2 -threads +# RUN: ld.lld --build-id=fast %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s -# RUN: ld.lld --build-id %t -o %t2 -no-threads +# RUN: ld.lld --build-id %t -o %t2 --threads=1 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=DEFAULT %s -# RUN: ld.lld --build-id=md5 %t -o %t2 -threads +# RUN: ld.lld --build-id=md5 %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=MD5 %s -# RUN: ld.lld --build-id=md5 %t -o %t2 -no-threads +# RUN: ld.lld --build-id=md5 %t -o %t2 --threads=1 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=MD5 %s -# RUN: ld.lld --build-id=sha1 %t -o %t2 -threads +# RUN: ld.lld --build-id=sha1 %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s -# RUN: ld.lld --build-id=sha1 %t -o %t2 -no-threads +# RUN: ld.lld --build-id=sha1 %t -o %t2 --threads=1 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s -# RUN: ld.lld --build-id=tree %t -o %t2 -threads +# RUN: ld.lld --build-id=tree %t -o %t2 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s -# RUN: ld.lld --build-id=tree %t -o %t2 -no-threads +# RUN: ld.lld --build-id=tree %t -o %t2 --threads=1 # RUN: llvm-objdump -s %t2 | FileCheck --check-prefix=SHA1 %s # RUN: ld.lld --build-id=uuid %t -o %t2 diff --git a/lld/test/ELF/lto/thinlto.ll b/lld/test/ELF/lto/thinlto.ll --- a/lld/test/ELF/lto/thinlto.ll +++ b/lld/test/ELF/lto/thinlto.ll @@ -22,6 +22,18 @@ ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 +;; --thinlto-jobs= defaults to --threads=. +; RUN: rm -f %t31.lto.o %t32.lto.o +; RUN: ld.lld -save-temps --threads=2 -shared %t1.o %t2.o -o %t3 +; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 + +;; --thinlto-jobs= overrides --threads=. +; RUN: rm -f %t31.lto.o %t32.lto.o +; RUN: ld.lld -save-temps --threads=1 --plugin-opt=jobs=2 -shared %t1.o %t2.o -o %t3 +; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 + ; Test with all threads, on all cores, on all CPU sockets ; RUN: rm -f %t31.lto.o %t32.lto.o ; RUN: ld.lld -save-temps --thinlto-jobs=all -shared %t1.o %t2.o -o %t3 diff --git a/lld/test/ELF/threads.s b/lld/test/ELF/threads.s new file mode 100644 --- /dev/null +++ b/lld/test/ELF/threads.s @@ -0,0 +1,11 @@ +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o + +## A positive integer is allowed. +# RUN: ld.lld --threads=1 %t.o -o /dev/null +# RUN: ld.lld --threads=2 %t.o -o /dev/null + +# RUN: not ld.lld --threads=all %t.o -o /dev/null 2>&1 | FileCheck %s -DN=all +# RUN: not ld.lld --threads=0 %t.o -o /dev/null 2>&1 | FileCheck %s -DN=0 +# RUN: not ld.lld --threads=-1 %t.o -o /dev/null 2>&1 | FileCheck %s -DN=-1 + +# CHECK: error: --threads: expected a positive integer, but got '[[N]]' diff --git a/lld/test/wasm/lto/thinlto.ll b/lld/test/wasm/lto/thinlto.ll --- a/lld/test/wasm/lto/thinlto.ll +++ b/lld/test/wasm/lto/thinlto.ll @@ -14,6 +14,18 @@ ; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 ; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 +;; --thinlto-jobs= defaults to --threads=. +; RUN: rm -f %t31.lto.o %t32.lto.o +; RUN: wasm-ld -r -save-temps --threads=2 %t1.o %t2.o -o %t3 +; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 + +;; --thinlto-jobs= overrides --threads=. +; RUN: rm -f %t31.lto.o %t32.lto.o +; RUN: wasm-ld -r -save-temps --threads=1 --thinlto-jobs=2 %t1.o %t2.o -o %t3 +; RUN: llvm-nm %t31.lto.o | FileCheck %s --check-prefix=NM1 +; RUN: llvm-nm %t32.lto.o | FileCheck %s --check-prefix=NM2 + ; Test with all threads, on all cores, on all CPU sockets ; RUN: rm -f %t31.lto.o %t32.lto.o ; RUN: wasm-ld -r -save-temps --thinlto-jobs=all %t1.o %t2.o -o %t3 diff --git a/lld/test/wasm/threads.s b/lld/test/wasm/threads.s new file mode 100644 --- /dev/null +++ b/lld/test/wasm/threads.s @@ -0,0 +1,12 @@ +# RUN: llvm-mc -filetype=obj -triple=wasm32 %s -o %t.o + +## A positive integer is allowed. +# RUN: wasm-ld --no-entry %t.o -o /dev/null +# RUN: wasm-ld --no-entry --threads=1 %t.o -o /dev/null +# RUN: wasm-ld --no-entry --threads=2 %t.o -o /dev/null + +# RUN: not wasm-ld --threads=all %t.o -o /dev/null 2>&1 | FileCheck %s -DN=all +# RUN: not wasm-ld --threads=0 %t.o -o /dev/null 2>&1 | FileCheck %s -DN=0 +# RUN: not wasm-ld --threads=-1 %t.o -o /dev/null 2>&1 | FileCheck %s -DN=-1 + +# CHECK: error: --threads: expected a positive integer, but got '[[N]]' diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -362,10 +362,8 @@ config->thinLTOCachePolicy = CHECK( parseCachePruningPolicy(args.getLastArgValue(OPT_thinlto_cache_policy)), "--thinlto-cache-policy: invalid cache policy"); - config->thinLTOJobs = args.getLastArgValue(OPT_thinlto_jobs); errorHandler().verbose = args.hasArg(OPT_verbose); LLVM_DEBUG(errorHandler().verbose = true); - threadsEnabled = args.hasFlag(OPT_threads, OPT_no_threads, true); config->initialMemory = args::getInteger(args, OPT_initial_memory, 0); config->globalBase = args::getInteger(args, OPT_global_base, 1024); @@ -377,6 +375,20 @@ config->exportDynamic = args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, config->shared); + // --threads= takes a positive integer and provides the default value for + // --thinlto-jobs=. + if (auto *arg = args.getLastArg(OPT_threads)) { + StringRef v(arg->getValue()); + unsigned threads = 0; + if (!llvm::to_integer(v, threads, 0) || threads == 0) + error(arg->getSpelling() + ": expected a positive integer, but got '" + + arg->getValue() + "'"); + parallel::strategy = hardware_concurrency(threads); + config->thinLTOJobs = v; + } + if (auto *arg = args.getLastArg(OPT_thinlto_jobs)) + config->thinLTOJobs = arg->getValue(); + if (auto *arg = args.getLastArg(OPT_features)) { config->features = llvm::Optional>(std::vector()); diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -64,9 +64,6 @@ def mllvm: S<"mllvm">, HelpText<"Options to pass to LLVM">; -def no_threads: F<"no-threads">, - HelpText<"Do not run the linker multi-threaded">; - def no_color_diagnostics: F<"no-color-diagnostics">, HelpText<"Do not use colors in diagnostics">; @@ -98,7 +95,9 @@ def strip_debug: F<"strip-debug">, HelpText<"Strip debugging information">; -def threads: F<"threads">, HelpText<"Run the linker multi-threaded">; +defm threads + : Eq<"threads", "Number of threads. '1' disables multi-threading. By " + "default all available hardware threads are used">; def trace: F<"trace">, HelpText<"Print the names of the input files">; @@ -198,4 +197,5 @@ def thinlto_cache_dir: J<"thinlto-cache-dir=">, HelpText<"Path to ThinLTO cached object file directory">; defm thinlto_cache_policy: Eq<"thinlto-cache-policy", "Pruning policy for the ThinLTO cache">; -def thinlto_jobs: J<"thinlto-jobs=">, HelpText<"Number of ThinLTO jobs">; +def thinlto_jobs: J<"thinlto-jobs=">, + HelpText<"Number of ThinLTO jobs. Default to --threads=">; diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h --- a/llvm/include/llvm/Support/Parallel.h +++ b/llvm/include/llvm/Support/Parallel.h @@ -12,6 +12,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Config/llvm-config.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Threading.h" #include #include @@ -33,6 +34,11 @@ constexpr sequential_execution_policy seq{}; constexpr parallel_execution_policy par{}; +// Strategy for the default executor used by the parallel routines provided by +// this file. It defaults to using all hardware threads and should be +// initialized before the first use of parallel routines. +extern ThreadPoolStrategy strategy; + namespace detail { #if LLVM_ENABLE_THREADS diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp --- a/llvm/lib/Support/Parallel.cpp +++ b/llvm/lib/Support/Parallel.cpp @@ -20,6 +20,8 @@ #include #include +llvm::ThreadPoolStrategy llvm::parallel::strategy; + namespace llvm { namespace parallel { namespace detail { @@ -78,6 +80,9 @@ T.join(); } + struct Creator { + static void *call() { return new ThreadPoolExecutor(strategy); } + }; struct Deleter { static void call(void *Ptr) { ((ThreadPoolExecutor *)Ptr)->stop(); } }; @@ -131,7 +136,8 @@ // are more frequent with the debug static runtime. // // This also prevents intermittent deadlocks on exit with the MinGW runtime. - static ManagedStatic, + + static ManagedStatic ManagedExec; static std::unique_ptr Exec(&(*ManagedExec)); diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn --- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn +++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn @@ -39,7 +39,6 @@ "Reproduce.cpp", "Strings.cpp", "TargetOptionsCommandFlags.cpp", - "Threads.cpp", "Timer.cpp", "Version.cpp", ]