Index: lld/trunk/Common/CMakeLists.txt =================================================================== --- lld/trunk/Common/CMakeLists.txt +++ lld/trunk/Common/CMakeLists.txt @@ -5,6 +5,7 @@ add_lld_library(lldCommon Reproduce.cpp TargetOptionsCommandFlags.cpp + Threads.cpp Version.cpp ADDITIONAL_HEADER_DIRS Index: lld/trunk/Common/Threads.cpp =================================================================== --- lld/trunk/Common/Threads.cpp +++ lld/trunk/Common/Threads.cpp @@ -0,0 +1,31 @@ +//===- Threads.cpp --------------------------------------------------------===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "lld/Common/Threads.h" +#include + +static std::vector Threads; + +bool lld::ThreadsEnabled = true; + +// Runs a given function in a new thread. +void lld::runBackground(std::function Fn) { + Threads.emplace_back(Fn); +} + +// Wait for all threads spawned for runBackground() to finish. +// +// You need to call this function from the main thread before exiting +// because it is not defined what will happen to non-main threads when +// the main thread exits. +void lld::waitForBackgroundThreads() { + for (std::thread &T : Threads) + if (T.joinable()) + T.join(); +} Index: lld/trunk/ELF/CMakeLists.txt =================================================================== --- lld/trunk/ELF/CMakeLists.txt +++ lld/trunk/ELF/CMakeLists.txt @@ -40,7 +40,6 @@ Symbols.cpp SyntheticSections.cpp Target.cpp - Threads.cpp Thunks.cpp Writer.cpp Index: lld/trunk/ELF/Config.h =================================================================== --- lld/trunk/ELF/Config.h +++ lld/trunk/ELF/Config.h @@ -139,7 +139,6 @@ bool Static = false; bool SysvHash = false; bool Target1Rel; - bool Threads; bool Trace; bool Verbose; bool WarnCommon; Index: lld/trunk/ELF/Driver.cpp =================================================================== --- lld/trunk/ELF/Driver.cpp +++ lld/trunk/ELF/Driver.cpp @@ -38,9 +38,9 @@ #include "SymbolTable.h" #include "SyntheticSections.h" #include "Target.h" -#include "Threads.h" #include "Writer.h" #include "lld/Common/Driver.h" +#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -684,7 +684,7 @@ parseCachePruningPolicy(Args.getLastArgValue(OPT_thinlto_cache_policy)), "--thinlto-cache-policy: invalid cache policy"); Config->ThinLTOJobs = getInteger(Args, OPT_thinlto_jobs, -1u); - Config->Threads = getArg(Args, OPT_threads, OPT_no_threads, true); + ThreadsEnabled = getArg(Args, OPT_threads, OPT_no_threads, true); Config->Trace = Args.hasArg(OPT_trace); Config->Undefined = getArgs(Args, OPT_undefined); Config->UnresolvedSymbols = getUnresolvedSymbolPolicy(Args); Index: lld/trunk/ELF/Error.cpp =================================================================== --- lld/trunk/ELF/Error.cpp +++ lld/trunk/ELF/Error.cpp @@ -9,7 +9,8 @@ #include "Error.h" #include "Config.h" -#include "Threads.h" + +#include "lld/Common/Threads.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Error.h" Index: lld/trunk/ELF/Filesystem.cpp =================================================================== --- lld/trunk/ELF/Filesystem.cpp +++ lld/trunk/ELF/Filesystem.cpp @@ -13,7 +13,7 @@ #include "Filesystem.h" #include "Config.h" -#include "Threads.h" +#include "lld/Common/Threads.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/Support/FileSystem.h" @@ -38,7 +38,7 @@ // This function spawns a background thread to call unlink. // The calling thread returns almost immediately. void elf::unlinkAsync(StringRef Path) { - if (!Config->Threads || !sys::fs::exists(Config->OutputFile) || + if (!ThreadsEnabled || !sys::fs::exists(Config->OutputFile) || !sys::fs::is_regular_file(Config->OutputFile)) return; Index: lld/trunk/ELF/ICF.cpp =================================================================== --- lld/trunk/ELF/ICF.cpp +++ lld/trunk/ELF/ICF.cpp @@ -76,7 +76,7 @@ #include "ICF.h" #include "Config.h" #include "SymbolTable.h" -#include "Threads.h" +#include "lld/Common/Threads.h" #include "llvm/ADT/Hashing.h" #include "llvm/BinaryFormat/ELF.h" #include "llvm/Object/ELF.h" @@ -366,7 +366,7 @@ void ICF::forEachClass(std::function Fn) { // If threading is disabled or the number of sections are // too small to use threading, call Fn sequentially. - if (!Config->Threads || Sections.size() < 1024) { + if (!ThreadsEnabled || Sections.size() < 1024) { forEachClassRange(0, Sections.size(), Fn); ++Cnt; return; Index: lld/trunk/ELF/LinkerScript.cpp =================================================================== --- lld/trunk/ELF/LinkerScript.cpp +++ lld/trunk/ELF/LinkerScript.cpp @@ -21,8 +21,8 @@ #include "Symbols.h" #include "SyntheticSections.h" #include "Target.h" -#include "Threads.h" #include "Writer.h" +#include "lld/Common/Threads.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/ELF.h" Index: lld/trunk/ELF/MapFile.cpp =================================================================== --- lld/trunk/ELF/MapFile.cpp +++ lld/trunk/ELF/MapFile.cpp @@ -26,7 +26,8 @@ #include "Strings.h" #include "SymbolTable.h" #include "SyntheticSections.h" -#include "Threads.h" + +#include "lld/Common/Threads.h" #include "llvm/Support/raw_ostream.h" Index: lld/trunk/ELF/OutputSections.cpp =================================================================== --- lld/trunk/ELF/OutputSections.cpp +++ lld/trunk/ELF/OutputSections.cpp @@ -15,7 +15,7 @@ #include "SymbolTable.h" #include "SyntheticSections.h" #include "Target.h" -#include "Threads.h" +#include "lld/Common/Threads.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/Support/Compression.h" #include "llvm/Support/MD5.h" Index: lld/trunk/ELF/SyntheticSections.cpp =================================================================== --- lld/trunk/ELF/SyntheticSections.cpp +++ lld/trunk/ELF/SyntheticSections.cpp @@ -24,8 +24,8 @@ #include "Strings.h" #include "SymbolTable.h" #include "Target.h" -#include "Threads.h" #include "Writer.h" +#include "lld/Common/Threads.h" #include "lld/Common/Version.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h" @@ -2258,7 +2258,7 @@ // Concurrency level. Must be a power of 2 to avoid expensive modulo // operations in the following tight loop. size_t Concurrency = 1; - if (Config->Threads) + if (ThreadsEnabled) Concurrency = std::min(PowerOf2Floor(hardware_concurrency()), NumShards); Index: lld/trunk/ELF/Threads.h =================================================================== --- lld/trunk/ELF/Threads.h +++ lld/trunk/ELF/Threads.h @@ -1,91 +0,0 @@ -//===- Threads.h ------------------------------------------------*- C++ -*-===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// LLD supports threads to distribute workloads to multiple cores. Using -// multicore is most effective when more than one core are idle. At the -// last step of a build, it is often the case that a linker is the only -// active process on a computer. So, we are naturally interested in using -// threads wisely to reduce latency to deliver results to users. -// -// That said, we don't want to do "too clever" things using threads. -// Complex multi-threaded algorithms are sometimes extremely hard to -// reason about and can easily mess up the entire design. -// -// Fortunately, when a linker links large programs (when the link time is -// most critical), it spends most of the time to work on massive number of -// small pieces of data of the same kind, and there are opportunities for -// large parallelism there. Here are examples: -// -// - We have hundreds of thousands of input sections that need to be -// copied to a result file at the last step of link. Once we fix a file -// layout, each section can be copied to its destination and its -// relocations can be applied independently. -// -// - We have tens of millions of small strings when constructing a -// mergeable string section. -// -// For the cases such as the former, we can just use parallelForEach -// instead of std::for_each (or a plain for loop). Because tasks are -// completely independent from each other, we can run them in parallel -// without any coordination between them. That's very easy to understand -// and reason about. -// -// For the cases such as the latter, we can use parallel algorithms to -// deal with massive data. We have to write code for a tailored algorithm -// for each problem, but the complexity of multi-threading is isolated in -// a single pass and doesn't affect the linker's overall design. -// -// The above approach seems to be working fairly well. As an example, when -// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to -// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my -// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from -// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the -// speedup is not linear, but as you add more cores, it gets faster. -// -// On a final note, if you are trying to optimize, keep the axiom "don't -// guess, measure!" in mind. Some important passes of the linker are not -// that slow. For example, resolving all symbols is not a very heavy pass, -// although it would be very hard to parallelize it. You want to first -// identify a slow pass and then optimize it. -// -//===----------------------------------------------------------------------===// - -#ifndef LLD_ELF_THREADS_H -#define LLD_ELF_THREADS_H - -#include "Config.h" - -#include "llvm/Support/Parallel.h" -#include - -namespace lld { -namespace elf { - -template void parallelForEach(R &&Range, FuncTy Fn) { - if (Config->Threads) - for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn); - else - for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn); -} - -inline void parallelForEachN(size_t Begin, size_t End, - std::function Fn) { - if (Config->Threads) - for_each_n(llvm::parallel::par, Begin, End, Fn); - else - for_each_n(llvm::parallel::seq, Begin, End, Fn); -} - -void runBackground(std::function Fn); -void waitForBackgroundThreads(); - -} // namespace elf -} // namespace lld - -#endif Index: lld/trunk/ELF/Threads.cpp =================================================================== --- lld/trunk/ELF/Threads.cpp +++ lld/trunk/ELF/Threads.cpp @@ -1,29 +0,0 @@ -//===- Threads.cpp --------------------------------------------------------===// -// -// The LLVM Linker -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Threads.h" -#include - -static std::vector Threads; - -// Runs a given function in a new thread. -void lld::elf::runBackground(std::function Fn) { - Threads.emplace_back(Fn); -} - -// Wait for all threads spawned for runBackground() to finish. -// -// You need to call this function from the main thread before exiting -// because it is not defined what will happen to non-main threads when -// the main thread exits. -void lld::elf::waitForBackgroundThreads() { - for (std::thread &T : Threads) - if (T.joinable()) - T.join(); -} Index: lld/trunk/ELF/Writer.cpp =================================================================== --- lld/trunk/ELF/Writer.cpp +++ lld/trunk/ELF/Writer.cpp @@ -19,7 +19,7 @@ #include "SymbolTable.h" #include "SyntheticSections.h" #include "Target.h" -#include "Threads.h" +#include "lld/Common/Threads.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Support/FileOutputBuffer.h" Index: lld/trunk/include/lld/Common/Threads.h =================================================================== --- lld/trunk/include/lld/Common/Threads.h +++ lld/trunk/include/lld/Common/Threads.h @@ -0,0 +1,89 @@ +//===- Threads.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Linker +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LLD supports threads to distribute workloads to multiple cores. Using +// multicore is most effective when more than one core are idle. At the +// last step of a build, it is often the case that a linker is the only +// active process on a computer. So, we are naturally interested in using +// threads wisely to reduce latency to deliver results to users. +// +// That said, we don't want to do "too clever" things using threads. +// Complex multi-threaded algorithms are sometimes extremely hard to +// reason about and can easily mess up the entire design. +// +// Fortunately, when a linker links large programs (when the link time is +// most critical), it spends most of the time to work on massive number of +// small pieces of data of the same kind, and there are opportunities for +// large parallelism there. Here are examples: +// +// - We have hundreds of thousands of input sections that need to be +// copied to a result file at the last step of link. Once we fix a file +// layout, each section can be copied to its destination and its +// relocations can be applied independently. +// +// - We have tens of millions of small strings when constructing a +// mergeable string section. +// +// For the cases such as the former, we can just use parallelForEach +// instead of std::for_each (or a plain for loop). Because tasks are +// completely independent from each other, we can run them in parallel +// without any coordination between them. That's very easy to understand +// and reason about. +// +// For the cases such as the latter, we can use parallel algorithms to +// deal with massive data. We have to write code for a tailored algorithm +// for each problem, but the complexity of multi-threading is isolated in +// a single pass and doesn't affect the linker's overall design. +// +// The above approach seems to be working fairly well. As an example, when +// linking Chromium (output size 1.6 GB), using 4 cores reduces latency to +// 75% compared to single core (from 12.66 seconds to 9.55 seconds) on my +// Ivy Bridge Xeon 2.8 GHz machine. Using 40 cores reduces it to 63% (from +// 12.66 seconds to 7.95 seconds). Because of the Amdahl's law, the +// speedup is not linear, but as you add more cores, it gets faster. +// +// On a final note, if you are trying to optimize, keep the axiom "don't +// guess, measure!" in mind. Some important passes of the linker are not +// that slow. For example, resolving all symbols is not a very heavy pass, +// although it would be very hard to parallelize it. You want to first +// identify a slow pass and then optimize it. +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_COMMON_THREADS_H +#define LLD_COMMON_THREADS_H + +#include "llvm/Support/Parallel.h" +#include + +namespace lld { + +extern bool ThreadsEnabled; + +template void parallelForEach(R &&Range, FuncTy Fn) { + if (ThreadsEnabled) + for_each(llvm::parallel::par, std::begin(Range), std::end(Range), Fn); + else + for_each(llvm::parallel::seq, std::begin(Range), std::end(Range), Fn); +} + +inline void parallelForEachN(size_t Begin, size_t End, + std::function Fn) { + if (ThreadsEnabled) + for_each_n(llvm::parallel::par, Begin, End, Fn); + else + for_each_n(llvm::parallel::seq, Begin, End, Fn); +} + +void runBackground(std::function Fn); +void waitForBackgroundThreads(); + +} // namespace lld + +#endif