Index: include/llvm/Support/thread.h =================================================================== --- include/llvm/Support/thread.h +++ include/llvm/Support/thread.h @@ -57,6 +57,7 @@ thread(const thread &) = delete; void join() {} + static unsigned hardware_concurrency() { return 1; }; }; } Index: test/tools/gold/X86/pr19901_thinlto.ll =================================================================== --- /dev/null +++ test/tools/gold/X86/pr19901_thinlto.ll @@ -0,0 +1,25 @@ +; RUN: llc %s -o %t.o -filetype=obj -relocation-model=pic +; RUN: llvm-as -function-summary %p/Inputs/pr19901-1.ll -o %t2.o +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: -shared -m elf_x86_64 -o %t.so %t2.o %t.o +; RUN: llvm-readobj -t %t.so | FileCheck %s + +; CHECK: Symbol { +; CHECK: Name: f +; CHECK-NEXT: Value: +; CHECK-NEXT: Size: +; CHECK-NEXT: Binding: Local +; CHECK-NEXT: Type: Function +; CHECK-NEXT: Other: {{2|0}} +; CHECK-NEXT: Section: .text +; CHECK-NEXT: } + +target triple = "x86_64-unknown-linux-gnu" +define i32 @g() { + call void @f() + ret i32 0 +} +define linkonce_odr hidden void @f() { + ret void +} Index: test/tools/gold/X86/thinlto.ll =================================================================== --- test/tools/gold/X86/thinlto.ll +++ test/tools/gold/X86/thinlto.ll @@ -4,17 +4,55 @@ ; RUN: llvm-as %p/Inputs/thinlto.ll -o %t2.o ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ ; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=thinlto-index-only \ ; RUN: -shared %t.o %t2.o -o %t3 +; RUN: not test -e %t3 +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: -shared %t.o %t2.o -o %t4 +; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM +; Next generate function summary sections and test gold handling. ; RUN: llvm-as -function-summary %s -o %t.o ; RUN: llvm-as -function-summary %p/Inputs/thinlto.ll -o %t2.o +; Ensure gold generates an index and not a binary if requested. ; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ ; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=thinlto-index-only \ ; RUN: -shared %t.o %t2.o -o %t3 ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED ; RUN: not test -e %t3 +; Ensure gold generates an index as well as a binary by default in ThinLTO mode. +; First force single-threaded mode +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=jobs=1 \ +; RUN: -shared %t.o %t2.o -o %t4 +; RUN: llvm-bcanalyzer -dump %t4.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM + +; Next force multi-threaded mode +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=jobs=2 \ +; RUN: -shared %t.o %t2.o -o %t4 +; RUN: llvm-bcanalyzer -dump %t4.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; RUN: llvm-nm %t4 | FileCheck %s --check-prefix=NM + +; Test --plugin-opt=obj-path to ensure unique object files generated. +; RUN: %gold -plugin %llvmshlibdir/LLVMgold.so \ +; RUN: --plugin-opt=thinlto \ +; RUN: --plugin-opt=jobs=2 \ +; RUN: --plugin-opt=obj-path=%t5.o \ +; RUN: -shared %t.o %t2.o -o %t4 +; RUN: llvm-nm %t5.o1 | FileCheck %s --check-prefix=NM2 +; RUN: llvm-nm %t5.o2 | FileCheck %s --check-prefix=NM2 + +; NM: T f +; NM2: T {{f|g}} + ; COMBINED: File; + bool Valid; - PluginInputFile(void *handle) : handle(handle) { - if (get_input_file(handle, &File) != LDPS_OK) + PluginInputFile(void *Handle) : Handle(Handle), Valid(true) { + File = llvm::make_unique(); + if (get_input_file(Handle, File.get()) != LDPS_OK) message(LDPL_FATAL, "Failed to get file information"); } ~PluginInputFile() { - if (release_input_file(handle) != LDPS_OK) - message(LDPL_FATAL, "Failed to release file information"); + if (Valid) + if (release_input_file(Handle) != LDPS_OK) + message(LDPL_FATAL, "Failed to release file information"); + } + + ld_plugin_input_file &file() { return *File; } + + PluginInputFile(PluginInputFile &&RHS) { + Handle = RHS.Handle; + File = std::move(RHS.File); + Valid = RHS.Valid; + RHS.Valid = false; + } + PluginInputFile &operator=(PluginInputFile &&RHS) { + Handle = RHS.Handle; + File = std::move(RHS.File); + Valid = RHS.Valid; + RHS.Valid = false; + return *this; } - ld_plugin_input_file &file() { return File; } }; struct ResolutionInfo { @@ -99,6 +120,36 @@ unsigned CommonAlign = 0; claimed_file *CommonFile = nullptr; }; + +/// Class to own information used by a task or during its join. +class TaskInfo { + /// The input file holding the module bitcode read by the task. + PluginInputFile InputFile; + + /// The output stream the task will codegen into. + std::unique_ptr OS; + + /// The corresponding gold file pointer, used during the join. + claimed_file *F; + + /// The file name corresponding to the output stream, used during the join. + std::string Filename; + + /// Flag indicating whether the output file is a temp file that must be + /// added to the cleanup list during the join. + bool TempOutFile; + +public: + TaskInfo(PluginInputFile InputFile, claimed_file *F, + std::unique_ptr OS, std::string Filename, + bool TempOutFile) + : InputFile(std::move(InputFile)), OS(std::move(OS)), F(F), + Filename(Filename), TempOutFile(TempOutFile) {} + + /// Performs task related cleanup activities that must be done + /// single-threaded (i.e. call backs to gold). + ~TaskInfo(); +}; } static ld_plugin_add_symbols add_symbols = nullptr; @@ -123,7 +174,11 @@ static bool generate_api_file = false; static OutputType TheOutputType = OT_NORMAL; static unsigned OptLevel = 2; - static unsigned Parallelism = 1; + // Default parallelism of 0 used to indicate that user did not specify. + // Actual parallelism default value depends on implementation. + // Currently, code generation defaults to no parallelism, whereas + // ThinLTO uses the hardware_concurrency as the default. + static unsigned Parallelism = 0; #ifdef NDEBUG static bool DisableVerify = true; #else @@ -137,6 +192,11 @@ // the information from intermediate files and write a combined // global index for the ThinLTO backends. static bool thinlto = false; + // If false, all ThinLTO backend compilations through code gen are performed + // using multiple threads in the gold-plugin, before handing control back to + // gold. If true, exit after creating the combined index, the assuming is + // that the build system will launch the backend processes. + static bool thinlto_index_only = false; // Additional options to pass into the code generator. // Note: This array will contain all plugin options which are not claimed // as plugin exclusive to pass to the code generator. @@ -168,6 +228,8 @@ TheOutputType = OT_DISABLE; } else if (opt == "thinlto") { thinlto = true; + } else if (opt == "thinlto-index-only") { + thinlto_index_only = true; } else if (opt.size() == 2 && opt[0] == 'O') { if (opt[1] < '0' || opt[1] > '3') message(LDPL_FATAL, "Optimization level must be between 0 and 3"); @@ -431,7 +493,7 @@ // If we are doing ThinLTO compilation, don't need to process the symbols. // Later we simply build a combined index file after all files are claimed. - if (options::thinlto) + if (options::thinlto && options::thinlto_index_only) return LDPS_OK; for (auto &Sym : Obj->symbols()) { @@ -561,16 +623,22 @@ Sym.comdat_key = nullptr; } -static std::unique_ptr -getFunctionIndexForFile(claimed_file &F, ld_plugin_input_file &Info) { - - if (get_symbols(F.handle, F.syms.size(), &F.syms[0]) != LDPS_OK) +/// Helper to get a file's symbols and a view into it via gold callbacks. +static const void *getSymbolsAndView(claimed_file &F) { + if (get_symbols(F.handle, F.syms.size(), F.syms.data()) != LDPS_OK) message(LDPL_FATAL, "Failed to get symbol information"); const void *View; if (get_view(F.handle, &View) != LDPS_OK) message(LDPL_FATAL, "Failed to get a view of file"); + return View; +} + +static std::unique_ptr +getFunctionIndexForFile(claimed_file &F, ld_plugin_input_file &Info) { + const void *View = getSymbolsAndView(F); + MemoryBufferRef BufferRef(StringRef((const char *)View, Info.filesize), Info.name); @@ -593,18 +661,11 @@ } static std::unique_ptr -getModuleForFile(LLVMContext &Context, claimed_file &F, +getModuleForFile(LLVMContext &Context, claimed_file &F, const void *View, ld_plugin_input_file &Info, raw_fd_ostream *ApiFile, StringSet<> &Internalize, StringSet<> &Maybe, std::vector &Keep) { - if (get_symbols(F.handle, F.syms.size(), F.syms.data()) != LDPS_OK) - message(LDPL_FATAL, "Failed to get symbol information"); - - const void *View; - if (get_view(F.handle, &View) != LDPS_OK) - message(LDPL_FATAL, "Failed to get a view of file"); - MemoryBufferRef BufferRef(StringRef((const char *)View, Info.filesize), Info.name); ErrorOr> ObjOrErr = @@ -731,7 +792,8 @@ return Obj.takeModule(); } -static void runLTOPasses(Module &M, TargetMachine &TM) { +static void runLTOPasses(Module &M, TargetMachine &TM, + const FunctionInfoIndex *Index) { M.setDataLayout(TM.createDataLayout()); legacy::PassManager passes; @@ -747,6 +809,7 @@ PMB.LoopVectorize = true; PMB.SLPVectorize = true; PMB.OptLevel = options::OptLevel; + PMB.FunctionIndex = Index; PMB.populateLTOPassManager(passes); passes.run(M); } @@ -759,7 +822,37 @@ WriteBitcodeToFile(&M, OS, /* ShouldPreserveUseListOrder */ true); } -static void codegen(std::unique_ptr M) { +/// Open a file and return the new file descriptor given a base input +/// file name, a flag indicating whether a temp file should be generated, +/// and an optional task id. The new filename generated is +/// returned in \p NewFilename. +static int openOutputFile(SmallString<128> InFilename, bool TempOutFile, + SmallString<128> &NewFilename, int TaskID = -1) { + int FD; + if (TempOutFile) { + std::error_code EC = + sys::fs::createTemporaryFile("lto-llvm", "o", FD, NewFilename); + if (EC) + message(LDPL_FATAL, "Could not create temporary file: %s", + EC.message().c_str()); + } else { + NewFilename = InFilename; + if (TaskID >= 0) + NewFilename += utostr(TaskID); + std::error_code EC = + sys::fs::openFileForWrite(NewFilename, FD, sys::fs::F_None); + if (EC) + message(LDPL_FATAL, "Could not open file: %s", EC.message().c_str()); + } + return FD; +} + +/// Invoke LTO passes and the code generator for the given module, writing into +/// the provided output streams. The split code generator will generate code +/// in parallel if \p OSPtrs has more than one entry. +static void codegenImpl(std::unique_ptr M, + std::vector &OSPtrs, + const FunctionInfoIndex *CombinedIndex = nullptr) { const std::string &TripleStr = M->getTargetTriple(); Triple TheTriple(TripleStr); @@ -796,48 +889,45 @@ TripleStr, options::mcpu, Features.getString(), Options, RelocationModel, CodeModel::Default, CGOptLevel)); - runLTOPasses(*M, *TM); + runLTOPasses(*M, *TM, CombinedIndex); if (options::TheOutputType == options::OT_SAVE_TEMPS) saveBCFile(output_name + ".opt.bc", *M); + // Run backend threads. + splitCodeGen(std::move(M), OSPtrs, options::mcpu, Features.getString(), + Options, RelocationModel, CodeModel::Default, CGOptLevel); +} + +/// Sets up output files necessary to perform optional multi-threaded +/// split code generation, and invokes the optimization and code generation +/// implementation. +static void codegen(std::unique_ptr M) { SmallString<128> Filename; if (!options::obj_path.empty()) Filename = options::obj_path; else if (options::TheOutputType == options::OT_SAVE_TEMPS) Filename = output_name + ".o"; - std::vector> Filenames(options::Parallelism); + // TODO: Should this use thread::hardware_concurrency() if + // -jobs option was not specified? Currently preserve behavior of + // default parallelism being 1. + unsigned int MaxThreads = options::Parallelism ? options::Parallelism : 1; + + std::vector> Filenames(MaxThreads); bool TempOutFile = Filename.empty(); { // Open a file descriptor for each backend thread. This is done in a block // so that the output file descriptors are closed before gold opens them. std::list OSs; - std::vector OSPtrs(options::Parallelism); - for (unsigned I = 0; I != options::Parallelism; ++I) { - int FD; - if (TempOutFile) { - std::error_code EC = - sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filenames[I]); - if (EC) - message(LDPL_FATAL, "Could not create temporary file: %s", - EC.message().c_str()); - } else { - Filenames[I] = Filename; - if (options::Parallelism != 1) - Filenames[I] += utostr(I); - std::error_code EC = - sys::fs::openFileForWrite(Filenames[I], FD, sys::fs::F_None); - if (EC) - message(LDPL_FATAL, "Could not open file: %s", EC.message().c_str()); - } + std::vector OSPtrs(MaxThreads); + for (unsigned I = 0; I != MaxThreads; ++I) { + int FD = openOutputFile(Filename, TempOutFile, Filenames[I], I); OSs.emplace_back(FD, true); OSPtrs[I] = &OSs.back(); } - // Run backend threads. - splitCodeGen(std::move(M), OSPtrs, options::mcpu, Features.getString(), - Options, RelocationModel, CodeModel::Default, CGOptLevel); + codegenImpl(std::move(M), OSPtrs); } for (auto &Filename : Filenames) { @@ -850,6 +940,113 @@ } } +/// Perform the backend on a single module, invoking the LTO and codegen +/// pipelines. +static void thinLTOBackendTask(claimed_file &F, const void *View, + ld_plugin_input_file &File, + raw_fd_ostream *ApiFile, + const FunctionInfoIndex &CombinedIndex, + raw_fd_ostream *OS) { + // Need to use a separate context for each thread + LLVMContext Context; + Context.setDiagnosticHandler(diagnosticHandlerForContext, nullptr, true); + + StringSet<> Dummy; + std::vector Keep; + std::unique_ptr M = + getModuleForFile(Context, F, View, File, ApiFile, Dummy, Dummy, Keep); + if (!options::triple.empty()) + M->setTargetTriple(options::triple.c_str()); + else if (M->getTargetTriple().empty()) { + M->setTargetTriple(sys::getDefaultTargetTriple()); + } + + std::unique_ptr NewModule( + new llvm::Module(M->getModuleIdentifier(), M->getContext())); + IRMover L(*NewModule.get()); + if (L.move(*M, Keep, [](GlobalValue &, IRMover::ValueAdder) {})) + message(LDPL_FATAL, "Failed to rename module for ThinLTO"); + // TODO: Refactor the static promotion/renaming out of the + // ModuleLinker so a subsequent link step and Module copy is not needed. + // We use the IRMover above to ensure that the ModuleLinker doesn't + // override gold's symbol resolution. + std::unique_ptr RenamedModule = + renameModuleForThinLTO(NewModule, &CombinedIndex); + if (!RenamedModule) + message(LDPL_FATAL, "Failed to rename module for ThinLTO"); + + // splitCodeGen will use a single thread since this has one entry. + std::vector OSPtrs(1); + OSPtrs[0] = OS; + + codegenImpl(std::move(RenamedModule), OSPtrs, &CombinedIndex); +} + +TaskInfo::~TaskInfo() { + // Close the output file descriptor before we pass it to gold. + OS->close(); + + if (add_input_file(Filename.c_str()) != LDPS_OK) + message(LDPL_FATAL, + "Unable to add .o file to the link. File left behind in: %s", + Filename.c_str()); + if (TempOutFile) + Cleanup.push_back(Filename.c_str()); +} + +/// Launch each module's backend pipeline in a separate task in a thread pool. +static void thinLTOBackends(raw_fd_ostream *ApiFile, + const FunctionInfoIndex &CombinedIndex) { + SmallString<128> Filename; + // Note that openOutputFile will append a unique ID for each task + if (!options::obj_path.empty()) + Filename = options::obj_path; + else if (options::TheOutputType == options::OT_SAVE_TEMPS) + Filename = output_name + ".o"; + bool TempOutFile = Filename.empty(); + + std::vector Worklist; + for (claimed_file &F : Modules) + Worklist.push_back(&F); + + unsigned TaskCount = 0; + std::vector> Tasks; + unsigned int MaxThreads = options::Parallelism + ? options::Parallelism + : thread::hardware_concurrency(); + + ThreadPool ThinLTOThreadPool(MaxThreads); + while (!Worklist.empty()) { + claimed_file *F = Worklist.back(); + Worklist.pop_back(); + + // Do all the gold callbacks in the main thread, since gold is not thread + // safe by default. + PluginInputFile InputFile(F->handle); + const void *View = getSymbolsAndView(*F); + + SmallString<128> NewFilename; + int FD = openOutputFile(Filename, TempOutFile, NewFilename, ++TaskCount); + std::unique_ptr OS = + llvm::make_unique(FD, true); + + // Enqueue the task + ThinLTOThreadPool.async(thinLTOBackendTask, std::ref(*F), View, + std::ref(InputFile.file()), ApiFile, + std::ref(CombinedIndex), OS.get()); + + // Record the information needed by the task or during its cleanup + // to a TaskInfo instance. For information needed by the task + // the unique_ptr ownership is transferred to the TaskInfo. + std::unique_ptr Task = + llvm::make_unique(std::move(InputFile), F, std::move(OS), + NewFilename.c_str(), TempOutFile); + Tasks.emplace_back(std::move(Task)); + } + + ThinLTOThreadPool.wait(); +} + /// gold informs us that all symbols have been read. At this point, we use /// get_symbols to see if any of our definitions have been overridden by a /// native object file. Then, perform optimization and codegen. @@ -883,8 +1080,12 @@ WriteFunctionSummaryToFile(CombinedIndex, OS); OS.close(); - cleanup_hook(); - exit(0); + if (options::thinlto_index_only) { + cleanup_hook(); + exit(0); + } + thinLTOBackends(ApiFile, CombinedIndex); + return LDPS_OK; } LLVMContext Context; @@ -899,9 +1100,10 @@ StringSet<> Maybe; for (claimed_file &F : Modules) { PluginInputFile InputFile(F.handle); + const void *View = getSymbolsAndView(F); std::vector Keep; std::unique_ptr M = getModuleForFile( - Context, F, InputFile.file(), ApiFile, Internalize, Maybe, Keep); + Context, F, View, InputFile.file(), ApiFile, Internalize, Maybe, Keep); if (!options::triple.empty()) M->setTargetTriple(options::triple.c_str()); else if (M->getTargetTriple().empty())