Index: include/llvm/CodeGen/ParallelCG.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/ParallelCG.h @@ -0,0 +1,41 @@ +//===-- llvm/CodeGen/ParallelCG.h - Parallel code generation ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header declares functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PARALLELCG_H +#define LLVM_CODEGEN_PARALLELCG_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { + +class Module; +class TargetOptions; +class raw_pwrite_stream; + +/// Split M into OSs.size() partitions, and generate code for each. Writes +/// OSs.size() object files to the output streams in OSs. The resulting object +/// files if linked together are intended to be equivalent to the single object +/// file that would have been code generated from M. +/// +/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr(). +std::unique_ptr +splitCodeGen(std::unique_ptr M, ArrayRef OSs, + StringRef CPU, StringRef Features, const TargetOptions &Options, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CM = CodeModel::Default, + CodeGenOpt::Level OL = CodeGenOpt::Default); + +} // namespace llvm + +#endif Index: include/llvm/LTO/LTOCodeGenerator.h =================================================================== --- include/llvm/LTO/LTOCodeGenerator.h +++ include/llvm/LTO/LTOCodeGenerator.h @@ -133,6 +133,12 @@ // if the compilation was not successful. std::unique_ptr compileOptimized(std::string &errMsg); + // Compile the merged optimized module into out.size() object files each + // representing a linkable partition of the module. If out contains more than + // one element, code generation is done in parallel with out.size() threads. + // Object files will be written to members of out. Returns true on success. + bool compileOptimized(ArrayRef out, std::string &errMsg); + void setDiagnosticHandler(lto_diagnostic_handler_t, void *); LLVMContext &getContext() { return Context; } @@ -140,7 +146,6 @@ private: void initializeLTOPasses(); - bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg); bool compileOptimizedToFile(const char **name, std::string &errMsg); void applyScopeRestrictions(); void applyRestriction(GlobalValue &GV, ArrayRef Libcalls, Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -80,6 +80,7 @@ OptimizePHIs.cpp PHIElimination.cpp PHIEliminationUtils.cpp + ParallelCG.cpp Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp Index: lib/CodeGen/ParallelCG.cpp =================================================================== --- /dev/null +++ lib/CodeGen/ParallelCG.cpp @@ -0,0 +1,93 @@ +//===-- ParallelCG.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ParallelCG.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/thread.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; + +static void codegen(Module *M, llvm::raw_pwrite_stream &OS, + const Target *TheTarget, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) { + std::unique_ptr TM(TheTarget->createTargetMachine( + M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); + + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, OS, + TargetMachine::CGFT_ObjectFile)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(*M); +} + +std::unique_ptr +llvm::splitCodeGen(std::unique_ptr M, + ArrayRef OSs, StringRef CPU, + StringRef Features, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { + StringRef TripleStr = M->getTargetTriple(); + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) + report_fatal_error(Twine("Target not found: ") + ErrMsg); + + if (OSs.size() == 1) { + codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, + OL); + return M; + } + + std::vector Threads; + SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr MPart) { + // We want to clone the module in a new context to multi-thread the codegen. + // We do it by serializing partition modules to bitcode (while still on the + // main thread, in order to avoid data races) and spinning up new threads + // which deserialize the partitions into separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallVector BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); + + llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()]; + Threads.emplace_back( + [TheTarget, CPU, Features, Options, RM, CM, OL, + ThreadOS](const SmallVector &BC) { + LLVMContext Ctx; + ErrorOr> MOrErr = + parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()), + ""), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr MPartInCtx = std::move(MOrErr.get()); + + codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features, + Options, RM, CM, OL); + }, + std::move(BC)); + }); + + for (std::thread &T : Threads) + T.join(); + + return {}; +} Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/ParallelCG.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -218,7 +219,7 @@ // generate object file tool_output_file objFile(Filename.c_str(), FD); - bool genResult = compileOptimized(objFile.os(), errMsg); + bool genResult = compileOptimized(&objFile.os(), errMsg); objFile.os().close(); if (objFile.os().has_error()) { objFile.os().clear_error(); @@ -495,25 +496,26 @@ return true; } -bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out, +bool LTOCodeGenerator::compileOptimized(ArrayRef out, std::string &errMsg) { if (!this->determineTarget(errMsg)) return false; - legacy::PassManager codeGenPasses; + legacy::PassManager preCodeGenPasses; // If the bitcode files contain ARC code and were compiled with optimization, // the ObjCARCContractPass must be run, so do it unconditionally here. - codeGenPasses.add(createObjCARCContractPass()); - - if (TargetMach->addPassesToEmitFile(codeGenPasses, out, - TargetMachine::CGFT_ObjectFile)) { - errMsg = "target file type not supported"; - return false; - } - - // Run the code generator, and write object file - codeGenPasses.run(*MergedModule); + preCodeGenPasses.add(createObjCARCContractPass()); + preCodeGenPasses.run(*MergedModule); + + // Do code generation. We need to preserve the module in case the client calls + // writeMergedModules() after compilation, but we only need to allow this at + // parallelism level 1. This is achieved by having splitCodeGen return the + // original module at parallelism level 1 which we then assign back to + // MergedModule. + MergedModule = + splitCodeGen(std::move(MergedModule), out, MCpu, FeatureStr, Options, + RelocModel, CodeModel::Default, CGOptLevel); return true; } Index: test/LTO/X86/parallel.ll =================================================================== --- /dev/null +++ test/LTO/X86/parallel.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc +; RUN: llvm-nm %t.o.0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-nm %t.o.1 | FileCheck --check-prefix=CHECK1 %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK0-NOT: bar +; CHECK0: T foo +; CHECK0-NOT: bar +define void @foo() { + call void @bar() + ret void +} + +; CHECK1-NOT: foo +; CHECK1: T bar +; CHECK1-NOT: foo +define void @bar() { + call void @foo() + ret void +} Index: tools/llvm-lto/llvm-lto.cpp =================================================================== --- tools/llvm-lto/llvm-lto.cpp +++ tools/llvm-lto/llvm-lto.cpp @@ -22,7 +22,9 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -77,6 +79,9 @@ "set-merged-module", cl::init(false), cl::desc("Use the first input module as the merged module")); +static cl::opt Parallelism("j", cl::Prefix, cl::init(1), + cl::desc("Number of backend threads")); + namespace { struct ModuleInfo { std::vector CanBeHidden; @@ -240,24 +245,41 @@ if (!OutputFilename.empty()) { std::string ErrorInfo; - std::unique_ptr Code = CodeGen.compile( - DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo); - if (!Code) { - errs() << argv[0] - << ": error compiling the code: " << ErrorInfo << "\n"; + if (!CodeGen.optimize(DisableInline, DisableGVNLoadPRE, + DisableLTOVectorization, ErrorInfo)) { + errs() << argv[0] << ": error optimizing the code: " << ErrorInfo << "\n"; return 1; } - std::error_code EC; - raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None); - if (EC) { - errs() << argv[0] << ": error opening the file '" << OutputFilename - << "': " << EC.message() << "\n"; + std::list OSs; + std::vector OSPtrs; + for (unsigned I = 0; I != Parallelism; ++I) { + std::string PartFilename = OutputFilename; + if (Parallelism != 1) + PartFilename += "." + utostr(I); + std::error_code EC; + OSs.emplace_back(PartFilename, EC, sys::fs::F_None); + if (EC) { + errs() << argv[0] << ": error opening the file '" << PartFilename + << "': " << EC.message() << "\n"; + return 1; + } + OSPtrs.push_back(&OSs.back().os()); + } + + if (!CodeGen.compileOptimized(OSPtrs, ErrorInfo)) { + errs() << argv[0] << ": error compiling the code: " << ErrorInfo << "\n"; return 1; } - FileStream.write(Code->getBufferStart(), Code->getBufferSize()); + for (tool_output_file &OS : OSs) + OS.keep(); } else { + if (Parallelism != 1) { + errs() << argv[0] << ": -j must be specified together with -o\n"; + return 1; + } + std::string ErrorInfo; const char *OutputName = nullptr; if (!CodeGen.compile_to_file(&OutputName, DisableInline,