Index: include/llvm/CodeGen/Parallel.h =================================================================== --- /dev/null +++ include/llvm/CodeGen/Parallel.h @@ -0,0 +1,41 @@ +//===-- llvm/CodeGen/Parallel.h - Parallel code generation ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header declares functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_PARALLEL_H +#define LLVM_CODEGEN_PARALLEL_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/CodeGen.h" + +namespace llvm { + +class Module; +class TargetOptions; +class raw_pwrite_stream; + +/// Split M into OSs.size() partitions, and generate code for each. Writes +/// OSs.size() object files to the output streams in OSs. The resulting object +/// files if linked together are intended to be equivalent to the single object +/// file that would have been code generated from M. +/// +/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr(). +std::unique_ptr +LinkedCodeGen(std::unique_ptr M, ArrayRef OSs, + StringRef CPU, StringRef Features, const TargetOptions &Options, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CM = CodeModel::Default, + CodeGenOpt::Level OL = CodeGenOpt::Default); + +} // namespace llvm + +#endif Index: include/llvm/LTO/LTOCodeGenerator.h =================================================================== --- include/llvm/LTO/LTOCodeGenerator.h +++ include/llvm/LTO/LTOCodeGenerator.h @@ -133,6 +133,12 @@ // if the compilation was not successful. std::unique_ptr compileOptimized(std::string &errMsg); + // Compile the merged optimized module into out.size() object files each + // representing a linkable partition of the module. If out contains more than + // one element, code generation is done in parallel with out.size() threads. + // Object files will be written to members of out. Returns true on success. + bool compileOptimized(ArrayRef out, std::string &errMsg); + void setDiagnosticHandler(lto_diagnostic_handler_t, void *); LLVMContext &getContext() { return Context; } @@ -140,7 +146,6 @@ private: void initializeLTOPasses(); - bool compileOptimized(raw_pwrite_stream &out, std::string &errMsg); bool compileOptimizedToFile(const char **name, std::string &errMsg); void applyScopeRestrictions(); void applyRestriction(GlobalValue &GV, ArrayRef Libcalls, Index: lib/CodeGen/CMakeLists.txt =================================================================== --- lib/CodeGen/CMakeLists.txt +++ lib/CodeGen/CMakeLists.txt @@ -80,6 +80,7 @@ OptimizePHIs.cpp PHIElimination.cpp PHIEliminationUtils.cpp + Parallel.cpp Passes.cpp PeepholeOptimizer.cpp PostRASchedulerList.cpp Index: lib/CodeGen/Parallel.cpp =================================================================== --- /dev/null +++ lib/CodeGen/Parallel.cpp @@ -0,0 +1,82 @@ +//===-- Parallel.cpp ------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Parallel.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SplitModule.h" +#include + +using namespace llvm; + +static void codegen(Module *M, llvm::raw_pwrite_stream &OS, + const Target *TheTarget, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL) { + std::unique_ptr TM(TheTarget->createTargetMachine( + M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); + + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, OS, + TargetMachine::CGFT_ObjectFile)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(*M); +} + +std::unique_ptr llvm::LinkedCodeGen( + std::unique_ptr M, ArrayRef OSs, + StringRef CPU, StringRef Features, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) { + StringRef TripleStr = M->getTargetTriple(); + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) + report_fatal_error(Twine("Target not found: ") + ErrMsg); + + if (OSs.size() == 1) { + codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, + OL); + return M; + } + + std::vector Threads; + SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr MPart) { + std::string BC; + raw_string_ostream OS(BC); + WriteBitcodeToFile(MPart.get(), OS); + OS.flush(); + + Threads.emplace_back([=](llvm::raw_pwrite_stream *OS, StringRef BC) { + LLVMContext Ctx; + ErrorOr> MOrErr = + parseBitcodeFile(MemoryBufferRef(BC, ""), Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr MPartInCtx = std::move(MOrErr.get()); + + codegen(MPartInCtx.get(), *OS, TheTarget, CPU, Features, Options, RM, CM, + OL); + }, OSs[Threads.size()], std::move(BC)); + }); + + for (std::thread &T : Threads) + T.join(); + + return {}; +} Index: lib/LTO/LTOCodeGenerator.cpp =================================================================== --- lib/LTO/LTOCodeGenerator.cpp +++ lib/LTO/LTOCodeGenerator.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/Parallel.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/Config/config.h" #include "llvm/IR/Constants.h" @@ -218,7 +219,7 @@ // generate object file tool_output_file objFile(Filename.c_str(), FD); - bool genResult = compileOptimized(objFile.os(), errMsg); + bool genResult = compileOptimized(&objFile.os(), errMsg); objFile.os().close(); if (objFile.os().has_error()) { objFile.os().clear_error(); @@ -495,7 +496,7 @@ return true; } -bool LTOCodeGenerator::compileOptimized(raw_pwrite_stream &out, +bool LTOCodeGenerator::compileOptimized(ArrayRef out, std::string &errMsg) { if (!this->determineTarget(errMsg)) return false; @@ -505,15 +506,15 @@ // If the bitcode files contain ARC code and were compiled with optimization, // the ObjCARCContractPass must be run, so do it unconditionally here. codeGenPasses.add(createObjCARCContractPass()); - - if (TargetMach->addPassesToEmitFile(codeGenPasses, out, - TargetMachine::CGFT_ObjectFile)) { - errMsg = "target file type not supported"; - return false; - } - - // Run the code generator, and write object file - codeGenPasses.run(*OwnedModule); + codeGenPasses.run(*OwnedModule.get()); + + // Do code generation. We need to preserve the module in case the client calls + // writeMergedModules() after compilation, but we only need to allow this at + // parallelism level 1. This is achieved by having LinkedCodeGen return the + // original module at parallelism level 1 which we then assign to OwnedModule. + OwnedModule = + LinkedCodeGen(std::move(OwnedModule), out, MCpu, FeatureStr, Options, + RelocModel, CodeModel::Default, CGOptLevel); return true; } Index: test/LTO/X86/parallel.ll =================================================================== --- /dev/null +++ test/LTO/X86/parallel.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: llvm-lto -exported-symbol=foo -exported-symbol=bar -j2 -o %t.o %t.bc +; RUN: llvm-nm %t.o0 | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-nm %t.o1 | FileCheck --check-prefix=CHECK1 %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK0-NOT: bar +; CHECK0: T foo +; CHECK0-NOT: bar +define void @foo() { + call void @bar() + ret void +} + +; CHECK1-NOT: foo +; CHECK1: T bar +; CHECK1-NOT: foo +define void @bar() { + call void @foo() + ret void +} Index: tools/llvm-lto/llvm-lto.cpp =================================================================== --- tools/llvm-lto/llvm-lto.cpp +++ tools/llvm-lto/llvm-lto.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/TargetSelect.h" #include "llvm/Support/raw_ostream.h" +#include using namespace llvm; @@ -77,6 +78,9 @@ "set-merged-module", cl::init(false), cl::desc("Use the first input module as the merged module")); +static cl::opt Parallelism("j", cl::Prefix, cl::init(1), + cl::desc("Number of backend threads")); + namespace { struct ModuleInfo { std::vector CanBeHidden; @@ -240,24 +244,38 @@ if (!OutputFilename.empty()) { std::string ErrorInfo; - std::unique_ptr Code = CodeGen.compile( - DisableInline, DisableGVNLoadPRE, DisableLTOVectorization, ErrorInfo); - if (!Code) { - errs() << argv[0] - << ": error compiling the code: " << ErrorInfo << "\n"; + if (!CodeGen.optimize(DisableInline, DisableGVNLoadPRE, + DisableLTOVectorization, ErrorInfo)) { + errs() << argv[0] << ": error optimizing the code: " << ErrorInfo << "\n"; return 1; } - std::error_code EC; - raw_fd_ostream FileStream(OutputFilename, EC, sys::fs::F_None); - if (EC) { - errs() << argv[0] << ": error opening the file '" << OutputFilename - << "': " << EC.message() << "\n"; - return 1; + std::list OSs; + std::vector OSPtrs; + for (unsigned I = 0; I != Parallelism; ++I) { + std::string PartFilename = OutputFilename; + if (Parallelism != 1) + PartFilename += utostr(I); + std::error_code EC; + OSs.emplace_back(PartFilename, EC, sys::fs::F_None); + if (EC) { + errs() << argv[0] << ": error opening the file '" << PartFilename + << "': " << EC.message() << "\n"; + return 1; + } + OSPtrs.push_back(&OSs.back()); } - FileStream.write(Code->getBufferStart(), Code->getBufferSize()); + if (!CodeGen.compileOptimized(OSPtrs, ErrorInfo)) { + errs() << argv[0] << ": error compiling the code: " << ErrorInfo << "\n"; + return 1; + } } else { + if (Parallelism != 1) { + errs() << argv[0] << ": -j must be specified together with -o\n"; + return 1; + } + std::string ErrorInfo; const char *OutputName = nullptr; if (!CodeGen.compile_to_file(&OutputName, DisableInline,