Index: ELF/Config.h =================================================================== --- ELF/Config.h +++ ELF/Config.h @@ -94,6 +94,7 @@ ELFKind EKind = ELFNoneKind; uint16_t EMachine = llvm::ELF::EM_NONE; uint64_t EntryAddr = -1; + unsigned LtoJobs; unsigned LtoO; unsigned Optimize; }; Index: ELF/Driver.cpp =================================================================== --- ELF/Driver.cpp +++ ELF/Driver.cpp @@ -321,6 +321,9 @@ Config->LtoO = getInteger(Args, OPT_lto_O, 2); if (Config->LtoO > 3) error("invalid optimization level for LTO: " + getString(Args, OPT_lto_O)); + Config->LtoJobs = getInteger(Args, OPT_lto_jobs, 1); + if (Config->LtoJobs == 0) + error("number of threads must be > 0"); Config->ZExecStack = hasZOption(Args, "execstack"); Config->ZNodelete = hasZOption(Args, "nodelete"); Index: ELF/LTO.h =================================================================== --- ELF/LTO.h +++ ELF/LTO.h @@ -37,20 +37,22 @@ class BitcodeCompiler { public: void add(BitcodeFile &F); - std::unique_ptr compile(); + std::vector> compile(); BitcodeCompiler() : Combined(new llvm::Module("ld-temp.o", Context)), Mover(*Combined) {} private: - llvm::TargetMachine *getTargetMachine(); + std::vector> runSplitCodegen(); + std::unique_ptr getTargetMachine(); llvm::LLVMContext Context; std::unique_ptr Combined; llvm::IRMover Mover; - SmallString<0> OwningData; + std::vector> OwningData; std::unique_ptr MB; llvm::StringSet<> InternalizedSyms; + std::string TheTriple; }; } } Index: ELF/LTO.cpp =================================================================== --- ELF/LTO.cpp +++ ELF/LTO.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/CommandFlags.h" +#include "llvm/CodeGen/ParallelCG.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Linker/IRMover.h" #include "llvm/Support/StringSaver.h" @@ -33,10 +34,13 @@ using namespace lld::elf; // This is for use when debugging LTO. -static void saveLtoObjectFile(StringRef Buffer) { +static void saveLtoObjectFile(StringRef Buffer, unsigned I, bool Many) { + SmallString<128> Filename = Config->OutputFile; + if (Many) + Filename += utostr(I); + Filename += ".lto.o"; std::error_code EC; - raw_fd_ostream OS(Config->OutputFile.str() + ".lto.o", EC, - sys::fs::OpenFlags::F_None); + raw_fd_ostream OS(Filename, EC, sys::fs::OpenFlags::F_None); check(EC); OS << Buffer; } @@ -136,9 +140,36 @@ GV.setLinkage(GlobalValue::InternalLinkage); } +std::vector> BitcodeCompiler::runSplitCodegen() { + unsigned NumThreads = Config->LtoJobs; + OwningData.resize(NumThreads); + + std::list OSs; + std::vector OSPtrs; + for (SmallString<0> &Obj : OwningData) { + OSs.emplace_back(Obj); + OSPtrs.push_back(&OSs.back()); + } + + splitCodeGen(std::move(Combined), OSPtrs, {}, + [this]() { return getTargetMachine(); }); + + std::vector> ObjFiles; + for (SmallString<0> &Obj : OwningData) + ObjFiles.push_back(createObjectFile( + MemoryBufferRef(Obj, "LLD-INTERNAL-combined-lto-object"))); + + if (Config->SaveTemps) + for (unsigned I = 0; I < NumThreads; ++I) + saveLtoObjectFile(OwningData[I], I, NumThreads > 1); + + return ObjFiles; +} + // Merge all the bitcode files we have seen, codegen the result // and return the resulting ObjectFile. -std::unique_ptr BitcodeCompiler::compile() { +std::vector> BitcodeCompiler::compile() { + TheTriple = Combined->getTargetTriple(); for (const auto &Name : InternalizedSyms) { GlobalValue *GV = Combined->getNamedValue(Name.first()); assert(GV); @@ -151,26 +182,16 @@ std::unique_ptr TM(getTargetMachine()); runLTOPasses(*Combined, *TM); - raw_svector_ostream OS(OwningData); - legacy::PassManager CodeGenPasses; - if (TM->addPassesToEmitFile(CodeGenPasses, OS, - TargetMachine::CGFT_ObjectFile)) - fatal("failed to setup codegen"); - CodeGenPasses.run(*Combined); - MB = MemoryBuffer::getMemBuffer(OwningData, - "LLD-INTERNAL-combined-lto-object", false); - if (Config->SaveTemps) - saveLtoObjectFile(MB->getBuffer()); - return createObjectFile(*MB); + return runSplitCodegen(); } -TargetMachine *BitcodeCompiler::getTargetMachine() { - StringRef TripleStr = Combined->getTargetTriple(); +std::unique_ptr BitcodeCompiler::getTargetMachine() { std::string Msg; - const Target *T = TargetRegistry::lookupTarget(TripleStr, Msg); + const Target *T = TargetRegistry::lookupTarget(TheTriple, Msg); if (!T) fatal("target not found: " + Msg); TargetOptions Options = InitTargetOptionsFromCodeGenFlags(); Reloc::Model R = Config->Pic ? Reloc::PIC_ : Reloc::Static; - return T->createTargetMachine(TripleStr, "", "", Options, R); + return std::unique_ptr( + T->createTargetMachine(TheTriple, "", "", Options, R)); } Index: ELF/Options.td =================================================================== --- ELF/Options.td +++ ELF/Options.td @@ -237,7 +237,9 @@ // Aliases for ignored options def alias_version_script_version_script : Joined<["--"], "version-script=">, Alias; -// Debugging/developer options +// LTO-related options. +def lto_jobs : Joined<["--"], "lto-jobs=">, + HelpText<"Number of threads to run codegen">; def disable_verify : Flag<["-"], "disable-verify">; def mllvm : Separate<["-"], "mllvm">; def save_temps : Flag<["-"], "save-temps">; Index: ELF/SymbolTable.cpp =================================================================== --- ELF/SymbolTable.cpp +++ ELF/SymbolTable.cpp @@ -122,24 +122,27 @@ Lto.reset(new BitcodeCompiler); for (const std::unique_ptr &F : BitcodeFiles) Lto->add(*F); - std::unique_ptr IF = Lto->compile(); - ObjectFile *Obj = cast>(IF.release()); + std::vector> IFs = Lto->compile(); // Replace bitcode symbols. - llvm::DenseSet DummyGroups; - Obj->parse(DummyGroups); - for (SymbolBody *Body : Obj->getNonLocalSymbols()) { - Symbol *Sym = insert(Body); - Sym->Body->setUsedInRegularObj(); - if (Sym->Body->isShared()) - Sym->Body->MustBeInDynSym = true; - if (Sym->Body->MustBeInDynSym) - Body->MustBeInDynSym = true; - if (!Sym->Body->isUndefined() && Body->isUndefined()) - continue; - Sym->Body = Body; + for (auto &IF : IFs) { + ObjectFile *Obj = cast>(IF.release()); + + llvm::DenseSet DummyGroups; + Obj->parse(DummyGroups); + for (SymbolBody *Body : Obj->getNonLocalSymbols()) { + Symbol *Sym = insert(Body); + Sym->Body->setUsedInRegularObj(); + if (Sym->Body->isShared()) + Sym->Body->MustBeInDynSym = true; + if (Sym->Body->MustBeInDynSym) + Body->MustBeInDynSym = true; + if (!Sym->Body->isUndefined() && Body->isUndefined()) + continue; + Sym->Body = Body; + } + ObjectFiles.emplace_back(Obj); } - ObjectFiles.emplace_back(Obj); } // Add an undefined symbol. Index: test/ELF/basic.s =================================================================== --- test/ELF/basic.s +++ test/ELF/basic.s @@ -214,3 +214,6 @@ # RUN: not ld.lld %t -o %t -m wrong_emul 2>&1 | FileCheck --check-prefix=UNKNOWN_EMUL %s # UNKNOWN_EMUL: unknown emulation: wrong_emul + +# RUN: not ld.lld %t --lto-jobs=0 2>&1 | FileCheck --check-prefix=NOTHREADS %s +# NOTHREADS: number of threads must be > 0 Index: test/ELF/lto/parallel.ll =================================================================== --- /dev/null +++ test/ELF/lto/parallel.ll @@ -0,0 +1,22 @@ +; RUN: llvm-as -o %t.bc %s +; RUN: ld.lld -m elf_x86_64 --lto-jobs=2 -save-temps -o %t %t.bc -shared +; RUN: llvm-nm %t0.lto.o | FileCheck --check-prefix=CHECK0 %s +; RUN: llvm-nm %t1.lto.o | FileCheck --check-prefix=CHECK1 %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK0-NOT: bar +; CHECK0: T foo +; CHECK0-NOT: bar +define void @foo() { + call void @bar() + ret void +} + +; CHECK1-NOT: foo +; CHECK1: T bar +; CHECK1-NOT: foo +define void @bar() { + call void @foo() + ret void +}