diff --git a/llvm/include/llvm/CodeGen/ParallelCG.h b/llvm/include/llvm/CodeGen/ParallelCG.h --- a/llvm/include/llvm/CodeGen/ParallelCG.h +++ b/llvm/include/llvm/CodeGen/ParallelCG.h @@ -32,14 +32,11 @@ /// /// Writes bitcode for individual partitions into output streams in BCOSs, if /// BCOSs is not empty. -/// -/// \returns M if OSs.size() == 1, otherwise returns std::unique_ptr(). -std::unique_ptr -splitCodeGen(std::unique_ptr M, ArrayRef OSs, - ArrayRef BCOSs, - const std::function()> &TMFactory, - CodeGenFileType FileType = CGFT_ObjectFile, - bool PreserveLocals = false); +void splitCodeGen( + Module &M, ArrayRef OSs, + ArrayRef BCOSs, + const std::function()> &TMFactory, + CodeGenFileType FileType = CGFT_ObjectFile, bool PreserveLocals = false); } // namespace llvm diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -42,8 +42,8 @@ /// Runs a regular LTO backend. The regular LTO backend can also act as the /// regular LTO phase of ThinLTO, which may need to access the combined index. Error backend(const Config &C, AddStreamFn AddStream, - unsigned ParallelCodeGenParallelismLevel, - std::unique_ptr M, ModuleSummaryIndex &CombinedIndex); + unsigned ParallelCodeGenParallelismLevel, Module &M, + ModuleSummaryIndex &CombinedIndex); /// Runs a ThinLTO backend. Error thinBackend(const Config &C, unsigned Task, AddStreamFn AddStream, diff --git a/llvm/include/llvm/Transforms/Utils/SplitModule.h b/llvm/include/llvm/Transforms/Utils/SplitModule.h --- a/llvm/include/llvm/Transforms/Utils/SplitModule.h +++ b/llvm/include/llvm/Transforms/Utils/SplitModule.h @@ -33,7 +33,7 @@ /// - Internal symbols defined in module-level inline asm should be visible to /// each partition. void SplitModule( - std::unique_ptr M, unsigned N, + Module &M, unsigned N, function_ref MPart)> ModuleCallback, bool PreserveLocals = false); diff --git a/llvm/lib/CodeGen/ParallelCG.cpp b/llvm/lib/CodeGen/ParallelCG.cpp --- a/llvm/lib/CodeGen/ParallelCG.cpp +++ b/llvm/lib/CodeGen/ParallelCG.cpp @@ -36,8 +36,8 @@ CodeGenPasses.run(*M); } -std::unique_ptr llvm::splitCodeGen( - std::unique_ptr M, ArrayRef OSs, +void llvm::splitCodeGen( + Module &M, ArrayRef OSs, ArrayRef BCOSs, const std::function()> &TMFactory, CodeGenFileType FileType, bool PreserveLocals) { @@ -45,9 +45,9 @@ if (OSs.size() == 1) { if (!BCOSs.empty()) - WriteBitcodeToFile(*M, *BCOSs[0]); - codegen(M.get(), *OSs[0], TMFactory, FileType); - return M; + WriteBitcodeToFile(M, *BCOSs[0]); + codegen(&M, *OSs[0], TMFactory, FileType); + return; } // Create ThreadPool in nested scope so that threads will be joined @@ -57,7 +57,7 @@ int ThreadCount = 0; SplitModule( - std::move(M), OSs.size(), + M, OSs.size(), [&](std::unique_ptr MPart) { // We want to clone the module in a new context to multi-thread the // codegen. We do it by serializing partition modules to bitcode @@ -95,6 +95,4 @@ }, PreserveLocals); } - - return {}; } diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -1101,9 +1101,9 @@ } if (!RegularLTO.EmptyCombinedModule || Conf.AlwaysEmitRegularLTOObj) { - if (Error Err = backend( - Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, - std::move(RegularLTO.CombinedModule), ThinLTO.CombinedIndex)) + if (Error Err = + backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel, + *RegularLTO.CombinedModule, ThinLTO.CombinedIndex)) return Err; } diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -456,8 +456,7 @@ static void splitCodeGen(const Config &C, TargetMachine *TM, AddStreamFn AddStream, - unsigned ParallelCodeGenParallelismLevel, - std::unique_ptr Mod, + unsigned ParallelCodeGenParallelismLevel, Module &Mod, const ModuleSummaryIndex &CombinedIndex) { ThreadPool CodegenThreadPool( heavyweight_hardware_concurrency(ParallelCodeGenParallelismLevel)); @@ -465,7 +464,7 @@ const Target *T = &TM->getTarget(); SplitModule( - std::move(Mod), ParallelCodeGenParallelismLevel, + Mod, ParallelCodeGenParallelismLevel, [&](std::unique_ptr MPart) { // We want to clone the module in a new context to multi-thread the // codegen. We do it by serializing partition modules to bitcode @@ -532,27 +531,26 @@ } Error lto::backend(const Config &C, AddStreamFn AddStream, - unsigned ParallelCodeGenParallelismLevel, - std::unique_ptr Mod, + unsigned ParallelCodeGenParallelismLevel, Module &Mod, ModuleSummaryIndex &CombinedIndex) { - Expected TOrErr = initAndLookupTarget(C, *Mod); + Expected TOrErr = initAndLookupTarget(C, Mod); if (!TOrErr) return TOrErr.takeError(); - std::unique_ptr TM = createTargetMachine(C, *TOrErr, *Mod); + std::unique_ptr TM = createTargetMachine(C, *TOrErr, Mod); if (!C.CodeGenOnly) { - if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false, + if (!opt(C, TM.get(), 0, Mod, /*IsThinLTO=*/false, /*ExportSummary=*/&CombinedIndex, /*ImportSummary=*/nullptr, /*CmdArgs*/ std::vector())) return Error::success(); } if (ParallelCodeGenParallelismLevel == 1) { - codegen(C, TM.get(), AddStream, 0, *Mod, CombinedIndex); + codegen(C, TM.get(), AddStream, 0, Mod, CombinedIndex); } else { - splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, - std::move(Mod), CombinedIndex); + splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel, Mod, + CombinedIndex); } return Error::success(); } diff --git a/llvm/lib/LTO/LTOCodeGenerator.cpp b/llvm/lib/LTO/LTOCodeGenerator.cpp --- a/llvm/lib/LTO/LTOCodeGenerator.cpp +++ b/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -616,14 +616,9 @@ // for splitting restoreLinkageForExternals(); - // Do code generation. We need to preserve the module in case the client calls - // writeMergedModules() after compilation, but we only need to allow this at - // parallelism level 1. This is achieved by having splitCodeGen return the - // original module at parallelism level 1 which we then assign back to - // MergedModule. - MergedModule = splitCodeGen(std::move(MergedModule), Out, {}, - [&]() { return createTargetMachine(); }, FileType, - ShouldRestoreGlobalsLinkage); + splitCodeGen( + *MergedModule, Out, {}, [&]() { return createTargetMachine(); }, FileType, + ShouldRestoreGlobalsLinkage); // If statistics were requested, save them to the specified file or // print them out after codegen. diff --git a/llvm/lib/Transforms/Utils/SplitModule.cpp b/llvm/lib/Transforms/Utils/SplitModule.cpp --- a/llvm/lib/Transforms/Utils/SplitModule.cpp +++ b/llvm/lib/Transforms/Utils/SplitModule.cpp @@ -95,13 +95,12 @@ // globalized. // Try to balance pack those partitions into N files since this roughly equals // thread balancing for the backend codegen step. -static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap, +static void findPartitions(Module &M, ClusterIDMapType &ClusterIDMap, unsigned N) { // At this point module should have the proper mix of globals and locals. // As we attempt to partition this module, we must not change any // locals to globals. - LLVM_DEBUG(dbgs() << "Partition module with (" << M->size() - << ")functions\n"); + LLVM_DEBUG(dbgs() << "Partition module with (" << M.size() << ")functions\n"); ClusterMapType GVtoClusterMap; ComdatMembersType ComdatMembers; @@ -144,9 +143,9 @@ addAllGlobalValueUsers(GVtoClusterMap, &GV, &GV); }; - llvm::for_each(M->functions(), recordGVSet); - llvm::for_each(M->globals(), recordGVSet); - llvm::for_each(M->aliases(), recordGVSet); + llvm::for_each(M.functions(), recordGVSet); + llvm::for_each(M.globals(), recordGVSet); + llvm::for_each(M.aliases(), recordGVSet); // Assigned all GVs to merged clusters while balancing number of objects in // each. @@ -247,31 +246,32 @@ } void llvm::SplitModule( - std::unique_ptr M, unsigned N, + Module &M, unsigned N, function_ref MPart)> ModuleCallback, bool PreserveLocals) { if (!PreserveLocals) { - for (Function &F : *M) + for (Function &F : M) externalize(&F); - for (GlobalVariable &GV : M->globals()) + for (GlobalVariable &GV : M.globals()) externalize(&GV); - for (GlobalAlias &GA : M->aliases()) + for (GlobalAlias &GA : M.aliases()) externalize(&GA); - for (GlobalIFunc &GIF : M->ifuncs()) + for (GlobalIFunc &GIF : M.ifuncs()) externalize(&GIF); } // This performs splitting without a need for externalization, which might not // always be possible. ClusterIDMapType ClusterIDMap; - findPartitions(M.get(), ClusterIDMap, N); + findPartitions(M, ClusterIDMap, N); // FIXME: We should be able to reuse M as the last partition instead of - // cloning it. + // cloning it. Note that the callers at the moment expect the module to + // be preserved, so will need some adjustments as well. for (unsigned I = 0; I < N; ++I) { ValueToValueMapTy VMap; std::unique_ptr MPart( - CloneModule(*M, VMap, [&](const GlobalValue *GV) { + CloneModule(M, VMap, [&](const GlobalValue *GV) { if (ClusterIDMap.count(GV)) return (ClusterIDMap[GV] == I); else diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp --- a/llvm/tools/llvm-split/llvm-split.cpp +++ b/llvm/tools/llvm-split/llvm-split.cpp @@ -52,25 +52,28 @@ } unsigned I = 0; - SplitModule(std::move(M), NumOutputs, [&](std::unique_ptr MPart) { - std::error_code EC; - std::unique_ptr Out( - new ToolOutputFile(OutputFilename + utostr(I++), EC, sys::fs::OF_None)); - if (EC) { - errs() << EC.message() << '\n'; - exit(1); - } + SplitModule( + *M, NumOutputs, + [&](std::unique_ptr MPart) { + std::error_code EC; + std::unique_ptr Out(new ToolOutputFile( + OutputFilename + utostr(I++), EC, sys::fs::OF_None)); + if (EC) { + errs() << EC.message() << '\n'; + exit(1); + } - if (verifyModule(*MPart, &errs())) { - errs() << "Broken module!\n"; - exit(1); - } + if (verifyModule(*MPart, &errs())) { + errs() << "Broken module!\n"; + exit(1); + } - WriteBitcodeToFile(*MPart, Out->os()); + WriteBitcodeToFile(*MPart, Out->os()); - // Declare success. - Out->keep(); - }, PreserveLocals); + // Declare success. + Out->keep(); + }, + PreserveLocals); return 0; }