diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1647,9 +1647,10 @@ Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = thinBackend( - Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { + if (Error E = + thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + ModuleMap, &CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/clang/test/CodeGen/Inputs/start-lib1.ll copy from llvm/test/LTO/X86/Inputs/start-lib1.ll copy to clang/test/CodeGen/Inputs/start-lib1.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/clang/test/CodeGen/Inputs/start-lib1.ll @@ -4,5 +4,6 @@ declare void @bar() define void @foo() { + call void @bar() ret void } diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/clang/test/CodeGen/Inputs/start-lib2.ll copy from llvm/test/LTO/X86/Inputs/start-lib1.ll copy to clang/test/CodeGen/Inputs/start-lib2.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/clang/test/CodeGen/Inputs/start-lib2.ll @@ -1,8 +1,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare void @bar() - -define void @foo() { +define void @bar() { ret void } diff --git a/clang/test/CodeGen/thinlto_embed_bitcode.ll b/clang/test/CodeGen/thinlto_embed_bitcode.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/thinlto_embed_bitcode.ll @@ -0,0 +1,30 @@ +; REQUIRES: x86-registered-target + +; check the -lto-embed-bitcode=post-thinlto-merge does not perform optimizations +; we expect 't1' - i.e start-lib1.ll's products - have both foo and bar defined, +; but the bar call is still made from foo. +; RUN: opt -module-summary %p/Inputs/start-lib1.ll -o %t1.bc +; RUN: opt -module-summary %p/Inputs/start-lib2.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t.o %t1.bc %t2.bc + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=post-merge-pre-opt +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-CMD +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT + +; For the optimized case, we expect the inlining of foo into bar to happen. +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=optimized +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-NO-CMD +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OPT + +; CHECK-ELF: .text PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0 +; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 0 +; CHECK-ELF-CMD: .llvmcmd +; CHECK-ELF-NO-CMD-NOT: .llvmcmd + +; CHECK: define void @foo() +; CHECK-OPT-NEXT: ret void +; CHECK-NOOPT-NEXT: call void @bar() +; CHECK-NOOPT: define available_externally void @bar() !thinlto_src_module !0 { +; CHECK-NOOPT-NEXT: ret void diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -44,7 +44,8 @@ Module &M, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector &ModuleMap); + MapVector &ModuleMap, + const std::vector *CmdArgs = nullptr); Error finalizeOptimizationRemarks( std::unique_ptr DiagOutputFile); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -50,9 +50,12 @@ using namespace llvm; using namespace lto; +#define DEBUG_TYPE "lto-backend" + enum class LTOBitcodeEmbedding { DoNotEmbed = 0, EmbedOptimized = 1, + EmbedPostMergePreOptimized = 2 }; static cl::opt EmbedBitcode( @@ -60,7 +63,10 @@ cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none", "Do not embed"), clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized", - "Embed after all optimization passes")), + "Embed after all optimization passes"), + clEnumValN(LTOBitcodeEmbedding::EmbedPostMergePreOptimized, + "post-merge-pre-opt", + "Embed post merge, but before optimizations")), cl::desc("Embed LLVM bitcode in object files produced by LTO")); LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { @@ -346,7 +352,25 @@ bool opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod, bool IsThinLTO, ModuleSummaryIndex *ExportSummary, - const ModuleSummaryIndex *ImportSummary) { + const ModuleSummaryIndex *ImportSummary, + const std::vector *CmdArgs = nullptr) { + if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) { + // FIXME: the motivation for capturing post-merge bitcode and command line + // is replicating the compilation environment from bitcode, without needing + // to understand the dependencies (the functions to be imported). This + // assumes a clang - based invocation, case in which we have the command + // line. + // It's not very clear how the above motivation would map in the + // linker-based case, so we currently don't plumb the command line args in + // that case. + if (CmdArgs == nullptr) + LLVM_DEBUG( + dbgs() << "Post-(Thin)LTO merge bitcode embedding was requested, but " + "command line arguments are not available"); + llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + /*EmbedBitcode*/ true, + /*EmbedMarker*/ false, CmdArgs); + } // FIXME: Plumb the combined index into the new pass manager. if (!Conf.OptPipeline.empty()) runNewPMCustomPasses(Conf, Mod, TM, Conf.OptPipeline, Conf.AAPipeline, @@ -531,7 +555,8 @@ Module &Mod, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector &ModuleMap) { + MapVector &ModuleMap, + const std::vector *CmdArgs) { Expected TOrErr = initAndLookupTarget(Conf, Mod); if (!TOrErr) return TOrErr.takeError(); @@ -599,7 +624,8 @@ return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, - /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) + /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex, + CmdArgs)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); codegen(Conf, TM.get(), AddStream, Task, Mod, CombinedIndex); diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/llvm/test/LTO/X86/Inputs/start-lib1.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/llvm/test/LTO/X86/Inputs/start-lib1.ll @@ -4,5 +4,6 @@ declare void @bar() define void @foo() { + call void @bar() ret void } diff --git a/llvm/test/LTO/X86/embed-bitcode.ll b/llvm/test/LTO/X86/embed-bitcode.ll --- a/llvm/test/LTO/X86/embed-bitcode.ll +++ b/llvm/test/LTO/X86/embed-bitcode.ll @@ -11,13 +11,20 @@ ; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=optimized -o %t3 %t1.o %t2.o %t3.o ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF ; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null -; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefix=CHECK-LL +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK-LL,CHECK-OPT + +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=post-merge-pre-opt -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK-LL,CHECK-NOOPT ; CHECK-ELF: .text PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0 ; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 0 ; CHECK-LL: @_start ; CHECK-LL: @foo +; CHECK-OPT-NEXT: ret void +; CHECK-NOOPT-NEXT: call void @bar ; CHECK-LL: @bar target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"