diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1646,9 +1646,10 @@ Conf.CGFileType = getCodeGenFileType(Action); break; } - if (Error E = thinBackend( - Conf, -1, AddStream, *M, *CombinedIndex, ImportList, - ModuleToDefinedGVSummaries[M->getModuleIdentifier()], ModuleMap)) { + if (Error E = + thinBackend(Conf, -1, AddStream, *M, *CombinedIndex, ImportList, + ModuleToDefinedGVSummaries[M->getModuleIdentifier()], + ModuleMap, &CGOpts.CmdArgs)) { handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) { errs() << "Error running ThinLTO backend: " << EIB.message() << '\n'; }); diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/clang/test/CodeGen/Inputs/start-lib1.ll copy from llvm/test/LTO/X86/Inputs/start-lib1.ll copy to clang/test/CodeGen/Inputs/start-lib1.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/clang/test/CodeGen/Inputs/start-lib1.ll @@ -4,5 +4,6 @@ declare void @bar() define void @foo() { + call void @bar() ret void } diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/clang/test/CodeGen/Inputs/start-lib2.ll copy from llvm/test/LTO/X86/Inputs/start-lib1.ll copy to clang/test/CodeGen/Inputs/start-lib2.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/clang/test/CodeGen/Inputs/start-lib2.ll @@ -1,8 +1,6 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -declare void @bar() - -define void @foo() { +define void @bar() { ret void } diff --git a/clang/test/CodeGen/thinlto_embed_bitcode.ll b/clang/test/CodeGen/thinlto_embed_bitcode.ll new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/thinlto_embed_bitcode.ll @@ -0,0 +1,30 @@ +; REQUIRES: x86-registered-target + +; check the -lto-embed-bitcode=post-thinlto-merge does not perform optimizations +; we expect 't1' - i.e start-lib1.ll's products - have both foo and bar defined, +; but the bar call is still made from foo. +; RUN: opt -module-summary %p/Inputs/start-lib1.ll -o %t1.bc +; RUN: opt -module-summary %p/Inputs/start-lib2.ll -o %t2.bc +; RUN: llvm-lto -thinlto -o %t.o %t1.bc %t2.bc + +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=post-thinlto-merge +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-CMD +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-NOOPT + +; For the optimized case, we expect the inlining of foo into bar to happen. +; RUN: %clang -target x86_64-unknown-linux-gnu -O2 -o %t.o -x ir %t1.bc -c -fthinlto-index=%t.o.thinlto.bc -mllvm -lto-embed-bitcode=optimized +; RUN: llvm-readelf -S %t.o | FileCheck %s --check-prefixes=CHECK-ELF,CHECK-NO-CMD +; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t.o /dev/null +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK,CHECK-OPT + +; CHECK-ELF: .text PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0 +; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 0 +; CHECK-ELF-CMD: .llvmcmd +; CHECK-ELF-NO-CMD-NOT: .llvmcmd + +; CHECK: define void @foo() +; CHECK-OPT-NEXT: ret void +; CHECK-NOOPT-NEXT: call void @bar() +; CHECK-NOOPT: define available_externally void @bar() !thinlto_src_module !0 { +; CHECK-NOOPT-NEXT: ret void diff --git a/llvm/include/llvm/LTO/LTOBackend.h b/llvm/include/llvm/LTO/LTOBackend.h --- a/llvm/include/llvm/LTO/LTOBackend.h +++ b/llvm/include/llvm/LTO/LTOBackend.h @@ -44,7 +44,8 @@ Module &M, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector &ModuleMap); + MapVector &ModuleMap, + const std::vector *CmdArgs = nullptr); Error finalizeOptimizationRemarks( std::unique_ptr DiagOutputFile); diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -50,17 +50,23 @@ using namespace llvm; using namespace lto; +#define DEBUG_TYPE "lto-backend" + enum class LTOBitcodeEmbedding { DoNotEmbed = 0, EmbedOptimized = 1, + EmbedPostThinLTOMerge = 2 }; static cl::opt EmbedBitcode( "lto-embed-bitcode", cl::init(LTOBitcodeEmbedding::DoNotEmbed), - cl::values(clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none", - "Do not embed"), - clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized", - "Embed after all optimization passes")), + cl::values( + clEnumValN(LTOBitcodeEmbedding::DoNotEmbed, "none", "Do not embed"), + clEnumValN(LTOBitcodeEmbedding::EmbedOptimized, "optimized", + "Embed after all optimization passes"), + clEnumValN(LTOBitcodeEmbedding::EmbedPostThinLTOMerge, + "post-thinlto-merge", + "Embed post thinlto merge, but before optimizations")), cl::desc("Embed LLVM bitcode in object files produced by LTO")); LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) { @@ -531,7 +537,8 @@ Module &Mod, const ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, const GVSummaryMapTy &DefinedGlobals, - MapVector &ModuleMap) { + MapVector &ModuleMap, + const std::vector *CmdArgs) { Expected TOrErr = initAndLookupTarget(Conf, Mod); if (!TOrErr) return TOrErr.takeError(); @@ -598,6 +605,23 @@ if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); + if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostThinLTOMerge) { + // FIXME: the motivation for capturing post-merge bitcode and command line + // is replicating the compilation environment from bitcode, without needing + // to understand the dependencies (the functions to be imported). This + // assumes a clang - based invocation, case in which we have the command + // line. + // It's not very clear how the above motivation would map in the + // linker-based case, so we currently don't plumb the command line args in + // that case. + if (CmdArgs == nullptr) + LLVM_DEBUG( + dbgs() << "Post-ThinLTO merge bitcode embedding was requested, but " + "command line arguments are not available"); + llvm::EmbedBitcodeInModule(Mod, llvm::MemoryBufferRef(), + /*EmbedBitcode*/ true, + /*EmbedMarker*/ false, CmdArgs); + } if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true, /*ExportSummary=*/nullptr, /*ImportSummary=*/&CombinedIndex)) return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile)); diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/llvm/test/LTO/X86/Inputs/start-lib1.ll --- a/llvm/test/LTO/X86/Inputs/start-lib1.ll +++ b/llvm/test/LTO/X86/Inputs/start-lib1.ll @@ -4,5 +4,6 @@ declare void @bar() define void @foo() { + call void @bar() ret void } diff --git a/llvm/test/LTO/X86/embed-bitcode.ll b/llvm/test/LTO/X86/embed-bitcode.ll --- a/llvm/test/LTO/X86/embed-bitcode.ll +++ b/llvm/test/LTO/X86/embed-bitcode.ll @@ -11,13 +11,14 @@ ; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=optimized -o %t3 %t1.o %t2.o %t3.o ; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF ; RUN: llvm-objcopy --dump-section=.llvmbc=%t-embedded.bc %t3.0 /dev/null -; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefix=CHECK-LL +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefixes=CHECK-LL,CHECK-OPT ; CHECK-ELF: .text PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 AX 0 ; CHECK-ELF-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] [[#%x,SIZE:]] 00 0 ; CHECK-LL: @_start ; CHECK-LL: @foo +; CHECK-OPT-NEXT: ret void ; CHECK-LL: @bar target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"