diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1558,129 +1558,14 @@ } } -static const char* getSectionNameForBitcode(const Triple &T) { - switch (T.getObjectFormat()) { - case Triple::MachO: - return "__LLVM,__bitcode"; - case Triple::COFF: - case Triple::ELF: - case Triple::Wasm: - case Triple::UnknownObjectFormat: - return ".llvmbc"; - case Triple::XCOFF: - llvm_unreachable("XCOFF is not yet implemented"); - break; - } - llvm_unreachable("Unimplemented ObjectFormatType"); -} - -static const char* getSectionNameForCommandline(const Triple &T) { - switch (T.getObjectFormat()) { - case Triple::MachO: - return "__LLVM,__cmdline"; - case Triple::COFF: - case Triple::ELF: - case Triple::Wasm: - case Triple::UnknownObjectFormat: - return ".llvmcmd"; - case Triple::XCOFF: - llvm_unreachable("XCOFF is not yet implemented"); - break; - } - llvm_unreachable("Unimplemented ObjectFormatType"); -} - // With -fembed-bitcode, save a copy of the llvm IR as data in the // __LLVM,__bitcode section. void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf) { if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) return; - - // Save llvm.compiler.used and remote it. - SmallVector UsedArray; - SmallPtrSet UsedGlobals; - Type *UsedElementType = Type::getInt8Ty(M->getContext())->getPointerTo(0); - GlobalVariable *Used = collectUsedGlobalVariables(*M, UsedGlobals, true); - for (auto *GV : UsedGlobals) { - if (GV->getName() != "llvm.embedded.module" && - GV->getName() != "llvm.cmdline") - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - } - if (Used) - Used->eraseFromParent(); - - // Embed the bitcode for the llvm module. - std::string Data; - ArrayRef ModuleData; - Triple T(M->getTargetTriple()); - // Create a constant that contains the bitcode. - // In case of embedding a marker, ignore the input Buf and use the empty - // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. - if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker) { - if (!isBitcode((const unsigned char *)Buf.getBufferStart(), - (const unsigned char *)Buf.getBufferEnd())) { - // If the input is LLVM Assembly, bitcode is produced by serializing - // the module. Use-lists order need to be perserved in this case. - llvm::raw_string_ostream OS(Data); - llvm::WriteBitcodeToFile(*M, OS, /* ShouldPreserveUseListOrder */ true); - ModuleData = - ArrayRef((const uint8_t *)OS.str().data(), OS.str().size()); - } else - // If the input is LLVM bitcode, write the input byte stream directly. - ModuleData = ArrayRef((const uint8_t *)Buf.getBufferStart(), - Buf.getBufferSize()); - } - llvm::Constant *ModuleConstant = - llvm::ConstantDataArray::get(M->getContext(), ModuleData); - llvm::GlobalVariable *GV = new llvm::GlobalVariable( - *M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, - ModuleConstant); - GV->setSection(getSectionNameForBitcode(T)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - if (llvm::GlobalVariable *Old = - M->getGlobalVariable("llvm.embedded.module", true)) { - assert(Old->hasOneUse() && - "llvm.embedded.module can only be used once in llvm.compiler.used"); - GV->takeName(Old); - Old->eraseFromParent(); - } else { - GV->setName("llvm.embedded.module"); - } - - // Skip if only bitcode needs to be embedded. - if (CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode) { - // Embed command-line options. - ArrayRef CmdData(const_cast(CGOpts.CmdArgs.data()), - CGOpts.CmdArgs.size()); - llvm::Constant *CmdConstant = - llvm::ConstantDataArray::get(M->getContext(), CmdData); - GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true, - llvm::GlobalValue::PrivateLinkage, - CmdConstant); - GV->setSection(getSectionNameForCommandline(T)); - UsedArray.push_back( - ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); - if (llvm::GlobalVariable *Old = - M->getGlobalVariable("llvm.cmdline", true)) { - assert(Old->hasOneUse() && - "llvm.cmdline can only be used once in llvm.compiler.used"); - GV->takeName(Old); - Old->eraseFromParent(); - } else { - GV->setName("llvm.cmdline"); - } - } - - if (UsedArray.empty()) - return; - - // Recreate llvm.compiler.used. - ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); - auto *NewUsed = new GlobalVariable( - *M, ATy, false, llvm::GlobalValue::AppendingLinkage, - llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); - NewUsed->setSection("llvm.metadata"); + llvm::EmbedBitcodeInModule( + *M, Buf, CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Marker, + CGOpts.getEmbedBitcode() != CodeGenOptions::Embed_Bitcode, + &CGOpts.CmdArgs); } diff --git a/clang/test/Frontend/x86-embed-bitcode.ll b/clang/test/Frontend/x86-embed-bitcode.ll new file mode 100644 --- /dev/null +++ b/clang/test/Frontend/x86-embed-bitcode.ll @@ -0,0 +1,74 @@ +; REQUIRES: x86-registered-target +; check .ll input +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=all -x ir %s -o - \ +; RUN: | FileCheck %s +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=marker -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-MARKER +; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +; RUN: -fembed-bitcode=all -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ELF +; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +; RUN: -fembed-bitcode=marker -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ELF-MARKER +; RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +; RUN: -fembed-bitcode=bitcode -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ELF-ONLY-BITCODE + +; check .bc input +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm-bc \ +; RUN: -x ir %s -o %t.bc +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=all -x ir %t.bc -o - \ +; RUN: | FileCheck %s +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=bitcode -x ir %t.bc -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ONLY-BITCODE +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=marker -x ir %t.bc -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-MARKER + +; run through -fembed-bitcode twice and make sure it doesn't crash +; RUN: %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm-bc \ +; RUN: -fembed-bitcode=all -x ir %s -o - \ +; RUN: | %clang_cc1 -triple x86_64-apple-macosx10.10 -emit-llvm \ +; RUN: -fembed-bitcode=all -x ir - -o /dev/null + +; check the magic number of bitcode at the beginning of the string +; CHECK: @llvm.embedded.module = private constant +; CHECK: c"\DE\C0\17\0B +; CHECK: section "__LLVM,__bitcode" +; CHECK: @llvm.cmdline = private constant +; CHECK: section "__LLVM,__cmdline" + +; CHECK-ELF: @llvm.embedded.module +; CHECK-ELF: section ".llvmbc" +; CHECK-ELF: @llvm.cmdline +; CHECK-ELF: section ".llvmcmd" + +; CHECK-ELF-MARKER: @llvm.embedded.module +; CHECK-ELF-MARKER: constant [0 x i8] zeroinitializer +; CHECK-ELF-MARKER: @llvm.cmdline +; CHECK-ELF-MARKER: section ".llvmcmd" + +; CHECK-ELF-ONLY-BITCODE: @llvm.embedded.module +; CHECK-ELF-ONLY-BITCODE: section ".llvmbc" +; CHECK-ELF-ONLY-BITCODE-NOT: @llvm.cmdline +; CHECK-ELF-ONLY-BITCODE-NOT: section ".llvmcmd" + +; CHECK-ONLY-BITCODE: @llvm.embedded.module = private constant +; CHECK-ONLY-BITCODE: c"\DE\C0\17\0B +; CHECK-ONLY-BITCODE: section "__LLVM,__bitcode" +; CHECK-ONLY-BITCODE-NOT: @llvm.cmdline = private constant +; CHECK-ONLY-BITCODE-NOT: section "__LLVM,__cmdline" + +; CHECK-MARKER: @llvm.embedded.module +; CHECK-MARKER: constant [0 x i8] zeroinitializer +; CHECK-MARKER: section "__LLVM,__bitcode" +; CHECK-MARKER: @llvm.cmdline +; CHECK-MARKER: section "__LLVM,__cmdline" + +define i32 @f0() { + ret i32 0 +} diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -17,6 +17,7 @@ #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/MemoryBuffer.h" #include #include #include @@ -151,6 +152,11 @@ const std::map *ModuleToSummariesForIndex = nullptr); + /// Save a copy of the llvm IR as data in the __LLVM,__bitcode section. + void EmbedBitcodeInModule(Module &M, MemoryBufferRef Buf, bool EmbedBitcode, + bool EmbedMarker, + const std::vector *CmdArgs); + } // end namespace llvm #endif // LLVM_BITCODE_BITCODEWRITER_H diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -24,9 +24,10 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitstream/BitCodes.h" #include "llvm/Bitstream/BitstreamWriter.h" -#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Config/llvm-config.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" @@ -4677,3 +4678,125 @@ Out.write((char *)&Buffer.front(), Buffer.size()); } + +static const char *getSectionNameForBitcode(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__bitcode"; + case Triple::COFF: + case Triple::ELF: + case Triple::Wasm: + case Triple::UnknownObjectFormat: + return ".llvmbc"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +static const char *getSectionNameForCommandline(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__cmdline"; + case Triple::COFF: + case Triple::ELF: + case Triple::Wasm: + case Triple::UnknownObjectFormat: + return ".llvmcmd"; + case Triple::XCOFF: + llvm_unreachable("XCOFF is not yet implemented"); + break; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + +void llvm::EmbedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, + bool EmbedBitcode, bool EmbedMarker, + const std::vector *CmdArgs) { + // Save llvm.compiler.used and remove it. + SmallVector UsedArray; + SmallPtrSet UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (GV->getName() != "llvm.embedded.module" && + GV->getName() != "llvm.cmdline") + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef ModuleData; + Triple T(M.getTargetTriple()); + // Create a constant that contains the bitcode. + // In case of embedding a marker, ignore the input Buf and use the empty + // ArrayRef. It is also legal to create a bitcode marker even Buf is empty. + if (EmbedBitcode) { + if (!isBitcode((const unsigned char *)Buf.getBufferStart(), + (const unsigned char *)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be preserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = + ArrayRef((const uint8_t *)OS.str().data(), OS.str().size()); + } else + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef((const uint8_t *)Buf.getBufferStart(), + Buf.getBufferSize()); + } + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M.getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcode(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M.getGlobalVariable("llvm.embedded.module", true)) { + assert(Old->hasOneUse() && + "llvm.embedded.module can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + // Skip if only bitcode needs to be embedded. + if (EmbedMarker) { + // Embed command-line options. + ArrayRef CmdData(const_cast(CmdArgs->data()), + CmdArgs->size()); + llvm::Constant *CmdConstant = + llvm::ConstantDataArray::get(M.getContext(), CmdData); + GV = new llvm::GlobalVariable(M, CmdConstant->getType(), true, + llvm::GlobalValue::PrivateLinkage, + CmdConstant); + GV->setSection(getSectionNameForCommandline(T)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = M.getGlobalVariable("llvm.cmdline", true)) { + assert(Old->hasOneUse() && + "llvm.cmdline can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.cmdline"); + } + } + + if (UsedArray.empty()) + return; + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); +} diff --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp --- a/llvm/lib/LTO/LTOBackend.cpp +++ b/llvm/lib/LTO/LTOBackend.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" +#include "llvm/Support/SmallVectorMemoryBuffer.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/raw_ostream.h" @@ -312,11 +313,30 @@ return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod); } +static cl::opt EmbedBitcode( + "lto-embed-bitcode", cl::init(false), + cl::desc("Embed LLVM bitcode in object files produced by LTO")); + +static void EmitBitcodeSection(Module &M, Config &Conf) { + if (!EmbedBitcode) + return; + SmallVector Buffer; + raw_svector_ostream OS(Buffer); + WriteBitcodeToFile(M, OS); + + std::unique_ptr Buf( + new SmallVectorMemoryBuffer(std::move(Buffer))); + llvm::EmbedBitcodeInModule(M, Buf->getMemBufferRef(), /*EmbedBitcode*/ true, + /*EmbedMarker*/ false, /*CmdArgs*/ nullptr); +} + void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream, unsigned Task, Module &Mod) { if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod)) return; + EmitBitcodeSection(Mod, Conf); + std::unique_ptr DwoOut; SmallString<1024> DwoFile(Conf.SplitDwarfOutput); if (!Conf.DwoDir.empty()) { diff --git a/llvm/test/LTO/X86/Inputs/start-lib1.ll b/llvm/test/LTO/X86/Inputs/start-lib1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/Inputs/start-lib1.ll @@ -0,0 +1,8 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @bar() + +define void @foo() { + ret void +} diff --git a/llvm/test/LTO/X86/Inputs/start-lib2.ll b/llvm/test/LTO/X86/Inputs/start-lib2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/Inputs/start-lib2.ll @@ -0,0 +1,6 @@ +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @bar() { + ret void +} diff --git a/llvm/test/LTO/X86/embed-bitcode.ll b/llvm/test/LTO/X86/embed-bitcode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/LTO/X86/embed-bitcode.ll @@ -0,0 +1,28 @@ +; RUN: llvm-as %s -o %t1.o +; RUN: llvm-as %p/Inputs/start-lib1.ll -o %t2.o +; RUN: llvm-as %p/Inputs/start-lib2.ll -o %t3.o + +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-readelf -S %t3.0 | FileCheck %s --implicit-check-not=.llvmbc + +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode=false -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-readelf -S %t3.0 | FileCheck %s --implicit-check-not=.llvmbc + +; RUN: llvm-lto2 run -r %t1.o,_start,px -r %t2.o,foo,px -r %t3.o,bar,px -r %t2.o,bar,lx -lto-embed-bitcode -o %t3 %t1.o %t2.o %t3.o +; RUN: llvm-readelf -S %t3.0 | FileCheck %s --check-prefix=CHECK-ELF +; RUN: llvm-objcopy -O binary -j .llvmbc %t3.0 %t-embedded.bc +; RUN: llvm-dis %t-embedded.bc -o - | FileCheck %s --check-prefix=CHECK-LL + +; CHECK-ELF: .text +; CHECK-ELF: .llvmbc + +; CHECK-LL: @_start +; CHECK-LL: @foo +; CHECK-LL: @bar + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_start() { + ret void +}