Index: include/clang/CodeGen/BackendUtil.h =================================================================== --- include/clang/CodeGen/BackendUtil.h +++ include/clang/CodeGen/BackendUtil.h @@ -16,6 +16,7 @@ namespace llvm { class Module; + class MemoryBufferRef; } namespace clang { @@ -37,6 +38,9 @@ const TargetOptions &TOpts, const LangOptions &LOpts, StringRef TDesc, llvm::Module *M, BackendAction Action, raw_pwrite_stream *OS); + + void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf); } #endif Index: include/clang/Frontend/CodeGenOptions.h =================================================================== --- include/clang/Frontend/CodeGenOptions.h +++ include/clang/Frontend/CodeGenOptions.h @@ -198,6 +198,9 @@ /// Set of sanitizer checks that trap rather than diagnose. SanitizerSet SanitizeTrap; + /// List of backend command-line options for -fembed-bitcode. + std::vector CmdArgs; + /// \brief A list of all -fno-builtin-* function names (e.g., memset). std::vector NoBuiltinFuncs; Index: include/clang/Frontend/CodeGenOptions.def =================================================================== --- include/clang/Frontend/CodeGenOptions.def +++ include/clang/Frontend/CodeGenOptions.def @@ -53,6 +53,8 @@ ///< frontend. CODEGENOPT(DisableRedZone , 1, 0) ///< Set when -mno-red-zone is enabled. CODEGENOPT(DisableTailCalls , 1, 0) ///< Do not emit tail calls. +CODEGENOPT(EmbedBitcode , 1, 0) ///< Embed LLVM IR bitcode as data. +CODEGENOPT(EmbedMarkerOnly , 1, 0) ///< Only create bitcode section as marker CODEGENOPT(EmitDeclMetadata , 1, 0) ///< Emit special metadata indicating what ///< Decl* various IR entities came from. ///< Only useful when running CodeGen as a Index: lib/CodeGen/BackendUtil.cpp =================================================================== --- lib/CodeGen/BackendUtil.cpp +++ lib/CodeGen/BackendUtil.cpp @@ -16,9 +16,11 @@ #include "clang/Frontend/Utils.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/IR/DataLayout.h" @@ -721,3 +723,86 @@ } } } + +static const char* getSectionNameForBitcode(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__bitcode"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmbc"; + } +} + +static const char* getSectionNameForCommandline(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + return "__LLVM,__cmdline"; + case Triple::COFF: + case Triple::ELF: + case Triple::UnknownObjectFormat: + return ".llvmcmd"; + } +} + +// With -fembed-bitcode, save a copy of the llvm IR as data in the +// __LLVM,__bitcode section. +void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf) +{ + if (!CGOpts.EmbedBitcode && !CGOpts.EmbedMarkerOnly) + return; + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef ModuleData; + Triple T(M->getTargetTriple()); + if (!CGOpts.EmbedMarkerOnly) { + if (!isBitcode((const unsigned char*)Buf.getBufferStart(), + (const unsigned char*)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be perserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = ArrayRef((uint8_t*)OS.str().data(), + OS.str().size()); + } else { + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef((uint8_t*)Buf.getBufferStart(), + Buf.getBufferSize()); + } + } + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M->getContext(), ModuleData); + // Use Appending linkage so it doesn't get optimized out. + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + *M, ModuleConstant->getType(), true, llvm::GlobalValue::AppendingLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcode(T)); + if (llvm::GlobalVariable *Old = + M->getGlobalVariable("llvm.embedded.module")) { + assert(Old->use_empty() && "llvm.embedded.module must have no uses"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + // Embed command-line options. + ArrayRef CmdData((uint8_t*)CGOpts.CmdArgs.data(), + CGOpts.CmdArgs.size()); + llvm::Constant *CmdConstant = + llvm::ConstantDataArray::get(M->getContext(), CmdData); + GV = new llvm::GlobalVariable(*M, CmdConstant->getType(), true, + llvm::GlobalValue::AppendingLinkage, + CmdConstant); + GV->setSection(getSectionNameForCommandline(T)); + if (llvm::GlobalVariable *Old = M->getGlobalVariable("llvm.cmdline")) { + assert(Old->use_empty() && "llvm.cmdline must have no uses"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.cmdline"); + } +} Index: lib/CodeGen/CodeGenAction.cpp =================================================================== --- lib/CodeGen/CodeGenAction.cpp +++ lib/CodeGen/CodeGenAction.cpp @@ -173,6 +173,8 @@ return; } + EmbedBitcode(TheModule.get(), CodeGenOpts, llvm::MemoryBufferRef()); + EmitBackendOutput(Diags, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayoutString(), getModule(), Action, AsmOutStream); @@ -823,6 +825,9 @@ TheModule->setTargetTriple(TargetOpts.Triple); } + EmbedBitcode(TheModule.get(), CI.getCodeGenOpts(), + MainFile->getMemBufferRef()); + LLVMContext &Ctx = TheModule->getContext(); Ctx.setInlineAsmDiagnosticHandler(BitcodeInlineAsmDiagHandler); EmitBackendOutput(CI.getDiagnostics(), CI.getCodeGenOpts(), TargetOpts, Index: lib/Driver/Tools.cpp =================================================================== --- lib/Driver/Tools.cpp +++ lib/Driver/Tools.cpp @@ -5553,7 +5553,10 @@ // With -save-temps, we want to save the unoptimized bitcode output from the // CompileJobAction, use -disable-llvm-passes to get pristine IR generated // by the frontend. - if (C.getDriver().isSaveTempsEnabled() && isa(JA)) + // When -fembed-bitcode is enabled, optimized bitcode is emitted because it + // has slightly different breakdown between stages. + if (C.getDriver().isSaveTempsEnabled() && + !C.getDriver().embedBitcodeEnabled() && isa(JA)) CmdArgs.push_back("-disable-llvm-passes"); if (Output.getType() == types::TY_Dependencies) { Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -616,6 +616,34 @@ } } } + // Handle -fembed-bitcode option. + Opts.EmbedBitcode = Args.hasArg(OPT_fembed_bitcode); + Opts.EmbedMarkerOnly = Args.hasArg(OPT_fembed_bitcode_marker); + // FIXME: For backend options that are not yet recorded as function + // attributes in the IR, keep track of them so we can embed them in a + // separate data section and use them when building the bitcode. + if (Opts.EmbedBitcode) { + for (ArgList::const_iterator A = Args.begin(), AE = Args.end(); + A != AE; ++ A) { + // Do not encode output and input. + if ((*A)->getOption().getID() == options::OPT_o || + (*A)->getOption().getID() == options::OPT_INPUT || + (*A)->getOption().getID() == options::OPT_x || + (*A)->getOption().getID() == options::OPT_fembed_bitcode || + ((*A)->getOption().getGroup().isValid() && + (*A)->getOption().getGroup().getID() == options::OPT_W_Group)) + continue; + ArgStringList ASL; + (*A)->render(Args, ASL); + for (ArgStringList::iterator it = ASL.begin(), ie = ASL.end(); + it != ie; ++ it) { + StringRef ArgStr(*it); + Opts.CmdArgs.insert(Opts.CmdArgs.end(), ArgStr.begin(), ArgStr.end()); + // using \00 to seperate each commandline options. + Opts.CmdArgs.push_back('\0'); + } + } + } Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions); Opts.InstrumentForProfiling = Args.hasArg(OPT_pg); Index: test/Frontend/embed-bitcode.ll =================================================================== --- /dev/null +++ test/Frontend/embed-bitcode.ll @@ -0,0 +1,48 @@ +; check .ll input +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \ +; RUN: -fembed-bitcode -x ir %s -o - \ +; RUN: | FileCheck %s +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \ +; RUN: -fembed-bitcode-marker -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-MARKER +; RUN: %clang_cc1 -triple aarch64-unknown-linux-gnueabi -emit-llvm \ +; RUN: -fembed-bitcode -x ir %s -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-ELF + +; check .bc input +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \ +; RUN: -x ir %s -o %t.bc +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \ +; RUN: -fembed-bitcode -x ir %t.bc -o - \ +; RUN: | FileCheck %s +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \ +; RUN: -fembed-bitcode-marker -x ir %t.bc -o - \ +; RUN: | FileCheck %s -check-prefix=CHECK-MARKER + +; run through -fembed-bitcode twice and make sure it doesn't crash +; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm-bc \ +; RUN: -fembed-bitcode -x ir %s -o - \ +; RUN: | %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \ +; RUN: -fembed-bitcode -x ir - -o /dev/null + +; check the magic number of bitcode at the beginning of the string +; CHECK: @llvm.embedded.module +; CHECK: c"\DE\C0\17\0B +; CHECK: section "__LLVM,__bitcode" +; CHECK: @llvm.cmdline +; CHECK: section "__LLVM,__cmdline" + +; CHECK-ELF: @llvm.embedded.module +; CHECK-ELF: section ".llvmbc" +; CHECK-ELF: @llvm.cmdline +; CHECK-ELF: section ".llvmcmd" + +; CHECK-MARKER: @llvm.embedded.module +; CHECK-MARKER: constant [0 x i8] zeroinitializer +; CHECK-MARKER: section "__LLVM,__bitcode" +; CHECK-MARKER: @llvm.cmdline +; CHECK-MARKER: section "__LLVM,__cmdline" + +define i32 @f0() { + ret i32 0 +}