diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -161,6 +161,7 @@ ///< compile step. CODEGENOPT(LTOUnit, 1, 0) ///< Emit IR to support LTO unit features (CFI, whole ///< program vtable opt). +CODEGENOPT(FatLTO, 1, 0) ///< Set when -ffat-lto-objects is enabled. CODEGENOPT(EnableSplitLTOUnit, 1, 0) ///< Enable LTO unit splitting to support /// CFI and traditional whole program /// devirtualization that require whole diff --git a/clang/include/clang/CodeGen/BackendUtil.h b/clang/include/clang/CodeGen/BackendUtil.h --- a/clang/include/clang/CodeGen/BackendUtil.h +++ b/clang/include/clang/CodeGen/BackendUtil.h @@ -45,6 +45,9 @@ void EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf); + void EmbedFatLTO(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf); + void EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts, DiagnosticsEngine &Diags); } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2124,6 +2124,11 @@ Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; def flto : Flag<["-"], "flto">, Flags<[CoreOption, CC1Option]>, Group, Alias, AliasArgs<["full"]>, HelpText<"Enable LTO in 'full' mode">; +defm fat_lto_objects : BoolFOption<"fat-lto-objects", + CodeGenOpts<"FatLTO">, DefaultFalse, + PosFlag, + NegFlag, + BothFlags<[CC1Option], "">>; def fno_lto : Flag<["-"], "fno-lto">, Flags<[CoreOption, CC1Option]>, Group, HelpText<"Disable LTO mode (default)">; def foffload_lto_EQ : Joined<["-"], "foffload-lto=">, Flags<[CoreOption]>, Group, @@ -4669,7 +4674,6 @@ defm reorder_blocks : BooleanFFlag<"reorder-blocks">, Group; defm branch_count_reg : BooleanFFlag<"branch-count-reg">, Group; defm default_inline : BooleanFFlag<"default-inline">, Group; -defm fat_lto_objects : BooleanFFlag<"fat-lto-objects">, Group; defm float_store : BooleanFFlag<"float-store">, Group; defm friend_injection : BooleanFFlag<"friend-injection">, Group; defm function_attribute_list : BooleanFFlag<"function-attribute-list">, Group; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -569,6 +569,9 @@ /// SupportsEmbeddedBitcode - Does this tool chain support embedded bitcode. virtual bool SupportsEmbeddedBitcode() const { return false; } + /// SupportsFatLTO - Does this tool chain support fat LTO objects. + virtual bool SupportsFatLTO() const { return true; } + /// getThreadModel() - Which thread model does this target use? virtual std::string getThreadModel() const { return "posix"; } diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1197,6 +1197,8 @@ // __LLVM,__bitcode section. void clang::EmbedBitcode(llvm::Module *M, const CodeGenOptions &CGOpts, llvm::MemoryBufferRef Buf) { + if (CGOpts.FatLTO) + return; if (CGOpts.getEmbedBitcode() == CodeGenOptions::Embed_Off) return; llvm::embedBitcodeInModule( @@ -1205,6 +1207,15 @@ CGOpts.CmdArgs); } +// With -ffat-lto-objects, save a copy of the llvm IR as data in the +// .fatlto section. +void clang::EmbedFatLTO(llvm::Module *M, const CodeGenOptions &CGOpts, + llvm::MemoryBufferRef Buf) { + if (!CGOpts.FatLTO) + return; + llvm::embedBitcodeInFatObject(*M, Buf); +} + void clang::EmbedObject(llvm::Module *M, const CodeGenOptions &CGOpts, DiagnosticsEngine &Diags) { if (CGOpts.OffloadObjects.empty()) diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -375,6 +375,7 @@ } EmbedBitcode(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); + EmbedFatLTO(getModule(), CodeGenOpts, llvm::MemoryBufferRef()); EmitBackendOutput(Diags, HeaderSearchOpts, CodeGenOpts, TargetOpts, LangOpts, C.getTargetInfo().getDataLayoutString(), @@ -1172,6 +1173,8 @@ EmbedObject(TheModule.get(), CodeGenOpts, Diagnostics); EmbedBitcode(TheModule.get(), CodeGenOpts, *MainFile); + EmbedFatLTO(TheModule.get(), CodeGenOpts, *MainFile); + LLVMContext &Ctx = TheModule->getContext(); diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -1314,6 +1314,11 @@ BitcodeEmbed = static_cast(Model); } + // Process -ffat-lto-objects flag. + if (Arg *A = Args.getLastArg(options::OPT_ffat_lto_objects)) { + BitcodeEmbed = EmbedBitcode; + } + // Remove existing compilation database so that each job can append to it. if (Arg *A = Args.getLastArg(options::OPT_MJ)) llvm::sys::fs::remove(A->getValue()); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7117,6 +7117,9 @@ } } + if (Args.hasArg(options::OPT_ffat_lto_objects)) + CmdArgs.push_back("-ffat-lto-objects"); + if (Args.hasArg(options::OPT_forder_file_instrumentation)) { CmdArgs.push_back("-forder-file-instrumentation"); // Enable order file instrumentation when ThinLTO is not on. When ThinLTO is diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -165,6 +165,7 @@ bool EmbedCmdline, const std::vector &CmdArgs); + void embedBitcodeInFatObject(Module &M, MemoryBufferRef Buf); } // end namespace llvm #endif // LLVM_BITCODE_BITCODEWRITER_H diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4953,6 +4953,37 @@ llvm_unreachable("Unimplemented ObjectFormatType"); } +static const char *getSectionNameForBitcodeForFatLTO(const Triple &T) { + switch (T.getObjectFormat()) { + case Triple::MachO: + llvm::report_fatal_error("MachO is not yet implemented for FatLTO"); + break; + case Triple::COFF: + llvm::report_fatal_error("COFF is not yet implemented for FatLTO"); + break; + case Triple::ELF: + return ".fatlto"; + case Triple::Wasm: + llvm::report_fatal_error("Wasm is not yet implemented for FatLTO"); + break; + case Triple::UnknownObjectFormat: + return ".fatlto"; + case Triple::GOFF: + llvm::report_fatal_error("GOFF is not yet implemented for FatLTO"); + break; + case Triple::SPIRV: + llvm::report_fatal_error("SPIRV is not yet implemented for FatLTO"); + break; + case Triple::XCOFF: + llvm::report_fatal_error("XCOFF is not yet implemented for FatLTO"); + break; + case Triple::DXContainer: + llvm::report_fatal_error("DXContainer is not yet implemented for FatLTO"); + break; + } + llvm_unreachable("Unimplemented ObjectFormatType"); +} + void llvm::embedBitcodeInModule(llvm::Module &M, llvm::MemoryBufferRef Buf, bool EmbedBitcode, bool EmbedCmdline, const std::vector &CmdArgs) { @@ -5045,3 +5076,68 @@ llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); NewUsed->setSection("llvm.metadata"); } + +void llvm::embedBitcodeInFatObject(llvm::Module &M, llvm::MemoryBufferRef Buf) { + // Save llvm.compiler.used and remove it. + SmallVector UsedArray; + SmallVector UsedGlobals; + Type *UsedElementType = Type::getInt8Ty(M.getContext())->getPointerTo(0); + GlobalVariable *Used = collectUsedGlobalVariables(M, UsedGlobals, true); + for (auto *GV : UsedGlobals) { + if (GV->getName() != "llvm.embedded.module" && + GV->getName() != "llvm.cmdline") + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + } + if (Used) + Used->eraseFromParent(); + + // Embed the bitcode for the llvm module. + std::string Data; + ArrayRef ModuleData; + Triple T(M.getTargetTriple()); + + if (Buf.getBufferSize() == 0 || + !isBitcode((const unsigned char *)Buf.getBufferStart(), + (const unsigned char *)Buf.getBufferEnd())) { + // If the input is LLVM Assembly, bitcode is produced by serializing + // the module. Use-lists order need to be preserved in this case. + llvm::raw_string_ostream OS(Data); + llvm::WriteBitcodeToFile(M, OS, /* ShouldPreserveUseListOrder */ true); + ModuleData = + ArrayRef((const uint8_t *)OS.str().data(), OS.str().size()); + } else + // If the input is LLVM bitcode, write the input byte stream directly. + ModuleData = ArrayRef((const uint8_t *)Buf.getBufferStart(), + Buf.getBufferSize()); + llvm::Constant *ModuleConstant = + llvm::ConstantDataArray::get(M.getContext(), ModuleData); + llvm::GlobalVariable *GV = new llvm::GlobalVariable( + M, ModuleConstant->getType(), true, llvm::GlobalValue::PrivateLinkage, + ModuleConstant); + GV->setSection(getSectionNameForBitcodeForFatLTO(T)); + // Set alignment to 1 to prevent padding between two contributions from input + // sections after linking. + GV->setAlignment(Align(1)); + UsedArray.push_back( + ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV, UsedElementType)); + if (llvm::GlobalVariable *Old = + M.getGlobalVariable("llvm.embedded.module", true)) { + assert(Old->hasZeroLiveUses() && + "llvm.embedded.module can only be used once in llvm.compiler.used"); + GV->takeName(Old); + Old->eraseFromParent(); + } else { + GV->setName("llvm.embedded.module"); + } + + if (UsedArray.empty()) + return; + + // Recreate llvm.compiler.used. + ArrayType *ATy = ArrayType::get(UsedElementType, UsedArray.size()); + auto *NewUsed = new GlobalVariable( + M, ATy, false, llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, UsedArray), "llvm.compiler.used"); + NewUsed->setSection("llvm.metadata"); +}