diff --git a/llvm/docs/FatLTOSupport.rst b/llvm/docs/FatLTOSupport.rst new file mode 100644 --- /dev/null +++ b/llvm/docs/FatLTOSupport.rst @@ -0,0 +1,73 @@ +=================== +FatLTO Support +=================== +.. contents:: + +.. toctree:: + :maxdepth: 1 + +Introduction +============ + +FatLTO objects are a special type of `fat object file +`_ that contain LTO compatible IR in +addition to generated object code, instead of containing object code for +multiple target architectures. This allows users to defer the choice of whether +to use LTO or not to link-time, and has been a feature available in other +compilers, like `GCC +`_ for some time. + +Overview +======== + +Within LLVM, FatLTO is supported by choosing the ``FatLTODefaultPipeline``. +This pipeline needs to: + +#) Optimize the module as if compiling for (Thin)LTO. +#) Embed the pre-link bitcode a special section. +#) Finish optimizing the module to approximate non-LTO compilation. +#) Emit the object file. + +The ``FatLTODefaultPipeline`` does this by running the pre-link (Thin)LTO +pipeline, saving the module in a new ``.llvm.lto`` section, and then running +the `ModuleSimplificationPipeline` and the `ModuleOptimizationPipeline` to +generate optimized object code for the module. + +.. NOTE + Strictly speaking running the pre-link pipeline, followed by module + simplification and module optimization is only an approximation of the + default optimization pipeline used for non-LTO builds. This is true for + both ThinLTO and LTO. + +Internally, the ``.llvm.lto`` section is created by running the +``EmbedBitcodePass`` directly after the (Thin)LTO pre-link pipeline. When +compiling for (Thin)LTO, this is normally the point at which the compiler would +emit a object file containing the bitcode and metadata. However, for FatLTO, we +still want to emit optimized object code, so after the section is created, we +run the parts of the default optimization pipeline that may have been run +differently under the pre-link pipeline. + +At the end of this process the compiler can emit standard object files which +contain both the machine code in ``.text`` and the bitcode in ``.llvm.lto``. + +Limitations +=========== + +Linkers +------- + +Currently, using LTO with LLVM fat lto objects is only supported with LLD. This +may change in the future, but extending support to other linkers isn't planned +for now. + +.. NOTE + For standard linking the fat object files should be usable by any + linker capable of using ELF objects, since the ``.llvm.lto`` section is + marked ``SHF_EXLUDE``. + +Supported File Formats +---------------------- + +The current implementation only supports ELF files. At time of writing, it is +unclear if it will be useful to support other object file formats like ``COFF`` +or ``Mach-O``. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -70,6 +70,11 @@ legacy inliner pass. Backend stack coloring should handle cases alloca merging initially set out to handle. +* A new FatLTO pipeline was added to support generating object files that have + both machine code and LTO compatible bitcode. See the `RFC + `_ for more + details. + Changes to building LLVM ------------------------ diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.rst --- a/llvm/docs/UserGuides.rst +++ b/llvm/docs/UserGuides.rst @@ -32,6 +32,7 @@ DebuggingJITedCode DirectXUsage Docker + FatLTOSupport ExtendingLLVM GoldPlugin HowToBuildOnARM diff --git a/llvm/include/llvm/Bitcode/EmbedBitcodePass.h b/llvm/include/llvm/Bitcode/EmbedBitcodePass.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Bitcode/EmbedBitcodePass.h @@ -0,0 +1,40 @@ +//===-- EmbedBitcodePass.h - Embeds bitcode into global ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides a pass which embeds the bitcode into a global variable. +/// +//===----------------------------------------------------------------------===// +// +#ifndef LLVM_BITCODE_EMBEDBITCODEPASS_H +#define LLVM_BITCODE_EMBEDBITCODEPASS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { +class Module; +class ModulePass; +class Pass; + +/// Pass embeds the current module into a global variable. +class EmbedBitcodePass : public PassInfoMixin { + bool IsThinLTO; + bool EmitLTOSummary; + +public: + EmbedBitcodePass(bool IsThinLTO, bool EmitLTOSummary) + : IsThinLTO(IsThinLTO), EmitLTOSummary(EmitLTOSummary) {} + + PreservedAnalyses run(Module &M, ModuleAnalysisManager &); + + static bool isRequired() { return true; } +}; + +} // end namespace llvm. + +#endif diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h --- a/llvm/include/llvm/Passes/PassBuilder.h +++ b/llvm/include/llvm/Passes/PassBuilder.h @@ -221,9 +221,9 @@ /// only intended for use when attempting to optimize code. If frontends /// require some transformations for semantic reasons, they should explicitly /// build them. - ModulePassManager - buildModuleOptimizationPipeline(OptimizationLevel Level, - ThinOrFullLTOPhase LTOPhase); + ModulePassManager buildModuleOptimizationPipeline(OptimizationLevel Level, + ThinOrFullLTOPhase LTOPhase, + bool IsFatLTO = false); /// Build a per-module default optimization pipeline. /// @@ -234,6 +234,16 @@ ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level, bool LTOPreLink = false); + /// Build a fat object default optimization pipeline. + /// + /// This builds a pipeline that runs the LTO/ThinLTO pre-link pipeline, and + /// emits a section containing the pre-link bitcode along side the object code + /// generated by running the passes from buildModuleSimplificationPipeline and + /// buildModuleOptimizationPipeline. This should result in object code very + /// close to the PerModuleDefaultPipeline, used when compiling without LTO. + ModulePassManager buildFatLTODefaultPipeline(OptimizationLevel Level, + bool ThinLTO, bool EmitSummary); + /// Build a pre-link, ThinLTO-targeting default optimization pipeline to /// a pass manager. /// diff --git a/llvm/lib/Bitcode/Writer/CMakeLists.txt b/llvm/lib/Bitcode/Writer/CMakeLists.txt --- a/llvm/lib/Bitcode/Writer/CMakeLists.txt +++ b/llvm/lib/Bitcode/Writer/CMakeLists.txt @@ -2,6 +2,7 @@ BitWriter.cpp BitcodeWriter.cpp BitcodeWriterPass.cpp + EmbedBitcodePass.cpp ValueEnumerator.cpp DEPENDS diff --git a/llvm/lib/Bitcode/Writer/EmbedBitcodePass.cpp b/llvm/lib/Bitcode/Writer/EmbedBitcodePass.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Bitcode/Writer/EmbedBitcodePass.cpp @@ -0,0 +1,53 @@ +//===- EmbedBitcodePass.cpp - Pass that embeds the bitcode into a global---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// EmbedBitcodePass implementation. +// +//===----------------------------------------------------------------------===// +// +#include "llvm/Bitcode/EmbedBitcodePass.h" + +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/TargetParser/Triple.h" +#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h" +#include "llvm/Transforms/Utils/ModuleUtils.h" + +using namespace llvm; + +PreservedAnalyses EmbedBitcodePass::run(Module &M, ModuleAnalysisManager &AM) { + if (M.getGlobalVariable("llvm.embedded.module", /*AllowInternal=*/true)) + report_fatal_error("Can only embed the module once.", + /*gen_crash_diag=*/true); + + Triple T(M.getTargetTriple()); + if (T.getObjectFormat() != Triple::ELF) + report_fatal_error( + "Embed bitcode pass currently only supports ELF object format."); + + std::string Data; + raw_string_ostream OS(Data); + + if (IsThinLTO) + ThinLTOBitcodeWriterPass(OS, nullptr).run(M, AM); + else + BitcodeWriterPass(OS, /*ShouldPreserveUseListOrder=*/false, EmitLTOSummary) + .run(M, AM); + + StringRef ModuleData(OS.str().data(), OS.str().size()); + MemoryBufferRef Buf(ModuleData, "ModuleData"); + embedBufferInModule(M, Buf, ".llvm.lto"); + + return PreservedAnalyses::all(); +} diff --git a/llvm/lib/Object/ObjectFile.cpp b/llvm/lib/Object/ObjectFile.cpp --- a/llvm/lib/Object/ObjectFile.cpp +++ b/llvm/lib/Object/ObjectFile.cpp @@ -79,7 +79,7 @@ bool ObjectFile::isSectionBitcode(DataRefImpl Sec) const { Expected NameOrErr = getSectionName(Sec); if (NameOrErr) - return *NameOrErr == ".llvmbc"; + return *NameOrErr == ".llvmbc" || *NameOrErr == ".llvm.lto"; consumeError(NameOrErr.takeError()); return false; } diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -71,6 +71,7 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Bitcode/EmbedBitcodePass.h" #include "llvm/CodeGen/HardwareLoops.h" #include "llvm/CodeGen/TypePromotion.h" #include "llvm/IR/DebugInfo.h" diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ScopedNoAliasAA.h" #include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/Bitcode/EmbedBitcodePass.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/OptimizationLevel.h" #include "llvm/Passes/PassBuilder.h" @@ -1261,9 +1262,8 @@ FPM.addPass(InstCombinePass()); } -ModulePassManager -PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, - ThinOrFullLTOPhase LTOPhase) { +ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + OptimizationLevel Level, ThinOrFullLTOPhase LTOPhase, bool IsFatLTO) { const bool LTOPreLink = isLTOPreLink(LTOPhase); ModulePassManager MPM; @@ -1316,7 +1316,8 @@ // memory operations. MPM.addPass(RecomputeGlobalsAAPass()); - invokeOptimizerEarlyEPCallbacks(MPM, Level); + if (!IsFatLTO) + invokeOptimizerEarlyEPCallbacks(MPM, Level); FunctionPassManager OptimizePM; OptimizePM.addPass(Float2IntPass()); @@ -1391,7 +1392,8 @@ MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM), PTO.EagerlyInvalidateAnalyses)); - invokeOptimizerLastEPCallbacks(MPM, Level); + if (!IsFatLTO) + invokeOptimizerLastEPCallbacks(MPM, Level); // Split out cold code. Splitting is done late to avoid hiding context from // other optimizations and inadvertently regressing performance. The tradeoff @@ -1467,7 +1469,25 @@ if (LTOPreLink) addRequiredLTOPreLinkPasses(MPM); + return MPM; +} +ModulePassManager +PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO, + bool EmitSummary) { + // TODO: once D148010 lands, we can simplify this + ModulePassManager MPM = ThinLTO ? buildThinLTOPreLinkDefaultPipeline(Level) + : buildLTOPreLinkDefaultPipeline(Level); + MPM.addPass(EmbedBitcodePass(ThinLTO, EmitSummary)); + // Module simplification pass can't run at O0 so just run the O0 pipline + if (Level == OptimizationLevel::O0) { + MPM.addPass(buildO0DefaultPipeline(Level, false)); + return MPM; + } + MPM.addPass( + buildModuleSimplificationPipeline(Level, ThinOrFullLTOPhase::None)); + MPM.addPass(buildModuleOptimizationPipeline(Level, ThinOrFullLTOPhase::None, + /*IsFatLto*/ true)); return MPM; } diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -58,6 +58,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) +MODULE_PASS("embed-bitcode", EmbedBitcodePass(true, true)) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globaldce", GlobalDCEPass()) diff --git a/llvm/test/Bitcode/embed-multiple.ll b/llvm/test/Bitcode/embed-multiple.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/embed-multiple.ll @@ -0,0 +1,6 @@ +; RUN: not --crash opt --mtriple x86_64-unknown-linux-gnu < %s -passes=embed-bitcode -S 2>&1 | FileCheck %s + +@a = global i32 1 +@llvm.embedded.module = private constant [4 x i8] c"BC\C0\DE" + +; CHECK: LLVM ERROR: Can only embed the module once. diff --git a/llvm/test/Bitcode/embed-unsupported-object-format.ll b/llvm/test/Bitcode/embed-unsupported-object-format.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/embed-unsupported-object-format.ll @@ -0,0 +1,5 @@ +; RUN: not --crash opt --mtriple powerpc64-unknown-aix < %s -passes=embed-bitcode -S 2>&1 | FileCheck %s + +@a = global i32 1 + +; CHECK: LLVM ERROR: Embed bitcode pass currently only supports ELF object format diff --git a/llvm/test/Bitcode/embed.ll b/llvm/test/Bitcode/embed.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Bitcode/embed.ll @@ -0,0 +1,15 @@ +; RUN: opt --mtriple x86_64-unknown-linux-gnu < %s -passes=embed-bitcode -S | FileCheck %s + +@a = global i32 1 + +; CHECK: @a = global i32 1 +;; Make sure the module is in the correct section +; CHECK: @llvm.embedded.object = private constant {{.*}}, section ".llvm.lto", align 1 + +;; Ensure that the metadata is in llvm.compiler.used +; CHECK: @llvm.compiler.used = appending global [1 x ptr] [ptr @llvm.embedded.object], section "llvm.metadata" + +;; Make sue the metadata correlates to the .llvm.lto section +; CHECK: !llvm.embedded.objects = !{!1} +; CHECK: !0 = !{} +; CHECK: !{ptr @llvm.embedded.object, !".llvm.lto"}