Index: llvm/include/llvm/InitializePasses.h =================================================================== --- llvm/include/llvm/InitializePasses.h +++ llvm/include/llvm/InitializePasses.h @@ -355,6 +355,7 @@ void initializeWholeProgramDevirtPass(PassRegistry &); void initializeWinEHPreparePass(PassRegistry&); void initializeWriteBitcodePassPass(PassRegistry &); +void initializeWriteThinLTOBitcodePass(PassRegistry &); void initializeXRayInstrumentationPass(PassRegistry &); } Index: llvm/include/llvm/Transforms/IPO.h =================================================================== --- llvm/include/llvm/Transforms/IPO.h +++ llvm/include/llvm/Transforms/IPO.h @@ -28,6 +28,7 @@ class Function; class BasicBlock; class GlobalValue; +class raw_ostream; //===----------------------------------------------------------------------===// // @@ -235,6 +236,9 @@ ModulePass *createSampleProfileLoaderPass(); ModulePass *createSampleProfileLoaderPass(StringRef Name); +/// Write ThinLTO-ready bitcode to Str. +ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str); + } // End llvm namespace #endif Index: llvm/lib/Transforms/IPO/CMakeLists.txt =================================================================== --- llvm/lib/Transforms/IPO/CMakeLists.txt +++ llvm/lib/Transforms/IPO/CMakeLists.txt @@ -28,6 +28,7 @@ SampleProfile.cpp StripDeadPrototypes.cpp StripSymbols.cpp + ThinLTOBitcodeWriter.cpp WholeProgramDevirt.cpp ADDITIONAL_HEADER_DIRS Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- /dev/null +++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -0,0 +1,306 @@ +//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass prepares a module containing type metadata for ThinLTO by splitting +// it into regular and thin LTO parts if possible, and writing both parts to +// a multi-module bitcode file. Modules that do not contain type metadata are +// written unmodified as a single module. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/TypeMetadataUtils.h" +#include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Pass.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Transforms/Utils/Cloning.h" +using namespace llvm; + +namespace { + +// Produce a unique identifier for this module by taking the MD5 sum of the +// names of the module's strong external symbols. This identifier is +// normally guaranteed to be unique, or the program would fail to link due to +// multiply defined symbols. +// +// If the module has no strong external symbols (such a module may still have a +// semantic effect if it performs global initialization), we cannot produce a +// unique identifier for this module, so we return the empty string, which +// causes the entire module to be written as a regular LTO module. +std::string getModuleId(Module *M) { + MD5 Md5; + bool ExportsSymbols = false; + auto AddGlobal = [&](GlobalValue &GV) { + if (GV.isDeclaration() || GV.getName().startswith("llvm.") || + !GV.hasExternalLinkage()) + return; + ExportsSymbols = true; + Md5.update(GV.getName()); + Md5.update(ArrayRef{0}); + }; + + for (auto &F : *M) + AddGlobal(F); + for (auto &GV : M->globals()) + AddGlobal(GV); + for (auto &GA : M->aliases()) + AddGlobal(GA); + for (auto &IF : M->ifuncs()) + AddGlobal(IF); + + if (!ExportsSymbols) + return ""; + + MD5::MD5Result R; + Md5.final(R); + + SmallString<32> Str; + MD5::stringifyResult(R, Str); + return ("$" + Str).str(); +} + +// Export each local-linkage entity defined by ExportM and used by ImportM by +// changing visibility and appending the given ModuleId. +void exportInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { + auto ExportInternal = [&](GlobalValue &ExportGV) { + if (!ExportGV.hasLocalLinkage()) + return; + + GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName()); + if (!ImportGV || ImportGV->use_empty()) + return; + + std::string NewName = (ExportGV.getName() + ModuleId).str(); + + ExportGV.setName(NewName); + ExportGV.setLinkage(GlobalValue::ExternalLinkage); + ExportGV.setVisibility(GlobalValue::HiddenVisibility); + + ImportGV->setName(NewName); + ImportGV->setVisibility(GlobalValue::HiddenVisibility); + }; + + for (auto &F : ExportM) + ExportInternal(F); + for (auto &GV : ExportM.globals()) + ExportInternal(GV); + for (auto &GA : ExportM.aliases()) + ExportInternal(GA); + for (auto &IF : ExportM.ifuncs()) + ExportInternal(IF); +} + +// Export all internal (i.e. distinct) type ids used by the module by replacing +// them with external type ids formed using the module id. +// +// Note that this needs to be done before we clone the module because each clone +// will receive its own set of distinct metadata nodes. +void exportTypeIds(Module &M, StringRef ModuleId) { + DenseMap LocalToGlobal; + auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) { + Metadata *MD = + cast(CI->getArgOperand(ArgNo))->getMetadata(); + + if (isa(MD) && cast(MD)->isDistinct()) { + Metadata *&GlobalMD = LocalToGlobal[MD]; + if (!GlobalMD) { + std::string NewName = + (to_string(LocalToGlobal.size()) + ModuleId).str(); + GlobalMD = MDString::get(M.getContext(), NewName); + } + + CI->setArgOperand(ArgNo, + MetadataAsValue::get(M.getContext(), GlobalMD)); + } + }; + + if (Function *TypeTestFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_test))) { + for (const Use &U : TypeTestFunc->uses()) { + auto CI = cast(U.getUser()); + ExternalizeTypeId(CI, 1); + } + } + + if (Function *TypeCheckedLoadFunc = + M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) { + for (const Use &U : TypeCheckedLoadFunc->uses()) { + auto CI = cast(U.getUser()); + ExternalizeTypeId(CI, 2); + } + } + + for (GlobalObject &GO : M.global_objects()) { + SmallVector MDs; + GO.getMetadata(LLVMContext::MD_type, MDs); + + GO.eraseMetadata(LLVMContext::MD_type); + for (auto MD : MDs) { + auto I = LocalToGlobal.find(MD->getOperand(1)); + if (I == LocalToGlobal.end()) { + GO.addMetadata(LLVMContext::MD_type, *MD); + continue; + } + GO.addMetadata( + LLVMContext::MD_type, + *MDNode::get(M.getContext(), + ArrayRef{MD->getOperand(0), I->second})); + } + } +} + +// Drop unused globals, and drop type information from function declarations. +// FIXME: If we made functions typeless then there would be no need to do this. +void simplifyExternals(Module &M) { + FunctionType *EmptyFT = + FunctionType::get(Type::getVoidTy(M.getContext()), false); + + for (auto I = M.begin(), E = M.end(); I != E;) { + Function &F = *I++; + if (F.isDeclaration() && F.use_empty()) { + F.eraseFromParent(); + continue; + } + + if (!F.isDeclaration() || F.getFunctionType() == EmptyFT) + continue; + + Function *NewF = + Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M); + NewF->setVisibility(F.getVisibility()); + NewF->takeName(&F); + F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType())); + F.eraseFromParent(); + } + + for (auto I = M.global_begin(), E = M.global_end(); I != E;) { + GlobalVariable &GV = *I++; + if (GV.isDeclaration() && GV.use_empty()) { + GV.eraseFromParent(); + continue; + } + } +} + +// If it's possible to split M into regular and thin LTO parts, do so and write +// a multi-module bitcode file with the two parts to OS. Otherwise, write only a +// regular LTO bitcode file to OS. +void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) { + std::string ModuleId = getModuleId(&M); + if (ModuleId.empty()) { + // We couldn't generate a module ID for this module, just write it out as a + // regular LTO module. + WriteBitcodeToFile(&M, OS); + return; + } + + exportTypeIds(M, ModuleId); + + auto IsInRegularM = [&](const GlobalValue *GV) { + auto *GVar = dyn_cast(GV->getBaseObject()); + if (!GVar) + return false; + + SmallVector MDs; + GVar->getMetadata(LLVMContext::MD_type, MDs); + return !MDs.empty(); + }; + + ValueToValueMapTy VMap1; + std::unique_ptr RegularM(CloneModule(&M, VMap1, IsInRegularM)); + + ValueToValueMapTy VMap2; + std::unique_ptr ThinM(CloneModule( + &M, VMap2, [&](const GlobalValue *GV) { return !IsInRegularM(GV); })); + + exportInternals(*RegularM, *ThinM, ModuleId); + exportInternals(*ThinM, *RegularM, ModuleId); + + simplifyExternals(*RegularM); + + SmallVector Buffer; + BitcodeWriter W(Buffer); + + // FIXME: Try to re-use BSI and PFI from the original module here. + ModuleSummaryIndex Index = buildModuleSummaryIndex(*ThinM, nullptr, nullptr); + W.writeModule(ThinM.get(), /*ShouldPreserveUseListOrder=*/false, &Index, + /*GenerateHash=*/true); + + W.writeModule(RegularM.get()); + + OS << Buffer; +} + +// Returns whether this module needs to be split because it uses type metadata. +bool requiresSplit(Module &M) { + SmallVector MDs; + for (auto &GO : M.global_objects()) { + GO.getMetadata(LLVMContext::MD_type, MDs); + if (!MDs.empty()) + return true; + } + + return false; +} + +void writeThinLTOBitcode(raw_ostream &OS, Module &M, + const ModuleSummaryIndex *Index) { + // See if this module has any type metadata. If so, we need to split it. + if (requiresSplit(M)) + return splitAndWriteThinLTOBitcode(OS, M); + + // Otherwise we can just write it out as a regular module. + WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, + /*GenerateHash=*/true); +} + +class WriteThinLTOBitcode : public ModulePass { + raw_ostream &OS; // raw_ostream to print on + +public: + static char ID; // Pass identification, replacement for typeid + WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) { + initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); + } + + explicit WriteThinLTOBitcode(raw_ostream &o) + : ModulePass(ID), OS(o) { + initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; } + + bool runOnModule(Module &M) override { + const ModuleSummaryIndex *Index = + &(getAnalysis().getIndex()); + writeThinLTOBitcode(OS, M, Index); + return true; + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired(); + } +}; +} + +char WriteThinLTOBitcode::ID = 0; +INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode", + "Write ThinLTO Bitcode", false, true) +INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass) +INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode", + "Write ThinLTO Bitcode", false, true) + +ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) { + return new WriteThinLTOBitcode(Str); +} Index: llvm/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll @@ -0,0 +1,13 @@ +; RUN: opt -thinlto-bc -o %t %s +; RUN: llvm-dis -o - %t | FileCheck %s +; RUN: llvm-bcanalyzer -dump %t | FileCheck --check-prefix=BCA %s + +; BCA: &1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=M0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=M1 %s +; RUN: llvm-bcanalyzer -dump %t0 | FileCheck --check-prefix=BCA0 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck --check-prefix=BCA1 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s) + +; BCA0: &1 | FileCheck --check-prefix=ERROR %s +; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=M0 %s +; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=M1 %s +; RUN: llvm-bcanalyzer -dump %t0 | FileCheck --check-prefix=BCA0 %s +; RUN: llvm-bcanalyzer -dump %t1 | FileCheck --check-prefix=BCA1 %s + +; ERROR: llvm-modextract: error: module index out of range; bitcode file contains 2 module(s) + +; BCA0: