Index: include/llvm/Bitcode/BitcodeWriter.h =================================================================== --- include/llvm/Bitcode/BitcodeWriter.h +++ include/llvm/Bitcode/BitcodeWriter.h @@ -84,6 +84,16 @@ const ModuleSummaryIndex *Index = nullptr, bool GenerateHash = false, ModuleHash *ModHash = nullptr); + /// Write the specified thin link bitcode file (i.e., the minimized bitcode + /// file) to the buffer specified at construction time. The thin link + /// bitcode file is used for thin link, and it only contains the necessary + /// information for thin link. + /// + /// ModHash is for use in ThinLTO incremental build, generated while the + /// IR bitcode file writing. + void writeThinLinkBitcode(const Module *M, const ModuleSummaryIndex &Index, + ModuleHash &ModHash); + void writeIndex( const ModuleSummaryIndex *Index, const std::map *ModuleToSummariesForIndex); @@ -116,6 +126,17 @@ bool GenerateHash = false, ModuleHash *ModHash = nullptr); + /// Write the specified thin link bitcode file (i.e., the minimized bitcode + /// file) to the given raw output stream, where it will be written in a new + /// bitcode block. The thin link bitcode file is used for thin link, and it + /// only contains the necessary information for thin link. + /// + /// ModHash is for use in ThinLTO incremental build, generated while the IR + /// bitcode file writing. + void WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out, + const ModuleSummaryIndex &Index, + ModuleHash &ModHash); + /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when /// writing the combined index file for ThinLTO. When writing a subset of the Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -97,11 +97,10 @@ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{2}); } -/// Class to manage the bitcode writing for a module. -class ModuleBitcodeWriter : public BitcodeWriterBase { - /// Pointer to the buffer allocated by caller for bitcode writing. - const SmallVectorImpl &Buffer; - +/// Base class to manage the module bitcode writing, currently subclassed for +/// ModuleBitcodeWriter and ThinLinkBitcodeWriter. +class ModuleBitcodeWriterBase : public BitcodeWriterBase { +protected: /// The Module to write to bitcode. const Module &M; @@ -111,22 +110,6 @@ /// Optional per-module index to write for ThinLTO. const ModuleSummaryIndex *Index; - /// True if a module hash record should be written. - bool GenerateHash; - - SHA1 Hasher; - - /// If non-null, when GenerateHash is true, the resulting hash is written - /// into ModHash. When GenerateHash is false, that specified value - /// is used as the hash instead of computing from the generated bitcode. - /// Can be used to produce the same module hash for a minimized bitcode - /// used just for the thin link as in the regular full bitcode that will - /// be used in the backend. - ModuleHash *ModHash; - - /// The start bit of the identification block. - uint64_t BitcodeStartBit; - /// Map that holds the correspondence between GUIDs in the summary index, /// that came from indirect call profiles, and a value id generated by this /// class to use in the VST and summary block records. @@ -140,17 +123,14 @@ uint64_t VSTOffsetPlaceholder = 0; public: - /// Constructs a ModuleBitcodeWriter object for the given Module, + /// Constructs a ModuleBitcodeWriterBase object for the given Module, /// writing to the provided \p Buffer. - ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, - StringTableBuilder &StrtabBuilder, - BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, - const ModuleSummaryIndex *Index, bool GenerateHash, - ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M), - VE(*M, ShouldPreserveUseListOrder), Index(Index), - GenerateHash(GenerateHash), ModHash(ModHash), - BitcodeStartBit(Stream.GetCurrentBitNo()) { + ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, + bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index) + : BitcodeWriterBase(Stream, StrtabBuilder), M(*M), + VE(*M, ShouldPreserveUseListOrder), Index(Index) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). @@ -172,6 +152,71 @@ assignValueId(CallEdge.first.getGUID()); } +protected: + void writePerModuleGlobalValueSummary(); + +private: + void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, + GlobalValueSummary *Summary, + unsigned ValueID, + unsigned FSCallsAbbrev, + unsigned FSCallsProfileAbbrev, + const Function &F); + void writeModuleLevelReferences(const GlobalVariable &V, + SmallVector &NameVals, + unsigned FSModRefsAbbrev); + + void assignValueId(GlobalValue::GUID ValGUID) { + GUIDToValueIdMap[ValGUID] = ++GlobalValueId; + } + unsigned getValueId(GlobalValue::GUID ValGUID) { + const auto &VMI = GUIDToValueIdMap.find(ValGUID); + // Expect that any GUID value had a value Id assigned by an + // earlier call to assignValueId. + assert(VMI != GUIDToValueIdMap.end() && + "GUID does not have assigned value Id"); + return VMI->second; + } + // Helper to get the valueId for the type of value recorded in VI. + unsigned getValueId(ValueInfo VI) { + if (!VI.getValue()) + return getValueId(VI.getGUID()); + return VE.getValueID(VI.getValue()); + } + std::map &valueIds() { return GUIDToValueIdMap; } +}; + +/// Class to manage the bitcode writing for a module. +class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { + /// Pointer to the buffer allocated by caller for bitcode writing. + const SmallVectorImpl &Buffer; + + /// True if a module hash record should be written. + bool GenerateHash; + + /// If non-null, when GenerateHash is true, the resulting hash is written + /// into ModHash. If null, when GenerateHash is true, ModHash will be + /// generated for this module. + ModuleHash *ModHash; + + SHA1 Hasher; + + /// The start bit of the identification block. + uint64_t BitcodeStartBit; + +public: + /// Constructs a ModuleBitcodeWriter object for the given Module, + /// writing to the provided \p Buffer. + ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, + StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index, bool GenerateHash, + ModuleHash *ModHash = nullptr) + : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, + ShouldPreserveUseListOrder, Index), + Buffer(Buffer), GenerateHash(GenerateHash), ModHash(ModHash), + BitcodeStartBit(Stream.GetCurrentBitNo()) {} + /// Emit the current module to the bitstream. void write(); @@ -287,37 +332,8 @@ writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex); void writeBlockInfo(); - void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, - GlobalValueSummary *Summary, - unsigned ValueID, - unsigned FSCallsAbbrev, - unsigned FSCallsProfileAbbrev, - const Function &F); - void writeModuleLevelReferences(const GlobalVariable &V, - SmallVector &NameVals, - unsigned FSModRefsAbbrev); - void writePerModuleGlobalValueSummary(); void writeModuleHash(size_t BlockStartPos); - void assignValueId(GlobalValue::GUID ValGUID) { - GUIDToValueIdMap[ValGUID] = ++GlobalValueId; - } - unsigned getValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - // Expect that any GUID value had a value Id assigned by an - // earlier call to assignValueId. - assert(VMI != GUIDToValueIdMap.end() && - "GUID does not have assigned value Id"); - return VMI->second; - } - // Helper to get the valueId for the type of value recorded in VI. - unsigned getValueId(ValueInfo VI) { - if (!VI.getValue()) - return getValueId(VI.getGUID()); - return VE.getValueID(VI.getValue()); - } - std::map &valueIds() { return GUIDToValueIdMap; } - unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { return unsigned(SSID); } @@ -3266,7 +3282,7 @@ } // Helper to emit a single function summary record. -void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord( +void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, const Function &F) { @@ -3300,7 +3316,7 @@ // Collect the global value references in the given variable's initializer, // and emit them in a summary record. -void ModuleBitcodeWriter::writeModuleLevelReferences( +void ModuleBitcodeWriterBase::writeModuleLevelReferences( const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev) { auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName())); @@ -3334,7 +3350,7 @@ /// Emit the per-module summary section alongside the rest of /// the module's bitcode. -void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { +void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { // By default we compile with ThinLTO if the module has a summary, but the // client can request full LTO with a module flag. bool IsThinLTO = true; @@ -3688,8 +3704,7 @@ if (ModHash) // Save the written hash value. std::copy(std::begin(Vals), std::end(Vals), std::begin(*ModHash)); - } else if (ModHash) - Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef(*ModHash)); + } } void ModuleBitcodeWriter::write() { @@ -3984,3 +3999,163 @@ Out.write((char *)&Buffer.front(), Buffer.size()); } + +/// Class to manage the bitcode writing for a thin link bitcode file. +class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase { + /// ModHash is for use in ThinLTO incremental build, generated while the + /// IR bitcode file writing. + ModuleHash *ModHash; + +public: + ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, + const ModuleSummaryIndex &Index, ModuleHash &ModHash) + : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, + /*ShouldPreserveUseListOrder=*/false, &Index), + ModHash(&ModHash) {} + + void write(); + +private: + void writeSimplifiedModuleInfo(); +}; + +// This function writes a simpilified module info for thin link bitcode file. +// It only contains the source file name along with the name(the offset and +// size in strtab) and linkage for global values. For the global value info +// entry, in order to keep linkage at offset 5, there are three zeros used +// as padding. +void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() { + SmallVector Vals; + // Emit the module's source file name. + { + StringEncoding Bits = getStringEncoding(M.getSourceFileName()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const auto P : M.getSourceFileName()) + Vals.push_back((unsigned char)P); + + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } + + // Emit the global variable information. + for (const GlobalVariable &GV : M.globals()) { + + // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(GV.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(GV)); + + Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals); + Vals.clear(); + } + + // Emit the function proto information. + for (const Function &F : M) { + // FUNCTION: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(F.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(F)); + + Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals); + Vals.clear(); + } + + // Emit the alias information. + for (const GlobalAlias &A : M.aliases()) { + // ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(A.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(A)); + + Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals); + Vals.clear(); + } + + // Emit the ifunc information. + for (const GlobalIFunc &I : M.ifuncs()) { + // IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(I.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(I)); + + Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals); + Vals.clear(); + } +} + +void ThinLinkBitcodeWriter::write() { + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + writeModuleVersion(); + + // Write simplified module info, which only contains the infomation + // that is needed by thin link. + writeSimplifiedModuleInfo(); + + // Write per-module global value summary. + writePerModuleGlobalValueSummary(); + + // Write module hash. + Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef(*ModHash)); + + Stream.ExitBlock(); +} + +void BitcodeWriter::writeThinLinkBitcode(const Module *M, + const ModuleSummaryIndex &Index, + ModuleHash &ModHash) { + assert(!WroteStrtab); + + // The Mods vector is used by irsymtab::build, which requires non-const + // Modules in case it needs to materialize metadata. But the bitcode writer + // requires that the module is materialized, so we can cast to non-const here, + // after checking that it is in fact materialized. + assert(M->isMaterialized()); + Mods.push_back(const_cast(M)); + + ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index, + ModHash); + ThinLinkWriter.write(); +} + +// Write the specified thin link bitcode file to the given raw output stream, +// where it will be written in a new bitcode block. This is used when +// writing the per-module index file for ThinLTO. +void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out, + const ModuleSummaryIndex &Index, + ModuleHash &ModHash) { + SmallVector Buffer; + Buffer.reserve(256 * 1024); + + BitcodeWriter Writer(Buffer); + Writer.writeThinLinkBitcode(M, Index, ModHash); + Writer.writeSymtab(); + Writer.writeStrtab(); + + Out.write((char *)&Buffer.front(), Buffer.size()); +} Index: lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -376,15 +376,14 @@ W.writeStrtab(); OS << Buffer; - // If a minimized bitcode module was requested for the thin link, - // strip the debug info (the merged module was already stripped above) - // and write it to the given OS. + // If a minimized bitcode module was requested for the thin link, only + // the information that is needed by thin link will be written in the + // given OS (the merged module will be written as usual). if (ThinLinkOS) { Buffer.clear(); BitcodeWriter W2(Buffer); StripDebugInfo(M); - W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, - /*GenerateHash=*/false, &ModHash); + W2.writeThinLinkBitcode(&M, Index, ModHash); W2.writeModule(MergedM.get(), /*ShouldPreserveUseListOrder=*/false, &MergedMIndex); W2.writeSymtab(); @@ -420,14 +419,11 @@ ModuleHash ModHash = {{0}}; WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index, /*GenerateHash=*/true, &ModHash); - // If a minimized bitcode module was requested for the thin link, - // strip the debug info and write it to the given OS. - if (ThinLinkOS) { - StripDebugInfo(M); - WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, - Index, - /*GenerateHash=*/false, &ModHash); - } + // If a minimized bitcode module was requested for the thin link, only + // the information that is needed by thin link will be written in the + // given OS. + if (ThinLinkOS && Index) + WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash); } class WriteThinLTOBitcode : public ModulePass { Index: test/ThinLTO/X86/distributed_import.ll =================================================================== --- test/ThinLTO/X86/distributed_import.ll +++ test/ThinLTO/X86/distributed_import.ll @@ -4,6 +4,7 @@ ; the debug metadata for the thin link. ; RUN: opt -thinlto-bc %s -thin-link-bitcode-file=%t1.thinlink.bc -o %t1.bc ; RUN: opt -thinlto-bc %p/Inputs/distributed_import.ll -thin-link-bitcode-file=%t2.thinlink.bc -o %t2.bc +; RUN: llvm-bcanalyzer -dump %t1.thinlink.bc | FileCheck --check-prefix=THINLINKBITCODE %s ; First perform the thin link on the normal bitcode file. ; RUN: llvm-lto2 run %t1.bc %t2.bc -o %t.o -save-temps \ @@ -56,6 +57,25 @@ ret void } +; THINLINKBITCODE-NOT: IDENTIFICATION_BLOCK_ID +; THINLINKBITCODE-NOT: BLOCKINFO_BLOCK +; THINLINKBITCODE-NOT: TYPE_BLOCK_ID +; THINLINKBITCODE-NOT: VSTOFFSET +; THINLINKBITCODE-NOT: CONSTANTS_BLOCK +; THINLINKBITCODE-NOT: METADATA_KIND_BLOCK +; THINLINKBITCODE-NOT: METADATA_BLOCK +; THINLINKBITCODE-NOT: OPERAND_BUNDLE_TAGS_BLOCK +; THINLINKBITCODE-NOT: UnknownBlock26 +; THINLINKBITCODE-NOT: FUNCTION_BLOCK +; THINLINKBITCODE-NOT: VALUE_SYMTAB +; THINLINKBITCODE: MODULE_BLOCK +; THINLINKBITCODE: VERSION +; THINLINKBITCODE: SOURCE_FILENAME +; THINLINKBITCODE: GLOBALVAL_SUMMARY_BLOCK +; THINLINKBITCODE: HASH +; THINLINKBITCODE: SYMTAB_BLOCK +; THINLINKBITCODE: STRTAB_BLOCK + !llvm.dbg.cu = !{} !1 = !{i32 2, !"Debug Info Version", i32 3} Index: test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll =================================================================== --- test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll +++ test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll @@ -2,7 +2,6 @@ ; the debug metadata for the thin link. ; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t.thinlink.bc -o %t.bc %s ; RUN: llvm-dis -o - %t.bc | FileCheck %s -; RUN: llvm-dis -o - %t.thinlink.bc | FileCheck --check-prefix=NODEBUG %s ; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck --check-prefix=BCA %s ; Make sure the combined index files produced by both the normal and the @@ -35,10 +34,3 @@ define void @f() { ret void } - -; CHECK: !llvm.dbg.cu -; NODEBUG-NOT: !llvm.dbg.cu -!llvm.dbg.cu = !{} - -!1 = !{i32 2, !"Debug Info Version", i32 3} -!llvm.module.flags = !{!1} Index: test/Transforms/ThinLTOBitcodeWriter/split.ll =================================================================== --- test/Transforms/ThinLTOBitcodeWriter/split.ll +++ test/Transforms/ThinLTOBitcodeWriter/split.ll @@ -8,8 +8,6 @@ ; RUN: not llvm-modextract -b -n 2 -o - %t 2>&1 | FileCheck --check-prefix=ERROR %s ; RUN: llvm-dis -o - %t0.bc | FileCheck --check-prefix=M0 %s ; RUN: llvm-dis -o - %t1.bc | FileCheck --check-prefix=M1 %s -; RUN: llvm-dis -o - %t0.thinlink.bc | FileCheck --check-prefix=NODEBUG %s -; RUN: llvm-dis -o - %t1.thinlink.bc | FileCheck --check-prefix=NODEBUG %s ; RUN: llvm-bcanalyzer -dump %t0.bc | FileCheck --check-prefix=BCA0 %s ; RUN: llvm-bcanalyzer -dump %t1.bc | FileCheck --check-prefix=BCA1 %s @@ -44,12 +42,3 @@ ; M1: !0 = !{i32 0, !"typeid"} !0 = !{i32 0, !"typeid"} - -; M0: !llvm.dbg.cu -; M1-NOT: !llvm.dbg.cu -; NODEBUG-NOT: !llvm.dbg.cu -!llvm.dbg.cu = !{} - -; M1: !{i32 1, !"ThinLTO", i32 0} -!1 = !{i32 2, !"Debug Info Version", i32 3} -!llvm.module.flags = !{!1}