Index: include/llvm/Bitcode/BitcodeWriter.h =================================================================== --- include/llvm/Bitcode/BitcodeWriter.h +++ include/llvm/Bitcode/BitcodeWriter.h @@ -84,6 +84,16 @@ const ModuleSummaryIndex *Index = nullptr, bool GenerateHash = false, ModuleHash *ModHash = nullptr); + /// Write the specified thin link bitcode file (i.e., the minimized bitcode + /// file) to the buffer specified at construction time. The thin link + /// bitcode file is used for thin link, and it only contains the necessary + /// information for thin link. + /// + /// ModHash is for use in ThinLTO incremental build, generated while the + /// IR bitcode file writing. + void writeThinLinkBitcode(const Module *M, const ModuleSummaryIndex &Index, + ModuleHash &ModHash); + void writeIndex( const ModuleSummaryIndex *Index, const std::map *ModuleToSummariesForIndex); @@ -116,6 +126,17 @@ bool GenerateHash = false, ModuleHash *ModHash = nullptr); + /// Write the specified thin link bitcode file (i.e., the minimized bitcode + /// file) to the given raw output stream, where it will be written in a new + /// bitcode block. The thin link bitcode file is used for thin link, and it + /// only contains the necessary information for thin link. + /// + /// ModHash is for use in ThinLTO incremental build, generated while the IR + /// bitcode file writing. + void WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out, + const ModuleSummaryIndex &Index, + ModuleHash &ModHash); + /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when /// writing the combined index file for ThinLTO. When writing a subset of the Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -97,11 +97,10 @@ Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{2}); } -/// Class to manage the bitcode writing for a module. -class ModuleBitcodeWriter : public BitcodeWriterBase { - /// Pointer to the buffer allocated by caller for bitcode writing. - const SmallVectorImpl &Buffer; - +/// Base class to manage the module bitcode writing, currently subclassed for +/// ModuleBitcodeWriter and ThinLinkBitcodeWriter. +class ModuleBitcodeWriterBase : public BitcodeWriterBase { +protected: /// The Module to write to bitcode. const Module &M; @@ -124,9 +123,6 @@ /// be used in the backend. ModuleHash *ModHash; - /// The start bit of the identification block. - uint64_t BitcodeStartBit; - /// Map that holds the correspondence between GUIDs in the summary index, /// that came from indirect call profiles, and a value id generated by this /// class to use in the VST and summary block records. @@ -140,17 +136,15 @@ uint64_t VSTOffsetPlaceholder = 0; public: - /// Constructs a ModuleBitcodeWriter object for the given Module, + /// Constructs a ModuleBitcodeWriterBase object for the given Module, /// writing to the provided \p Buffer. - ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, - StringTableBuilder &StrtabBuilder, + ModuleBitcodeWriterBase(const Module *M, StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream, StrtabBuilder), Buffer(Buffer), M(*M), + : BitcodeWriterBase(Stream, StrtabBuilder), M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), - GenerateHash(GenerateHash), ModHash(ModHash), - BitcodeStartBit(Stream.GetCurrentBitNo()) { + GenerateHash(GenerateHash), ModHash(ModHash) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). @@ -172,14 +166,68 @@ assignValueId(CallEdge.first.getGUID()); } +protected: + size_t addToStrtab(StringRef Str); + + void writePerModuleGlobalValueSummary(); + +private: + void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, + GlobalValueSummary *Summary, + unsigned ValueID, + unsigned FSCallsAbbrev, + unsigned FSCallsProfileAbbrev, + const Function &F); + void writeModuleLevelReferences(const GlobalVariable &V, + SmallVector &NameVals, + unsigned FSModRefsAbbrev); + + void assignValueId(GlobalValue::GUID ValGUID) { + GUIDToValueIdMap[ValGUID] = ++GlobalValueId; + } + unsigned getValueId(GlobalValue::GUID ValGUID) { + const auto &VMI = GUIDToValueIdMap.find(ValGUID); + // Expect that any GUID value had a value Id assigned by an + // earlier call to assignValueId. + assert(VMI != GUIDToValueIdMap.end() && + "GUID does not have assigned value Id"); + return VMI->second; + } + // Helper to get the valueId for the type of value recorded in VI. + unsigned getValueId(ValueInfo VI) { + if (!VI.getValue()) + return getValueId(VI.getGUID()); + return VE.getValueID(VI.getValue()); + } + std::map &valueIds() { return GUIDToValueIdMap; } +}; + +/// Class to manage the bitcode writing for a module. +class ModuleBitcodeWriter : public ModuleBitcodeWriterBase { + /// Pointer to the buffer allocated by caller for bitcode writing. + const SmallVectorImpl &Buffer; + + /// The start bit of the identification block. + uint64_t BitcodeStartBit; + +public: + /// Constructs a ModuleBitcodeWriter object for the given Module, + /// writing to the provided \p Buffer. + ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, + StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index, bool GenerateHash, + ModuleHash *ModHash = nullptr) + : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, ShouldPreserveUseListOrder, + Index, GenerateHash, ModHash), + Buffer(Buffer), BitcodeStartBit(Stream.GetCurrentBitNo()) { } + /// Emit the current module to the bitstream. void write(); private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } - size_t addToStrtab(StringRef Str); - void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -287,37 +335,8 @@ writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex); void writeBlockInfo(); - void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, - GlobalValueSummary *Summary, - unsigned ValueID, - unsigned FSCallsAbbrev, - unsigned FSCallsProfileAbbrev, - const Function &F); - void writeModuleLevelReferences(const GlobalVariable &V, - SmallVector &NameVals, - unsigned FSModRefsAbbrev); - void writePerModuleGlobalValueSummary(); void writeModuleHash(size_t BlockStartPos); - void assignValueId(GlobalValue::GUID ValGUID) { - GUIDToValueIdMap[ValGUID] = ++GlobalValueId; - } - unsigned getValueId(GlobalValue::GUID ValGUID) { - const auto &VMI = GUIDToValueIdMap.find(ValGUID); - // Expect that any GUID value had a value Id assigned by an - // earlier call to assignValueId. - assert(VMI != GUIDToValueIdMap.end() && - "GUID does not have assigned value Id"); - return VMI->second; - } - // Helper to get the valueId for the type of value recorded in VI. - unsigned getValueId(ValueInfo VI) { - if (!VI.getValue()) - return getValueId(VI.getGUID()); - return VE.getValueID(VI.getValue()); - } - std::map &valueIds() { return GUIDToValueIdMap; } - unsigned getEncodedSyncScopeID(SyncScope::ID SSID) { return unsigned(SSID); } @@ -948,7 +967,7 @@ llvm_unreachable("Invalid unnamed_addr"); } -size_t ModuleBitcodeWriter::addToStrtab(StringRef Str) { +size_t ModuleBitcodeWriterBase::addToStrtab(StringRef Str) { if (GenerateHash) Hasher.update(Str); return StrtabBuilder.add(Str); @@ -3266,7 +3285,7 @@ } // Helper to emit a single function summary record. -void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord( +void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, const Function &F) { @@ -3300,7 +3319,7 @@ // Collect the global value references in the given variable's initializer, // and emit them in a summary record. -void ModuleBitcodeWriter::writeModuleLevelReferences( +void ModuleBitcodeWriterBase::writeModuleLevelReferences( const GlobalVariable &V, SmallVector &NameVals, unsigned FSModRefsAbbrev) { auto VI = Index->getValueInfo(GlobalValue::getGUID(V.getName())); @@ -3334,7 +3353,7 @@ /// Emit the per-module summary section alongside the rest of /// the module's bitcode. -void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() { +void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() { // By default we compile with ThinLTO if the module has a summary, but the // client can request full LTO with a module flag. bool IsThinLTO = true; @@ -3984,3 +4003,158 @@ Out.write((char *)&Buffer.front(), Buffer.size()); } + +/// Class to manage the bitcode writing for a thin link bitcode file. +class ThinLinkBitcodeWriter : public ModuleBitcodeWriterBase { +public: + ThinLinkBitcodeWriter(const Module *M, StringTableBuilder &StrtabBuilder, + BitstreamWriter &Stream, const ModuleSummaryIndex &Index, + ModuleHash &ModHash) + : ModuleBitcodeWriterBase(M, StrtabBuilder, Stream, + /*ShouldPreserveUseListOrder=*/ false, + &Index, + /*GenerateHash=*/ false, &ModHash) { } + + void write(); + +private: + void writeModuleHash() { + Stream.EmitRecord(bitc::MODULE_CODE_HASH, ArrayRef(*ModHash)); + } + + void writeSimplifiedModuleInfo(); +}; + +// This function writes a simpilified module info for thin link bitcode file. +// It only contains the source file name along with the name(the offset and +// size in strtab) and linkage for global values. For the global value info +// entry, in order to keep linkage at offset 5, there are three zeros used +// as padding. +void ThinLinkBitcodeWriter::writeSimplifiedModuleInfo() { + SmallVector Vals; + // Emit the module's source file name. + { + StringEncoding Bits = getStringEncoding(M.getSourceFileName()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const auto P : M.getSourceFileName()) + Vals.push_back((unsigned char)P); + + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } + + // Emit the global variable information. + for (const GlobalVariable &GV : M.globals()) { + + // GLOBALVAR: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(addToStrtab(GV.getName())); + Vals.push_back(GV.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(GV)); + + Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals); + Vals.clear(); + } + + // Emit the function proto information. + for (const Function &F : M) { + // FUNCTION: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(addToStrtab(F.getName())); + Vals.push_back(F.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(F)); + + Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals); + Vals.clear(); + } + + // Emit the alias information. + for (const GlobalAlias &A : M.aliases()) { + // ALIAS: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(addToStrtab(A.getName())); + Vals.push_back(A.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(A)); + + Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals); + Vals.clear(); + } + + // Emit the ifunc information. + for (const GlobalIFunc &I : M.ifuncs()) { + // IFUNC: [strtab offset, strtab size, 0, 0, 0, linkage] + Vals.push_back(addToStrtab(I.getName())); + Vals.push_back(I.getName().size()); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(0); + Vals.push_back(getEncodedLinkage(I)); + + Stream.EmitRecord(bitc::MODULE_CODE_IFUNC, Vals); + Vals.clear(); + } +} + +void ThinLinkBitcodeWriter::write() { + Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + + writeModuleVersion(); + + writeSimplifiedModuleInfo(); + + writePerModuleGlobalValueSummary(); + + writeModuleHash(); + + Stream.ExitBlock(); +} + +void BitcodeWriter::writeThinLinkBitcode(const Module *M,const ModuleSummaryIndex &Index, + ModuleHash &ModHash) { + assert(!WroteStrtab); + + // The Mods vector is used by irsymtab::build, which requires non-const + // Modules in case it needs to materialize metadata. But the bitcode writer + // requires that the module is materialized, so we can cast to non-const here, + // after checking that it is in fact materialized. + assert(M->isMaterialized()); + Mods.push_back(const_cast(M)); + + ThinLinkBitcodeWriter ThinLinkWriter(M, StrtabBuilder, *Stream, Index, ModHash); + ThinLinkWriter.write(); +} + +// Write the specified thin link bitcode file to the given raw output stream, +// where it will be written in a new bitcode block. This is used when +// writing the per-module index file for ThinLTO. +void llvm::WriteThinLinkBitcodeToFile(const Module *M, raw_ostream &Out, + const ModuleSummaryIndex &Index, + ModuleHash &ModHash) { + SmallVector Buffer; + Buffer.reserve(256 * 1024); + + BitcodeWriter Writer(Buffer); + Writer.writeThinLinkBitcode(M, Index, ModHash); + Writer.writeSymtab(); + Writer.writeStrtab(); + + Out.write((char *)&Buffer.front(), Buffer.size()); +} Index: lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -422,12 +422,8 @@ /*GenerateHash=*/true, &ModHash); // If a minimized bitcode module was requested for the thin link, // strip the debug info and write it to the given OS. - if (ThinLinkOS) { - StripDebugInfo(M); - WriteBitcodeToFile(&M, *ThinLinkOS, /*ShouldPreserveUseListOrder=*/false, - Index, - /*GenerateHash=*/false, &ModHash); - } + if (ThinLinkOS && Index) + WriteThinLinkBitcodeToFile(&M, *ThinLinkOS, *Index, ModHash); } class WriteThinLTOBitcode : public ModulePass { Index: test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll =================================================================== --- test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll +++ test/Transforms/ThinLTOBitcodeWriter/no-type-md.ll @@ -2,7 +2,7 @@ ; the debug metadata for the thin link. ; RUN: opt -thinlto-bc -thin-link-bitcode-file=%t.thinlink.bc -o %t.bc %s ; RUN: llvm-dis -o - %t.bc | FileCheck %s -; RUN: llvm-dis -o - %t.thinlink.bc | FileCheck --check-prefix=NODEBUG %s +; RUN: llvm-bcanalyzer -dump %t.thinlink.bc | FileCheck --check-prefix=NODEBUG %s ; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck --check-prefix=BCA %s ; Make sure the combined index files produced by both the normal and the @@ -26,8 +26,17 @@ ; RUN: llvm-lto -thinlto-action=thinlink -o %t4.thinlto.bc %t.bc ; RUN: diff %t3.thinlto.bc %t4.thinlto.bc +; Make sure llvm-lto2 can read the thin link bitcode file +; RUN: llvm-lto2 run %t.thinlink.bc -o %t.o -save-temps \ +; RUN: -thinlto-distributed-indexes \ +; RUN: -r=%t.thinlink.bc,f,px \ +; RUN: -r=%t.thinlink.bc,g,px + ; BCA: