Index: llvm/docs/BitCodeFormat.rst =================================================================== --- llvm/docs/BitCodeFormat.rst +++ llvm/docs/BitCodeFormat.rst @@ -550,6 +550,8 @@ * 17 --- `TYPE_BLOCK`_ --- This describes all of the types in the module. +* 23 --- `STRTAB_BLOCK`_ --- The bitcode file's string table. + .. _MODULE_BLOCK: MODULE_BLOCK Contents @@ -577,7 +579,7 @@ ``[VERSION, version#]`` The ``VERSION`` record (code 1) contains a single value indicating the format -version. Versions 0 and 1 are supported at this time. The difference between +version. Versions 0, 1 and 2 are supported at this time. The difference between version 0 and 1 is in the encoding of instruction operands in each `FUNCTION_BLOCK`_. @@ -620,6 +622,12 @@ case of phi instructions. For phi instructions, operands are encoded as `Signed VBRs`_ to deal with forward references. +In version 2, the meaning of module records ``FUNCTION``, ``GLOBALVAR``, +``ALIAS``, ``IFUNC`` and ``COMDAT`` change such that the first two operands +specify an offset and size of a string in a string table (see `STRTAB_BLOCK +Contents`_), the function name is removed from the ``FNENTRY`` record in the +value symbol table, and the top-level ``VALUE_SYMTAB_BLOCK`` may only contain +``FNENTRY`` records. MODULE_CODE_TRIPLE Record ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -673,11 +681,14 @@ MODULE_CODE_GLOBALVAR Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]`` +``[GLOBALVAR, strtab offset, strtab size, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal, unnamed_addr, externally_initialized, dllstorageclass, comdat]`` The ``GLOBALVAR`` record (code 7) marks the declaration or definition of a global variable. The operand fields are: +* *strtab offset*, *strtab size*: Specifies the name of the global variable. + See `STRTAB_BLOCK Contents`_. + * *pointer type*: The type index of the pointer type used to point to this global variable @@ -755,11 +766,14 @@ MODULE_CODE_FUNCTION Record ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]`` +``[FUNCTION, strtab offset, strtab size, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prologuedata, dllstorageclass, comdat, prefixdata, personalityfn]`` The ``FUNCTION`` record (code 8) marks the declaration or definition of a function. The operand fields are: +* *strtab offset*, *strtab size*: Specifies the name of the function. + See `STRTAB_BLOCK Contents`_. + * *type*: The type index of the function type describing this function * *callingconv*: The calling convention number: @@ -817,11 +831,14 @@ MODULE_CODE_ALIAS Record ^^^^^^^^^^^^^^^^^^^^^^^^ -``[ALIAS, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]`` +``[ALIAS, strtab offset, strtab size, alias type, aliasee val#, linkage, visibility, dllstorageclass, threadlocal, unnamed_addr]`` The ``ALIAS`` record (code 9) marks the definition of an alias. The operand fields are +* *strtab offset*, *strtab size*: Specifies the name of the alias. + See `STRTAB_BLOCK Contents`_. + * *alias type*: The type index of the alias * *aliasee val#*: The value index of the aliased value @@ -1300,3 +1317,20 @@ ---------------------------- The ``METADATA_ATTACHMENT`` block (id 16) ... + +.. _STRTAB_BLOCK: + +STRTAB_BLOCK Contents +--------------------- + +The ``STRTAB`` block (id 23) contains a single record (``STRTAB_BLOB``, id 1) +with a single blob operand containing the bitcode file's string table. + +Strings in the string table are not null terminated. A record's *strtab +offset* and *strtab size* operands specify the byte offset and size of a +string within the string table. + +The string table is used by all preceding blocks in the bitcode file that are +not succeeded by another intervening ``STRTAB`` block. Normally a bitcode +file will have a single string table, but it may have more than one if it +was created by binary concatenation of multiple bitcode files. Index: llvm/include/llvm/Bitcode/BitcodeReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeReader.h +++ llvm/include/llvm/Bitcode/BitcodeReader.h @@ -46,6 +46,9 @@ ArrayRef Buffer; StringRef ModuleIdentifier; + // The string table used to interpret this module. + StringRef Strtab; + // The bitstream location of the IDENTIFICATION_BLOCK. uint64_t IdentificationBit; @@ -70,6 +73,7 @@ StringRef getBuffer() const { return StringRef((const char *)Buffer.begin(), Buffer.size()); } + StringRef getStrtab() const { return Strtab; } StringRef getModuleIdentifier() const { return ModuleIdentifier; } Index: llvm/include/llvm/Bitcode/BitcodeWriter.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeWriter.h +++ llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -15,6 +15,7 @@ #define LLVM_BITCODE_BITCODEWRITER_H #include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/MC/StringTableBuilder.h" #include namespace llvm { @@ -26,12 +27,25 @@ SmallVectorImpl &Buffer; std::unique_ptr Stream; + StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; + bool WroteStrtab = false; + + void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + public: /// Create a BitcodeWriter that writes to Buffer. BitcodeWriter(SmallVectorImpl &Buffer); ~BitcodeWriter(); + /// Write the bitcode file's string table. This must be called exactly once + /// after all modules have been written. + void writeStrtab(); + + /// Copy the string table for another module into this bitcode file. This + /// should be called after copying the module itself into the bitcode file. + void copyStrtab(StringRef Strtab); + /// Write the specified module to the buffer specified at construction time. /// /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -22,7 +22,7 @@ namespace llvm { namespace bitc { -// The only top-level block type defined is for a module. +// The only top-level block types are MODULE, IDENTIFICATION and STRTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, @@ -52,7 +52,9 @@ OPERAND_BUNDLE_TAGS_BLOCK_ID, - METADATA_KIND_BLOCK_ID + METADATA_KIND_BLOCK_ID, + + STRTAB_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -232,6 +234,10 @@ // llvm.type.checked.load intrinsic with all constant integer arguments. // [typeid, offset, n x arg] FS_TYPE_CHECKED_LOAD_CONST_VCALL = 15, + // Assigns a GUID to a value ID. This normally appears only in combined + // summaries, but it can also appear in per-module summaries for PGO data. + // [valueid, guid] + FS_VALUE_GUID = 16, }; enum MetadataCodes { @@ -550,6 +556,10 @@ COMDAT_SELECTION_KIND_SAME_SIZE = 5, }; +enum StrtabCodes { + STRTAB_BLOB = 1, +}; + } // End bitc namespace } // End llvm namespace Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -372,12 +372,14 @@ class BitcodeReaderBase { protected: - BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) { + BitcodeReaderBase(BitstreamCursor Stream, StringRef Strtab) + : Stream(std::move(Stream)), Strtab(Strtab) { this->Stream.setBlockInfo(&BlockInfo); } BitstreamBlockInfo BlockInfo; BitstreamCursor Stream; + StringRef Strtab; /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the @@ -387,8 +389,18 @@ /// not need this flag. bool UseRelativeIDs = false; + /// In version 2 of the bitcode we store names of global values and comdats in + /// a string table rather than in the VST. + bool UseStrtab = false; + Error parseVersionRecord(ArrayRef Record); + /// If this module uses a string table, pop the reference to the string table + /// and return the referenced string and the rest of the record. Otherwise + /// just return the record itself. + std::pair> + readNameFromStrtab(ArrayRef Record); + bool readBlockInfo(); // Contains an arbitrary and optional string identifying the bitcode producer @@ -409,12 +421,23 @@ if (Record.size() < 1) return error("Invalid record"); unsigned ModuleVersion = Record[0]; - if (ModuleVersion > 1) + if (ModuleVersion > 2) return error("Invalid value"); UseRelativeIDs = ModuleVersion >= 1; + UseStrtab = ModuleVersion >= 2; return Error::success(); } +std::pair> +BitcodeReaderBase::readNameFromStrtab(ArrayRef Record) { + if (!UseStrtab) + return {"", Record}; + // Invalid reference. Let the caller complain about the record being empty. + if (Record[0] + Record[1] > Strtab.size()) + return {"", {}}; + return {StringRef(Strtab.data() + Record[0], Record[1]), Record.slice(2)}; +} + class BitcodeReader : public BitcodeReaderBase, public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; @@ -492,8 +515,8 @@ std::vector BundleTags; public: - BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification, - LLVMContext &Context); + BitcodeReader(BitstreamCursor Stream, StringRef Strtab, + StringRef ProducerIdentification, LLVMContext &Context); Error materializeForwardReferencedFunctions(); @@ -628,7 +651,10 @@ Expected recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); + void setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, Function *F, + ArrayRef Record); Error parseValueSymbolTable(uint64_t Offset = 0); + Error parseGlobalValueSymbolTable(); Error parseConstants(); Error rememberAndSkipFunctionBodies(); Error rememberAndSkipFunctionBody(); @@ -681,12 +707,15 @@ std::string SourceFileName; public: - ModuleSummaryIndexBitcodeReader( - BitstreamCursor Stream, ModuleSummaryIndex &TheIndex); + ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab, + ModuleSummaryIndex &TheIndex); Error parseModule(StringRef ModulePath); private: + void setValueGUID(uint64_t ValueID, StringRef ValueName, + GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName); Error parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap); @@ -716,10 +745,10 @@ return std::error_code(); } -BitcodeReader::BitcodeReader(BitstreamCursor Stream, +BitcodeReader::BitcodeReader(BitstreamCursor Stream, StringRef Strtab, StringRef ProducerIdentification, LLVMContext &Context) - : BitcodeReaderBase(std::move(Stream)), Context(Context), + : BitcodeReaderBase(std::move(Stream), Strtab), Context(Context), ValueList(Context) { this->ProducerIdentification = ProducerIdentification; } @@ -1749,6 +1778,54 @@ return CurrentBit; } +void BitcodeReader::setDeferredFunctionInfo(unsigned FuncBitcodeOffsetDelta, + Function *F, + ArrayRef Record) { + // Note that we subtract 1 here because the offset is relative to one word + // before the start of the identification or module block, which was + // historically always the start of the regular bitcode header. + uint64_t FuncWordOffset = Record[1] - 1; + uint64_t FuncBitOffset = FuncWordOffset * 32; + DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; + // Set the LastFunctionBlockBit to point to the last function block. + // Later when parsing is resumed after function materialization, + // we can simply skip that last function block. + if (FuncBitOffset > LastFunctionBlockBit) + LastFunctionBlockBit = FuncBitOffset; +} + +/// Read a new-style GlobalValue symbol table. +Error BitcodeReader::parseGlobalValueSymbolTable() { + unsigned FuncBitcodeOffsetDelta = + Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; + + if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) + return error("Invalid record"); + + SmallVector Record; + while (true) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return Error::success(); + case BitstreamEntry::Record: + break; + } + + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + case bitc::VST_CODE_FNENTRY: // [valueid, offset] + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, + cast(ValueList[Record[0]]), Record); + break; + } + } +} + /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) { @@ -1756,8 +1833,18 @@ // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). - if (Offset > 0) + if (Offset > 0) { CurrentBit = jumpToValueSymbolTable(Offset, Stream); + // If this module uses a string table, read this as a module-level VST. + if (UseStrtab) { + if (Error Err = parseGlobalValueSymbolTable()) + return Err; + Stream.JumpToBit(CurrentBit); + return Error::success(); + } + // Otherwise, the VST will be in a similar format to a function-level VST, + // and will contain symbol names. + } // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that @@ -1818,23 +1905,10 @@ return Err; Value *V = ValOrErr.get(); - auto *F = dyn_cast(V); // Ignore function offsets emitted for aliases of functions in older // versions of LLVM. - if (!F) - break; - - // Note that we subtract 1 here because the offset is relative to one word - // before the start of the identification or module block, which was - // historically always the start of the regular bitcode header. - uint64_t FuncWordOffset = Record[1] - 1; - uint64_t FuncBitOffset = FuncWordOffset * 32; - DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; - // Set the LastFunctionBlockBit to point to the last function block. - // Later when parsing is resumed after function materialization, - // we can simply skip that last function block. - if (FuncBitOffset > LastFunctionBlockBit) - LastFunctionBlockBit = FuncBitOffset; + if (auto *F = dyn_cast(V)) + setDeferredFunctionInfo(FuncBitcodeOffsetDelta, F, Record); break; } case bitc::VST_CODE_BBENTRY: { @@ -2626,15 +2700,24 @@ } Error BitcodeReader::parseComdatRecord(ArrayRef Record) { - // [selection_kind, name] - if (Record.size() < 2) + // v1: [selection_kind, name] + // v2: [strtab_offset, strtab_size, selection_kind] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + + if (Record.size() < 1) return error("Invalid record"); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); - std::string Name; - unsigned ComdatNameSize = Record[1]; - Name.reserve(ComdatNameSize); - for (unsigned i = 0; i != ComdatNameSize; ++i) - Name += (char)Record[2 + i]; + std::string OldFormatName; + if (!UseStrtab) { + if (Record.size() < 2) + return error("Invalid record"); + unsigned ComdatNameSize = Record[1]; + OldFormatName.reserve(ComdatNameSize); + for (unsigned i = 0; i != ComdatNameSize; ++i) + OldFormatName += (char)Record[2 + i]; + Name = OldFormatName; + } Comdat *C = TheModule->getOrInsertComdat(Name); C->setSelectionKind(SK); ComdatList.push_back(C); @@ -2642,9 +2725,13 @@ } Error BitcodeReader::parseGlobalVarRecord(ArrayRef Record) { - // [pointer type, isconst, initid, linkage, alignment, section, + // v1: [pointer type, isconst, initid, linkage, alignment, section, // visibility, threadlocal, unnamed_addr, externally_initialized, - // dllstorageclass, comdat] + // dllstorageclass, comdat] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 6) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2692,7 +2779,7 @@ ExternallyInitialized = Record[9]; GlobalVariable *NewGV = - new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", + new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, Name, nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) @@ -2724,9 +2811,13 @@ } Error BitcodeReader::parseFunctionRecord(ArrayRef Record) { - // [type, callingconv, isproto, linkage, paramattr, alignment, section, + // v1: [type, callingconv, isproto, linkage, paramattr, alignment, section, // visibility, gc, unnamed_addr, prologuedata, dllstorageclass, comdat, - // prefixdata] + // prefixdata] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + if (Record.size() < 8) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); @@ -2742,7 +2833,7 @@ return error("Invalid calling convention ID"); Function *Func = - Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); + Function::Create(FTy, GlobalValue::ExternalLinkage, Name, TheModule); Func->setCallingConv(CC); bool isProto = Record[2]; @@ -2810,11 +2901,15 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord( unsigned BitCode, ArrayRef Record) { - // ALIAS_OLD: [alias type, aliasee val#, linkage] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, - // dllstorageclass] - // IFUNC: [alias type, addrspace, aliasee val#, linkage, - // visibility, dllstorageclass] + // v1 ALIAS_OLD: [alias type, aliasee val#, linkage] (name in VST) + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, + // dllstorageclass] (name in VST) + // v1 IFUNC: [alias type, addrspace, aliasee val#, linkage, + // visibility, dllstorageclass] (name in VST) + // v2: [strtab_offset, strtab_size, v1] + StringRef Name; + std::tie(Name, Record) = readNameFromStrtab(Record); + bool NewRecord = BitCode != bitc::MODULE_CODE_ALIAS_OLD; if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); @@ -2839,10 +2934,10 @@ GlobalIndirectSymbol *NewGA; if (BitCode == bitc::MODULE_CODE_ALIAS || BitCode == bitc::MODULE_CODE_ALIAS_OLD) - NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalAlias::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, TheModule); else - NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), "", + NewGA = GlobalIFunc::create(Ty, AddrSpace, getDecodedLinkage(Linkage), Name, nullptr, TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. @@ -4568,8 +4663,8 @@ } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( - BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex) - : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {} + BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex) + : BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex) {} std::pair ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { @@ -4578,12 +4673,32 @@ return VGI->second; } +void ModuleSummaryIndexBitcodeReader::setValueGUID( + uint64_t ValueID, StringRef ValueName, GlobalValue::LinkageTypes Linkage, + StringRef SourceFileName) { + std::string GlobalId = + GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); + auto ValueGUID = GlobalValue::getGUID(GlobalId); + auto OriginalNameID = ValueGUID; + if (GlobalValue::isLocalLinkage(Linkage)) + OriginalNameID = GlobalValue::getGUID(ValueName); + if (PrintSummaryGUIDs) + dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " + << ValueName << "\n"; + ValueIdToCallGraphGUIDMap[ValueID] = + std::make_pair(ValueGUID, OriginalNameID); +} + // Specialized value symbol table parser used when reading module index // blocks where we don't actually create global values. The parsed information // is saved in the bitcode reader for use when later parsing summaries. Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap) { + // With a strtab the VST is not required to parse the summary. + if (UseStrtab) + return Error::success(); + assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); @@ -4625,17 +4740,7 @@ assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string GlobalId = - GlobalValue::getGlobalIdentifier(ValueName, Linkage, SourceFileName); - auto ValueGUID = GlobalValue::getGUID(GlobalId); - auto OriginalNameID = ValueGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << ValueGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(ValueGUID, OriginalNameID); + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4649,18 +4754,7 @@ assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); auto Linkage = VLI->second; - std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier( - ValueName, VLI->second, SourceFileName); - auto FunctionGUID = GlobalValue::getGUID(FunctionGlobalId); - auto OriginalNameID = FunctionGUID; - if (GlobalValue::isLocalLinkage(Linkage)) - OriginalNameID = GlobalValue::getGUID(ValueName); - if (PrintSummaryGUIDs) - dbgs() << "GUID " << FunctionGUID << "(" << OriginalNameID << ") is " - << ValueName << "\n"; - ValueIdToCallGraphGUIDMap[ValueID] = - std::make_pair(FunctionGUID, OriginalNameID); - + setValueGUID(ValueID, ValueName, Linkage, SourceFileName); ValueName.clear(); break; } @@ -4747,6 +4841,11 @@ switch (BitCode) { default: break; // Default behavior, ignore unknown content. + case bitc::MODULE_CODE_VERSION: { + if (Error Err = parseVersionRecord(Record)) + return Err; + break; + } /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; @@ -4781,17 +4880,26 @@ // was historically always the start of the regular bitcode header. VSTOffset = Record[0] - 1; break; - // GLOBALVAR: [pointer type, isconst, initid, linkage, ...] - // FUNCTION: [type, callingconv, isproto, linkage, ...] - // ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v1 GLOBALVAR: [pointer type, isconst, initid, linkage, ...] + // v1 FUNCTION: [type, callingconv, isproto, linkage, ...] + // v1 ALIAS: [alias type, addrspace, aliasee val#, linkage, ...] + // v2: [strtab offset, strtab size, v1] case bitc::MODULE_CODE_GLOBALVAR: case bitc::MODULE_CODE_FUNCTION: case bitc::MODULE_CODE_ALIAS: { - if (Record.size() <= 3) + StringRef Name; + ArrayRef GVRecord; + std::tie(Name, GVRecord) = readNameFromStrtab(Record); + if (GVRecord.size() <= 3) return error("Invalid record"); - uint64_t RawLinkage = Record[3]; + uint64_t RawLinkage = GVRecord[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); - ValueIdToLinkageMap[ValueId++] = Linkage; + if (!UseStrtab) { + ValueIdToLinkageMap[ValueId++] = Linkage; + break; + } + + setValueGUID(ValueId++, Name, Linkage, SourceFileName); break; } } @@ -4902,6 +5010,12 @@ switch (BitCode) { default: // Default behavior: ignore. break; + case bitc::FS_VALUE_GUID: { + uint64_t ValueID = Record[0]; + GlobalValue::GUID RefGUID = Record[1]; + ValueIdToCallGraphGUIDMap[ValueID] = std::make_pair(RefGUID, RefGUID); + break; + } // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid, // n x (valueid)] // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs, @@ -5206,6 +5320,35 @@ return *ErrorCategory; } +static Expected readStrtab(BitstreamCursor &Stream) { + if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) + return error("Invalid record"); + + StringRef Strtab; + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::EndBlock: + return Strtab; + + case BitstreamEntry::Error: + return error("Malformed block"); + + case BitstreamEntry::SubBlock: + if (Stream.SkipBlock()) + return error("Malformed block"); + break; + + case BitstreamEntry::Record: + StringRef Blob; + SmallVector Record; + if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + Strtab = Blob; + break; + } + } +} + //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// @@ -5258,6 +5401,22 @@ continue; } + if (Entry.ID == bitc::STRTAB_BLOCK_ID) { + Expected Strtab = readStrtab(Stream); + if (!Strtab) + return Strtab.takeError(); + // This string table is used by every preceding bitcode module that does + // not have its own string table. A bitcode file may have multiple + // string tables if it was created by binary concatenation, for example + // with "llvm-cat -b". + for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + if (!I->Strtab.empty()) + break; + I->Strtab = *Strtab; + } + continue; + } + if (Stream.SkipBlock()) return error("Malformed block"); continue; @@ -5294,8 +5453,8 @@ } Stream.JumpToBit(ModuleBit); - auto *R = - new BitcodeReader(std::move(Stream), ProducerIdentification, Context); + auto *R = new BitcodeReader(std::move(Stream), Strtab, ProducerIdentification, + Context); std::unique_ptr M = llvm::make_unique(ModuleIdentifier, Context); @@ -5330,7 +5489,7 @@ Stream.JumpToBit(ModuleBit); auto Index = llvm::make_unique(); - ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index); + ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, *Index); if (Error Err = R.parseModule(ModuleIdentifier)) return std::move(Err); Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" @@ -76,26 +77,28 @@ /// The stream created and owned by the client. BitstreamWriter &Stream; - /// Saves the offset of the VSTOffset record that must eventually be - /// backpatched with the offset of the actual VST. - uint64_t VSTOffsetPlaceholder = 0; - public: /// Constructs a BitcodeWriterBase object that writes to the provided /// \p Stream. BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {} protected: - bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; } - void writeValueSymbolTableForwardDecl(); void writeBitcodeHeader(); + void writeModuleVersion(); }; +void BitcodeWriterBase::writeModuleVersion() { + // VERSION: [version#] + Stream.EmitRecord(bitc::MODULE_CODE_VERSION, ArrayRef{2}); +} + /// Class to manage the bitcode writing for a module. class ModuleBitcodeWriter : public BitcodeWriterBase { /// Pointer to the buffer allocated by caller for bitcode writing. const SmallVectorImpl &Buffer; + StringTableBuilder &StrtabBuilder; + /// The Module to write to bitcode. const Module &M; @@ -127,15 +130,20 @@ /// Tracks the last value id recorded in the GUIDToValueMap. unsigned GlobalValueId; + /// Saves the offset of the VSTOffset record that must eventually be + /// backpatched with the offset of the actual VST. + uint64_t VSTOffsetPlaceholder = 0; + public: /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, + StringTableBuilder &StrtabBuilder, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash = nullptr) - : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M), - VE(*M, ShouldPreserveUseListOrder), Index(Index), + : BitcodeWriterBase(Stream), Buffer(Buffer), StrtabBuilder(StrtabBuilder), + M(*M), VE(*M, ShouldPreserveUseListOrder), Index(Index), GenerateHash(GenerateHash), ModHash(ModHash), BitcodeStartBit(Stream.GetCurrentBitNo()) { // Assign ValueIds to any callee values in the index that came from @@ -169,6 +177,7 @@ void writeAttributeTable(); void writeTypeTable(); void writeComdats(); + void writeValueSymbolTableForwardDecl(); void writeModuleInfo(); void writeValueAsMetadata(const ValueAsMetadata *MD, SmallVectorImpl &Record); @@ -261,9 +270,9 @@ SmallVectorImpl &Vals); void writeInstruction(const Instruction &I, unsigned InstID, SmallVectorImpl &Vals); - void writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel = false, - DenseMap *FunctionToBitcodeIndex = nullptr); + void writeValueSymbolTable(const ValueSymbolTable &VST); + void writeGlobalValueSymbolTable( + DenseMap &FunctionToBitcodeIndex); void writeUseList(UseListOrder &&Order); void writeUseListBlock(const Function *F); void @@ -477,7 +486,6 @@ private: void writeModStrings(); - void writeCombinedValueSymbolTable(); void writeCombinedGlobalValueSummary(); /// Indicates whether the provided \p ModulePath should be written into @@ -1047,13 +1055,10 @@ void ModuleBitcodeWriter::writeComdats() { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { - // COMDAT: [selection_kind, name] + // COMDAT: [strtab offset, strtab size, selection_kind] + Vals.push_back(StrtabBuilder.add(C->getName())); + Vals.push_back(C->getName().size()); Vals.push_back(getEncodedComdatSelectionKind(*C)); - size_t Size = C->getName().size(); - assert(isUInt<32>(Size)); - Vals.push_back(Size); - for (char Chr : C->getName()) - Vals.push_back((unsigned char)Chr); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); Vals.clear(); } @@ -1062,7 +1067,7 @@ /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Saves the bit offset to backpatch. -void BitcodeWriterBase::writeValueSymbolTableForwardDecl() { +void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be @@ -1165,6 +1170,8 @@ // Add an abbrev for common globals with no visibility or thread localness. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxGlobalType+1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 @@ -1188,15 +1195,42 @@ SimpleGVarAbbrev = Stream.EmitAbbrev(std::move(Abbv)); } - // Emit the global variable information. SmallVector Vals; + // Emit the module's source file name. + { + StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), + M.getSourceFileName().size()); + BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); + if (Bits == SE_Char6) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); + else if (Bits == SE_Fixed7) + AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); + + // MODULE_CODE_SOURCE_FILENAME: [namechar x N] + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); + Abbv->Add(AbbrevOpToUse); + unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const auto P : M.getSourceFileName()) + Vals.push_back((unsigned char)P); + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); + Vals.clear(); + } + + // Emit the global variable information. for (const GlobalVariable &GV : M.globals()) { unsigned AbbrevToUse = 0; - // GLOBALVAR: [type, isconst, initid, + // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat] + Vals.push_back(StrtabBuilder.add(GV.getName())); + Vals.push_back(GV.getName().size()); Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : @@ -1226,9 +1260,12 @@ // Emit the function proto information. for (const Function &F : M) { - // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility, gc, unnamed_addr, prologuedata, - // dllstorageclass, comdat, prefixdata, personalityfn] + // FUNCTION: [strtab offset, strtab size, type, callingconv, isproto, + // linkage, paramattrs, alignment, section, visibility, gc, + // unnamed_addr, prologuedata, dllstorageclass, comdat, + // prefixdata, personalityfn] + Vals.push_back(StrtabBuilder.add(F.getName())); + Vals.push_back(F.getName().size()); Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); @@ -1255,8 +1292,10 @@ // Emit the alias information. for (const GlobalAlias &A : M.aliases()) { - // ALIAS: [alias type, aliasee val#, linkage, visibility, dllstorageclass, - // threadlocal, unnamed_addr] + // ALIAS: [strtab offset, strtab size, alias type, aliasee val#, linkage, + // visibility, dllstorageclass, threadlocal, unnamed_addr] + Vals.push_back(StrtabBuilder.add(A.getName())); + Vals.push_back(A.getName().size()); Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); @@ -1272,7 +1311,10 @@ // Emit the ifunc information. for (const GlobalIFunc &I : M.ifuncs()) { - // IFUNC: [ifunc type, address space, resolver val#, linkage, visibility] + // IFUNC: [strtab offset, strtab size, ifunc type, address space, resolver + // val#, linkage, visibility] + Vals.push_back(StrtabBuilder.add(I.getName())); + Vals.push_back(I.getName().size()); Vals.push_back(VE.getTypeID(I.getValueType())); Vals.push_back(I.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(I.getResolver())); @@ -1282,34 +1324,6 @@ Vals.clear(); } - // Emit the module's source file name. - { - StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(), - M.getSourceFileName().size()); - BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); - if (Bits == SE_Char6) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); - else if (Bits == SE_Fixed7) - AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); - - // MODULE_CODE_SOURCE_FILENAME: [namechar x N] - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(AbbrevOpToUse); - unsigned FilenameAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - - for (const auto P : M.getSourceFileName()) - Vals.push_back((unsigned char)P); - - // Emit the finished record. - Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); - Vals.clear(); - } - - // If we have a VST, write the VSTOFFSET record placeholder. - if (M.getValueSymbolTable().empty()) - return; writeValueSymbolTableForwardDecl(); } @@ -2839,77 +2853,60 @@ Vals.clear(); } -/// Emit names for globals/functions etc. \p IsModuleLevel is true when -/// we are writing the module-level VST, where we are including a function -/// bitcode index and need to backpatch the VST forward declaration record. -void ModuleBitcodeWriter::writeValueSymbolTable( - const ValueSymbolTable &VST, bool IsModuleLevel, - DenseMap *FunctionToBitcodeIndex) { - if (VST.empty()) { - // writeValueSymbolTableForwardDecl should have returned early as - // well. Ensure this handling remains in sync by asserting that - // the placeholder offset is not set. - assert(!IsModuleLevel || !hasVSTOffsetPlaceholder()); - return; - } +/// Write a GlobalValue VST to the module. The purpose of this data structure is +/// to allow clients to efficiently find the function body. +void ModuleBitcodeWriter::writeGlobalValueSymbolTable( + DenseMap &FunctionToBitcodeIndex) { + // Get the offset of the VST we are writing, and backpatch it into + // the VST forward declaration record. + uint64_t VSTOffset = Stream.GetCurrentBitNo(); + // The BitcodeStartBit was the stream offset of the identification block. + VSTOffset -= bitcodeStartBit(); + assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + // Note that we add 1 here because the offset is relative to one word + // before the start of the identification block, which was historically + // always the start of the regular bitcode header. + Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - // The BitcodeStartBit was the stream offset of the identification block. - VSTOffset -= bitcodeStartBit(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); + + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset + unsigned FnEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + + for (const Function &F : M) { + uint64_t Record[2]; + + if (F.isDeclaration()) + continue; + + Record[0] = VE.getValueID(&F); + + // Save the word offset of the function (from the start of the + // actual bitcode written to the stream). + uint64_t BitcodeIndex = FunctionToBitcodeIndex[&F] - bitcodeStartBit(); + assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); // Note that we add 1 here because the offset is relative to one word // before the start of the identification block, which was historically // always the start of the regular bitcode header. - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1); - } + Record[1] = BitcodeIndex / 32 + 1; - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - - // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY - // records, which are not used in the per-function VSTs. - unsigned FnEntry8BitAbbrev; - unsigned FnEntry7BitAbbrev; - unsigned FnEntry6BitAbbrev; - unsigned GUIDEntryAbbrev; - if (IsModuleLevel && hasVSTOffsetPlaceholder()) { - // 8-bit fixed-width VST_CODE_FNENTRY function strings. - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); - FnEntry8BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.EmitRecord(bitc::VST_CODE_FNENTRY, Record, FnEntryAbbrev); + } - // 7-bit fixed width VST_CODE_FNENTRY function strings. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); - FnEntry7BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); + Stream.ExitBlock(); +} - // 6-bit char6 VST_CODE_FNENTRY function strings. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); - FnEntry6BitAbbrev = Stream.EmitAbbrev(std::move(Abbv)); +/// Emit names for globals/functions etc. \p IsModuleLevel is true when +/// we are writing the module-level VST, where we are including a function +/// bitcode index and need to backpatch the VST forward declaration record. +void ModuleBitcodeWriter::writeValueSymbolTable(const ValueSymbolTable &VST) { + if (VST.empty()) + return; - // FIXME: Change the name of this record as it is now used by - // the per-module index as well. - Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - GUIDEntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - } + Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. @@ -2923,8 +2920,6 @@ unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); - Function *F = dyn_cast(Name.getValue()); - // VST_CODE_ENTRY: [valueid, namechar x N] // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N] // VST_CODE_BBENTRY: [bbid, namechar x N] @@ -2933,28 +2928,6 @@ Code = bitc::VST_CODE_BBENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; - } else if (F && !F->isDeclaration()) { - // Must be the module-level VST, where we pass in the Index and - // have a VSTOffsetPlaceholder. The function-level VST should not - // contain any Function symbols. - assert(FunctionToBitcodeIndex); - assert(hasVSTOffsetPlaceholder()); - - // Save the word offset of the function (from the start of the - // actual bitcode written to the stream). - uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit(); - assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); - // Note that we add 1 here because the offset is relative to one word - // before the start of the identification block, which was historically - // always the start of the regular bitcode header. - NameVals.push_back(BitcodeIndex / 32 + 1); - - Code = bitc::VST_CODE_FNENTRY; - AbbrevToUse = FnEntry8BitAbbrev; - if (Bits == SE_Char6) - AbbrevToUse = FnEntry6BitAbbrev; - else if (Bits == SE_Fixed7) - AbbrevToUse = FnEntry7BitAbbrev; } else { Code = bitc::VST_CODE_ENTRY; if (Bits == SE_Char6) @@ -2970,47 +2943,7 @@ Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } - // Emit any GUID valueIDs created for indirect call edges into the - // module-level VST. - if (IsModuleLevel && hasVSTOffsetPlaceholder()) - for (const auto &GI : valueIds()) { - NameVals.push_back(GI.second); - NameVals.push_back(GI.first); - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, - GUIDEntryAbbrev); - NameVals.clear(); - } - Stream.ExitBlock(); -} - -/// Emit function names and summary offsets for the combined index -/// used by ThinLTO. -void IndexBitcodeWriter::writeCombinedValueSymbolTable() { - assert(hasVSTOffsetPlaceholder() && "Expected non-zero VSTOffsetPlaceholder"); - // Get the offset of the VST we are writing, and backpatch it into - // the VST forward declaration record. - uint64_t VSTOffset = Stream.GetCurrentBitNo(); - assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); - Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); - Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); - - auto Abbv = std::make_shared(); - Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid - unsigned EntryAbbrev = Stream.EmitAbbrev(std::move(Abbv)); - - SmallVector NameVals; - for (const auto &GVI : valueIds()) { - // VST_CODE_COMBINED_ENTRY: [valueid, refguid] - NameVals.push_back(GVI.second); - NameVals.push_back(GVI.first); - - // Emit the finished record. - Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev); - NameVals.clear(); - } Stream.ExitBlock(); } @@ -3502,6 +3435,11 @@ return; } + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef{GVI.second, GVI.first}); + } + // Abbrev for FS_PERMODULE. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); @@ -3594,6 +3532,39 @@ Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); Stream.EmitRecord(bitc::FS_VERSION, ArrayRef{INDEX_VERSION}); + // Create value IDs for undefined references. + for (const auto &I : *this) { + if (auto *VS = dyn_cast(I.second)) { + for (auto &RI : VS->refs()) + getValueId(RI.getGUID()); + continue; + } + + auto *FS = dyn_cast(I.second); + if (!FS) + continue; + for (auto &RI : FS->refs()) + getValueId(RI.getGUID()); + + for (auto &EI : FS->calls()) { + GlobalValue::GUID GUID = EI.first.getGUID(); + if (!hasValueId(GUID)) { + // For SamplePGO, the indirect call targets for local functions will + // have its original name annotated in profile. We try to find the + // corresponding PGOFuncName as the GUID. + GUID = Index.getGUIDFromOriginalID(GUID); + if (GUID == 0 || !hasValueId(GUID)) + continue; + } + getValueId(GUID); + } + } + + for (const auto &GVI : valueIds()) { + Stream.EmitRecord(bitc::FS_VALUE_GUID, + ArrayRef{GVI.second, GVI.first}); + } + // Abbrev for FS_COMBINED. auto Abbv = std::make_shared(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED)); @@ -3808,10 +3779,7 @@ Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); - SmallVector Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); + writeModuleVersion(); // Emit blockinfo, which defines the standard abbreviations etc. writeBlockInfo(); @@ -3857,8 +3825,7 @@ if (Index) writePerModuleGlobalValueSummary(); - writeValueSymbolTable(M.getValueSymbolTable(), - /* IsModuleLevel */ true, &FunctionToBitcodeIndex); + writeGlobalValueSymbolTable(FunctionToBitcodeIndex); writeModuleHash(BlockStartPos); @@ -3946,13 +3913,45 @@ writeBitcodeHeader(*Stream); } -BitcodeWriter::~BitcodeWriter() = default; +BitcodeWriter::~BitcodeWriter() { assert(WroteStrtab); } + +void BitcodeWriter::writeBlob(unsigned Block, unsigned Record, StringRef Blob) { + Stream->EnterSubblock(Block, 3); + + auto Abbv = std::make_shared(); + Abbv->Add(BitCodeAbbrevOp(Record)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); + auto AbbrevNo = Stream->EmitAbbrev(std::move(Abbv)); + + Stream->EmitRecordWithBlob(AbbrevNo, ArrayRef{Record}, Blob); + + Stream->ExitBlock(); +} + +void BitcodeWriter::writeStrtab() { + assert(!WroteStrtab); + + std::vector Strtab; + StrtabBuilder.finalizeInOrder(); + Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)Strtab.data()); + + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, + {Strtab.data(), Strtab.size()}); + + WroteStrtab = true; +} + +void BitcodeWriter::copyStrtab(StringRef Strtab) { + writeBlob(bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB, Strtab); + WroteStrtab = true; +} void BitcodeWriter::writeModule(const Module *M, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { - ModuleBitcodeWriter ModuleWriter(M, Buffer, *Stream, + ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); ModuleWriter.write(); @@ -3976,6 +3975,7 @@ BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); @@ -3987,13 +3987,7 @@ void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); - SmallVector Vals; - unsigned CurVersion = 1; - Vals.push_back(CurVersion); - Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - - // If we have a VST, write the VSTOFFSET record placeholder. - writeValueSymbolTableForwardDecl(); + writeModuleVersion(); // Write the module paths in the combined index. writeModStrings(); @@ -4001,10 +3995,6 @@ // Write the summary combined index records. writeCombinedGlobalValueSummary(); - // Need a special VST writer for the combined index (we don't have a - // real VST and real values when this is invoked). - writeCombinedValueSymbolTable(); - Stream.ExitBlock(); } Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -336,6 +336,7 @@ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); W.writeModule(MergedM.get()); + W.writeStrtab(); OS << Buffer; // If a minimized bitcode module was requested for the thin link, @@ -348,6 +349,7 @@ W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); W2.writeModule(MergedM.get()); + W2.writeStrtab(); *ThinLinkOS << Buffer; } } Index: llvm/test/Bitcode/thinlto-alias.ll =================================================================== --- llvm/test/Bitcode/thinlto-alias.ll +++ llvm/test/Bitcode/thinlto-alias.ll @@ -5,33 +5,31 @@ ; RUN: llvm-lto -thinlto -o %t3 %t.o %t2.o ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; CHECK: +; See if the call to func is registered. +; The value id 1 matches the second FUNCTION record above. +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-NEXT: + +; CHECK: -; Followed by the alias and aliasee +; See if the call to analias is registered, using the expected value id. +; COMBINED-NEXT: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-pgo.ll @@ -10,31 +10,27 @@ ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo.1.bc | FileCheck %s --check-prefix=OLD ; RUN: llvm-lto -thinlto-index-stats %p/Inputs/thinlto-function-summary-callgraph-pgo-combined.1.bc | FileCheck %s --check-prefix=OLD-COMBINED +; CHECK: +; See if the call to func is registered, using the expected hotness type. +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-NEXT: +; CHECK: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll +++ llvm/test/Bitcode/thinlto-function-summary-callgraph-profile-summary.ll @@ -6,27 +6,45 @@ ; RUN: llvm-bcanalyzer -dump %t3.thinlto.bc | FileCheck %s --check-prefix=COMBINED +; CHECK: +; CHECK-NEXT: +; op4=hot1 op6=cold op8=hot2 op10=hot4 op12=none1 op14=hot3 op16=none2 op18=none3 op20=123 +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-LABEL: -; CHECK-LABEL: + +; CHECK: +; See if the call to func is registered. +; CHECK-NEXT: ; CHECK-NEXT: -; CHECK-NEXT: +; CHECK: +; COMBINED-NEXT: +; See if the call to func is registered. +; COMBINED-NEXT: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: ; ModuleID = 'thinlto-function-summary-callgraph.ll' target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -49,4 +46,4 @@ declare void @func(...) #1 ; OLD: Index {{.*}} contains 1 nodes (1 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) -; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) \ No newline at end of file +; OLD-COMBINED: Index {{.*}} contains 2 nodes (2 functions, 0 alias, 0 globals) and 1 edges (0 refs and 1 calls) Index: llvm/test/Bitcode/thinlto-function-summary-originalnames.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-originalnames.ll +++ llvm/test/Bitcode/thinlto-function-summary-originalnames.ll @@ -5,6 +5,9 @@ ; COMBINED: +; COMBINED-NEXT: +; COMBINED-NEXT: ; COMBINED-DAG: ; COMBINED-DAG: ; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: -; COMBINED-NEXT: source_filename = "/path/to/source.c" Index: llvm/test/Bitcode/thinlto-function-summary-refgraph.ll =================================================================== --- llvm/test/Bitcode/thinlto-function-summary-refgraph.ll +++ llvm/test/Bitcode/thinlto-function-summary-refgraph.ll @@ -2,6 +2,32 @@ ; RUN: opt -module-summary %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s +; CHECK: +; op0=main op4=func op5=func +; CHECK-DAG: ; Function W contains a call to func3 as well as a reference to globalvar: -; CHECK-DAG: +; op0=W op4=globalvar op5=func3 +; CHECK-DAG: ; Function X contains call to foo, as well as address reference to foo ; which is in the same instruction as the call: -; CHECK-DAG: +; op0=X op4=foo op5=foo +; CHECK-DAG: ; Function Y contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while earlier analyzing the phi using its ; return value: -; CHECK-DAG: +; op0=Y op4=func2 +; CHECK-DAG: ; Function Z contains call to func2, and ensures we don't incorrectly add ; a reference to it when reached while analyzing subsequent use of its return ; value: -; CHECK-DAG: +; op0=Z op4=func2 +; CHECK-DAG: ; Variable bar initialization contains address reference to func: -; CHECK-DAG: +; op0=bar op2=func +; CHECK-DAG: ; CHECK: -; CHECK-NEXT: +; CHECK: record string = 'variadic' -; BC-NEXT: record string = 'foo' -; BC-NEXT: record string = 'bar' -; BC-NEXT: record string = 'f' -; BC-NEXT: record string = 'anon. +; BC: MB = ExitOnErr( errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); std::vector Mods = ExitOnErr(getBitcodeModuleList(*MB)); - for (auto &BitcodeMod : Mods) + for (auto &BitcodeMod : Mods) { Buffer.insert(Buffer.end(), BitcodeMod.getBuffer().begin(), BitcodeMod.getBuffer().end()); + Writer.copyStrtab(BitcodeMod.getStrtab()); + } } } else { + std::vector> OwnedMods; for (std::string InputFilename : InputFilenames) { SMDiagnostic Err; std::unique_ptr M = parseIRFile(InputFilename, Err, Context); @@ -57,7 +60,9 @@ return 1; } Writer.writeModule(M.get()); + OwnedMods.push_back(std::move(M)); } + Writer.writeStrtab(); } std::error_code EC; Index: llvm/tools/llvm-modextract/llvm-modextract.cpp =================================================================== --- llvm/tools/llvm-modextract/llvm-modextract.cpp +++ llvm/tools/llvm-modextract/llvm-modextract.cpp @@ -61,7 +61,10 @@ if (BinaryExtract) { SmallVector Header; BitcodeWriter Writer(Header); - Out->os() << Header << Ms[ModuleIndex].getBuffer(); + Header.append(Ms[ModuleIndex].getBuffer().begin(), + Ms[ModuleIndex].getBuffer().end()); + Writer.copyStrtab(Ms[ModuleIndex].getStrtab()); + Out->os() << Header; Out->keep(); return 0; }