diff --git a/llvm/include/llvm/Bitcode/BitstreamReader.h b/llvm/include/llvm/Bitcode/BitstreamReader.h index 628a83cb0de0..b331ceea051c 100644 --- a/llvm/include/llvm/Bitcode/BitstreamReader.h +++ b/llvm/include/llvm/Bitcode/BitstreamReader.h @@ -1,596 +1,598 @@ //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This header defines the BitstreamReader class. This class can be used to // read an arbitrary bitstream, regardless of its contents. // //===----------------------------------------------------------------------===// #ifndef LLVM_BITCODE_BITSTREAMREADER_H #define LLVM_BITCODE_BITSTREAMREADER_H #include "llvm/Bitcode/BitCodes.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/StreamingMemoryObject.h" #include #include #include namespace llvm { /// This class is used to read from an LLVM bitcode stream, maintaining /// information that is global to decoding the entire file. While a file is /// being read, multiple cursors can be independently advanced or skipped around /// within the file. These are represented by the BitstreamCursor class. class BitstreamReader { public: /// This contains information emitted to BLOCKINFO_BLOCK blocks. These /// describe abbreviations that all blocks of the specified ID inherit. struct BlockInfo { unsigned BlockID; std::vector> Abbrevs; std::string Name; std::vector > RecordNames; }; private: std::unique_ptr BitcodeBytes; std::vector BlockInfoRecords; /// This is set to true if we don't care about the block/record name /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. bool IgnoreBlockInfoNames; BitstreamReader(const BitstreamReader&) = delete; void operator=(const BitstreamReader&) = delete; public: BitstreamReader() : IgnoreBlockInfoNames(true) { } BitstreamReader(const unsigned char *Start, const unsigned char *End) : IgnoreBlockInfoNames(true) { init(Start, End); } BitstreamReader(std::unique_ptr BitcodeBytes) : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} BitstreamReader(BitstreamReader &&Other) { *this = std::move(Other); } BitstreamReader &operator=(BitstreamReader &&Other) { BitcodeBytes = std::move(Other.BitcodeBytes); // Explicitly swap block info, so that nothing gets destroyed twice. std::swap(BlockInfoRecords, Other.BlockInfoRecords); IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; return *this; } void init(const unsigned char *Start, const unsigned char *End) { assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes"); BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); } MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } /// This is called by clients that want block/record name information. void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } //===--------------------------------------------------------------------===// // Block Manipulation //===--------------------------------------------------------------------===// /// Return true if we've already read and processed the block info block for /// this Bitstream. We only process it for the first cursor that walks over /// it. bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } /// If there is block info for the specified ID, return it, otherwise return /// null. const BlockInfo *getBlockInfo(unsigned BlockID) const { // Common case, the most recent entry matches BlockID. if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) return &BlockInfoRecords.back(); for (unsigned i = 0, e = static_cast(BlockInfoRecords.size()); i != e; ++i) if (BlockInfoRecords[i].BlockID == BlockID) return &BlockInfoRecords[i]; return nullptr; } BlockInfo &getOrCreateBlockInfo(unsigned BlockID) { if (const BlockInfo *BI = getBlockInfo(BlockID)) return *const_cast(BI); // Otherwise, add a new record. BlockInfoRecords.emplace_back(); BlockInfoRecords.back().BlockID = BlockID; return BlockInfoRecords.back(); } /// Takes block info from the other bitstream reader. /// /// This is a "take" operation because BlockInfo records are non-trivial, and /// indeed rather expensive. void takeBlockInfo(BitstreamReader &&Other) { assert(!hasBlockInfoRecords()); BlockInfoRecords = std::move(Other.BlockInfoRecords); } }; /// This represents a position within a bitstream. There may be multiple /// independent cursors reading within one bitstream, each maintaining their /// own local state. class SimpleBitstreamCursor { BitstreamReader *R = nullptr; size_t NextChar = 0; // The size of the bicode. 0 if we don't know it yet. size_t Size = 0; /// This is the current data we have pulled from the stream but have not /// returned to the client. This is specifically and intentionally defined to /// follow the word size of the host machine for efficiency. We use word_t in /// places that are aware of this to make it perfectly explicit what is going /// on. public: typedef size_t word_t; private: word_t CurWord = 0; /// This is the number of bits in CurWord that are valid. This is always from /// [0...bits_of(size_t)-1] inclusive. unsigned BitsInCurWord = 0; public: static const size_t MaxChunkSize = sizeof(word_t) * 8; SimpleBitstreamCursor() = default; explicit SimpleBitstreamCursor(BitstreamReader &R) : R(&R) {} explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {} bool canSkipToPos(size_t pos) const { // pos can be skipped to if it is a valid address or one byte past the end. return pos == 0 || R->getBitcodeBytes().isValidAddress(static_cast(pos - 1)); } bool AtEndOfStream() { if (BitsInCurWord != 0) return false; if (Size != 0) return Size <= NextChar; fillCurWord(); return BitsInCurWord == 0; } /// Return the bit # of the bit we are reading. uint64_t GetCurrentBitNo() const { return NextChar*CHAR_BIT - BitsInCurWord; } // Return the byte # of the current bit. uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; } BitstreamReader *getBitStreamReader() { return R; } const BitstreamReader *getBitStreamReader() const { return R; } /// Reset the stream to the specified bit number. void JumpToBit(uint64_t BitNo) { size_t ByteNo = size_t(BitNo/8) & ~(sizeof(word_t)-1); unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); assert(canSkipToPos(ByteNo) && "Invalid location"); // Move the cursor to the right word. NextChar = ByteNo; BitsInCurWord = 0; // Skip over any bits that are already consumed. if (WordBitNo) Read(WordBitNo); } /// Reset the stream to the bit pointed at by the specified pointer. /// /// The pointer must be a dereferenceable pointer into the bytes in the /// underlying memory object. void jumpToPointer(const uint8_t *Pointer) { auto *Pointer0 = getPointerToByte(0, 1); assert((intptr_t)Pointer0 <= (intptr_t)Pointer && "Expected pointer into bitstream"); JumpToBit(8 * (Pointer - Pointer0)); assert((intptr_t)getPointerToByte(getCurrentByteNo(), 1) == (intptr_t)Pointer && "Expected to reach pointer"); } void jumpToPointer(const char *Pointer) { jumpToPointer((const uint8_t *)Pointer); } /// Get a pointer into the bitstream at the specified byte offset. const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) { return R->getBitcodeBytes().getPointer(ByteNo, NumBytes); } /// Get a pointer into the bitstream at the specified bit offset. /// /// The bit offset must be on a byte boundary. const uint8_t *getPointerToBit(uint64_t BitNo, uint64_t NumBytes) { assert(!(BitNo % 8) && "Expected bit on byte boundary"); return getPointerToByte(BitNo / 8, NumBytes); } void fillCurWord() { if (Size != 0 && NextChar >= Size) report_fatal_error("Unexpected end of file"); // Read the next word from the stream. uint8_t Array[sizeof(word_t)] = {0}; uint64_t BytesRead = R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); // If we run out of data, stop at the end of the stream. if (BytesRead == 0) { CurWord = 0; BitsInCurWord = 0; Size = NextChar; return; } CurWord = support::endian::read( Array); NextChar += BytesRead; BitsInCurWord = BytesRead * 8; } word_t Read(unsigned NumBits) { static const unsigned BitsInWord = MaxChunkSize; assert(NumBits && NumBits <= BitsInWord && "Cannot return zero or more than BitsInWord bits!"); static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; // If the field is fully contained by CurWord, return it quickly. if (BitsInCurWord >= NumBits) { word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); // Use a mask to avoid undefined behavior. CurWord >>= (NumBits & Mask); BitsInCurWord -= NumBits; return R; } word_t R = BitsInCurWord ? CurWord : 0; unsigned BitsLeft = NumBits - BitsInCurWord; fillCurWord(); // If we run out of data, stop at the end of the stream. if (BitsLeft > BitsInCurWord) return 0; word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); // Use a mask to avoid undefined behavior. CurWord >>= (BitsLeft & Mask); BitsInCurWord -= BitsLeft; R |= R2 << (NumBits - BitsLeft); return R; } uint32_t ReadVBR(unsigned NumBits) { uint32_t Piece = Read(NumBits); if ((Piece & (1U << (NumBits-1))) == 0) return Piece; uint32_t Result = 0; unsigned NextBit = 0; while (1) { Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit; if ((Piece & (1U << (NumBits-1))) == 0) return Result; NextBit += NumBits-1; Piece = Read(NumBits); } } // Read a VBR that may have a value up to 64-bits in size. The chunk size of // the VBR must still be <= 32 bits though. uint64_t ReadVBR64(unsigned NumBits) { uint32_t Piece = Read(NumBits); if ((Piece & (1U << (NumBits-1))) == 0) return uint64_t(Piece); uint64_t Result = 0; unsigned NextBit = 0; while (1) { Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit; if ((Piece & (1U << (NumBits-1))) == 0) return Result; NextBit += NumBits-1; Piece = Read(NumBits); } } void SkipToFourByteBoundary() { // If word_t is 64-bits and if we've read less than 32 bits, just dump // the bits we have up to the next 32-bit boundary. if (sizeof(word_t) > 4 && BitsInCurWord >= 32) { CurWord >>= BitsInCurWord-32; BitsInCurWord = 32; return; } BitsInCurWord = 0; } /// Skip to the end of the file. void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); } /// Prevent the cursor from reading past a byte boundary. /// /// Prevent the cursor from requesting byte reads past \c Limit. This is /// useful when working with a cursor on a StreamingMemoryObject, when it's /// desirable to avoid invalidating the result of getPointerToByte(). /// /// If \c Limit is on a word boundary, AtEndOfStream() will return true if /// the cursor position reaches or exceeds \c Limit, regardless of the true /// number of available bytes. Otherwise, AtEndOfStream() returns true when /// it reaches or exceeds the next word boundary. void setArtificialByteLimit(uint64_t Limit) { assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit"); // Round to word boundary. Limit = alignTo(Limit, sizeof(word_t)); // Only change size if the new one is lower. if (!Size || Size > Limit) Size = Limit; } /// Return the Size, if known. uint64_t getSizeIfKnown() const { return Size; } }; /// When advancing through a bitstream cursor, each advance can discover a few /// different kinds of entries: struct BitstreamEntry { enum { Error, // Malformed bitcode was found. EndBlock, // We've reached the end of the current block, (or the end of the // file, which is treated like a series of EndBlock records. SubBlock, // This is the start of a new subblock of a specific ID. Record // This is a record with a specific AbbrevID. } Kind; unsigned ID; static BitstreamEntry getError() { BitstreamEntry E; E.Kind = Error; return E; } static BitstreamEntry getEndBlock() { BitstreamEntry E; E.Kind = EndBlock; return E; } static BitstreamEntry getSubBlock(unsigned ID) { BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E; } static BitstreamEntry getRecord(unsigned AbbrevID) { BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E; } }; /// This represents a position within a bitcode file, implemented on top of a /// SimpleBitstreamCursor. /// /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not /// be passed by value. class BitstreamCursor : SimpleBitstreamCursor { // This is the declared size of code values used for the current block, in // bits. unsigned CurCodeSize = 2; /// Abbrevs installed at in this block. std::vector> CurAbbrevs; struct Block { unsigned PrevCodeSize; std::vector> PrevAbbrevs; explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} }; /// This tracks the codesize of parent blocks. SmallVector BlockScope; public: static const size_t MaxChunkSize = sizeof(word_t) * 8; BitstreamCursor() = default; explicit BitstreamCursor(BitstreamReader &R) { init(&R); } void init(BitstreamReader *R) { freeState(); SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R)); CurCodeSize = 2; } void freeState(); using SimpleBitstreamCursor::canSkipToPos; using SimpleBitstreamCursor::AtEndOfStream; using SimpleBitstreamCursor::GetCurrentBitNo; + using SimpleBitstreamCursor::getCurrentByteNo; + using SimpleBitstreamCursor::getPointerToByte; using SimpleBitstreamCursor::getBitStreamReader; using SimpleBitstreamCursor::JumpToBit; using SimpleBitstreamCursor::fillCurWord; using SimpleBitstreamCursor::Read; using SimpleBitstreamCursor::ReadVBR; using SimpleBitstreamCursor::ReadVBR64; /// Return the number of bits used to encode an abbrev #. unsigned getAbbrevIDWidth() const { return CurCodeSize; } /// Flags that modify the behavior of advance(). enum { /// If this flag is used, the advance() method does not automatically pop /// the block scope when the end of a block is reached. AF_DontPopBlockAtEnd = 1, /// If this flag is used, abbrev entries are returned just like normal /// records. AF_DontAutoprocessAbbrevs = 2 }; /// Advance the current bitstream, returning the next entry in the stream. BitstreamEntry advance(unsigned Flags = 0) { while (1) { unsigned Code = ReadCode(); if (Code == bitc::END_BLOCK) { // Pop the end of the block unless Flags tells us not to. if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd()) return BitstreamEntry::getError(); return BitstreamEntry::getEndBlock(); } if (Code == bitc::ENTER_SUBBLOCK) return BitstreamEntry::getSubBlock(ReadSubBlockID()); if (Code == bitc::DEFINE_ABBREV && !(Flags & AF_DontAutoprocessAbbrevs)) { // We read and accumulate abbrev's, the client can't do anything with // them anyway. ReadAbbrevRecord(); continue; } return BitstreamEntry::getRecord(Code); } } /// This is a convenience function for clients that don't expect any /// subblocks. This just skips over them automatically. BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { while (1) { // If we found a normal entry, return it. BitstreamEntry Entry = advance(Flags); if (Entry.Kind != BitstreamEntry::SubBlock) return Entry; // If we found a sub-block, just skip over it and check the next entry. if (SkipBlock()) return BitstreamEntry::getError(); } } unsigned ReadCode() { return Read(CurCodeSize); } // Block header: // [ENTER_SUBBLOCK, blockid, newcodelen, , blocklen] /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. unsigned ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body /// of this block. If the block record is malformed, return true. bool SkipBlock() { // Read and ignore the codelen value. Since we are skipping this block, we // don't care what code widths are used inside of it. ReadVBR(bitc::CodeLenWidth); SkipToFourByteBoundary(); unsigned NumFourBytes = Read(bitc::BlockSizeWidth); // Check that the block wasn't partially defined, and that the offset isn't // bogus. size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8; if (AtEndOfStream() || !canSkipToPos(SkipTo/8)) return true; JumpToBit(SkipTo); return false; } /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true /// if the block has an error. bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); bool ReadBlockEnd() { if (BlockScope.empty()) return true; // Block tail: // [END_BLOCK, ] SkipToFourByteBoundary(); popBlockScope(); return false; } private: void popBlockScope() { CurCodeSize = BlockScope.back().PrevCodeSize; CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); BlockScope.pop_back(); } //===--------------------------------------------------------------------===// // Record Processing //===--------------------------------------------------------------------===// public: /// Return the abbreviation for the specified AbbrevId. const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { unsigned AbbrevNo = AbbrevID - bitc::FIRST_APPLICATION_ABBREV; if (AbbrevNo >= CurAbbrevs.size()) report_fatal_error("Invalid abbrev number"); return CurAbbrevs[AbbrevNo].get(); } /// Read the current record and discard it. void skipRecord(unsigned AbbrevID); unsigned readRecord(unsigned AbbrevID, SmallVectorImpl &Vals, StringRef *Blob = nullptr); //===--------------------------------------------------------------------===// // Abbrev Processing //===--------------------------------------------------------------------===// void ReadAbbrevRecord(); bool ReadBlockInfoBlock(); }; } // End llvm namespace #endif diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h index a93ed58ebb99..0c4cc854cdc6 100644 --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -1,523 +1,527 @@ //===- LLVMBitCodes.h - Enum values for the LLVM bitcode format -*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This header defines Bitcode enum values for LLVM IR bitcode files. // // The enum values defined in this file should be considered permanent. If // new features are added, they should have values added at the end of the // respective lists. // //===----------------------------------------------------------------------===// #ifndef LLVM_BITCODE_LLVMBITCODES_H #define LLVM_BITCODE_LLVMBITCODES_H #include "llvm/Bitcode/BitCodes.h" namespace llvm { namespace bitc { // The only top-level block type defined is for a module. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, // Module sub-block id's. PARAMATTR_BLOCK_ID, PARAMATTR_GROUP_BLOCK_ID, CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, // Block intended to contains information on the bitcode versioning. // Can be used to provide better error messages when we fail to parse a // bitcode file. IDENTIFICATION_BLOCK_ID, VALUE_SYMTAB_BLOCK_ID, METADATA_BLOCK_ID, METADATA_ATTACHMENT_ID, TYPE_BLOCK_ID_NEW, USELIST_BLOCK_ID, MODULE_STRTAB_BLOCK_ID, GLOBALVAL_SUMMARY_BLOCK_ID, OPERAND_BUNDLE_TAGS_BLOCK_ID, METADATA_KIND_BLOCK_ID }; /// Identification block contains a string that describes the producer details, /// and an epoch that defines the auto-upgrade capability. enum IdentificationCodes { IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N] IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#] }; /// The epoch that defines the auto-upgrade compatibility for the bitcode. /// /// LLVM guarantees in a major release that a minor release can read bitcode /// generated by previous minor releases. We translate this by making the reader /// accepting only bitcode with the same epoch, except for the X.0 release which /// also accepts N-1. enum { BITCODE_CURRENT_EPOCH = 0 }; /// MODULE blocks have a number of optional fields and subblocks. enum ModuleCodes { MODULE_CODE_VERSION = 1, // VERSION: [version#] MODULE_CODE_TRIPLE = 2, // TRIPLE: [strchr x N] MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] MODULE_CODE_ASM = 4, // ASM: [strchr x N] MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] // FIXME: Remove DEPLIB in 4.0. MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal] MODULE_CODE_GLOBALVAR = 7, // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, // section, visibility, gc, unnamed_addr] MODULE_CODE_FUNCTION = 8, // ALIAS: [alias type, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS_OLD = 9, // MODULE_CODE_PURGEVALS: [numvals] MODULE_CODE_PURGEVALS = 10, MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N] MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name] MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset] // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] MODULE_CODE_ALIAS = 14, MODULE_CODE_METADATA_VALUES_UNUSED = 15, // SOURCE_FILENAME: [namechar x N] MODULE_CODE_SOURCE_FILENAME = 16, + + // HASH: [5*i32] + MODULE_CODE_HASH = 17, }; /// PARAMATTR blocks have code for defining a parameter attribute set. enum AttributeCodes { // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0 PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0, // paramidx1, attr1...] PARAMATTR_CODE_ENTRY = 2, // ENTRY: [paramidx0, attrgrp0, // paramidx1, attrgrp1, ...] PARAMATTR_GRP_CODE_ENTRY = 3 // ENTRY: [id, attr0, att1, ...] }; /// TYPE blocks have codes for each type primitive they use. enum TypeCodes { TYPE_CODE_NUMENTRY = 1, // NUMENTRY: [numentries] // Type Codes TYPE_CODE_VOID = 2, // VOID TYPE_CODE_FLOAT = 3, // FLOAT TYPE_CODE_DOUBLE = 4, // DOUBLE TYPE_CODE_LABEL = 5, // LABEL TYPE_CODE_OPAQUE = 6, // OPAQUE TYPE_CODE_INTEGER = 7, // INTEGER: [width] TYPE_CODE_POINTER = 8, // POINTER: [pointee type] TYPE_CODE_FUNCTION_OLD = 9, // FUNCTION: [vararg, attrid, retty, // paramty x N] TYPE_CODE_HALF = 10, // HALF TYPE_CODE_ARRAY = 11, // ARRAY: [numelts, eltty] TYPE_CODE_VECTOR = 12, // VECTOR: [numelts, eltty] // These are not with the other floating point types because they're // a late addition, and putting them in the right place breaks // binary compatibility. TYPE_CODE_X86_FP80 = 13, // X86 LONG DOUBLE TYPE_CODE_FP128 = 14, // LONG DOUBLE (112 bit mantissa) TYPE_CODE_PPC_FP128 = 15, // PPC LONG DOUBLE (2 doubles) TYPE_CODE_METADATA = 16, // METADATA TYPE_CODE_X86_MMX = 17, // X86 MMX TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N] TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N] TYPE_CODE_STRUCT_NAMED = 20, // STRUCT_NAMED: [ispacked, eltty x N] TYPE_CODE_FUNCTION = 21, // FUNCTION: [vararg, retty, paramty x N] TYPE_CODE_TOKEN = 22 // TOKEN }; enum OperandBundleTagCode { OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N] }; // The type symbol table only has one code (TST_ENTRY_CODE). enum TypeSymtabCodes { TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N] }; // Value symbol table codes. enum ValueSymtabCodes { VST_CODE_ENTRY = 1, // VST_ENTRY: [valueid, namechar x N] VST_CODE_BBENTRY = 2, // VST_BBENTRY: [bbid, namechar x N] VST_CODE_FNENTRY = 3, // VST_FNENTRY: [valueid, offset, namechar x N] // VST_COMBINED_GVDEFENTRY: [valueid, sumoffset, guid] VST_CODE_COMBINED_GVDEFENTRY = 4, // VST_COMBINED_ENTRY: [valueid, refguid] VST_CODE_COMBINED_ENTRY = 5 }; // The module path symbol table only has one code (MST_CODE_ENTRY). enum ModulePathSymtabCodes { MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + MST_CODE_HASH = 2, // MST_HASH: [5*i32] }; // The summary section uses different codes in the per-module // and combined index cases. enum GlobalValueSummarySymtabCodes { // PERMODULE: [valueid, linkage, instcount, numrefs, numrefs x valueid, // n x (valueid, callsitecount)] FS_PERMODULE = 1, // PERMODULE_PROFILE: [valueid, linkage, instcount, numrefs, // numrefs x valueid, // n x (valueid, callsitecount, profilecount)] FS_PERMODULE_PROFILE = 2, // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, linkage, n x valueid] FS_PERMODULE_GLOBALVAR_INIT_REFS = 3, // COMBINED: [modid, linkage, instcount, numrefs, numrefs x valueid, // n x (valueid, callsitecount)] FS_COMBINED = 4, // COMBINED_PROFILE: [modid, linkage, instcount, numrefs, // numrefs x valueid, // n x (valueid, callsitecount, profilecount)] FS_COMBINED_PROFILE = 5, // COMBINED_GLOBALVAR_INIT_REFS: [modid, linkage, n x valueid] FS_COMBINED_GLOBALVAR_INIT_REFS = 6, }; enum MetadataCodes { METADATA_STRING_OLD = 1, // MDSTRING: [values] METADATA_VALUE = 2, // VALUE: [type num, value num] METADATA_NODE = 3, // NODE: [n x md num] METADATA_NAME = 4, // STRING: [values] METADATA_DISTINCT_NODE = 5, // DISTINCT_NODE: [n x md num] METADATA_KIND = 6, // [n x [id, name]] METADATA_LOCATION = 7, // [distinct, line, col, scope, inlined-at?] METADATA_OLD_NODE = 8, // OLD_NODE: [n x (type num, value num)] METADATA_OLD_FN_NODE = 9, // OLD_FN_NODE: [n x (type num, value num)] METADATA_NAMED_NODE = 10, // NAMED_NODE: [n x mdnodes] METADATA_ATTACHMENT = 11, // [m x [value, [n x [id, mdnode]]] METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num] METADATA_SUBRANGE = 13, // [distinct, count, lo] METADATA_ENUMERATOR = 14, // [distinct, value, name] METADATA_BASIC_TYPE = 15, // [distinct, tag, name, size, align, enc] METADATA_FILE = 16, // [distinct, filename, directory] METADATA_DERIVED_TYPE = 17, // [distinct, ...] METADATA_COMPOSITE_TYPE = 18, // [distinct, ...] METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types] METADATA_COMPILE_UNIT = 20, // [distinct, ...] METADATA_SUBPROGRAM = 21, // [distinct, ...] METADATA_LEXICAL_BLOCK = 22, // [distinct, scope, file, line, column] METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator] METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line] METADATA_TEMPLATE_TYPE = 25, // [distinct, scope, name, type, ...] METADATA_TEMPLATE_VALUE = 26, // [distinct, scope, name, type, value, ...] METADATA_GLOBAL_VAR = 27, // [distinct, ...] METADATA_LOCAL_VAR = 28, // [distinct, ...] METADATA_EXPRESSION = 29, // [distinct, n x element] METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] METADATA_IMPORTED_ENTITY = 31, // [distinct, tag, scope, entity, line, name] METADATA_MODULE = 32, // [distinct, scope, name, ...] METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] METADATA_STRINGS = 35, // [count, offset] blob([lengths][chars]) }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each // constant and maintains an implicit current type value. enum ConstantsCodes { CST_CODE_SETTYPE = 1, // SETTYPE: [typeid] CST_CODE_NULL = 2, // NULL CST_CODE_UNDEF = 3, // UNDEF CST_CODE_INTEGER = 4, // INTEGER: [intval] CST_CODE_WIDE_INTEGER = 5, // WIDE_INTEGER: [n x intval] CST_CODE_FLOAT = 6, // FLOAT: [fpval] CST_CODE_AGGREGATE = 7, // AGGREGATE: [n x value number] CST_CODE_STRING = 8, // STRING: [values] CST_CODE_CSTRING = 9, // CSTRING: [values] CST_CODE_CE_BINOP = 10, // CE_BINOP: [opcode, opval, opval] CST_CODE_CE_CAST = 11, // CE_CAST: [opcode, opty, opval] CST_CODE_CE_GEP = 12, // CE_GEP: [n x operands] CST_CODE_CE_SELECT = 13, // CE_SELECT: [opval, opval, opval] CST_CODE_CE_EXTRACTELT = 14, // CE_EXTRACTELT: [opty, opval, opval] CST_CODE_CE_INSERTELT = 15, // CE_INSERTELT: [opval, opval, opval] CST_CODE_CE_SHUFFLEVEC = 16, // CE_SHUFFLEVEC: [opval, opval, opval] CST_CODE_CE_CMP = 17, // CE_CMP: [opty, opval, opval, pred] CST_CODE_INLINEASM_OLD = 18, // INLINEASM: [sideeffect|alignstack, // asmstr,conststr] CST_CODE_CE_SHUFVEC_EX = 19, // SHUFVEC_EX: [opty, opval, opval, opval] CST_CODE_CE_INBOUNDS_GEP = 20, // INBOUNDS_GEP: [n x operands] CST_CODE_BLOCKADDRESS = 21, // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#] CST_CODE_DATA = 22, // DATA: [n x elements] CST_CODE_INLINEASM = 23 // INLINEASM: [sideeffect|alignstack| // asmdialect,asmstr,conststr] }; /// CastOpcodes - These are values used in the bitcode files to encode which /// cast a CST_CODE_CE_CAST or a XXX refers to. The values of these enums /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum CastOpcodes { CAST_TRUNC = 0, CAST_ZEXT = 1, CAST_SEXT = 2, CAST_FPTOUI = 3, CAST_FPTOSI = 4, CAST_UITOFP = 5, CAST_SITOFP = 6, CAST_FPTRUNC = 7, CAST_FPEXT = 8, CAST_PTRTOINT = 9, CAST_INTTOPTR = 10, CAST_BITCAST = 11, CAST_ADDRSPACECAST = 12 }; /// BinaryOpcodes - These are values used in the bitcode files to encode which /// binop a CST_CODE_CE_BINOP or a XXX refers to. The values of these enums /// have no fixed relation to the LLVM IR enum values. Changing these will /// break compatibility with old files. enum BinaryOpcodes { BINOP_ADD = 0, BINOP_SUB = 1, BINOP_MUL = 2, BINOP_UDIV = 3, BINOP_SDIV = 4, // overloaded for FP BINOP_UREM = 5, BINOP_SREM = 6, // overloaded for FP BINOP_SHL = 7, BINOP_LSHR = 8, BINOP_ASHR = 9, BINOP_AND = 10, BINOP_OR = 11, BINOP_XOR = 12 }; /// These are values used in the bitcode files to encode AtomicRMW operations. /// The values of these enums have no fixed relation to the LLVM IR enum /// values. Changing these will break compatibility with old files. enum RMWOperations { RMW_XCHG = 0, RMW_ADD = 1, RMW_SUB = 2, RMW_AND = 3, RMW_NAND = 4, RMW_OR = 5, RMW_XOR = 6, RMW_MAX = 7, RMW_MIN = 8, RMW_UMAX = 9, RMW_UMIN = 10 }; /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing /// OverflowingBinaryOperator's SubclassOptionalData contents. enum OverflowingBinaryOperatorOptionalFlags { OBO_NO_UNSIGNED_WRAP = 0, OBO_NO_SIGNED_WRAP = 1 }; /// PossiblyExactOperatorOptionalFlags - Flags for serializing /// PossiblyExactOperator's SubclassOptionalData contents. enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 }; /// Encoded AtomicOrdering values. enum AtomicOrderingCodes { ORDERING_NOTATOMIC = 0, ORDERING_UNORDERED = 1, ORDERING_MONOTONIC = 2, ORDERING_ACQUIRE = 3, ORDERING_RELEASE = 4, ORDERING_ACQREL = 5, ORDERING_SEQCST = 6 }; /// Encoded SynchronizationScope values. enum AtomicSynchScopeCodes { SYNCHSCOPE_SINGLETHREAD = 0, SYNCHSCOPE_CROSSTHREAD = 1 }; /// Markers and flags for call instruction. enum CallMarkersFlags { CALL_TAIL = 0, CALL_CCONV = 1, CALL_MUSTTAIL = 14, CALL_EXPLICIT_TYPE = 15, CALL_NOTAIL = 16, CALL_FMF = 17 // Call has optional fast-math-flags. }; // The function body block (FUNCTION_BLOCK_ID) describes function bodies. It // can contain a constant block (CONSTANTS_BLOCK_ID). enum FunctionCodes { FUNC_CODE_DECLAREBLOCKS = 1, // DECLAREBLOCKS: [n] FUNC_CODE_INST_BINOP = 2, // BINOP: [opcode, ty, opval, opval] FUNC_CODE_INST_CAST = 3, // CAST: [opcode, ty, opty, opval] FUNC_CODE_INST_GEP_OLD = 4, // GEP: [n x operands] FUNC_CODE_INST_SELECT = 5, // SELECT: [ty, opval, opval, opval] FUNC_CODE_INST_EXTRACTELT = 6, // EXTRACTELT: [opty, opval, opval] FUNC_CODE_INST_INSERTELT = 7, // INSERTELT: [ty, opval, opval, opval] FUNC_CODE_INST_SHUFFLEVEC = 8, // SHUFFLEVEC: [ty, opval, opval, opval] FUNC_CODE_INST_CMP = 9, // CMP: [opty, opval, opval, pred] FUNC_CODE_INST_RET = 10, // RET: [opty,opval] FUNC_CODE_INST_BR = 11, // BR: [bb#, bb#, cond] or [bb#] FUNC_CODE_INST_SWITCH = 12, // SWITCH: [opty, op0, op1, ...] FUNC_CODE_INST_INVOKE = 13, // INVOKE: [attr, fnty, op0,op1, ...] // 14 is unused. FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE FUNC_CODE_INST_PHI = 16, // PHI: [ty, val0,bb0, ...] // 17 is unused. // 18 is unused. FUNC_CODE_INST_ALLOCA = 19, // ALLOCA: [instty, opty, op, align] FUNC_CODE_INST_LOAD = 20, // LOAD: [opty, op, align, vol] // 21 is unused. // 22 is unused. FUNC_CODE_INST_VAARG = 23, // VAARG: [valistty, valist, instty] // This store code encodes the pointer type, rather than the value type // this is so information only available in the pointer type (e.g. address // spaces) is retained. FUNC_CODE_INST_STORE_OLD = 24, // STORE: [ptrty,ptr,val, align, vol] // 25 is unused. FUNC_CODE_INST_EXTRACTVAL = 26, // EXTRACTVAL: [n x operands] FUNC_CODE_INST_INSERTVAL = 27, // INSERTVAL: [n x operands] // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to // support legacy vicmp/vfcmp instructions. FUNC_CODE_INST_CMP2 = 28, // CMP2: [opty, opval, opval, pred] // new select on i1 or [N x i1] FUNC_CODE_INST_VSELECT = 29, // VSELECT: [ty,opval,opval,predty,pred] FUNC_CODE_INST_INBOUNDS_GEP_OLD = 30, // INBOUNDS_GEP: [n x operands] FUNC_CODE_INST_INDIRECTBR = 31, // INDIRECTBR: [opty, op0, op1, ...] // 32 is unused. FUNC_CODE_DEBUG_LOC_AGAIN = 33, // DEBUG_LOC_AGAIN FUNC_CODE_INST_CALL = 34, // CALL: [attr, cc, fnty, fnid, args...] FUNC_CODE_DEBUG_LOC = 35, // DEBUG_LOC: [Line,Col,ScopeVal, IAVal] FUNC_CODE_INST_FENCE = 36, // FENCE: [ordering, synchscope] FUNC_CODE_INST_CMPXCHG_OLD = 37, // CMPXCHG: [ptrty,ptr,cmp,new, align, vol, // ordering, synchscope] FUNC_CODE_INST_ATOMICRMW = 38, // ATOMICRMW: [ptrty,ptr,val, operation, // align, vol, // ordering, synchscope] FUNC_CODE_INST_RESUME = 39, // RESUME: [opval] FUNC_CODE_INST_LANDINGPAD_OLD = 40, // LANDINGPAD: [ty,val,val,num,id0,val0...] FUNC_CODE_INST_LOADATOMIC = 41, // LOAD: [opty, op, align, vol, // ordering, synchscope] FUNC_CODE_INST_STOREATOMIC_OLD = 42, // STORE: [ptrty,ptr,val, align, vol // ordering, synchscope] FUNC_CODE_INST_GEP = 43, // GEP: [inbounds, n x operands] FUNC_CODE_INST_STORE = 44, // STORE: [ptrty,ptr,valty,val, align, vol] FUNC_CODE_INST_STOREATOMIC = 45, // STORE: [ptrty,ptr,val, align, vol FUNC_CODE_INST_CMPXCHG = 46, // CMPXCHG: [ptrty,ptr,valty,cmp,new, align, // vol,ordering,synchscope] FUNC_CODE_INST_LANDINGPAD = 47, // LANDINGPAD: [ty,val,num,id0,val0...] FUNC_CODE_INST_CLEANUPRET = 48, // CLEANUPRET: [val] or [val,bb#] FUNC_CODE_INST_CATCHRET = 49, // CATCHRET: [val,bb#] FUNC_CODE_INST_CATCHPAD = 50, // CATCHPAD: [bb#,bb#,num,args...] FUNC_CODE_INST_CLEANUPPAD = 51, // CLEANUPPAD: [num,args...] FUNC_CODE_INST_CATCHSWITCH = 52, // CATCHSWITCH: [num,args...] or [num,args...,bb] // 53 is unused. // 54 is unused. FUNC_CODE_OPERAND_BUNDLE = 55, // OPERAND_BUNDLE: [tag#, value...] }; enum UseListCodes { USELIST_CODE_DEFAULT = 1, // DEFAULT: [index..., value-id] USELIST_CODE_BB = 2 // BB: [index..., bb-id] }; enum AttributeKindCodes { // = 0 is unused ATTR_KIND_ALIGNMENT = 1, ATTR_KIND_ALWAYS_INLINE = 2, ATTR_KIND_BY_VAL = 3, ATTR_KIND_INLINE_HINT = 4, ATTR_KIND_IN_REG = 5, ATTR_KIND_MIN_SIZE = 6, ATTR_KIND_NAKED = 7, ATTR_KIND_NEST = 8, ATTR_KIND_NO_ALIAS = 9, ATTR_KIND_NO_BUILTIN = 10, ATTR_KIND_NO_CAPTURE = 11, ATTR_KIND_NO_DUPLICATE = 12, ATTR_KIND_NO_IMPLICIT_FLOAT = 13, ATTR_KIND_NO_INLINE = 14, ATTR_KIND_NON_LAZY_BIND = 15, ATTR_KIND_NO_RED_ZONE = 16, ATTR_KIND_NO_RETURN = 17, ATTR_KIND_NO_UNWIND = 18, ATTR_KIND_OPTIMIZE_FOR_SIZE = 19, ATTR_KIND_READ_NONE = 20, ATTR_KIND_READ_ONLY = 21, ATTR_KIND_RETURNED = 22, ATTR_KIND_RETURNS_TWICE = 23, ATTR_KIND_S_EXT = 24, ATTR_KIND_STACK_ALIGNMENT = 25, ATTR_KIND_STACK_PROTECT = 26, ATTR_KIND_STACK_PROTECT_REQ = 27, ATTR_KIND_STACK_PROTECT_STRONG = 28, ATTR_KIND_STRUCT_RET = 29, ATTR_KIND_SANITIZE_ADDRESS = 30, ATTR_KIND_SANITIZE_THREAD = 31, ATTR_KIND_SANITIZE_MEMORY = 32, ATTR_KIND_UW_TABLE = 33, ATTR_KIND_Z_EXT = 34, ATTR_KIND_BUILTIN = 35, ATTR_KIND_COLD = 36, ATTR_KIND_OPTIMIZE_NONE = 37, ATTR_KIND_IN_ALLOCA = 38, ATTR_KIND_NON_NULL = 39, ATTR_KIND_JUMP_TABLE = 40, ATTR_KIND_DEREFERENCEABLE = 41, ATTR_KIND_DEREFERENCEABLE_OR_NULL = 42, ATTR_KIND_CONVERGENT = 43, ATTR_KIND_SAFESTACK = 44, ATTR_KIND_ARGMEMONLY = 45, ATTR_KIND_SWIFT_SELF = 46, ATTR_KIND_SWIFT_ERROR = 47, ATTR_KIND_NO_RECURSE = 48, ATTR_KIND_INACCESSIBLEMEM_ONLY = 49, ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY = 50 }; enum ComdatSelectionKindCodes { COMDAT_SELECTION_KIND_ANY = 1, COMDAT_SELECTION_KIND_EXACT_MATCH = 2, COMDAT_SELECTION_KIND_LARGEST = 3, COMDAT_SELECTION_KIND_NO_DUPLICATES = 4, COMDAT_SELECTION_KIND_SAME_SIZE = 5, }; } // End bitc namespace } // End llvm namespace #endif diff --git a/llvm/include/llvm/Bitcode/ReaderWriter.h b/llvm/include/llvm/Bitcode/ReaderWriter.h index bbce15a8f931..1afffa05527c 100644 --- a/llvm/include/llvm/Bitcode/ReaderWriter.h +++ b/llvm/include/llvm/Bitcode/ReaderWriter.h @@ -1,215 +1,216 @@ //===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This header defines interfaces to read and write LLVM bitcode files/streams. // //===----------------------------------------------------------------------===// #ifndef LLVM_BITCODE_READERWRITER_H #define LLVM_BITCODE_READERWRITER_H #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorOr.h" #include "llvm/Support/MemoryBuffer.h" #include #include namespace llvm { class BitstreamWriter; class DataStreamer; class LLVMContext; class Module; class ModulePass; class raw_ostream; /// Offsets of the 32-bit fields of bitcode wrapper header. static const unsigned BWH_MagicField = 0*4; static const unsigned BWH_VersionField = 1*4; static const unsigned BWH_OffsetField = 2*4; static const unsigned BWH_SizeField = 3*4; static const unsigned BWH_CPUTypeField = 4*4; static const unsigned BWH_HeaderSize = 5*4; /// Read the header of the specified bitcode buffer and prepare for lazy /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, /// lazily load metadata as well. If successful, this moves Buffer. On /// error, this *does not* move Buffer. ErrorOr> getLazyBitcodeModule(std::unique_ptr &&Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata = false); /// Read the header of the specified stream and prepare for lazy /// deserialization and streaming of function bodies. ErrorOr> getStreamedBitcodeModule(StringRef Name, std::unique_ptr Streamer, LLVMContext &Context); /// Read the header of the specified bitcode buffer and extract just the /// triple information. If successful, this returns a string. On error, this /// returns "". std::string getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context); /// Read the header of the specified bitcode buffer and extract just the /// producer string information. If successful, this returns a string. On /// error, this returns "". std::string getBitcodeProducerString(MemoryBufferRef Buffer, LLVMContext &Context); /// Read the specified bitcode file, returning the module. ErrorOr> parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context); /// Check if the given bitcode buffer contains a summary block. bool hasGlobalValueSummary(MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler); /// Parse the specified bitcode buffer, returning the module summary index. /// If IsLazy is true, parse the entire module summary into /// the index. Otherwise skip the module summary section, and only create /// an index object with a map from value name to the value's summary offset. /// The index is used to perform lazy summary reading later. ErrorOr> getModuleSummaryIndex(MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy = false); /// This method supports lazy reading of summary data from the /// combined index during function importing. When reading the combined index /// file, getModuleSummaryIndex is first invoked with IsLazy=true. /// Then this method is called for each value considered for importing, /// to parse the summary information for the given value name into /// the index. std::error_code readGlobalValueSummary( MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, StringRef ValueName, std::unique_ptr Index); /// \brief Write the specified module to the specified raw output stream. /// /// For streams where it matters, the given stream should be in "binary" /// mode. /// /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a /// Value in \c M. These will be reconstructed exactly when \a M is /// deserialized. /// /// If \c EmitSummaryIndex, emit the module's summary index (currently /// for use in ThinLTO optimization). void WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder = false, - bool EmitSummaryIndex = false); + bool EmitSummaryIndex = false, + bool GenerateHash = false); /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when /// writing the combined index file for ThinLTO. void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out); /// isBitcodeWrapper - Return true if the given bytes are the magic bytes /// for an LLVM IR bitcode wrapper. /// inline bool isBitcodeWrapper(const unsigned char *BufPtr, const unsigned char *BufEnd) { // See if you can find the hidden message in the magic bytes :-). // (Hint: it's a little-endian encoding.) return BufPtr != BufEnd && BufPtr[0] == 0xDE && BufPtr[1] == 0xC0 && BufPtr[2] == 0x17 && BufPtr[3] == 0x0B; } /// isRawBitcode - Return true if the given bytes are the magic bytes for /// raw LLVM IR bitcode (without a wrapper). /// inline bool isRawBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd) { // These bytes sort of have a hidden message, but it's not in // little-endian this time, and it's a little redundant. return BufPtr != BufEnd && BufPtr[0] == 'B' && BufPtr[1] == 'C' && BufPtr[2] == 0xc0 && BufPtr[3] == 0xde; } /// isBitcode - Return true if the given bytes are the magic bytes for /// LLVM IR bitcode, either with or without a wrapper. /// inline bool isBitcode(const unsigned char *BufPtr, const unsigned char *BufEnd) { return isBitcodeWrapper(BufPtr, BufEnd) || isRawBitcode(BufPtr, BufEnd); } /// SkipBitcodeWrapperHeader - Some systems wrap bc files with a special /// header for padding or other reasons. The format of this header is: /// /// struct bc_header { /// uint32_t Magic; // 0x0B17C0DE /// uint32_t Version; // Version, currently always 0. /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. /// uint32_t BitcodeSize; // Size of traditional bitcode file. /// ... potentially other gunk ... /// }; /// /// This function is called when we find a file with a matching magic number. /// In this case, skip down to the subsection of the file that is actually a /// BC file. /// If 'VerifyBufferSize' is true, check that the buffer is large enough to /// contain the whole bitcode file. inline bool SkipBitcodeWrapperHeader(const unsigned char *&BufPtr, const unsigned char *&BufEnd, bool VerifyBufferSize) { // Must contain the offset and size field! if (unsigned(BufEnd - BufPtr) < BWH_SizeField + 4) return true; unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); // Verify that Offset+Size fits in the file. if (VerifyBufferSize && Offset+Size > unsigned(BufEnd-BufPtr)) return true; BufPtr += Offset; BufEnd = BufPtr+Size; return false; } const std::error_category &BitcodeErrorCategory(); enum class BitcodeError { InvalidBitcodeSignature = 1, CorruptedBitcode }; inline std::error_code make_error_code(BitcodeError E) { return std::error_code(static_cast(E), BitcodeErrorCategory()); } class BitcodeDiagnosticInfo : public DiagnosticInfo { const Twine &Msg; std::error_code EC; public: BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity, const Twine &Msg); void print(DiagnosticPrinter &DP) const override; std::error_code getError() const { return EC; } static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_Bitcode; } }; } // End llvm namespace namespace std { template <> struct is_error_code_enum : std::true_type {}; } #endif diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h index 30a7145cb42e..fe0385e76b9b 100644 --- a/llvm/include/llvm/IR/ModuleSummaryIndex.h +++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h @@ -1,360 +1,377 @@ //===-- llvm/ModuleSummaryIndex.h - Module Summary Index --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // /// @file /// ModuleSummaryIndex.h This file contains the declarations the classes that /// hold the module index and summary for function importing. // //===----------------------------------------------------------------------===// #ifndef LLVM_IR_MODULESUMMARYINDEX_H #define LLVM_IR_MODULESUMMARYINDEX_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include + namespace llvm { /// \brief Class to accumulate and hold information about a callee. struct CalleeInfo { /// The static number of callsites calling corresponding function. unsigned CallsiteCount; /// The cumulative profile count of calls to corresponding function /// (if using PGO, otherwise 0). uint64_t ProfileCount; CalleeInfo() : CallsiteCount(0), ProfileCount(0) {} CalleeInfo(unsigned CallsiteCount, uint64_t ProfileCount) : CallsiteCount(CallsiteCount), ProfileCount(ProfileCount) {} CalleeInfo &operator+=(uint64_t RHSProfileCount) { CallsiteCount++; ProfileCount += RHSProfileCount; return *this; } }; /// \brief Function and variable summary information to aid decisions and /// implementation of importing. /// /// This is a separate class from GlobalValueInfo to enable lazy reading of this /// summary information from the combined index file during imporing. class GlobalValueSummary { public: /// \brief Sububclass discriminator (for dyn_cast<> et al.) enum SummaryKind { FunctionKind, GlobalVarKind }; private: /// Kind of summary for use in dyn_cast<> et al. SummaryKind Kind; /// \brief Path of module IR containing value's definition, used to locate /// module during importing. /// /// This is only used during parsing of the combined index, or when /// parsing the per-module index for creation of the combined summary index, /// not during writing of the per-module index which doesn't contain a /// module path string table. StringRef ModulePath; /// \brief The linkage type of the associated global value. /// /// One use is to flag values that have local linkage types and need to /// have module identifier appended before placing into the combined /// index, to disambiguate from other values with the same name. /// In the future this will be used to update and optimize linkage /// types based on global summary-based analysis. GlobalValue::LinkageTypes Linkage; /// List of GUIDs of values referenced by this global value's definition /// (either by the initializer of a global variable, or referenced /// from within a function). This does not include functions called, which /// are listed in the derived FunctionSummary object. std::vector RefEdgeList; protected: /// GlobalValueSummary constructor. GlobalValueSummary(SummaryKind K, GlobalValue::LinkageTypes Linkage) : Kind(K), Linkage(Linkage) {} public: virtual ~GlobalValueSummary() = default; /// Which kind of summary subclass this is. SummaryKind getSummaryKind() const { return Kind; } /// Set the path to the module containing this function, for use in /// the combined index. void setModulePath(StringRef ModPath) { ModulePath = ModPath; } /// Get the path to the module containing this function. StringRef modulePath() const { return ModulePath; } /// Return linkage type recorded for this global value. GlobalValue::LinkageTypes linkage() const { return Linkage; } /// Record a reference from this global value to the global value identified /// by \p RefGUID. void addRefEdge(uint64_t RefGUID) { RefEdgeList.push_back(RefGUID); } /// Record a reference from this global value to each global value identified /// in \p RefEdges. void addRefEdges(DenseSet &RefEdges) { for (auto &RI : RefEdges) addRefEdge(RI); } /// Return the list of GUIDs referenced by this global value definition. std::vector &refs() { return RefEdgeList; } const std::vector &refs() const { return RefEdgeList; } }; /// \brief Function summary information to aid decisions and implementation of /// importing. class FunctionSummary : public GlobalValueSummary { public: /// call edge pair. typedef std::pair EdgeTy; private: /// Number of instructions (ignoring debug instructions, e.g.) computed /// during the initial compile step when the summary index is first built. unsigned InstCount; /// List of call edge pairs from this function. std::vector CallGraphEdgeList; public: /// Summary constructors. FunctionSummary(GlobalValue::LinkageTypes Linkage, unsigned NumInsts) : GlobalValueSummary(FunctionKind, Linkage), InstCount(NumInsts) {} /// Check if this is a function summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == FunctionKind; } /// Get the instruction count recorded for this function. unsigned instCount() const { return InstCount; } /// Record a call graph edge from this function to the function identified /// by \p CalleeGUID, with \p CalleeInfo including the cumulative profile /// count (across all calls from this function) or 0 if no PGO. void addCallGraphEdge(uint64_t CalleeGUID, CalleeInfo Info) { CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info)); } /// Record a call graph edge from this function to each function recorded /// in \p CallGraphEdges. void addCallGraphEdges(DenseMap &CallGraphEdges) { for (auto &EI : CallGraphEdges) addCallGraphEdge(EI.first, EI.second); } /// Return the list of pairs. std::vector &calls() { return CallGraphEdgeList; } const std::vector &calls() const { return CallGraphEdgeList; } }; /// \brief Global variable summary information to aid decisions and /// implementation of importing. /// /// Currently this doesn't add anything to the base \p GlobalValueSummary, /// but is a placeholder as additional info may be added to the summary /// for variables. class GlobalVarSummary : public GlobalValueSummary { public: /// Summary constructors. GlobalVarSummary(GlobalValue::LinkageTypes Linkage) : GlobalValueSummary(GlobalVarKind, Linkage) {} /// Check if this is a global variable summary. static bool classof(const GlobalValueSummary *GVS) { return GVS->getSummaryKind() == GlobalVarKind; } }; /// \brief Class to hold pointer to summary object and information required /// for parsing or writing it. class GlobalValueInfo { private: /// Summary information used to help make ThinLTO importing decisions. std::unique_ptr Summary; /// \brief The bitcode offset corresponding to either an associated /// function's function body record, or to an associated summary record, /// depending on whether this is a per-module or combined index. /// /// This bitcode offset is written to or read from the associated /// \a ValueSymbolTable entry for a function. /// For the per-module index this holds the bitcode offset of a /// function's body record within bitcode module block in its module, /// although this field is currently only used when writing the VST /// (it is set to 0 and also unused when this is a global variable). /// For the combined index this holds the offset of the corresponding /// summary record, to enable associating the combined index /// VST records with the summary records. uint64_t BitcodeIndex; public: GlobalValueInfo(uint64_t Offset = 0, std::unique_ptr Summary = nullptr) : Summary(std::move(Summary)), BitcodeIndex(Offset) {} /// Record the summary information parsed out of the summary block during /// parsing or combined index creation. void setSummary(std::unique_ptr GVSummary) { Summary = std::move(GVSummary); } /// Get the summary recorded for this global value. GlobalValueSummary *summary() const { return Summary.get(); } /// Get the bitcode index recorded for this value symbol table entry. uint64_t bitcodeIndex() const { return BitcodeIndex; } /// Set the bitcode index recorded for this value symbol table entry. void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; } }; +/// 160 bits SHA1 +typedef std::array ModuleHash; + /// List of global value info structures for a particular value held /// in the GlobalValueMap. Requires a vector in the case of multiple /// COMDAT values of the same name. typedef std::vector> GlobalValueInfoList; /// Map from global value GUID to corresponding info structures. /// Use a std::map rather than a DenseMap since it will likely incur /// less overhead, as the value type is not very small and the size /// of the map is unknown, resulting in inefficiencies due to repeated /// insertions and resizing. typedef std::map GlobalValueInfoMapTy; /// Type used for iterating through the global value info map. typedef GlobalValueInfoMapTy::const_iterator const_globalvalueinfo_iterator; typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator; /// String table to hold/own module path strings, which additionally holds the -/// module ID assigned to each module during the plugin step. The StringMap -/// makes a copy of and owns inserted strings. -typedef StringMap ModulePathStringTableTy; +/// module ID assigned to each module during the plugin step, as well as a hash +/// of the module. The StringMap makes a copy of and owns inserted strings. +typedef StringMap> ModulePathStringTableTy; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. class ModuleSummaryIndex { private: /// Map from value name to list of information instances for values of that /// name (may be duplicates in the COMDAT case, e.g.). GlobalValueInfoMapTy GlobalValueMap; /// Holds strings for combined index, mapping to the corresponding module ID. ModulePathStringTableTy ModulePathStringTable; public: ModuleSummaryIndex() = default; // Disable the copy constructor and assignment operators, so // no unexpected copying/moving occurs. ModuleSummaryIndex(const ModuleSummaryIndex &) = delete; void operator=(const ModuleSummaryIndex &) = delete; globalvalueinfo_iterator begin() { return GlobalValueMap.begin(); } const_globalvalueinfo_iterator begin() const { return GlobalValueMap.begin(); } globalvalueinfo_iterator end() { return GlobalValueMap.end(); } const_globalvalueinfo_iterator end() const { return GlobalValueMap.end(); } /// Get the list of global value info objects for a given value name. const GlobalValueInfoList &getGlobalValueInfoList(StringRef ValueName) { return GlobalValueMap[GlobalValue::getGUID(ValueName)]; } /// Get the list of global value info objects for a given value name. const const_globalvalueinfo_iterator findGlobalValueInfoList(StringRef ValueName) const { return GlobalValueMap.find(GlobalValue::getGUID(ValueName)); } /// Get the list of global value info objects for a given value GUID. const const_globalvalueinfo_iterator findGlobalValueInfoList(uint64_t ValueGUID) const { return GlobalValueMap.find(ValueGUID); } /// Add a global value info for a value of the given name. void addGlobalValueInfo(StringRef ValueName, std::unique_ptr Info) { GlobalValueMap[GlobalValue::getGUID(ValueName)].push_back(std::move(Info)); } /// Add a global value info for a value of the given GUID. void addGlobalValueInfo(uint64_t ValueGUID, std::unique_ptr Info) { GlobalValueMap[ValueGUID].push_back(std::move(Info)); } - /// Table of modules, containing an id. - const StringMap &modulePaths() const { + /// Table of modules, containing module hash and id. + const StringMap> &modulePaths() const { return ModulePathStringTable; } - /// Table of modules, containing an id. - StringMap &modulePaths() { return ModulePathStringTable; } + /// Table of modules, containing hash and id. + StringMap> &modulePaths() { + return ModulePathStringTable; + } /// Get the module ID recorded for the given module path. uint64_t getModuleId(const StringRef ModPath) const { - return ModulePathStringTable.lookup(ModPath); + return ModulePathStringTable.lookup(ModPath).first; + } + + /// Get the module SHA1 hash recorded for the given module path. + const ModuleHash &getModuleHash(const StringRef ModPath) const { + auto It = ModulePathStringTable.find(ModPath); + assert(It != ModulePathStringTable.end() && "Module not registered"); + return It->second.second; } /// Add the given per-module index into this module index/summary, /// assigning it the given module ID. Each module merged in should have /// a unique ID, necessary for consistent renaming of promoted /// static (local) variables. void mergeFrom(std::unique_ptr Other, uint64_t NextModuleId); /// Convenience method for creating a promoted global name /// for the given value name of a local, and its original module's ID. static std::string getGlobalNameForLocal(StringRef Name, uint64_t ModId) { SmallString<256> NewName(Name); NewName += ".llvm."; raw_svector_ostream(NewName) << ModId; return NewName.str(); } - /// Add a new module path, mapped to the given module Id, and return StringRef - /// owned by string table map. - StringRef addModulePath(StringRef ModPath, uint64_t ModId) { - return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) - .first->first(); + /// Add a new module path with the given \p Hash, mapped to the given \p + /// ModID, and return an iterator to the entry in the index. + ModulePathStringTableTy::iterator + addModulePath(StringRef ModPath, uint64_t ModId, + ModuleHash Hash = ModuleHash{{0}}) { + return ModulePathStringTable.insert(std::make_pair( + ModPath, + std::make_pair(ModId, Hash))).first; } /// Check if the given Module has any functions available for exporting /// in the index. We consider any module present in the ModulePathStringTable /// to have exported functions. bool hasExportedFunctions(const Module &M) const { return ModulePathStringTable.count(M.getModuleIdentifier()); } /// Remove entries in the GlobalValueMap that have empty summaries due to the /// eager nature of map entry creation during VST parsing. These would /// also be suppressed during combined index generation in mergeFrom(), /// but if there was only one module or this was the first module we might /// not invoke mergeFrom. void removeEmptySummaryEntries(); }; } // End llvm namespace #endif diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 93496fe8e6fc..1840b60cc012 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1,6256 +1,6290 @@ //===- BitcodeReader.cpp - Internal BitcodeReader implementation ----------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/GVMaterializer.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace { enum { SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; class BitcodeReaderValueList { std::vector ValuePtrs; /// As we resolve forward-referenced constants, we add information about them /// to this vector. This allows us to resolve them in bulk instead of /// resolving each reference at a time. See the code in /// ResolveConstantForwardRefs for more information about this. /// /// The key of this vector is the placeholder constant, the value is the slot /// number that holds the resolved value. typedef std::vector > ResolveConstantsTy; ResolveConstantsTy ResolveConstants; LLVMContext &Context; public: BitcodeReaderValueList(LLVMContext &C) : Context(C) {} ~BitcodeReaderValueList() { assert(ResolveConstants.empty() && "Constants not resolved?"); } // vector compatibility methods unsigned size() const { return ValuePtrs.size(); } void resize(unsigned N) { ValuePtrs.resize(N); } void push_back(Value *V) { ValuePtrs.emplace_back(V); } void clear() { assert(ResolveConstants.empty() && "Constants not resolved?"); ValuePtrs.clear(); } Value *operator[](unsigned i) const { assert(i < ValuePtrs.size()); return ValuePtrs[i]; } Value *back() const { return ValuePtrs.back(); } void pop_back() { ValuePtrs.pop_back(); } bool empty() const { return ValuePtrs.empty(); } void shrinkTo(unsigned N) { assert(N <= size() && "Invalid shrinkTo request!"); ValuePtrs.resize(N); } Constant *getConstantFwdRef(unsigned Idx, Type *Ty); Value *getValueFwdRef(unsigned Idx, Type *Ty); void assignValue(Value *V, unsigned Idx); /// Once all constants are read, this method bulk resolves any forward /// references. void resolveConstantForwardRefs(); }; class BitcodeReaderMetadataList { unsigned NumFwdRefs; bool AnyFwdRefs; unsigned MinFwdRef; unsigned MaxFwdRef; std::vector MetadataPtrs; LLVMContext &Context; public: BitcodeReaderMetadataList(LLVMContext &C) : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {} // vector compatibility methods unsigned size() const { return MetadataPtrs.size(); } void resize(unsigned N) { MetadataPtrs.resize(N); } void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); } void clear() { MetadataPtrs.clear(); } Metadata *back() const { return MetadataPtrs.back(); } void pop_back() { MetadataPtrs.pop_back(); } bool empty() const { return MetadataPtrs.empty(); } Metadata *operator[](unsigned i) const { assert(i < MetadataPtrs.size()); return MetadataPtrs[i]; } void shrinkTo(unsigned N) { assert(N <= size() && "Invalid shrinkTo request!"); MetadataPtrs.resize(N); } Metadata *getMetadataFwdRef(unsigned Idx); MDNode *getMDNodeFwdRefOrNull(unsigned Idx); void assignValue(Metadata *MD, unsigned Idx); void tryToResolveCycles(); }; class BitcodeReader : public GVMaterializer { LLVMContext &Context; Module *TheModule = nullptr; std::unique_ptr Buffer; std::unique_ptr StreamFile; BitstreamCursor Stream; // Next offset to start scanning for lazy parsing of function bodies. uint64_t NextUnreadBit = 0; // Last function offset found in the VST. uint64_t LastFunctionBlockBit = 0; bool SeenValueSymbolTable = false; uint64_t VSTOffset = 0; // Contains an arbitrary and optional string identifying the bitcode producer std::string ProducerIdentification; std::vector TypeList; BitcodeReaderValueList ValueList; BitcodeReaderMetadataList MetadataList; std::vector ComdatList; SmallVector InstructionList; std::vector > GlobalInits; std::vector > AliasInits; std::vector > FunctionPrefixes; std::vector > FunctionPrologues; std::vector > FunctionPersonalityFns; SmallVector InstsWithTBAATag; bool HasSeenOldLoopTags = false; /// The set of attributes by index. Index zero in the file is for null, and /// is thus not represented here. As such all indices are off by one. std::vector MAttributes; /// The set of attribute groups. std::map MAttributeGroups; /// While parsing a function body, this is a list of the basic blocks for the /// function. std::vector FunctionBBs; // When reading the module header, this list is populated with functions that // have bodies later in the file. std::vector FunctionsWithBodies; // When intrinsic functions are encountered which require upgrading they are // stored here with their replacement function. typedef DenseMap UpgradedIntrinsicMap; UpgradedIntrinsicMap UpgradedIntrinsics; // Map the bitcode's custom MDKind ID to the Module's MDKind ID. DenseMap MDKindMap; // Several operations happen after the module header has been read, but // before function bodies are processed. This keeps track of whether // we've done this yet. bool SeenFirstFunctionBody = false; /// When function bodies are initially scanned, this map contains info about /// where to find deferred function body in the stream. DenseMap DeferredFunctionInfo; /// When Metadata block is initially scanned when parsing the module, we may /// choose to defer parsing of the metadata. This vector contains info about /// which Metadata blocks are deferred. std::vector DeferredMetadataInfo; /// These are basic blocks forward-referenced by block addresses. They are /// inserted lazily into functions when they're loaded. The basic block ID is /// its index into the vector. DenseMap> BasicBlockFwdRefs; std::deque BasicBlockFwdRefQueue; /// Indicates that we are using a new encoding for instruction operands where /// most operands in the current FUNCTION_BLOCK are encoded relative to the /// instruction number, for a more compact encoding. Some instruction /// operands are not relative to the instruction ID: basic block numbers, and /// types. Once the old style function blocks have been phased out, we would /// not need this flag. bool UseRelativeIDs = false; /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. bool WillMaterializeAllForwardRefs = false; /// True if any Metadata block has been materialized. bool IsMetadataMaterialized = false; bool StripDebugInfo = false; /// Functions that need to be matched with subprograms when upgrading old /// metadata. SmallDenseMap FunctionsWithSPs; std::vector BundleTags; public: std::error_code error(BitcodeError E, const Twine &Message); std::error_code error(BitcodeError E); std::error_code error(const Twine &Message); BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context); BitcodeReader(LLVMContext &Context); ~BitcodeReader() override { freeState(); } std::error_code materializeForwardReferencedFunctions(); void freeState(); void releaseBuffer(); std::error_code materialize(GlobalValue *GV) override; std::error_code materializeModule() override; std::vector getIdentifiedStructTypes() const override; /// \brief Main interface to parsing a bitcode buffer. /// \returns true if an error occurred. std::error_code parseBitcodeInto(std::unique_ptr Streamer, Module *M, bool ShouldLazyLoadMetadata = false); /// \brief Cheap mechanism to just extract module triple /// \returns true if an error occurred. ErrorOr parseTriple(); /// Cheap mechanism to just extract the identification block out of bitcode. ErrorOr parseIdentificationBlock(); static uint64_t decodeSignRotatedValue(uint64_t V); /// Materialize any deferred Metadata block. std::error_code materializeMetadata() override; void setStripDebugInfo() override; private: /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the // ProducerIdentification data member, and do some basic enforcement on the // "epoch" encoded in the bitcode. std::error_code parseBitcodeVersion(); std::vector IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); Type *getTypeByID(unsigned ID); Value *getFnValueByID(unsigned ID, Type *Ty) { if (Ty && Ty->isMetadataTy()) return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID)); return ValueList.getValueFwdRef(ID, Ty); } Metadata *getFnMetadataByID(unsigned ID) { return MetadataList.getMetadataFwdRef(ID); } BasicBlock *getBasicBlock(unsigned ID) const { if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID return FunctionBBs[ID]; } AttributeSet getAttributes(unsigned i) const { if (i-1 < MAttributes.size()) return MAttributes[i-1]; return AttributeSet(); } /// Read a value/type pair out of the specified record from slot 'Slot'. /// Increment Slot past the number of slots used in the record. Return true on /// failure. bool getValueTypePair(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; if (ValNo < InstNum) { // If this is not a forward reference, just return the value we already // have. ResVal = getFnValueByID(ValNo, nullptr); return ResVal == nullptr; } if (Slot == Record.size()) return true; unsigned TypeNo = (unsigned)Record[Slot++]; ResVal = getFnValueByID(ValNo, getTypeByID(TypeNo)); return ResVal == nullptr; } /// Read a value out of the specified record from slot 'Slot'. Increment Slot /// past the number of slots used by the value in the record. Return true if /// there is an error. bool popValue(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { if (getValue(Record, Slot, InstNum, Ty, ResVal)) return true; // All values currently take a single record slot. ++Slot; return false; } /// Like popValue, but does not increment the Slot number. bool getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == nullptr; } /// Version of getValue that returns ResVal directly, or 0 if there is an /// error. Value *getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)Record[Slot]; // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; return getFnValueByID(ValNo, Ty); } /// Like getValue, but decodes signed VBRs. Value *getValueSigned(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return nullptr; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); // Adjust the ValNo, if it was encoded relative to the InstNum. if (UseRelativeIDs) ValNo = InstNum - ValNo; return getFnValueByID(ValNo, Ty); } /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); std::error_code parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false); std::error_code parseAttributeBlock(); std::error_code parseAttributeGroupBlock(); std::error_code parseTypeTable(); std::error_code parseTypeTableBody(); std::error_code parseOperandBundleTags(); ErrorOr recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT); std::error_code parseValueSymbolTable(uint64_t Offset = 0); std::error_code parseConstants(); std::error_code rememberAndSkipFunctionBodies(); std::error_code rememberAndSkipFunctionBody(); /// Save the positions of the Metadata blocks and skip parsing the blocks. std::error_code rememberAndSkipMetadata(); std::error_code parseFunctionBody(Function *F); std::error_code globalCleanup(); std::error_code resolveGlobalAndAliasInits(); std::error_code parseMetadata(bool ModuleLevel = false); std::error_code parseMetadataStrings(ArrayRef Record, StringRef Blob, unsigned &NextMetadataNo); std::error_code parseMetadataKinds(); std::error_code parseMetadataKindRecord(SmallVectorImpl &Record); std::error_code parseMetadataAttachment(Function &F); ErrorOr parseModuleTriple(); std::error_code parseUseLists(); std::error_code initStream(std::unique_ptr Streamer); std::error_code initStreamFromBuffer(); std::error_code initLazyStream(std::unique_ptr Streamer); std::error_code findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator); }; /// Class to manage reading and parsing function summary index bitcode /// files/sections. class ModuleSummaryIndexBitcodeReader { DiagnosticHandlerFunction DiagnosticHandler; /// Eventually points to the module index built during parsing. ModuleSummaryIndex *TheIndex = nullptr; std::unique_ptr Buffer; std::unique_ptr StreamFile; BitstreamCursor Stream; /// \brief Used to indicate whether we are doing lazy parsing of summary data. /// /// If false, the summary section is fully parsed into the index during /// the initial parse. Otherwise, if true, the caller is expected to /// invoke \a readGlobalValueSummary for each summary needed, and the summary /// section is thus parsed lazily. bool IsLazy = false; /// Used to indicate whether caller only wants to check for the presence /// of the global value summary bitcode section. All blocks are skipped, /// but the SeenGlobalValSummary boolean is set. bool CheckGlobalValSummaryPresenceOnly = false; /// Indicates whether we have encountered a global value summary section /// yet during parsing, used when checking if file contains global value /// summary section. bool SeenGlobalValSummary = false; /// Indicates whether we have already parsed the VST, used for error checking. bool SeenValueSymbolTable = false; /// Set to the offset of the VST recorded in the MODULE_CODE_VSTOFFSET record. /// Used to enable on-demand parsing of the VST. uint64_t VSTOffset = 0; // Map to save ValueId to GUID association that was recorded in the // ValueSymbolTable. It is used after the VST is parsed to convert // call graph edges read from the function summary from referencing // callees by their ValueId to using the GUID instead, which is how // they are recorded in the summary index being built. DenseMap ValueIdToCallGraphGUIDMap; /// Map to save the association between summary offset in the VST to the /// GlobalValueInfo object created when parsing it. Used to access the /// info object when parsing the summary section. DenseMap SummaryOffsetToInfoMap; /// Map populated during module path string table parsing, from the /// module ID to a string reference owned by the index's module /// path string table, used to correlate with combined index /// summary records. DenseMap ModuleIdMap; /// Original source file name recorded in a bitcode record. std::string SourceFileName; public: std::error_code error(BitcodeError E, const Twine &Message); std::error_code error(BitcodeError E); std::error_code error(const Twine &Message); ModuleSummaryIndexBitcodeReader( MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy = false, bool CheckGlobalValSummaryPresenceOnly = false); ModuleSummaryIndexBitcodeReader( DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy = false, bool CheckGlobalValSummaryPresenceOnly = false); ~ModuleSummaryIndexBitcodeReader() { freeState(); } void freeState(); void releaseBuffer(); /// Check if the parser has encountered a summary section. bool foundGlobalValSummary() { return SeenGlobalValSummary; } /// \brief Main interface to parsing a bitcode buffer. /// \returns true if an error occurred. std::error_code parseSummaryIndexInto(std::unique_ptr Streamer, ModuleSummaryIndex *I); /// \brief Interface for parsing a summary lazily. std::error_code parseGlobalValueSummary(std::unique_ptr Streamer, ModuleSummaryIndex *I, size_t SummaryOffset); private: std::error_code parseModule(); std::error_code parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap); std::error_code parseEntireSummary(); std::error_code parseModuleStringTable(); std::error_code initStream(std::unique_ptr Streamer); std::error_code initStreamFromBuffer(); std::error_code initLazyStream(std::unique_ptr Streamer); uint64_t getGUIDFromValueId(unsigned ValueId); GlobalValueInfo *getInfoFromSummaryOffset(uint64_t Offset); }; } // end anonymous namespace BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity, const Twine &Msg) : DiagnosticInfo(DK_Bitcode, Severity), Msg(Msg), EC(EC) {} void BitcodeDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; } static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler, std::error_code EC, const Twine &Message) { BitcodeDiagnosticInfo DI(EC, DS_Error, Message); DiagnosticHandler(DI); return EC; } static std::error_code error(DiagnosticHandlerFunction DiagnosticHandler, std::error_code EC) { return error(DiagnosticHandler, EC, EC.message()); } static std::error_code error(LLVMContext &Context, std::error_code EC, const Twine &Message) { return error([&](const DiagnosticInfo &DI) { Context.diagnose(DI); }, EC, Message); } static std::error_code error(LLVMContext &Context, std::error_code EC) { return error(Context, EC, EC.message()); } static std::error_code error(LLVMContext &Context, const Twine &Message) { return error(Context, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) { if (!ProducerIdentification.empty()) { return ::error(Context, make_error_code(E), Message + " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"); } return ::error(Context, make_error_code(E), Message); } std::error_code BitcodeReader::error(const Twine &Message) { if (!ProducerIdentification.empty()) { return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode), Message + " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"); } return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code BitcodeReader::error(BitcodeError E) { return ::error(Context, make_error_code(E)); } BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context) : Context(Context), Buffer(Buffer), ValueList(Context), MetadataList(Context) {} BitcodeReader::BitcodeReader(LLVMContext &Context) : Context(Context), Buffer(nullptr), ValueList(Context), MetadataList(Context) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) return std::error_code(); // Prevent recursion. WillMaterializeAllForwardRefs = true; while (!BasicBlockFwdRefQueue.empty()) { Function *F = BasicBlockFwdRefQueue.front(); BasicBlockFwdRefQueue.pop_front(); assert(F && "Expected valid function"); if (!BasicBlockFwdRefs.count(F)) // Already materialized. continue; // Check for a function that isn't materializable to prevent an infinite // loop. When parsing a blockaddress stored in a global variable, there // isn't a trivial way to check if a function will have a body without a // linear search through FunctionsWithBodies, so just check it here. if (!F->isMaterializable()) return error("Never resolved function from blockaddress"); // Try to materialize F. if (std::error_code EC = materialize(F)) return EC; } assert(BasicBlockFwdRefs.empty() && "Function missing from queue"); // Reset state. WillMaterializeAllForwardRefs = false; return std::error_code(); } void BitcodeReader::freeState() { Buffer = nullptr; std::vector().swap(TypeList); ValueList.clear(); MetadataList.clear(); std::vector().swap(ComdatList); std::vector().swap(MAttributes); std::vector().swap(FunctionBBs); std::vector().swap(FunctionsWithBodies); DeferredFunctionInfo.clear(); DeferredMetadataInfo.clear(); MDKindMap.clear(); assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references"); BasicBlockFwdRefQueue.clear(); } //===----------------------------------------------------------------------===// // Helper functions to implement forward reference resolution, etc. //===----------------------------------------------------------------------===// /// Convert a string from a record into an std::string, return true on failure. template static bool convertToString(ArrayRef Record, unsigned Idx, StrTy &Result) { if (Idx > Record.size()) return true; for (unsigned i = Idx, e = Record.size(); i != e; ++i) Result += (char)Record[i]; return false; } static bool hasImplicitComdat(size_t Val) { switch (Val) { default: return false; case 1: // Old WeakAnyLinkage case 4: // Old LinkOnceAnyLinkage case 10: // Old WeakODRLinkage case 11: // Old LinkOnceODRLinkage return true; } } static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) { switch (Val) { default: // Map unknown/new linkages to external case 0: return GlobalValue::ExternalLinkage; case 2: return GlobalValue::AppendingLinkage; case 3: return GlobalValue::InternalLinkage; case 5: return GlobalValue::ExternalLinkage; // Obsolete DLLImportLinkage case 6: return GlobalValue::ExternalLinkage; // Obsolete DLLExportLinkage case 7: return GlobalValue::ExternalWeakLinkage; case 8: return GlobalValue::CommonLinkage; case 9: return GlobalValue::PrivateLinkage; case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateLinkage case 14: return GlobalValue::PrivateLinkage; // Obsolete LinkerPrivateWeakLinkage case 15: return GlobalValue::ExternalLinkage; // Obsolete LinkOnceODRAutoHideLinkage case 1: // Old value with implicit comdat. case 16: return GlobalValue::WeakAnyLinkage; case 10: // Old value with implicit comdat. case 17: return GlobalValue::WeakODRLinkage; case 4: // Old value with implicit comdat. case 18: return GlobalValue::LinkOnceAnyLinkage; case 11: // Old value with implicit comdat. case 19: return GlobalValue::LinkOnceODRLinkage; } } static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) { switch (Val) { default: // Map unknown visibilities to default. case 0: return GlobalValue::DefaultVisibility; case 1: return GlobalValue::HiddenVisibility; case 2: return GlobalValue::ProtectedVisibility; } } static GlobalValue::DLLStorageClassTypes getDecodedDLLStorageClass(unsigned Val) { switch (Val) { default: // Map unknown values to default. case 0: return GlobalValue::DefaultStorageClass; case 1: return GlobalValue::DLLImportStorageClass; case 2: return GlobalValue::DLLExportStorageClass; } } static GlobalVariable::ThreadLocalMode getDecodedThreadLocalMode(unsigned Val) { switch (Val) { case 0: return GlobalVariable::NotThreadLocal; default: // Map unknown non-zero value to general dynamic. case 1: return GlobalVariable::GeneralDynamicTLSModel; case 2: return GlobalVariable::LocalDynamicTLSModel; case 3: return GlobalVariable::InitialExecTLSModel; case 4: return GlobalVariable::LocalExecTLSModel; } } static int getDecodedCastOpcode(unsigned Val) { switch (Val) { default: return -1; case bitc::CAST_TRUNC : return Instruction::Trunc; case bitc::CAST_ZEXT : return Instruction::ZExt; case bitc::CAST_SEXT : return Instruction::SExt; case bitc::CAST_FPTOUI : return Instruction::FPToUI; case bitc::CAST_FPTOSI : return Instruction::FPToSI; case bitc::CAST_UITOFP : return Instruction::UIToFP; case bitc::CAST_SITOFP : return Instruction::SIToFP; case bitc::CAST_FPTRUNC : return Instruction::FPTrunc; case bitc::CAST_FPEXT : return Instruction::FPExt; case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; case bitc::CAST_BITCAST : return Instruction::BitCast; case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast; } } static int getDecodedBinaryOpcode(unsigned Val, Type *Ty) { bool IsFP = Ty->isFPOrFPVectorTy(); // BinOps are only valid for int/fp or vector of int/fp types if (!IsFP && !Ty->isIntOrIntVectorTy()) return -1; switch (Val) { default: return -1; case bitc::BINOP_ADD: return IsFP ? Instruction::FAdd : Instruction::Add; case bitc::BINOP_SUB: return IsFP ? Instruction::FSub : Instruction::Sub; case bitc::BINOP_MUL: return IsFP ? Instruction::FMul : Instruction::Mul; case bitc::BINOP_UDIV: return IsFP ? -1 : Instruction::UDiv; case bitc::BINOP_SDIV: return IsFP ? Instruction::FDiv : Instruction::SDiv; case bitc::BINOP_UREM: return IsFP ? -1 : Instruction::URem; case bitc::BINOP_SREM: return IsFP ? Instruction::FRem : Instruction::SRem; case bitc::BINOP_SHL: return IsFP ? -1 : Instruction::Shl; case bitc::BINOP_LSHR: return IsFP ? -1 : Instruction::LShr; case bitc::BINOP_ASHR: return IsFP ? -1 : Instruction::AShr; case bitc::BINOP_AND: return IsFP ? -1 : Instruction::And; case bitc::BINOP_OR: return IsFP ? -1 : Instruction::Or; case bitc::BINOP_XOR: return IsFP ? -1 : Instruction::Xor; } } static AtomicRMWInst::BinOp getDecodedRMWOperation(unsigned Val) { switch (Val) { default: return AtomicRMWInst::BAD_BINOP; case bitc::RMW_XCHG: return AtomicRMWInst::Xchg; case bitc::RMW_ADD: return AtomicRMWInst::Add; case bitc::RMW_SUB: return AtomicRMWInst::Sub; case bitc::RMW_AND: return AtomicRMWInst::And; case bitc::RMW_NAND: return AtomicRMWInst::Nand; case bitc::RMW_OR: return AtomicRMWInst::Or; case bitc::RMW_XOR: return AtomicRMWInst::Xor; case bitc::RMW_MAX: return AtomicRMWInst::Max; case bitc::RMW_MIN: return AtomicRMWInst::Min; case bitc::RMW_UMAX: return AtomicRMWInst::UMax; case bitc::RMW_UMIN: return AtomicRMWInst::UMin; } } static AtomicOrdering getDecodedOrdering(unsigned Val) { switch (Val) { case bitc::ORDERING_NOTATOMIC: return NotAtomic; case bitc::ORDERING_UNORDERED: return Unordered; case bitc::ORDERING_MONOTONIC: return Monotonic; case bitc::ORDERING_ACQUIRE: return Acquire; case bitc::ORDERING_RELEASE: return Release; case bitc::ORDERING_ACQREL: return AcquireRelease; default: // Map unknown orderings to sequentially-consistent. case bitc::ORDERING_SEQCST: return SequentiallyConsistent; } } static SynchronizationScope getDecodedSynchScope(unsigned Val) { switch (Val) { case bitc::SYNCHSCOPE_SINGLETHREAD: return SingleThread; default: // Map unknown scopes to cross-thread. case bitc::SYNCHSCOPE_CROSSTHREAD: return CrossThread; } } static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { switch (Val) { default: // Map unknown selection kinds to any. case bitc::COMDAT_SELECTION_KIND_ANY: return Comdat::Any; case bitc::COMDAT_SELECTION_KIND_EXACT_MATCH: return Comdat::ExactMatch; case bitc::COMDAT_SELECTION_KIND_LARGEST: return Comdat::Largest; case bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES: return Comdat::NoDuplicates; case bitc::COMDAT_SELECTION_KIND_SAME_SIZE: return Comdat::SameSize; } } static FastMathFlags getDecodedFastMathFlags(unsigned Val) { FastMathFlags FMF; if (0 != (Val & FastMathFlags::UnsafeAlgebra)) FMF.setUnsafeAlgebra(); if (0 != (Val & FastMathFlags::NoNaNs)) FMF.setNoNaNs(); if (0 != (Val & FastMathFlags::NoInfs)) FMF.setNoInfs(); if (0 != (Val & FastMathFlags::NoSignedZeros)) FMF.setNoSignedZeros(); if (0 != (Val & FastMathFlags::AllowReciprocal)) FMF.setAllowReciprocal(); return FMF; } static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) { switch (Val) { case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break; case 6: GV->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break; } } namespace llvm { namespace { /// \brief A class for maintaining the slot number definition /// as a placeholder for the actual definition for forward constants defs. class ConstantPlaceHolder : public ConstantExpr { void operator=(const ConstantPlaceHolder &) = delete; public: // allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context) : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) { Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } /// \brief Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; } /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); }; } // end anonymous namespace // FIXME: can we inherit this from ConstantExpr? template <> struct OperandTraits : public FixedNumOperandTraits { }; DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value) } // end namespace llvm void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) { if (Idx == size()) { push_back(V); return; } if (Idx >= size()) resize(Idx+1); WeakVH &OldV = ValuePtrs[Idx]; if (!OldV) { OldV = V; return; } // Handle constants and non-constants (e.g. instrs) differently for // efficiency. if (Constant *PHC = dyn_cast(&*OldV)) { ResolveConstants.push_back(std::make_pair(PHC, Idx)); OldV = V; } else { // If there was a forward reference to this value, replace it. Value *PrevVal = OldV; OldV->replaceAllUsesWith(V); delete PrevVal; } } Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) { if (Idx >= size()) resize(Idx + 1); if (Value *V = ValuePtrs[Idx]) { if (Ty != V->getType()) report_fatal_error("Type mismatch in constant table!"); return cast(V); } // Create and return a placeholder, which will later be RAUW'd. Constant *C = new ConstantPlaceHolder(Ty, Context); ValuePtrs[Idx] = C; return C; } Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) { // Bail out for a clearly invalid value. This would make us call resize(0) if (Idx == UINT_MAX) return nullptr; if (Idx >= size()) resize(Idx + 1); if (Value *V = ValuePtrs[Idx]) { // If the types don't match, it's invalid. if (Ty && Ty != V->getType()) return nullptr; return V; } // No type specified, must be invalid reference. if (!Ty) return nullptr; // Create and return a placeholder, which will later be RAUW'd. Value *V = new Argument(Ty); ValuePtrs[Idx] = V; return V; } /// Once all constants are read, this method bulk resolves any forward /// references. The idea behind this is that we sometimes get constants (such /// as large arrays) which reference *many* forward ref constants. Replacing /// each of these causes a lot of thrashing when building/reuniquing the /// constant. Instead of doing this, we look at all the uses and rewrite all /// the place holders at once for any constant that uses a placeholder. void BitcodeReaderValueList::resolveConstantForwardRefs() { // Sort the values by-pointer so that they are efficient to look up with a // binary search. std::sort(ResolveConstants.begin(), ResolveConstants.end()); SmallVector NewOps; while (!ResolveConstants.empty()) { Value *RealVal = operator[](ResolveConstants.back().second); Constant *Placeholder = ResolveConstants.back().first; ResolveConstants.pop_back(); // Loop over all users of the placeholder, updating them to reference the // new value. If they reference more than one placeholder, update them all // at once. while (!Placeholder->use_empty()) { auto UI = Placeholder->user_begin(); User *U = *UI; // If the using object isn't uniqued, just update the operands. This // handles instructions and initializers for global variables. if (!isa(U) || isa(U)) { UI.getUse().set(RealVal); continue; } // Otherwise, we have a constant that uses the placeholder. Replace that // constant with a new constant that has *all* placeholder uses updated. Constant *UserC = cast(U); for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E; ++I) { Value *NewOp; if (!isa(*I)) { // Not a placeholder reference. NewOp = *I; } else if (*I == Placeholder) { // Common case is that it just references this one placeholder. NewOp = RealVal; } else { // Otherwise, look up the placeholder in ResolveConstants. ResolveConstantsTy::iterator It = std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(), std::pair(cast(*I), 0)); assert(It != ResolveConstants.end() && It->first == *I); NewOp = operator[](It->second); } NewOps.push_back(cast(NewOp)); } // Make the new constant. Constant *NewC; if (ConstantArray *UserCA = dyn_cast(UserC)) { NewC = ConstantArray::get(UserCA->getType(), NewOps); } else if (ConstantStruct *UserCS = dyn_cast(UserC)) { NewC = ConstantStruct::get(UserCS->getType(), NewOps); } else if (isa(UserC)) { NewC = ConstantVector::get(NewOps); } else { assert(isa(UserC) && "Must be a ConstantExpr."); NewC = cast(UserC)->getWithOperands(NewOps); } UserC->replaceAllUsesWith(NewC); UserC->destroyConstant(); NewOps.clear(); } // Update all ValueHandles, they should be the only users at this point. Placeholder->replaceAllUsesWith(RealVal); delete Placeholder; } } void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) { if (Idx == size()) { push_back(MD); return; } if (Idx >= size()) resize(Idx+1); TrackingMDRef &OldMD = MetadataPtrs[Idx]; if (!OldMD) { OldMD.reset(MD); return; } // If there was a forward reference to this value, replace it. TempMDTuple PrevMD(cast(OldMD.get())); PrevMD->replaceAllUsesWith(MD); --NumFwdRefs; } Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) { if (Idx >= size()) resize(Idx + 1); if (Metadata *MD = MetadataPtrs[Idx]) return MD; // Track forward refs to be resolved later. if (AnyFwdRefs) { MinFwdRef = std::min(MinFwdRef, Idx); MaxFwdRef = std::max(MaxFwdRef, Idx); } else { AnyFwdRefs = true; MinFwdRef = MaxFwdRef = Idx; } ++NumFwdRefs; // Create and return a placeholder, which will later be RAUW'd. Metadata *MD = MDNode::getTemporary(Context, None).release(); MetadataPtrs[Idx].reset(MD); return MD; } MDNode *BitcodeReaderMetadataList::getMDNodeFwdRefOrNull(unsigned Idx) { return dyn_cast_or_null(getMetadataFwdRef(Idx)); } void BitcodeReaderMetadataList::tryToResolveCycles() { if (!AnyFwdRefs) // Nothing to do. return; if (NumFwdRefs) // Still forward references... can't resolve cycles. return; // Resolve any cycles. for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) { auto &MD = MetadataPtrs[I]; auto *N = dyn_cast_or_null(MD); if (!N) continue; assert(!N->isTemporary() && "Unexpected forward reference"); N->resolveCycles(); } // Make sure we return early again until there's another forward ref. AnyFwdRefs = false; } Type *BitcodeReader::getTypeByID(unsigned ID) { // The type table size is always specified correctly. if (ID >= TypeList.size()) return nullptr; if (Type *Ty = TypeList[ID]) return Ty; // If we have a forward reference, the only possible case is when it is to a // named struct. Just create a placeholder for now. return TypeList[ID] = createIdentifiedStructType(Context); } StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context, StringRef Name) { auto *Ret = StructType::create(Context, Name); IdentifiedStructTypes.push_back(Ret); return Ret; } StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context) { auto *Ret = StructType::create(Context); IdentifiedStructTypes.push_back(Ret); return Ret; } //===----------------------------------------------------------------------===// // Functions for parsing blocks from the bitcode file //===----------------------------------------------------------------------===// /// \brief This fills an AttrBuilder object with the LLVM attributes that have /// been decoded from the given integer. This function must stay in sync with /// 'encodeLLVMAttributesForBitcode'. static void decodeLLVMAttributesForBitcode(AttrBuilder &B, uint64_t EncodedAttrs) { // FIXME: Remove in 4.0. // The alignment is stored as a 16-bit raw value from bits 31--16. We shift // the bits above 31 down by 11 bits. unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; assert((!Alignment || isPowerOf2_32(Alignment)) && "Alignment must be a power of two."); if (Alignment) B.addAlignmentAttr(Alignment); B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) | (EncodedAttrs & 0xffff)); } std::error_code BitcodeReader::parseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return error("Invalid record"); if (!MAttributes.empty()) return error("Invalid multiple blocks"); SmallVector Record; SmallVector Attrs; // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...] // FIXME: Remove in 4.0. if (Record.size() & 1) return error("Invalid record"); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B; decodeLLVMAttributesForBitcode(B, Record[i+1]); Attrs.push_back(AttributeSet::get(Context, Record[i], B)); } MAttributes.push_back(AttributeSet::get(Context, Attrs)); Attrs.clear(); break; } case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [attrgrp0, attrgrp1, ...] for (unsigned i = 0, e = Record.size(); i != e; ++i) Attrs.push_back(MAttributeGroups[Record[i]]); MAttributes.push_back(AttributeSet::get(Context, Attrs)); Attrs.clear(); break; } } } } // Returns Attribute::None on unrecognized codes. static Attribute::AttrKind getAttrFromCode(uint64_t Code) { switch (Code) { default: return Attribute::None; case bitc::ATTR_KIND_ALIGNMENT: return Attribute::Alignment; case bitc::ATTR_KIND_ALWAYS_INLINE: return Attribute::AlwaysInline; case bitc::ATTR_KIND_ARGMEMONLY: return Attribute::ArgMemOnly; case bitc::ATTR_KIND_BUILTIN: return Attribute::Builtin; case bitc::ATTR_KIND_BY_VAL: return Attribute::ByVal; case bitc::ATTR_KIND_IN_ALLOCA: return Attribute::InAlloca; case bitc::ATTR_KIND_COLD: return Attribute::Cold; case bitc::ATTR_KIND_CONVERGENT: return Attribute::Convergent; case bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY: return Attribute::InaccessibleMemOnly; case bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY: return Attribute::InaccessibleMemOrArgMemOnly; case bitc::ATTR_KIND_INLINE_HINT: return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: return Attribute::InReg; case bitc::ATTR_KIND_JUMP_TABLE: return Attribute::JumpTable; case bitc::ATTR_KIND_MIN_SIZE: return Attribute::MinSize; case bitc::ATTR_KIND_NAKED: return Attribute::Naked; case bitc::ATTR_KIND_NEST: return Attribute::Nest; case bitc::ATTR_KIND_NO_ALIAS: return Attribute::NoAlias; case bitc::ATTR_KIND_NO_BUILTIN: return Attribute::NoBuiltin; case bitc::ATTR_KIND_NO_CAPTURE: return Attribute::NoCapture; case bitc::ATTR_KIND_NO_DUPLICATE: return Attribute::NoDuplicate; case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT: return Attribute::NoImplicitFloat; case bitc::ATTR_KIND_NO_INLINE: return Attribute::NoInline; case bitc::ATTR_KIND_NO_RECURSE: return Attribute::NoRecurse; case bitc::ATTR_KIND_NON_LAZY_BIND: return Attribute::NonLazyBind; case bitc::ATTR_KIND_NON_NULL: return Attribute::NonNull; case bitc::ATTR_KIND_DEREFERENCEABLE: return Attribute::Dereferenceable; case bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL: return Attribute::DereferenceableOrNull; case bitc::ATTR_KIND_NO_RED_ZONE: return Attribute::NoRedZone; case bitc::ATTR_KIND_NO_RETURN: return Attribute::NoReturn; case bitc::ATTR_KIND_NO_UNWIND: return Attribute::NoUnwind; case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE: return Attribute::OptimizeForSize; case bitc::ATTR_KIND_OPTIMIZE_NONE: return Attribute::OptimizeNone; case bitc::ATTR_KIND_READ_NONE: return Attribute::ReadNone; case bitc::ATTR_KIND_READ_ONLY: return Attribute::ReadOnly; case bitc::ATTR_KIND_RETURNED: return Attribute::Returned; case bitc::ATTR_KIND_RETURNS_TWICE: return Attribute::ReturnsTwice; case bitc::ATTR_KIND_S_EXT: return Attribute::SExt; case bitc::ATTR_KIND_STACK_ALIGNMENT: return Attribute::StackAlignment; case bitc::ATTR_KIND_STACK_PROTECT: return Attribute::StackProtect; case bitc::ATTR_KIND_STACK_PROTECT_REQ: return Attribute::StackProtectReq; case bitc::ATTR_KIND_STACK_PROTECT_STRONG: return Attribute::StackProtectStrong; case bitc::ATTR_KIND_SAFESTACK: return Attribute::SafeStack; case bitc::ATTR_KIND_STRUCT_RET: return Attribute::StructRet; case bitc::ATTR_KIND_SANITIZE_ADDRESS: return Attribute::SanitizeAddress; case bitc::ATTR_KIND_SANITIZE_THREAD: return Attribute::SanitizeThread; case bitc::ATTR_KIND_SANITIZE_MEMORY: return Attribute::SanitizeMemory; case bitc::ATTR_KIND_SWIFT_SELF: return Attribute::SwiftSelf; case bitc::ATTR_KIND_UW_TABLE: return Attribute::UWTable; case bitc::ATTR_KIND_Z_EXT: return Attribute::ZExt; } } std::error_code BitcodeReader::parseAlignmentValue(uint64_t Exponent, unsigned &Alignment) { // Note: Alignment in bitcode files is incremented by 1, so that zero // can be used for default alignment. if (Exponent > Value::MaxAlignmentExponent + 1) return error("Invalid alignment value"); Alignment = (1 << static_cast(Exponent)) >> 1; return std::error_code(); } std::error_code BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) { *Kind = getAttrFromCode(Code); if (*Kind == Attribute::None) return error(BitcodeError::CorruptedBitcode, "Unknown attribute kind (" + Twine(Code) + ")"); return std::error_code(); } std::error_code BitcodeReader::parseAttributeGroupBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) return error("Invalid record"); if (!MAttributeGroups.empty()) return error("Invalid multiple blocks"); SmallVector Record; // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] if (Record.size() < 3) return error("Invalid record"); uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. AttrBuilder B; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute Attribute::AttrKind Kind; if (std::error_code EC = parseAttrKind(Record[++i], &Kind)) return EC; B.addAttribute(Kind); } else if (Record[i] == 1) { // Integer attribute Attribute::AttrKind Kind; if (std::error_code EC = parseAttrKind(Record[++i], &Kind)) return EC; if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); else if (Kind == Attribute::StackAlignment) B.addStackAlignmentAttr(Record[++i]); else if (Kind == Attribute::Dereferenceable) B.addDereferenceableAttr(Record[++i]); else if (Kind == Attribute::DereferenceableOrNull) B.addDereferenceableOrNullAttr(Record[++i]); } else { // String attribute assert((Record[i] == 3 || Record[i] == 4) && "Invalid attribute group entry"); bool HasValue = (Record[i++] == 4); SmallString<64> KindStr; SmallString<64> ValStr; while (Record[i] != 0 && i != e) KindStr += Record[i++]; assert(Record[i] == 0 && "Kind string not null terminated"); if (HasValue) { // Has a value associated with it. ++i; // Skip the '0' that terminates the "kind" string. while (Record[i] != 0 && i != e) ValStr += Record[i++]; assert(Record[i] == 0 && "Value string not null terminated"); } B.addAttribute(KindStr.str(), ValStr.str()); } } MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B); break; } } } } std::error_code BitcodeReader::parseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return error("Invalid record"); return parseTypeTableBody(); } std::error_code BitcodeReader::parseTypeTableBody() { if (!TypeList.empty()) return error("Invalid multiple blocks"); SmallVector Record; unsigned NumRecords = 0; SmallString<64> TypeName; // Read all the records for this type table. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) return error("Malformed block"); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Type *ResultTy = nullptr; switch (Stream.readRecord(Entry.ID, Record)) { default: return error("Invalid value"); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.size() < 1) return error("Invalid record"); TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID ResultTy = Type::getVoidTy(Context); break; case bitc::TYPE_CODE_HALF: // HALF ResultTy = Type::getHalfTy(Context); break; case bitc::TYPE_CODE_FLOAT: // FLOAT ResultTy = Type::getFloatTy(Context); break; case bitc::TYPE_CODE_DOUBLE: // DOUBLE ResultTy = Type::getDoubleTy(Context); break; case bitc::TYPE_CODE_X86_FP80: // X86_FP80 ResultTy = Type::getX86_FP80Ty(Context); break; case bitc::TYPE_CODE_FP128: // FP128 ResultTy = Type::getFP128Ty(Context); break; case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128 ResultTy = Type::getPPC_FP128Ty(Context); break; case bitc::TYPE_CODE_LABEL: // LABEL ResultTy = Type::getLabelTy(Context); break; case bitc::TYPE_CODE_METADATA: // METADATA ResultTy = Type::getMetadataTy(Context); break; case bitc::TYPE_CODE_X86_MMX: // X86_MMX ResultTy = Type::getX86_MMXTy(Context); break; case bitc::TYPE_CODE_TOKEN: // TOKEN ResultTy = Type::getTokenTy(Context); break; case bitc::TYPE_CODE_INTEGER: { // INTEGER: [width] if (Record.size() < 1) return error("Invalid record"); uint64_t NumBits = Record[0]; if (NumBits < IntegerType::MIN_INT_BITS || NumBits > IntegerType::MAX_INT_BITS) return error("Bitwidth for integer type out of range"); ResultTy = IntegerType::get(Context, NumBits); break; } case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.size() < 1) return error("Invalid record"); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; ResultTy = getTypeByID(Record[0]); if (!ResultTy || !PointerType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = PointerType::get(ResultTy, AddressSpace); break; } case bitc::TYPE_CODE_FUNCTION_OLD: { // FIXME: attrid is dead, remove it in LLVM 4.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) return error("Invalid record"); SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) ArgTys.push_back(T); else break; } ResultTy = getTypeByID(Record[2]); if (!ResultTy || ArgTys.size() < Record.size()-3) return error("Invalid type"); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_FUNCTION: { // FUNCTION: [vararg, retty, paramty x N] if (Record.size() < 2) return error("Invalid record"); SmallVector ArgTys; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) { if (!FunctionType::isValidArgumentType(T)) return error("Invalid function argument type"); ArgTys.push_back(T); } else break; } ResultTy = getTypeByID(Record[1]); if (!ResultTy || ArgTys.size() < Record.size()-2) return error("Invalid type"); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return error("Invalid record"); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) EltTys.push_back(T); else break; } if (EltTys.size() != Record.size()-1) return error("Invalid type"); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] if (convertToString(Record, 0, TypeName)) return error("Invalid record"); continue; case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return error("Invalid record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct. Res = createIdentifiedStructType(Context, TypeName); TypeName.clear(); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) EltTys.push_back(T); else break; } if (EltTys.size() != Record.size()-1) return error("Invalid record"); Res->setBody(EltTys, Record[0]); ResultTy = Res; break; } case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] if (Record.size() != 1) return error("Invalid record"); if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { Res->setName(TypeName); TypeList[NumRecords] = nullptr; } else // Otherwise, create a new struct with no body. Res = createIdentifiedStructType(Context, TypeName); TypeName.clear(); ResultTy = Res; break; } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return error("Invalid record"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !ArrayType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = ArrayType::get(ResultTy, Record[0]); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) return error("Invalid record"); if (Record[0] == 0) return error("Invalid vector length"); ResultTy = getTypeByID(Record[1]); if (!ResultTy || !StructType::isValidElementType(ResultTy)) return error("Invalid type"); ResultTy = VectorType::get(ResultTy, Record[0]); break; } if (NumRecords >= TypeList.size()) return error("Invalid TYPE table"); if (TypeList[NumRecords]) return error( "Invalid TYPE table: Only named structs can be forward referenced"); assert(ResultTy && "Didn't read a type?"); TypeList[NumRecords++] = ResultTy; } } std::error_code BitcodeReader::parseOperandBundleTags() { if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID)) return error("Invalid record"); if (!BundleTags.empty()) return error("Invalid multiple blocks"); SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Tags are implicitly mapped to integers by their order. if (Stream.readRecord(Entry.ID, Record) != bitc::OPERAND_BUNDLE_TAG) return error("Invalid record"); // OPERAND_BUNDLE_TAG: [strchr x N] BundleTags.emplace_back(); if (convertToString(Record, 0, BundleTags.back())) return error("Invalid record"); Record.clear(); } } /// Associate a value with its name from the given index in the provided record. ErrorOr BitcodeReader::recordValue(SmallVectorImpl &Record, unsigned NameIndex, Triple &TT) { SmallString<128> ValueName; if (convertToString(Record, NameIndex, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; if (ValueID >= ValueList.size() || !ValueList[ValueID]) return error("Invalid record"); Value *V = ValueList[ValueID]; StringRef NameStr(ValueName.data(), ValueName.size()); if (NameStr.find_first_of(0) != StringRef::npos) return error("Invalid value name"); V->setName(NameStr); auto *GO = dyn_cast(V); if (GO) { if (GO->getComdat() == reinterpret_cast(1)) { if (TT.isOSBinFormatMachO()) GO->setComdat(nullptr); else GO->setComdat(TheModule->getOrInsertComdat(V->getName())); } } return V; } /// Helper to note and return the current location, and jump to the given /// offset. static uint64_t jumpToValueSymbolTable(uint64_t Offset, BitstreamCursor &Stream) { // Save the current parsing location so we can jump back at the end // of the VST read. uint64_t CurrentBit = Stream.GetCurrentBitNo(); Stream.JumpToBit(Offset * 32); #ifndef NDEBUG // Do some checking if we are in debug mode. BitstreamEntry Entry = Stream.advance(); assert(Entry.Kind == BitstreamEntry::SubBlock); assert(Entry.ID == bitc::VALUE_SYMTAB_BLOCK_ID); #else // In NDEBUG mode ignore the output so we don't get an unused variable // warning. Stream.advance(); #endif return CurrentBit; } /// Parse the value symbol table at either the current parsing location or /// at the given bit offset if provided. std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) { uint64_t CurrentBit; // Pass in the Offset to distinguish between calling for the module-level // VST (where we want to jump to the VST offset) and the function-level // VST (where we don't). if (Offset > 0) CurrentBit = jumpToValueSymbolTable(Offset, Stream); // Compute the delta between the bitcode indices in the VST (the word offset // to the word-aligned ENTER_SUBBLOCK for the function block, and that // expected by the lazy reader. The reader's EnterSubBlock expects to have // already read the ENTER_SUBBLOCK code (size getAbbrevIDWidth) and BlockID // (size BlockIDWidth). Note that we access the stream's AbbrevID width here // just before entering the VST subblock because: 1) the EnterSubBlock // changes the AbbrevID width; 2) the VST block is nested within the same // outer MODULE_BLOCK as the FUNCTION_BLOCKs and therefore have the same // AbbrevID width before calling EnterSubBlock; and 3) when we want to // jump to the FUNCTION_BLOCK using this offset later, we don't want // to rely on the stream's AbbrevID width being that of the MODULE_BLOCK. unsigned FuncBitcodeOffsetDelta = Stream.getAbbrevIDWidth() + bitc::BlockIDWidth; if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; Triple TT(TheModule->getTargetTriple()); // Read all the records for this value table. SmallString<128> ValueName; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (Offset > 0) Stream.JumpToBit(CurrentBit); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N] ErrorOr ValOrErr = recordValue(Record, 1, TT); if (std::error_code EC = ValOrErr.getError()) return EC; ValOrErr.get(); break; } case bitc::VST_CODE_FNENTRY: { // VST_CODE_FNENTRY: [valueid, offset, namechar x N] ErrorOr ValOrErr = recordValue(Record, 2, TT); if (std::error_code EC = ValOrErr.getError()) return EC; Value *V = ValOrErr.get(); auto *GO = dyn_cast(V); if (!GO) { // If this is an alias, need to get the actual Function object // it aliases, in order to set up the DeferredFunctionInfo entry below. auto *GA = dyn_cast(V); if (GA) GO = GA->getBaseObject(); assert(GO); } uint64_t FuncWordOffset = Record[1]; Function *F = dyn_cast(GO); assert(F); uint64_t FuncBitOffset = FuncWordOffset * 32; DeferredFunctionInfo[F] = FuncBitOffset + FuncBitcodeOffsetDelta; // Set the LastFunctionBlockBit to point to the last function block. // Later when parsing is resumed after function materialization, // we can simply skip that last function block. if (FuncBitOffset > LastFunctionBlockBit) LastFunctionBlockBit = FuncBitOffset; break; } case bitc::VST_CODE_BBENTRY: { if (convertToString(Record, 1, ValueName)) return error("Invalid record"); BasicBlock *BB = getBasicBlock(Record[0]); if (!BB) return error("Invalid record"); BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); break; } } } } /// Parse a single METADATA_KIND record, inserting result in MDKindMap. std::error_code BitcodeReader::parseMetadataKindRecord(SmallVectorImpl &Record) { if (Record.size() < 2) return error("Invalid record"); unsigned Kind = Record[0]; SmallString<8> Name(Record.begin() + 1, Record.end()); unsigned NewKind = TheModule->getMDKindID(Name.str()); if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) return error("Conflicting METADATA_KIND records"); return std::error_code(); } static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; } std::error_code BitcodeReader::parseMetadataStrings(ArrayRef Record, StringRef Blob, unsigned &NextMetadataNo) { // All the MDStrings in the block are emitted together in a single // record. The strings are concatenated and stored in a blob along with // their sizes. if (Record.size() != 2) return error("Invalid record: metadata strings layout"); unsigned NumStrings = Record[0]; unsigned StringsOffset = Record[1]; if (!NumStrings) return error("Invalid record: metadata strings with no strings"); if (StringsOffset > Blob.size()) return error("Invalid record: metadata strings corrupt offset"); StringRef Lengths = Blob.slice(0, StringsOffset); SimpleBitstreamCursor R(*StreamFile); R.jumpToPointer(Lengths.begin()); // Ensure that Blob doesn't get invalidated, even if this is reading from // a StreamingMemoryObject with corrupt data. R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset); StringRef Strings = Blob.drop_front(StringsOffset); do { if (R.AtEndOfStream()) return error("Invalid record: metadata strings bad length"); unsigned Size = R.ReadVBR(6); if (Strings.size() < Size) return error("Invalid record: metadata strings truncated chars"); MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)), NextMetadataNo++); Strings = Strings.drop_front(Size); } while (--NumStrings); return std::error_code(); } /// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing /// module level metadata. std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { IsMetadataMaterialized = true; unsigned NextMetadataNo = MetadataList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) return error("Invalid record"); SmallVector Record; auto getMD = [&](unsigned ID) -> Metadata * { return MetadataList.getMetadataFwdRef(ID); }; auto getMDOrNull = [&](unsigned ID) -> Metadata *{ if (ID) return getMD(ID - 1); return nullptr; }; auto getMDString = [&](unsigned ID) -> MDString *{ // This requires that the ID is not really a forward reference. In // particular, the MDString must already have been resolved. return cast_or_null(getMDOrNull(ID)); }; #define GET_OR_DISTINCT(CLASS, DISTINCT, ARGS) \ (DISTINCT ? CLASS::getDistinct ARGS : CLASS::get ARGS) // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: MetadataList.tryToResolveCycles(); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); StringRef Blob; unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); bool IsDistinct = false; switch (Code) { default: // Default behavior: ignore. break; case bitc::METADATA_NAME: { // Read name of the named metadata. SmallString<8> Name(Record.begin(), Record.end()); Record.clear(); Code = Stream.ReadCode(); unsigned NextBitCode = Stream.readRecord(Code, Record); if (NextBitCode != bitc::METADATA_NAMED_NODE) return error("METADATA_NAME not followed by METADATA_NAMED_NODE"); // Read named metadata elements. unsigned Size = Record.size(); NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name); for (unsigned i = 0; i != Size; ++i) { MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]); if (!MD) return error("Invalid record"); NMD->addOperand(MD); } break; } case bitc::METADATA_OLD_FN_NODE: { // FIXME: Remove in 4.0. // This is a LocalAsMetadata record, the only type of function-local // metadata. if (Record.size() % 2 == 1) return error("Invalid record"); // If this isn't a LocalAsMetadata record, we're dropping it. This used // to be legal, but there's no upgrade path. auto dropRecord = [&] { MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++); }; if (Record.size() != 2) { dropRecord(); break; } Type *Ty = getTypeByID(Record[0]); if (Ty->isMetadataTy() || Ty->isVoidTy()) { dropRecord(); break; } MetadataList.assignValue( LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), NextMetadataNo++); break; } case bitc::METADATA_OLD_NODE: { // FIXME: Remove in 4.0. if (Record.size() % 2 == 1) return error("Invalid record"); unsigned Size = Record.size(); SmallVector Elts; for (unsigned i = 0; i != Size; i += 2) { Type *Ty = getTypeByID(Record[i]); if (!Ty) return error("Invalid record"); if (Ty->isMetadataTy()) Elts.push_back(MetadataList.getMetadataFwdRef(Record[i + 1])); else if (!Ty->isVoidTy()) { auto *MD = ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty)); assert(isa(MD) && "Expected non-function-local metadata"); Elts.push_back(MD); } else Elts.push_back(nullptr); } MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++); break; } case bitc::METADATA_VALUE: { if (Record.size() != 2) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (Ty->isMetadataTy() || Ty->isVoidTy()) return error("Invalid record"); MetadataList.assignValue( ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)), NextMetadataNo++); break; } case bitc::METADATA_DISTINCT_NODE: IsDistinct = true; // fallthrough... case bitc::METADATA_NODE: { SmallVector Elts; Elts.reserve(Record.size()); for (unsigned ID : Record) Elts.push_back(ID ? MetadataList.getMetadataFwdRef(ID - 1) : nullptr); MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts) : MDNode::get(Context, Elts), NextMetadataNo++); break; } case bitc::METADATA_LOCATION: { if (Record.size() != 5) return error("Invalid record"); unsigned Line = Record[1]; unsigned Column = Record[2]; MDNode *Scope = MetadataList.getMDNodeFwdRefOrNull(Record[3]); if (!Scope) return error("Invalid record"); Metadata *InlinedAt = Record[4] ? MetadataList.getMetadataFwdRef(Record[4] - 1) : nullptr; MetadataList.assignValue( GET_OR_DISTINCT(DILocation, Record[0], (Context, Line, Column, Scope, InlinedAt)), NextMetadataNo++); break; } case bitc::METADATA_GENERIC_DEBUG: { if (Record.size() < 4) return error("Invalid record"); unsigned Tag = Record[1]; unsigned Version = Record[2]; if (Tag >= 1u << 16 || Version != 0) return error("Invalid record"); auto *Header = getMDString(Record[3]); SmallVector DwarfOps; for (unsigned I = 4, E = Record.size(); I != E; ++I) DwarfOps.push_back(Record[I] ? MetadataList.getMetadataFwdRef(Record[I] - 1) : nullptr); MetadataList.assignValue( GET_OR_DISTINCT(GenericDINode, Record[0], (Context, Tag, Header, DwarfOps)), NextMetadataNo++); break; } case bitc::METADATA_SUBRANGE: { if (Record.size() != 3) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DISubrange, Record[0], (Context, Record[1], unrotateSign(Record[2]))), NextMetadataNo++); break; } case bitc::METADATA_ENUMERATOR: { if (Record.size() != 3) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT( DIEnumerator, Record[0], (Context, unrotateSign(Record[1]), getMDString(Record[2]))), NextMetadataNo++); break; } case bitc::METADATA_BASIC_TYPE: { if (Record.size() != 6) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIBasicType, Record[0], (Context, Record[1], getMDString(Record[2]), Record[3], Record[4], Record[5])), NextMetadataNo++); break; } case bitc::METADATA_DERIVED_TYPE: { if (Record.size() != 12) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIDerivedType, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), Record[4], getMDOrNull(Record[5]), getMDOrNull(Record[6]), Record[7], Record[8], Record[9], Record[10], getMDOrNull(Record[11]))), NextMetadataNo++); break; } case bitc::METADATA_COMPOSITE_TYPE: { if (Record.size() != 16) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DICompositeType, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), Record[4], getMDOrNull(Record[5]), getMDOrNull(Record[6]), Record[7], Record[8], Record[9], Record[10], getMDOrNull(Record[11]), Record[12], getMDOrNull(Record[13]), getMDOrNull(Record[14]), getMDString(Record[15]))), NextMetadataNo++); break; } case bitc::METADATA_SUBROUTINE_TYPE: { if (Record.size() != 3) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DISubroutineType, Record[0], (Context, Record[1], getMDOrNull(Record[2]))), NextMetadataNo++); break; } case bitc::METADATA_MODULE: { if (Record.size() != 6) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIModule, Record[0], (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDString(Record[4]), getMDString(Record[5]))), NextMetadataNo++); break; } case bitc::METADATA_FILE: { if (Record.size() != 3) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIFile, Record[0], (Context, getMDString(Record[1]), getMDString(Record[2]))), NextMetadataNo++); break; } case bitc::METADATA_COMPILE_UNIT: { if (Record.size() < 14 || Record.size() > 16) return error("Invalid record"); // Ignore Record[0], which indicates whether this compile unit is // distinct. It's always distinct. MetadataList.assignValue( DICompileUnit::getDistinct( Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]), Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]), Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]), getMDOrNull(Record[11]), getMDOrNull(Record[12]), getMDOrNull(Record[13]), Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]), Record.size() <= 14 ? 0 : Record[14]), NextMetadataNo++); break; } case bitc::METADATA_SUBPROGRAM: { if (Record.size() != 18 && Record.size() != 19) return error("Invalid record"); bool HasFn = Record.size() == 19; DISubprogram *SP = GET_OR_DISTINCT( DISubprogram, Record[0] || Record[8], // All definitions should be distinct. (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], getMDOrNull(Record[6]), Record[7], Record[8], Record[9], getMDOrNull(Record[10]), Record[11], Record[12], Record[13], Record[14], getMDOrNull(Record[15 + HasFn]), getMDOrNull(Record[16 + HasFn]), getMDOrNull(Record[17 + HasFn]))); MetadataList.assignValue(SP, NextMetadataNo++); // Upgrade sp->function mapping to function->sp mapping. if (HasFn && Record[15]) { if (auto *CMD = dyn_cast(getMDOrNull(Record[15]))) if (auto *F = dyn_cast(CMD->getValue())) { if (F->isMaterializable()) // Defer until materialized; unmaterialized functions may not have // metadata. FunctionsWithSPs[F] = SP; else if (!F->empty()) F->setSubprogram(SP); } } break; } case bitc::METADATA_LEXICAL_BLOCK: { if (Record.size() != 5) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DILexicalBlock, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3], Record[4])), NextMetadataNo++); break; } case bitc::METADATA_LEXICAL_BLOCK_FILE: { if (Record.size() != 4) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DILexicalBlockFile, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), Record[3])), NextMetadataNo++); break; } case bitc::METADATA_NAMESPACE: { if (Record.size() != 5) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DINamespace, Record[0], (Context, getMDOrNull(Record[1]), getMDOrNull(Record[2]), getMDString(Record[3]), Record[4])), NextMetadataNo++); break; } case bitc::METADATA_MACRO: { if (Record.size() != 5) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIMacro, Record[0], (Context, Record[1], Record[2], getMDString(Record[3]), getMDString(Record[4]))), NextMetadataNo++); break; } case bitc::METADATA_MACRO_FILE: { if (Record.size() != 5) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIMacroFile, Record[0], (Context, Record[1], Record[2], getMDOrNull(Record[3]), getMDOrNull(Record[4]))), NextMetadataNo++); break; } case bitc::METADATA_TEMPLATE_TYPE: { if (Record.size() != 3) return error("Invalid record"); MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter, Record[0], (Context, getMDString(Record[1]), getMDOrNull(Record[2]))), NextMetadataNo++); break; } case bitc::METADATA_TEMPLATE_VALUE: { if (Record.size() != 5) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DITemplateValueParameter, Record[0], (Context, Record[1], getMDString(Record[2]), getMDOrNull(Record[3]), getMDOrNull(Record[4]))), NextMetadataNo++); break; } case bitc::METADATA_GLOBAL_VAR: { if (Record.size() != 11) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIGlobalVariable, Record[0], (Context, getMDOrNull(Record[1]), getMDString(Record[2]), getMDString(Record[3]), getMDOrNull(Record[4]), Record[5], getMDOrNull(Record[6]), Record[7], Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]))), NextMetadataNo++); break; } case bitc::METADATA_LOCAL_VAR: { // 10th field is for the obseleted 'inlinedAt:' field. if (Record.size() < 8 || Record.size() > 10) return error("Invalid record"); // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or // DW_TAG_arg_variable. bool HasTag = Record.size() > 8; MetadataList.assignValue( GET_OR_DISTINCT(DILocalVariable, Record[0], (Context, getMDOrNull(Record[1 + HasTag]), getMDString(Record[2 + HasTag]), getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag], getMDOrNull(Record[5 + HasTag]), Record[6 + HasTag], Record[7 + HasTag])), NextMetadataNo++); break; } case bitc::METADATA_EXPRESSION: { if (Record.size() < 1) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIExpression, Record[0], (Context, makeArrayRef(Record).slice(1))), NextMetadataNo++); break; } case bitc::METADATA_OBJC_PROPERTY: { if (Record.size() != 8) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIObjCProperty, Record[0], (Context, getMDString(Record[1]), getMDOrNull(Record[2]), Record[3], getMDString(Record[4]), getMDString(Record[5]), Record[6], getMDOrNull(Record[7]))), NextMetadataNo++); break; } case bitc::METADATA_IMPORTED_ENTITY: { if (Record.size() != 6) return error("Invalid record"); MetadataList.assignValue( GET_OR_DISTINCT(DIImportedEntity, Record[0], (Context, Record[1], getMDOrNull(Record[2]), getMDOrNull(Record[3]), Record[4], getMDString(Record[5]))), NextMetadataNo++); break; } case bitc::METADATA_STRING_OLD: { std::string String(Record.begin(), Record.end()); // Test for upgrading !llvm.loop. HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String); Metadata *MD = MDString::get(Context, String); MetadataList.assignValue(MD, NextMetadataNo++); break; } case bitc::METADATA_STRINGS: if (std::error_code EC = parseMetadataStrings(Record, Blob, NextMetadataNo)) return EC; break; case bitc::METADATA_KIND: { // Support older bitcode files that had METADATA_KIND records in a // block with METADATA_BLOCK_ID. if (std::error_code EC = parseMetadataKindRecord(Record)) return EC; break; } } } #undef GET_OR_DISTINCT } /// Parse the metadata kinds out of the METADATA_KIND_BLOCK. std::error_code BitcodeReader::parseMetadataKinds() { if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); unsigned Code = Stream.readRecord(Entry.ID, Record); switch (Code) { default: // Default behavior: ignore. break; case bitc::METADATA_KIND: { if (std::error_code EC = parseMetadataKindRecord(Record)) return EC; break; } } } } /// Decode a signed value stored with the sign bit in the LSB for dense VBR /// encoding. uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) return -(V >> 1); // There is no such thing as -0 with integers. "-0" really means MININT. return 1ULL << 63; } /// Resolve all of the initializers for global values and aliases that we can. std::error_code BitcodeReader::resolveGlobalAndAliasInits() { std::vector > GlobalInitWorklist; std::vector > AliasInitWorklist; std::vector > FunctionPrefixWorklist; std::vector > FunctionPrologueWorklist; std::vector > FunctionPersonalityFnWorklist; GlobalInitWorklist.swap(GlobalInits); AliasInitWorklist.swap(AliasInits); FunctionPrefixWorklist.swap(FunctionPrefixes); FunctionPrologueWorklist.swap(FunctionPrologues); FunctionPersonalityFnWorklist.swap(FunctionPersonalityFns); while (!GlobalInitWorklist.empty()) { unsigned ValID = GlobalInitWorklist.back().second; if (ValID >= ValueList.size()) { // Not ready to resolve this yet, it requires something later in the file. GlobalInits.push_back(GlobalInitWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) GlobalInitWorklist.back().first->setInitializer(C); else return error("Expected a constant"); } GlobalInitWorklist.pop_back(); } while (!AliasInitWorklist.empty()) { unsigned ValID = AliasInitWorklist.back().second; if (ValID >= ValueList.size()) { AliasInits.push_back(AliasInitWorklist.back()); } else { Constant *C = dyn_cast_or_null(ValueList[ValID]); if (!C) return error("Expected a constant"); GlobalAlias *Alias = AliasInitWorklist.back().first; if (C->getType() != Alias->getType()) return error("Alias and aliasee types don't match"); Alias->setAliasee(C); } AliasInitWorklist.pop_back(); } while (!FunctionPrefixWorklist.empty()) { unsigned ValID = FunctionPrefixWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPrefixes.push_back(FunctionPrefixWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPrefixWorklist.back().first->setPrefixData(C); else return error("Expected a constant"); } FunctionPrefixWorklist.pop_back(); } while (!FunctionPrologueWorklist.empty()) { unsigned ValID = FunctionPrologueWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPrologues.push_back(FunctionPrologueWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPrologueWorklist.back().first->setPrologueData(C); else return error("Expected a constant"); } FunctionPrologueWorklist.pop_back(); } while (!FunctionPersonalityFnWorklist.empty()) { unsigned ValID = FunctionPersonalityFnWorklist.back().second; if (ValID >= ValueList.size()) { FunctionPersonalityFns.push_back(FunctionPersonalityFnWorklist.back()); } else { if (Constant *C = dyn_cast_or_null(ValueList[ValID])) FunctionPersonalityFnWorklist.back().first->setPersonalityFn(C); else return error("Expected a constant"); } FunctionPersonalityFnWorklist.pop_back(); } return std::error_code(); } static APInt readWideAPInt(ArrayRef Vals, unsigned TypeBits) { SmallVector Words(Vals.size()); std::transform(Vals.begin(), Vals.end(), Words.begin(), BitcodeReader::decodeSignRotatedValue); return APInt(TypeBits, Words); } std::error_code BitcodeReader::parseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this value table. Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: if (NextCstNo != ValueList.size()) return error("Invalid constant reference"); // Once all the constants have been read, go through and resolve forward // references. ValueList.resolveConstantForwardRefs(); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Value *V = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: unknown constant case bitc::CST_CODE_UNDEF: // UNDEF V = UndefValue::get(CurTy); break; case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] if (Record.empty()) return error("Invalid record"); if (Record[0] >= TypeList.size() || !TypeList[Record[0]]) return error("Invalid record"); CurTy = TypeList[Record[0]]; continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL V = Constant::getNullValue(CurTy); break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) return error("Invalid record"); V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) return error("Invalid record"); APInt VInt = readWideAPInt(Record, cast(CurTy)->getBitWidth()); V = ConstantInt::get(Context, VInt); break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return error("Invalid record"); if (CurTy->isHalfTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf, APInt(16, (uint16_t)Record[0]))); else if (CurTy->isFloatTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle, APInt(32, (uint32_t)Record[0]))); else if (CurTy->isDoubleTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble, APInt(64, Record[0]))); else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended, APInt(80, Rearrange))); } else if (CurTy->isFP128Ty()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad, APInt(128, Record))); else if (CurTy->isPPC_FP128Ty()) V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble, APInt(128, Record))); else V = UndefValue::get(CurTy); break; } case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) return error("Invalid record"); unsigned Size = Record.size(); SmallVector Elts; if (StructType *STy = dyn_cast(CurTy)) { for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], STy->getElementType(i))); V = ConstantStruct::get(STy, Elts); } else if (ArrayType *ATy = dyn_cast(CurTy)) { Type *EltTy = ATy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantArray::get(ATy, Elts); } else if (VectorType *VTy = dyn_cast(CurTy)) { Type *EltTy = VTy->getElementType(); for (unsigned i = 0; i != Size; ++i) Elts.push_back(ValueList.getConstantFwdRef(Record[i], EltTy)); V = ConstantVector::get(Elts); } else { V = UndefValue::get(CurTy); } break; } case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) return error("Invalid record"); SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, BitCode == bitc::CST_CODE_CSTRING); break; } case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) return error("Invalid record"); Type *EltTy = cast(CurTy)->getElementType(); if (EltTy->isIntegerTy(8)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(16)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(32)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isIntegerTy(64)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::get(Context, Elts); else V = ConstantDataArray::get(Context, Elts); } else if (EltTy->isHalfTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isFloatTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else if (EltTy->isDoubleTy()) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) V = ConstantDataVector::getFP(Context, Elts); else V = ConstantDataArray::getFP(Context, Elts); } else { return error("Invalid type for value"); } break; } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] if (Record.size() < 3) return error("Invalid record"); int Opc = getDecodedBinaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown binop. } else { Constant *LHS = ValueList.getConstantFwdRef(Record[1], CurTy); Constant *RHS = ValueList.getConstantFwdRef(Record[2], CurTy); unsigned Flags = 0; if (Record.size() >= 4) { if (Opc == Instruction::Add || Opc == Instruction::Sub || Opc == Instruction::Mul || Opc == Instruction::Shl) { if (Record[3] & (1 << bitc::OBO_NO_SIGNED_WRAP)) Flags |= OverflowingBinaryOperator::NoSignedWrap; if (Record[3] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) Flags |= OverflowingBinaryOperator::NoUnsignedWrap; } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv || Opc == Instruction::LShr || Opc == Instruction::AShr) { if (Record[3] & (1 << bitc::PEO_EXACT)) Flags |= SDivOperator::IsExact; } } V = ConstantExpr::get(Opc, LHS, RHS, Flags); } break; } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] if (Record.size() < 3) return error("Invalid record"); int Opc = getDecodedCastOpcode(Record[0]); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { Type *OpTy = getTypeByID(Record[1]); if (!OpTy) return error("Invalid record"); Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); V = UpgradeBitCastExpr(Opc, Op, CurTy); if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy); } break; } case bitc::CST_CODE_CE_INBOUNDS_GEP: case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] unsigned OpNum = 0; Type *PointeeType = nullptr; if (Record.size() % 2) PointeeType = getTypeByID(Record[OpNum++]); SmallVector Elts; while (OpNum != Record.size()) { Type *ElTy = getTypeByID(Record[OpNum++]); if (!ElTy) return error("Invalid record"); Elts.push_back(ValueList.getConstantFwdRef(Record[OpNum++], ElTy)); } if (PointeeType && PointeeType != cast(Elts[0]->getType()->getScalarType()) ->getElementType()) return error("Explicit gep operator type does not match pointee type " "of pointer operand"); ArrayRef Indices(Elts.begin() + 1, Elts.end()); V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices, BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP); break; } case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) return error("Invalid record"); Type *SelectorTy = Type::getInt1Ty(Context); // The selector might be an i1 or an // Get the type from the ValueList before getting a forward ref. if (VectorType *VTy = dyn_cast(CurTy)) if (Value *V = ValueList[Record[0]]) if (SelectorTy != V->getType()) SelectorTy = VectorType::get(SelectorTy, VTy->getNumElements()); V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], SelectorTy), ValueList.getConstantFwdRef(Record[1],CurTy), ValueList.getConstantFwdRef(Record[2],CurTy)); break; } case bitc::CST_CODE_CE_EXTRACTELT : { // CE_EXTRACTELT: [opty, opval, opty, opval] if (Record.size() < 3) return error("Invalid record"); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (!OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = nullptr; if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) return error("Invalid record"); Op1 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op1) return error("Invalid record"); V = ConstantExpr::getExtractElement(Op0, Op1); break; } case bitc::CST_CODE_CE_INSERTELT : { // CE_INSERTELT: [opval, opval, opty, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); Constant *Op2 = nullptr; if (Record.size() == 4) { Type *IdxTy = getTypeByID(Record[2]); if (!IdxTy) return error("Invalid record"); Op2 = ValueList.getConstantFwdRef(Record[3], IdxTy); } else // TODO: Remove with llvm 4.0 Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); if (!Op2) return error("Invalid record"); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), OpTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[2], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_SHUFVEC_EX: { // [opty, opval, opval, opval] VectorType *RTy = dyn_cast(CurTy); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (Record.size() < 4 || !RTy || !OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), RTy->getNumElements()); Constant *Op2 = ValueList.getConstantFwdRef(Record[3], ShufTy); V = ConstantExpr::getShuffleVector(Op0, Op1, Op2); break; } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] if (Record.size() < 4) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); if (!OpTy) return error("Invalid record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); if (OpTy->isFPOrFPVectorTy()) V = ConstantExpr::getFCmp(Record[3], Op0, Op1); else V = ConstantExpr::getICmp(Record[3], Op0, Op1); break; } // This maintains backward compatibility, pre-asm dialect keywords. // FIXME: Remove with the 4.0 release. case bitc::CST_CODE_INLINEASM_OLD: { if (Record.size() < 2) return error("Invalid record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return error("Invalid record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) return error("Invalid record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; PointerType *PTy = cast(CurTy); V = InlineAsm::get(cast(PTy->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack); break; } // This version adds support for the asm dialect keywords (e.g., // inteldialect). case bitc::CST_CODE_INLINEASM: { if (Record.size() < 2) return error("Invalid record"); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = (Record[0] >> 1) & 1; unsigned AsmDialect = Record[0] >> 2; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) return error("Invalid record"); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) return error("Invalid record"); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; for (unsigned i = 0; i != ConstStrSize; ++i) ConstrStr += (char)Record[3+AsmStrSize+i]; PointerType *PTy = cast(CurTy); V = InlineAsm::get(cast(PTy->getElementType()), AsmStr, ConstrStr, HasSideEffects, IsAlignStack, InlineAsm::AsmDialect(AsmDialect)); break; } case bitc::CST_CODE_BLOCKADDRESS:{ if (Record.size() < 3) return error("Invalid record"); Type *FnTy = getTypeByID(Record[0]); if (!FnTy) return error("Invalid record"); Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); if (!Fn) return error("Invalid record"); // If the function is already parsed we can insert the block address right // away. BasicBlock *BB; unsigned BBID = Record[2]; if (!BBID) // Invalid reference to entry block. return error("Invalid ID"); if (!Fn->empty()) { Function::iterator BBI = Fn->begin(), BBE = Fn->end(); for (size_t I = 0, E = BBID; I != E; ++I) { if (BBI == BBE) return error("Invalid ID"); ++BBI; } BB = &*BBI; } else { // Otherwise insert a placeholder and remember it so it can be inserted // when the function is parsed. auto &FwdBBs = BasicBlockFwdRefs[Fn]; if (FwdBBs.empty()) BasicBlockFwdRefQueue.push_back(Fn); if (FwdBBs.size() < BBID + 1) FwdBBs.resize(BBID + 1); if (!FwdBBs[BBID]) FwdBBs[BBID] = BasicBlock::Create(Context); BB = FwdBBs[BBID]; } V = BlockAddress::get(Fn, BB); break; } } ValueList.assignValue(V, NextCstNo); ++NextCstNo; } } std::error_code BitcodeReader::parseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) return error("Invalid record"); // Read all the records. SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a use list record. Record.clear(); bool IsBB = false; switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::USELIST_CODE_BB: IsBB = true; // fallthrough case bitc::USELIST_CODE_DEFAULT: { unsigned RecordLength = Record.size(); if (RecordLength < 3) // Records should have at least an ID and two indexes. return error("Invalid record"); unsigned ID = Record.back(); Record.pop_back(); Value *V; if (IsBB) { assert(ID < FunctionBBs.size() && "Basic block not found"); V = FunctionBBs[ID]; } else V = ValueList[ID]; unsigned NumUses = 0; SmallDenseMap Order; for (const Use &U : V->materialized_uses()) { if (++NumUses > Record.size()) break; Order[&U] = Record[NumUses - 1]; } if (Order.size() != Record.size() || NumUses > Record.size()) // Mismatches can happen if the functions are being materialized lazily // (out-of-order), or a value has been upgraded. break; V->sortUseList([&](const Use &L, const Use &R) { return Order.lookup(&L) < Order.lookup(&R); }); break; } } } } /// When we see the block for metadata, remember where it is and then skip it. /// This lets us lazily deserialize the metadata. std::error_code BitcodeReader::rememberAndSkipMetadata() { // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); DeferredMetadataInfo.push_back(CurBit); // Skip over the block for now. if (Stream.SkipBlock()) return error("Invalid record"); return std::error_code(); } std::error_code BitcodeReader::materializeMetadata() { for (uint64_t BitPos : DeferredMetadataInfo) { // Move the bit stream to the saved position. Stream.JumpToBit(BitPos); if (std::error_code EC = parseMetadata(true)) return EC; } DeferredMetadataInfo.clear(); return std::error_code(); } void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; } /// When we see the block for a function body, remember where it is and then /// skip it. This lets us lazily deserialize the functions. std::error_code BitcodeReader::rememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) return error("Insufficient function protos"); Function *Fn = FunctionsWithBodies.back(); FunctionsWithBodies.pop_back(); // Save the current stream state. uint64_t CurBit = Stream.GetCurrentBitNo(); assert( (DeferredFunctionInfo[Fn] == 0 || DeferredFunctionInfo[Fn] == CurBit) && "Mismatch between VST and scanned function offsets"); DeferredFunctionInfo[Fn] = CurBit; // Skip over the function block for now. if (Stream.SkipBlock()) return error("Invalid record"); return std::error_code(); } std::error_code BitcodeReader::globalCleanup() { // Patch the initializers for globals and aliases up. resolveGlobalAndAliasInits(); if (!GlobalInits.empty() || !AliasInits.empty()) return error("Malformed global initializer set"); // Look for intrinsic functions which need to be upgraded at some point for (Function &F : *TheModule) { Function *NewFn; if (UpgradeIntrinsicFunction(&F, NewFn)) UpgradedIntrinsics[&F] = NewFn; } // Look for global variables which need to be renamed. for (GlobalVariable &GV : TheModule->globals()) UpgradeGlobalVariable(&GV); // Force deallocation of memory for these vectors to favor the client that // want lazy deserialization. std::vector >().swap(GlobalInits); std::vector >().swap(AliasInits); return std::error_code(); } /// Support for lazy parsing of function bodies. This is required if we /// either have an old bitcode file without a VST forward declaration record, /// or if we have an anonymous function being materialized, since anonymous /// functions do not have a name and are therefore not in the VST. std::error_code BitcodeReader::rememberAndSkipFunctionBodies() { Stream.JumpToBit(NextUnreadBit); if (Stream.AtEndOfStream()) return error("Could not find function in stream"); if (!SeenFirstFunctionBody) return error("Trying to materialize functions before seeing function blocks"); // An old bitcode file with the symbol table at the end would have // finished the parse greedily. assert(SeenValueSymbolTable); SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { default: return error("Expect SubBlock"); case BitstreamEntry::SubBlock: switch (Entry.ID) { default: return error("Expect function block"); case bitc::FUNCTION_BLOCK_ID: if (std::error_code EC = rememberAndSkipFunctionBody()) return EC; NextUnreadBit = Stream.GetCurrentBitNo(); return std::error_code(); } } } } std::error_code BitcodeReader::parseBitcodeVersion() { if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID)) return error("Invalid record"); // Read all the records. SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { default: case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return error("Invalid value"); case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x // N] convertToString(Record, 0, ProducerIdentification); break; } case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#] unsigned epoch = (unsigned)Record[0]; if (epoch != bitc::BITCODE_CURRENT_EPOCH) { return error( Twine("Incompatible epoch: Bitcode '") + Twine(epoch) + "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'"); } } } } } std::error_code BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { if (ResumeBit) Stream.JumpToBit(ResumeBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; std::vector SectionTable; std::vector GCTable; // Read all the records for this module. while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return globalCleanup(); case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) return error("Malformed block"); break; case bitc::PARAMATTR_BLOCK_ID: if (std::error_code EC = parseAttributeBlock()) return EC; break; case bitc::PARAMATTR_GROUP_BLOCK_ID: if (std::error_code EC = parseAttributeGroupBlock()) return EC; break; case bitc::TYPE_BLOCK_ID_NEW: if (std::error_code EC = parseTypeTable()) return EC; break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (!SeenValueSymbolTable) { // Either this is an old form VST without function index and an // associated VST forward declaration record (which would have caused // the VST to be jumped to and parsed before it was encountered // normally in the stream), or there were no function blocks to // trigger an earlier parsing of the VST. assert(VSTOffset == 0 || FunctionsWithBodies.empty()); if (std::error_code EC = parseValueSymbolTable()) return EC; SeenValueSymbolTable = true; } else { // We must have had a VST forward declaration record, which caused // the parser to jump to and parse the VST earlier. assert(VSTOffset > 0); if (Stream.SkipBlock()) return error("Invalid record"); } break; case bitc::CONSTANTS_BLOCK_ID: if (std::error_code EC = parseConstants()) return EC; if (std::error_code EC = resolveGlobalAndAliasInits()) return EC; break; case bitc::METADATA_BLOCK_ID: if (ShouldLazyLoadMetadata && !IsMetadataMaterialized) { if (std::error_code EC = rememberAndSkipMetadata()) return EC; break; } assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata"); if (std::error_code EC = parseMetadata(true)) return EC; break; case bitc::METADATA_KIND_BLOCK_ID: if (std::error_code EC = parseMetadataKinds()) return EC; break; case bitc::FUNCTION_BLOCK_ID: // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. if (!SeenFirstFunctionBody) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); if (std::error_code EC = globalCleanup()) return EC; SeenFirstFunctionBody = true; } if (VSTOffset > 0) { // If we have a VST forward declaration record, make sure we // parse the VST now if we haven't already. It is needed to // set up the DeferredFunctionInfo vector for lazy reading. if (!SeenValueSymbolTable) { if (std::error_code EC = BitcodeReader::parseValueSymbolTable(VSTOffset)) return EC; SeenValueSymbolTable = true; // Fall through so that we record the NextUnreadBit below. // This is necessary in case we have an anonymous function that // is later materialized. Since it will not have a VST entry we // need to fall back to the lazy parse to find its offset. } else { // If we have a VST forward declaration record, but have already // parsed the VST (just above, when the first function body was // encountered here), then we are resuming the parse after // materializing functions. The ResumeBit points to the // start of the last function block recorded in the // DeferredFunctionInfo map. Skip it. if (Stream.SkipBlock()) return error("Invalid record"); continue; } } // Support older bitcode files that did not have the function // index in the VST, nor a VST forward declaration record, as // well as anonymous functions that do not have VST entries. // Build the DeferredFunctionInfo vector on the fly. if (std::error_code EC = rememberAndSkipFunctionBody()) return EC; // Suspend parsing when we reach the function bodies. Subsequent // materialization calls will resume it when necessary. If the bitcode // file is old, the symbol table will be at the end instead and will not // have been seen yet. In this case, just finish the parse now. if (SeenValueSymbolTable) { NextUnreadBit = Stream.GetCurrentBitNo(); return std::error_code(); } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = parseUseLists()) return EC; break; case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: if (std::error_code EC = parseOperandBundleTags()) return EC; break; } continue; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) return error("Invalid record"); // Only version #0 and #1 are supported so far. unsigned module_version = Record[0]; switch (module_version) { default: return error("Invalid value"); case 0: UseRelativeIDs = false; break; case 1: UseRelativeIDs = true; break; } break; } case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setTargetTriple(S); break; } case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setDataLayout(S); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); TheModule->setModuleInlineAsm(S); break; } case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N] // FIXME: Remove in 4.0. std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); // Ignore value. break; } case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); SectionTable.push_back(S); break; } case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); GCTable.push_back(S); break; } case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name] if (Record.size() < 2) return error("Invalid record"); Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); unsigned ComdatNameSize = Record[1]; std::string ComdatName; ComdatName.reserve(ComdatNameSize); for (unsigned i = 0; i != ComdatNameSize; ++i) ComdatName += (char)Record[2 + i]; Comdat *C = TheModule->getOrInsertComdat(ComdatName); C->setSelectionKind(SK); ComdatList.push_back(C); break; } // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat] case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); bool isConstant = Record[1] & 1; bool explicitType = Record[1] & 2; unsigned AddressSpace; if (explicitType) { AddressSpace = Record[1] >> 2; } else { if (!Ty->isPointerTy()) return error("Invalid type for value"); AddressSpace = cast(Ty)->getAddressSpace(); Ty = cast(Ty)->getElementType(); } uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); unsigned Alignment; if (std::error_code EC = parseAlignmentValue(Record[4], Alignment)) return EC; std::string Section; if (Record[5]) { if (Record[5]-1 >= SectionTable.size()) return error("Invalid ID"); Section = SectionTable[Record[5]-1]; } GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; // Local linkage must have default visibility. if (Record.size() > 6 && !GlobalValue::isLocalLinkage(Linkage)) // FIXME: Change to an error if non-default in 4.0. Visibility = getDecodedVisibility(Record[6]); GlobalVariable::ThreadLocalMode TLM = GlobalVariable::NotThreadLocal; if (Record.size() > 7) TLM = getDecodedThreadLocalMode(Record[7]); bool UnnamedAddr = false; if (Record.size() > 8) UnnamedAddr = Record[8]; bool ExternallyInitialized = false; if (Record.size() > 9) ExternallyInitialized = Record[9]; GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, nullptr, "", nullptr, TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); NewGV->setVisibility(Visibility); NewGV->setUnnamedAddr(UnnamedAddr); if (Record.size() > 10) NewGV->setDLLStorageClass(getDecodedDLLStorageClass(Record[10])); else upgradeDLLImportExportLinkage(NewGV, RawLinkage); ValueList.push_back(NewGV); // Remember which value to use for the global initializer. if (unsigned InitID = Record[2]) GlobalInits.push_back(std::make_pair(NewGV, InitID-1)); if (Record.size() > 11) { if (unsigned ComdatID = Record[11]) { if (ComdatID > ComdatList.size()) return error("Invalid global variable comdat ID"); NewGV->setComdat(ComdatList[ComdatID - 1]); } } else if (hasImplicitComdat(RawLinkage)) { NewGV->setComdat(reinterpret_cast(1)); } break; } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, // alignment, section, visibility, gc, unnamed_addr, // prologuedata, dllstorageclass, comdat, prefixdata] case bitc::MODULE_CODE_FUNCTION: { if (Record.size() < 8) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); if (auto *PTy = dyn_cast(Ty)) Ty = PTy->getElementType(); auto *FTy = dyn_cast(Ty); if (!FTy) return error("Invalid type for value"); auto CC = static_cast(Record[1]); if (CC & ~CallingConv::MaxID) return error("Invalid calling convention ID"); Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); Func->setCallingConv(CC); bool isProto = Record[2]; uint64_t RawLinkage = Record[3]; Func->setLinkage(getDecodedLinkage(RawLinkage)); Func->setAttributes(getAttributes(Record[4])); unsigned Alignment; if (std::error_code EC = parseAlignmentValue(Record[5], Alignment)) return EC; Func->setAlignment(Alignment); if (Record[6]) { if (Record[6]-1 >= SectionTable.size()) return error("Invalid ID"); Func->setSection(SectionTable[Record[6]-1]); } // Local linkage must have default visibility. if (!Func->hasLocalLinkage()) // FIXME: Change to an error if non-default in 4.0. Func->setVisibility(getDecodedVisibility(Record[7])); if (Record.size() > 8 && Record[8]) { if (Record[8]-1 >= GCTable.size()) return error("Invalid ID"); Func->setGC(GCTable[Record[8]-1].c_str()); } bool UnnamedAddr = false; if (Record.size() > 9) UnnamedAddr = Record[9]; Func->setUnnamedAddr(UnnamedAddr); if (Record.size() > 10 && Record[10] != 0) FunctionPrologues.push_back(std::make_pair(Func, Record[10]-1)); if (Record.size() > 11) Func->setDLLStorageClass(getDecodedDLLStorageClass(Record[11])); else upgradeDLLImportExportLinkage(Func, RawLinkage); if (Record.size() > 12) { if (unsigned ComdatID = Record[12]) { if (ComdatID > ComdatList.size()) return error("Invalid function comdat ID"); Func->setComdat(ComdatList[ComdatID - 1]); } } else if (hasImplicitComdat(RawLinkage)) { Func->setComdat(reinterpret_cast(1)); } if (Record.size() > 13 && Record[13] != 0) FunctionPrefixes.push_back(std::make_pair(Func, Record[13]-1)); if (Record.size() > 14 && Record[14] != 0) FunctionPersonalityFns.push_back(std::make_pair(Func, Record[14] - 1)); ValueList.push_back(Func); // If this is a function with a body, remember the prototype we are // creating now, so that we can match up the body with them later. if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); DeferredFunctionInfo[Func] = 0; } break; } // ALIAS: [alias type, addrspace, aliasee val#, linkage] // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, dllstorageclass] case bitc::MODULE_CODE_ALIAS: case bitc::MODULE_CODE_ALIAS_OLD: { bool NewRecord = BitCode == bitc::MODULE_CODE_ALIAS; if (Record.size() < (3 + (unsigned)NewRecord)) return error("Invalid record"); unsigned OpNum = 0; Type *Ty = getTypeByID(Record[OpNum++]); if (!Ty) return error("Invalid record"); unsigned AddrSpace; if (!NewRecord) { auto *PTy = dyn_cast(Ty); if (!PTy) return error("Invalid type for value"); Ty = PTy->getElementType(); AddrSpace = PTy->getAddressSpace(); } else { AddrSpace = Record[OpNum++]; } auto Val = Record[OpNum++]; auto Linkage = Record[OpNum++]; auto *NewGA = GlobalAlias::create( Ty, AddrSpace, getDecodedLinkage(Linkage), "", TheModule); // Old bitcode files didn't have visibility field. // Local linkage must have default visibility. if (OpNum != Record.size()) { auto VisInd = OpNum++; if (!NewGA->hasLocalLinkage()) // FIXME: Change to an error if non-default in 4.0. NewGA->setVisibility(getDecodedVisibility(Record[VisInd])); } if (OpNum != Record.size()) NewGA->setDLLStorageClass(getDecodedDLLStorageClass(Record[OpNum++])); else upgradeDLLImportExportLinkage(NewGA, Linkage); if (OpNum != Record.size()) NewGA->setThreadLocalMode(getDecodedThreadLocalMode(Record[OpNum++])); if (OpNum != Record.size()) NewGA->setUnnamedAddr(Record[OpNum++]); ValueList.push_back(NewGA); AliasInits.push_back(std::make_pair(NewGA, Val)); break; } /// MODULE_CODE_PURGEVALS: [numvals] case bitc::MODULE_CODE_PURGEVALS: // Trim down the value list to the specified size. if (Record.size() < 1 || Record[0] > ValueList.size()) return error("Invalid record"); ValueList.shrinkTo(Record[0]); break; /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) return error("Invalid record"); VSTOffset = Record[0]; break; /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: SmallString<128> ValueName; if (convertToString(Record, 0, ValueName)) return error("Invalid record"); TheModule->setSourceFileName(ValueName); break; } Record.clear(); } } /// Helper to read the header common to all bitcode files. static bool hasValidBitcodeHeader(BitstreamCursor &Stream) { // Sniff for the signature. if (Stream.Read(8) != 'B' || Stream.Read(8) != 'C' || Stream.Read(4) != 0x0 || Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) return false; return true; } std::error_code BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, Module *M, bool ShouldLazyLoadMetadata) { TheModule = M; if (std::error_code EC = initStream(std::move(Streamer))) return EC; // Sniff for the signature. if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { if (Stream.AtEndOfStream()) { // We didn't really read a proper Module. return error("Malformed IR file"); } BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { parseBitcodeVersion(); continue; } if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(0, ShouldLazyLoadMetadata); if (Stream.SkipBlock()) return error("Invalid record"); } } ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; std::string Triple; // Read all the records for this module. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return Triple; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (convertToString(Record, 0, S)) return error("Invalid record"); Triple = S; break; } } Record.clear(); } llvm_unreachable("Exit infinite loop"); } ErrorOr BitcodeReader::parseTriple() { if (std::error_code EC = initStream(nullptr)) return EC; // Sniff for the signature. if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModuleTriple(); // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } ErrorOr BitcodeReader::parseIdentificationBlock() { if (std::error_code EC = initStream(nullptr)) return EC; // Sniff for the signature. if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::SubBlock: if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { if (std::error_code EC = parseBitcodeVersion()) return EC; return ProducerIdentification; } // Ignore other sub-blocks. if (Stream.SkipBlock()) return error("Malformed block"); continue; case BitstreamEntry::Record: Stream.skipRecord(Entry.ID); continue; } } } /// Parse metadata attachments. std::error_code BitcodeReader::parseMetadataAttachment(Function &F) { if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) return error("Invalid record"); SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a metadata attachment record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::METADATA_ATTACHMENT: { unsigned RecordLength = Record.size(); if (Record.empty()) return error("Invalid record"); if (RecordLength % 2 == 0) { // A function attachment. for (unsigned I = 0; I != RecordLength; I += 2) { auto K = MDKindMap.find(Record[I]); if (K == MDKindMap.end()) return error("Invalid ID"); MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]); if (!MD) return error("Invalid metadata attachment"); F.setMetadata(K->second, MD); } continue; } // An instruction attachment. Instruction *Inst = InstructionList[Record[0]]; for (unsigned i = 1; i != RecordLength; i = i+2) { unsigned Kind = Record[i]; DenseMap::iterator I = MDKindMap.find(Kind); if (I == MDKindMap.end()) return error("Invalid ID"); Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]); if (isa(Node)) // Drop the attachment. This used to be legal, but there's no // upgrade path. break; MDNode *MD = dyn_cast_or_null(Node); if (!MD) return error("Invalid metadata attachment"); if (HasSeenOldLoopTags && I->second == LLVMContext::MD_loop) MD = upgradeInstructionLoopAttachment(*MD); Inst->setMetadata(I->second, MD); if (I->second == LLVMContext::MD_tbaa) { InstsWithTBAATag.push_back(Inst); continue; } } break; } } } } static std::error_code typeCheckLoadStoreInst(Type *ValType, Type *PtrType) { LLVMContext &Context = PtrType->getContext(); if (!isa(PtrType)) return error(Context, "Load/Store operand is not a pointer type"); Type *ElemType = cast(PtrType)->getElementType(); if (ValType && ValType != ElemType) return error(Context, "Explicit load/store type does not match pointee " "type of pointer operand"); if (!PointerType::isLoadableOrStorableType(ElemType)) return error(Context, "Cannot load/store from pointer"); return std::error_code(); } /// Lazily parse the specified function body block. std::error_code BitcodeReader::parseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) return error("Invalid record"); InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); unsigned ModuleMetadataListSize = MetadataList.size(); // Add all the function arguments to the value table. for (Argument &I : F->args()) ValueList.push_back(&I); unsigned NextValueNo = ValueList.size(); BasicBlock *CurBB = nullptr; unsigned CurBBNo = 0; DebugLoc LastLoc; auto getLastInstruction = [&]() -> Instruction * { if (CurBB && !CurBB->empty()) return &CurBB->back(); else if (CurBBNo && FunctionBBs[CurBBNo - 1] && !FunctionBBs[CurBBNo - 1]->empty()) return &FunctionBBs[CurBBNo - 1]->back(); return nullptr; }; std::vector OperandBundles; // Read all the records. SmallVector Record; while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: goto OutOfRecordLoop; case BitstreamEntry::SubBlock: switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::CONSTANTS_BLOCK_ID: if (std::error_code EC = parseConstants()) return EC; NextValueNo = ValueList.size(); break; case bitc::VALUE_SYMTAB_BLOCK_ID: if (std::error_code EC = parseValueSymbolTable()) return EC; break; case bitc::METADATA_ATTACHMENT_ID: if (std::error_code EC = parseMetadataAttachment(*F)) return EC; break; case bitc::METADATA_BLOCK_ID: if (std::error_code EC = parseMetadata()) return EC; break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = parseUseLists()) return EC; break; } continue; case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); Instruction *I = nullptr; unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return error("Invalid value"); case bitc::FUNC_CODE_DECLAREBLOCKS: { // DECLAREBLOCKS: [nblocks] if (Record.size() < 1 || Record[0] == 0) return error("Invalid record"); // Create all the basic blocks for the function. FunctionBBs.resize(Record[0]); // See if anything took the address of blocks in this function. auto BBFRI = BasicBlockFwdRefs.find(F); if (BBFRI == BasicBlockFwdRefs.end()) { for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) FunctionBBs[i] = BasicBlock::Create(Context, "", F); } else { auto &BBRefs = BBFRI->second; // Check for invalid basic block references. if (BBRefs.size() > FunctionBBs.size()) return error("Invalid ID"); assert(!BBRefs.empty() && "Unexpected empty array"); assert(!BBRefs.front() && "Invalid reference to entry block"); for (unsigned I = 0, E = FunctionBBs.size(), RE = BBRefs.size(); I != E; ++I) if (I < RE && BBRefs[I]) { BBRefs[I]->insertInto(F); FunctionBBs[I] = BBRefs[I]; } else { FunctionBBs[I] = BasicBlock::Create(Context, "", F); } // Erase from the table. BasicBlockFwdRefs.erase(BBFRI); } CurBB = FunctionBBs[0]; continue; } case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. I = getLastInstruction(); if (!I) return error("Invalid record"); I->setDebugLoc(LastLoc); I = nullptr; continue; case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] I = getLastInstruction(); if (!I || Record.size() < 4) return error("Invalid record"); unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; MDNode *Scope = nullptr, *IA = nullptr; if (ScopeID) { Scope = MetadataList.getMDNodeFwdRefOrNull(ScopeID - 1); if (!Scope) return error("Invalid record"); } if (IAID) { IA = MetadataList.getMDNodeFwdRefOrNull(IAID - 1); if (!IA) return error("Invalid record"); } LastLoc = DebugLoc::get(Line, Col, Scope, IA); I->setDebugLoc(LastLoc); I = nullptr; continue; } case bitc::FUNC_CODE_INST_BINOP: { // BINOP: [opval, ty, opval, opcode] unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) return error("Invalid record"); int Opc = getDecodedBinaryOpcode(Record[OpNum++], LHS->getType()); if (Opc == -1) return error("Invalid record"); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); InstructionList.push_back(I); if (OpNum < Record.size()) { if (Opc == Instruction::Add || Opc == Instruction::Sub || Opc == Instruction::Mul || Opc == Instruction::Shl) { if (Record[OpNum] & (1 << bitc::OBO_NO_SIGNED_WRAP)) cast(I)->setHasNoSignedWrap(true); if (Record[OpNum] & (1 << bitc::OBO_NO_UNSIGNED_WRAP)) cast(I)->setHasNoUnsignedWrap(true); } else if (Opc == Instruction::SDiv || Opc == Instruction::UDiv || Opc == Instruction::LShr || Opc == Instruction::AShr) { if (Record[OpNum] & (1 << bitc::PEO_EXACT)) cast(I)->setIsExact(true); } else if (isa(I)) { FastMathFlags FMF = getDecodedFastMathFlags(Record[OpNum]); if (FMF.any()) I->setFastMathFlags(FMF); } } break; } case bitc::FUNC_CODE_INST_CAST: { // CAST: [opval, opty, destty, castopc] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) return error("Invalid record"); Type *ResTy = getTypeByID(Record[OpNum]); int Opc = getDecodedCastOpcode(Record[OpNum + 1]); if (Opc == -1 || !ResTy) return error("Invalid record"); Instruction *Temp = nullptr; if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) { if (Temp) { InstructionList.push_back(Temp); CurBB->getInstList().push_back(Temp); } } else { auto CastOp = (Instruction::CastOps)Opc; if (!CastInst::castIsValid(CastOp, Op, ResTy)) return error("Invalid cast"); I = CastInst::Create(CastOp, Op, ResTy); } InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD: case bitc::FUNC_CODE_INST_GEP_OLD: case bitc::FUNC_CODE_INST_GEP: { // GEP: type, [n x operands] unsigned OpNum = 0; Type *Ty; bool InBounds; if (BitCode == bitc::FUNC_CODE_INST_GEP) { InBounds = Record[OpNum++]; Ty = getTypeByID(Record[OpNum++]); } else { InBounds = BitCode == bitc::FUNC_CODE_INST_INBOUNDS_GEP_OLD; Ty = nullptr; } Value *BasePtr; if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) return error("Invalid record"); if (!Ty) Ty = cast(BasePtr->getType()->getScalarType()) ->getElementType(); else if (Ty != cast(BasePtr->getType()->getScalarType()) ->getElementType()) return error( "Explicit gep type does not match pointee type of pointer operand"); SmallVector GEPIdx; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); GEPIdx.push_back(Op); } I = GetElementPtrInst::Create(Ty, BasePtr, GEPIdx); InstructionList.push_back(I); if (InBounds) cast(I)->setIsInBounds(true); break; } case bitc::FUNC_CODE_INST_EXTRACTVAL: { // EXTRACTVAL: [opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) return error("Invalid record"); unsigned RecSize = Record.size(); if (OpNum == RecSize) return error("EXTRACTVAL: Invalid instruction with 0 indices"); SmallVector EXTRACTVALIdx; Type *CurTy = Agg->getType(); for (; OpNum != RecSize; ++OpNum) { bool IsArray = CurTy->isArrayTy(); bool IsStruct = CurTy->isStructTy(); uint64_t Index = Record[OpNum]; if (!IsStruct && !IsArray) return error("EXTRACTVAL: Invalid type"); if ((unsigned)Index != Index) return error("Invalid value"); if (IsStruct && Index >= CurTy->subtypes().size()) return error("EXTRACTVAL: Invalid struct index"); if (IsArray && Index >= CurTy->getArrayNumElements()) return error("EXTRACTVAL: Invalid array index"); EXTRACTVALIdx.push_back((unsigned)Index); if (IsStruct) CurTy = CurTy->subtypes()[Index]; else CurTy = CurTy->subtypes()[0]; } I = ExtractValueInst::Create(Agg, EXTRACTVALIdx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INSERTVAL: { // INSERTVAL: [opty, opval, opty, opval, n x indices] unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) return error("Invalid record"); Value *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Val)) return error("Invalid record"); unsigned RecSize = Record.size(); if (OpNum == RecSize) return error("INSERTVAL: Invalid instruction with 0 indices"); SmallVector INSERTVALIdx; Type *CurTy = Agg->getType(); for (; OpNum != RecSize; ++OpNum) { bool IsArray = CurTy->isArrayTy(); bool IsStruct = CurTy->isStructTy(); uint64_t Index = Record[OpNum]; if (!IsStruct && !IsArray) return error("INSERTVAL: Invalid type"); if ((unsigned)Index != Index) return error("Invalid value"); if (IsStruct && Index >= CurTy->subtypes().size()) return error("INSERTVAL: Invalid struct index"); if (IsArray && Index >= CurTy->getArrayNumElements()) return error("INSERTVAL: Invalid array index"); INSERTVALIdx.push_back((unsigned)Index); if (IsStruct) CurTy = CurTy->subtypes()[Index]; else CurTy = CurTy->subtypes()[0]; } if (CurTy != Val->getType()) return error("Inserted value type doesn't match aggregate type"); I = InsertValueInst::Create(Agg, Val, INSERTVALIdx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SELECT: { // SELECT: [opval, ty, opval, opval] // obsolete form of select // handles select i1 ... in old bitcode unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) return error("Invalid record"); I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [ty,opval,opval,predty,pred] // new form of select // handles select i1 or select [N x i1] unsigned OpNum = 0; Value *TrueVal, *FalseVal, *Cond; if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) return error("Invalid record"); // select condition can be either i1 or [N x i1] if (VectorType* vector_type = dyn_cast(Cond->getType())) { // expect if (vector_type->getElementType() != Type::getInt1Ty(Context)) return error("Invalid type for value"); } else { // expect i1 if (Cond->getType() != Type::getInt1Ty(Context)) return error("Invalid type for value"); } I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_EXTRACTELT: { // EXTRACTELT: [opty, opval, opval] unsigned OpNum = 0; Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_INSERTELT: { // INSERTELT: [ty, opval,opval,opval] unsigned OpNum = 0; Value *Vec, *Elt, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec)) return error("Invalid record"); if (!Vec->getType()->isVectorTy()) return error("Invalid type for value"); if (popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || getValueTypePair(Record, OpNum, NextValueNo, Idx)) return error("Invalid record"); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SHUFFLEVEC: {// SHUFFLEVEC: [opval,ty,opval,opval] unsigned OpNum = 0; Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) return error("Invalid record"); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) return error("Invalid record"); if (!Vec1->getType()->isVectorTy() || !Vec2->getType()->isVectorTy()) return error("Invalid type for value"); I = new ShuffleVectorInst(Vec1, Vec2, Mask); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMP: // CMP: [opty, opval, opval, pred] // Old form of ICmp/FCmp returning bool // Existed to differentiate between icmp/fcmp and vicmp/vfcmp which were // both legal on vectors but had different behaviour. case bitc::FUNC_CODE_INST_CMP2: { // CMP2: [opty, opval, opval, pred] // FCmp/ICmp returning bool or vector of bool unsigned OpNum = 0; Value *LHS, *RHS; if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS)) return error("Invalid record"); unsigned PredVal = Record[OpNum]; bool IsFP = LHS->getType()->isFPOrFPVectorTy(); FastMathFlags FMF; if (IsFP && Record.size() > OpNum+1) FMF = getDecodedFastMathFlags(Record[++OpNum]); if (OpNum+1 != Record.size()) return error("Invalid record"); if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)PredVal, LHS, RHS); else I = new ICmpInst((ICmpInst::Predicate)PredVal, LHS, RHS); if (FMF.any()) I->setFastMathFlags(FMF); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval] { unsigned Size = Record.size(); if (Size == 0) { I = ReturnInst::Create(Context); InstructionList.push_back(I); break; } unsigned OpNum = 0; Value *Op = nullptr; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); if (OpNum != Record.size()) return error("Invalid record"); I = ReturnInst::Create(Context, Op); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] if (Record.size() != 1 && Record.size() != 3) return error("Invalid record"); BasicBlock *TrueDest = getBasicBlock(Record[0]); if (!TrueDest) return error("Invalid record"); if (Record.size() == 1) { I = BranchInst::Create(TrueDest); InstructionList.push_back(I); } else { BasicBlock *FalseDest = getBasicBlock(Record[1]); Value *Cond = getValue(Record, 2, NextValueNo, Type::getInt1Ty(Context)); if (!FalseDest || !Cond) return error("Invalid record"); I = BranchInst::Create(TrueDest, FalseDest, Cond); InstructionList.push_back(I); } break; } case bitc::FUNC_CODE_INST_CLEANUPRET: { // CLEANUPRET: [val] or [val,bb#] if (Record.size() != 1 && Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; Value *CleanupPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); if (!CleanupPad) return error("Invalid record"); BasicBlock *UnwindDest = nullptr; if (Record.size() == 2) { UnwindDest = getBasicBlock(Record[Idx++]); if (!UnwindDest) return error("Invalid record"); } I = CleanupReturnInst::Create(CleanupPad, UnwindDest); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHRET: { // CATCHRET: [val,bb#] if (Record.size() != 2) return error("Invalid record"); unsigned Idx = 0; Value *CatchPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); if (!CatchPad) return error("Invalid record"); BasicBlock *BB = getBasicBlock(Record[Idx++]); if (!BB) return error("Invalid record"); I = CatchReturnInst::Create(CatchPad, BB); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHSWITCH: { // CATCHSWITCH: [tok,num,(bb)*,bb?] // We must have, at minimum, the outer scope and the number of arguments. if (Record.size() < 2) return error("Invalid record"); unsigned Idx = 0; Value *ParentPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); unsigned NumHandlers = Record[Idx++]; SmallVector Handlers; for (unsigned Op = 0; Op != NumHandlers; ++Op) { BasicBlock *BB = getBasicBlock(Record[Idx++]); if (!BB) return error("Invalid record"); Handlers.push_back(BB); } BasicBlock *UnwindDest = nullptr; if (Idx + 1 == Record.size()) { UnwindDest = getBasicBlock(Record[Idx++]); if (!UnwindDest) return error("Invalid record"); } if (Record.size() != Idx) return error("Invalid record"); auto *CatchSwitch = CatchSwitchInst::Create(ParentPad, UnwindDest, NumHandlers); for (BasicBlock *Handler : Handlers) CatchSwitch->addHandler(Handler); I = CatchSwitch; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CATCHPAD: case bitc::FUNC_CODE_INST_CLEANUPPAD: { // [tok,num,(ty,val)*] // We must have, at minimum, the outer scope and the number of arguments. if (Record.size() < 2) return error("Invalid record"); unsigned Idx = 0; Value *ParentPad = getValue(Record, Idx++, NextValueNo, Type::getTokenTy(Context)); unsigned NumArgOperands = Record[Idx++]; SmallVector Args; for (unsigned Op = 0; Op != NumArgOperands; ++Op) { Value *Val; if (getValueTypePair(Record, Idx, NextValueNo, Val)) return error("Invalid record"); Args.push_back(Val); } if (Record.size() != Idx) return error("Invalid record"); if (BitCode == bitc::FUNC_CODE_INST_CLEANUPPAD) I = CleanupPadInst::Create(ParentPad, Args); else I = CatchPadInst::Create(ParentPad, Args); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] // Check magic if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { // "New" SwitchInst format with case ranges. The changes to write this // format were reverted but we still recognize bitcode that uses it. // Hopefully someday we will have support for case ranges and can use // this format again. Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (!OpTy || !Cond || !Default) return error("Invalid record"); unsigned NumCases = Record[4]; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); unsigned CurIdx = 5; for (unsigned i = 0; i != NumCases; ++i) { SmallVector CaseVals; unsigned NumItems = Record[CurIdx++]; for (unsigned ci = 0; ci != NumItems; ++ci) { bool isSingleNumber = Record[CurIdx++]; APInt Low; unsigned ActiveWords = 1; if (ValueBitWidth > 64) ActiveWords = Record[CurIdx++]; Low = readWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); CurIdx += ActiveWords; if (!isSingleNumber) { ActiveWords = 1; if (ValueBitWidth > 64) ActiveWords = Record[CurIdx++]; APInt High = readWideAPInt( makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); CurIdx += ActiveWords; // FIXME: It is not clear whether values in the range should be // compared as signed or unsigned values. The partially // implemented changes that used this format in the past used // unsigned comparisons. for ( ; Low.ule(High); ++Low) CaseVals.push_back(ConstantInt::get(Context, Low)); } else CaseVals.push_back(ConstantInt::get(Context, Low)); } BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); for (SmallVector::iterator cvi = CaseVals.begin(), cve = CaseVals.end(); cvi != cve; ++cvi) SI->addCase(*cvi, DestBB); } I = SI; break; } // Old SwitchInst format without case ranges. if (Record.size() < 3 || (Record.size() & 1) == 0) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (!OpTy || !Cond || !Default) return error("Invalid record"); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); for (unsigned i = 0, e = NumCases; i != e; ++i) { ConstantInt *CaseVal = dyn_cast_or_null(getFnValueByID(Record[3+i*2], OpTy)); BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (!CaseVal || !DestBB) { delete SI; return error("Invalid record"); } SI->addCase(CaseVal, DestBB); } I = SI; break; } case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (!OpTy || !Address) return error("Invalid record"); unsigned NumDests = Record.size()-2; IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests); InstructionList.push_back(IBI); for (unsigned i = 0, e = NumDests; i != e; ++i) { if (BasicBlock *DestBB = getBasicBlock(Record[2+i])) { IBI->addDestination(DestBB); } else { delete IBI; return error("Invalid record"); } } I = IBI; break; } case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] if (Record.size() < 4) return error("Invalid record"); unsigned OpNum = 0; AttributeSet PAL = getAttributes(Record[OpNum++]); unsigned CCInfo = Record[OpNum++]; BasicBlock *NormalBB = getBasicBlock(Record[OpNum++]); BasicBlock *UnwindBB = getBasicBlock(Record[OpNum++]); FunctionType *FTy = nullptr; if (CCInfo >> 13 & 1 && !(FTy = dyn_cast(getTypeByID(Record[OpNum++])))) return error("Explicit invoke type is not a function type"); Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return error("Invalid record"); PointerType *CalleeTy = dyn_cast(Callee->getType()); if (!CalleeTy) return error("Callee is not a pointer"); if (!FTy) { FTy = dyn_cast(CalleeTy->getElementType()); if (!FTy) return error("Callee is not of pointer to function type"); } else if (CalleeTy->getElementType() != FTy) return error("Explicit invoke type does not match pointee type of " "callee operand"); if (Record.size() < FTy->getNumParams() + OpNum) return error("Insufficient operands to call"); SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Ops.back()) return error("Invalid record"); } if (!FTy->isVarArg()) { if (Record.size() != OpNum) return error("Invalid record"); } else { // Read type/value pairs for varargs params. while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Ops.push_back(Op); } } I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops, OperandBundles); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast(CallingConv::MaxID & CCInfo)); cast(I)->setAttributes(PAL); break; } case bitc::FUNC_CODE_INST_RESUME: { // RESUME: [opval] unsigned Idx = 0; Value *Val = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, Val)) return error("Invalid record"); I = ResumeInst::Create(Val); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE I = new UnreachableInst(Context); InstructionList.push_back(I); break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) return error("Invalid record"); Type *Ty = getTypeByID(Record[0]); if (!Ty) return error("Invalid record"); PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { Value *V; // With the new function encoding, it is possible that operands have // negative IDs (for forward references). Use a signed VBR // representation to keep the encoding small. if (UseRelativeIDs) V = getValueSigned(Record, 1+i, NextValueNo, Ty); else V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); if (!V || !BB) return error("Invalid record"); PN->addIncoming(V, BB); } I = PN; break; } case bitc::FUNC_CODE_INST_LANDINGPAD: case bitc::FUNC_CODE_INST_LANDINGPAD_OLD: { // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?] unsigned Idx = 0; if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD) { if (Record.size() < 3) return error("Invalid record"); } else { assert(BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD); if (Record.size() < 4) return error("Invalid record"); } Type *Ty = getTypeByID(Record[Idx++]); if (!Ty) return error("Invalid record"); if (BitCode == bitc::FUNC_CODE_INST_LANDINGPAD_OLD) { Value *PersFn = nullptr; if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) return error("Invalid record"); if (!F->hasPersonalityFn()) F->setPersonalityFn(cast(PersFn)); else if (F->getPersonalityFn() != cast(PersFn)) return error("Personality function mismatch"); } bool IsCleanup = !!Record[Idx++]; unsigned NumClauses = Record[Idx++]; LandingPadInst *LP = LandingPadInst::Create(Ty, NumClauses); LP->setCleanup(IsCleanup); for (unsigned J = 0; J != NumClauses; ++J) { LandingPadInst::ClauseType CT = LandingPadInst::ClauseType(Record[Idx++]); (void)CT; Value *Val; if (getValueTypePair(Record, Idx, NextValueNo, Val)) { delete LP; return error("Invalid record"); } assert((CT != LandingPadInst::Catch || !isa(Val->getType())) && "Catch clause has a invalid type!"); assert((CT != LandingPadInst::Filter || isa(Val->getType())) && "Filter clause has invalid type!"); LP->addClause(cast(Val)); } I = LP; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] if (Record.size() != 4) return error("Invalid record"); uint64_t AlignRecord = Record[3]; const uint64_t InAllocaMask = uint64_t(1) << 5; const uint64_t ExplicitTypeMask = uint64_t(1) << 6; // Reserve bit 7 for SwiftError flag. // const uint64_t SwiftErrorMask = uint64_t(1) << 7; const uint64_t FlagMask = InAllocaMask | ExplicitTypeMask; bool InAlloca = AlignRecord & InAllocaMask; Type *Ty = getTypeByID(Record[0]); if ((AlignRecord & ExplicitTypeMask) == 0) { auto *PTy = dyn_cast_or_null(Ty); if (!PTy) return error("Old-style alloca with a non-pointer type"); Ty = PTy->getElementType(); } Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); unsigned Align; if (std::error_code EC = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) { return EC; } if (!Ty || !Size) return error("Invalid record"); AllocaInst *AI = new AllocaInst(Ty, Size, Align); AI->setUsedWithInAlloca(InAlloca); I = AI; InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOAD: { // LOAD: [opty, op, align, vol] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || (OpNum + 2 != Record.size() && OpNum + 3 != Record.size())) return error("Invalid record"); Type *Ty = nullptr; if (OpNum + 3 == Record.size()) Ty = getTypeByID(Record[OpNum++]); if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType())) return EC; if (!Ty) Ty = cast(Op->getType())->getElementType(); unsigned Align; if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align)) return EC; I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_LOADATOMIC: { // LOADATOMIC: [opty, op, align, vol, ordering, synchscope] unsigned OpNum = 0; Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || (OpNum + 4 != Record.size() && OpNum + 5 != Record.size())) return error("Invalid record"); Type *Ty = nullptr; if (OpNum + 5 == Record.size()) Ty = getTypeByID(Record[OpNum++]); if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType())) return EC; if (!Ty) Ty = cast(Op->getType())->getElementType(); AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == NotAtomic || Ordering == Release || Ordering == AcquireRelease) return error("Invalid record"); if (Ordering != NotAtomic && Record[OpNum] == 0) return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); unsigned Align; if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align)) return EC; I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STORE: case bitc::FUNC_CODE_INST_STORE_OLD: { // STORE2:[ptrty, ptr, val, align, vol] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || (BitCode == bitc::FUNC_CODE_INST_STORE ? getValueTypePair(Record, OpNum, NextValueNo, Val) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val)) || OpNum + 2 != Record.size()) return error("Invalid record"); if (std::error_code EC = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return EC; unsigned Align; if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align)) return EC; I = new StoreInst(Val, Ptr, Record[OpNum+1], Align); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_STOREATOMIC: case bitc::FUNC_CODE_INST_STOREATOMIC_OLD: { // STOREATOMIC: [ptrty, ptr, val, align, vol, ordering, synchscope] unsigned OpNum = 0; Value *Val, *Ptr; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC ? getValueTypePair(Record, OpNum, NextValueNo, Val) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val)) || OpNum + 4 != Record.size()) return error("Invalid record"); if (std::error_code EC = typeCheckLoadStoreInst(Val->getType(), Ptr->getType())) return EC; AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == NotAtomic || Ordering == Acquire || Ordering == AcquireRelease) return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); if (Ordering != NotAtomic && Record[OpNum] == 0) return error("Invalid record"); unsigned Align; if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align)) return EC; I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CMPXCHG_OLD: case bitc::FUNC_CODE_INST_CMPXCHG: { // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope, // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || (BitCode == bitc::FUNC_CODE_INST_CMPXCHG ? getValueTypePair(Record, OpNum, NextValueNo, Cmp) : popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Cmp)) || popValue(Record, OpNum, NextValueNo, Cmp->getType(), New) || Record.size() < OpNum + 3 || Record.size() > OpNum + 5) return error("Invalid record"); AtomicOrdering SuccessOrdering = getDecodedOrdering(Record[OpNum + 1]); if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered) return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]); if (std::error_code EC = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType())) return EC; AtomicOrdering FailureOrdering; if (Record.size() < 7) FailureOrdering = AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering); else FailureOrdering = getDecodedOrdering(Record[OpNum + 3]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SynchScope); cast(I)->setVolatile(Record[OpNum]); if (Record.size() < 8) { // Before weak cmpxchgs existed, the instruction simply returned the // value loaded from memory, so bitcode files from that era will be // expecting the first component of a modern cmpxchg. CurBB->getInstList().push_back(I); I = ExtractValueInst::Create(I, 0); } else { cast(I)->setWeak(Record[OpNum+4]); } InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_ATOMICRMW: { // ATOMICRMW:[ptrty, ptr, val, op, vol, ordering, synchscope] unsigned OpNum = 0; Value *Ptr, *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) return error("Invalid record"); AtomicRMWInst::BinOp Operation = getDecodedRMWOperation(Record[OpNum]); if (Operation < AtomicRMWInst::FIRST_BINOP || Operation > AtomicRMWInst::LAST_BINOP) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]); if (Ordering == NotAtomic || Ordering == Unordered) return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]); I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); cast(I)->setVolatile(Record[OpNum+1]); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] if (2 != Record.size()) return error("Invalid record"); AtomicOrdering Ordering = getDecodedOrdering(Record[0]); if (Ordering == NotAtomic || Ordering == Unordered || Ordering == Monotonic) return error("Invalid record"); SynchronizationScope SynchScope = getDecodedSynchScope(Record[1]); I = new FenceInst(Context, Ordering, SynchScope); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fmf, fnty, fnid, arg0, arg1...] if (Record.size() < 3) return error("Invalid record"); unsigned OpNum = 0; AttributeSet PAL = getAttributes(Record[OpNum++]); unsigned CCInfo = Record[OpNum++]; FastMathFlags FMF; if ((CCInfo >> bitc::CALL_FMF) & 1) { FMF = getDecodedFastMathFlags(Record[OpNum++]); if (!FMF.any()) return error("Fast math flags indicator set for call with no FMF"); } FunctionType *FTy = nullptr; if (CCInfo >> bitc::CALL_EXPLICIT_TYPE & 1 && !(FTy = dyn_cast(getTypeByID(Record[OpNum++])))) return error("Explicit call type is not a function type"); Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) return error("Invalid record"); PointerType *OpTy = dyn_cast(Callee->getType()); if (!OpTy) return error("Callee is not a pointer type"); if (!FTy) { FTy = dyn_cast(OpTy->getElementType()); if (!FTy) return error("Callee is not of pointer to function type"); } else if (OpTy->getElementType() != FTy) return error("Explicit call type does not match pointee type of " "callee operand"); if (Record.size() < FTy->getNumParams() + OpNum) return error("Insufficient operands to call"); SmallVector Args; // Read the fixed params. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { if (FTy->getParamType(i)->isLabelTy()) Args.push_back(getBasicBlock(Record[OpNum])); else Args.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); if (!Args.back()) return error("Invalid record"); } // Read type/value pairs for varargs params. if (!FTy->isVarArg()) { if (OpNum != Record.size()) return error("Invalid record"); } else { while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Args.push_back(Op); } } I = CallInst::Create(FTy, Callee, Args, OperandBundles); OperandBundles.clear(); InstructionList.push_back(I); cast(I)->setCallingConv( static_cast((0x7ff & CCInfo) >> bitc::CALL_CCONV)); CallInst::TailCallKind TCK = CallInst::TCK_None; if (CCInfo & 1 << bitc::CALL_TAIL) TCK = CallInst::TCK_Tail; if (CCInfo & (1 << bitc::CALL_MUSTTAIL)) TCK = CallInst::TCK_MustTail; if (CCInfo & (1 << bitc::CALL_NOTAIL)) TCK = CallInst::TCK_NoTail; cast(I)->setTailCallKind(TCK); cast(I)->setAttributes(PAL); if (FMF.any()) { if (!isa(I)) return error("Fast-math-flags specified for call without " "floating-point scalar or vector return type"); I->setFastMathFlags(FMF); } break; } case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) return error("Invalid record"); Type *OpTy = getTypeByID(Record[0]); Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) return error("Invalid record"); I = new VAArgInst(Op, ResTy); InstructionList.push_back(I); break; } case bitc::FUNC_CODE_OPERAND_BUNDLE: { // A call or an invoke can be optionally prefixed with some variable // number of operand bundle blocks. These blocks are read into // OperandBundles and consumed at the next call or invoke instruction. if (Record.size() < 1 || Record[0] >= BundleTags.size()) return error("Invalid record"); std::vector Inputs; unsigned OpNum = 1; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) return error("Invalid record"); Inputs.push_back(Op); } OperandBundles.emplace_back(BundleTags[Record[0]], std::move(Inputs)); continue; } } // Add instruction to end of current BB. If there is no current BB, reject // this file. if (!CurBB) { delete I; return error("Invalid instruction with no BB"); } if (!OperandBundles.empty()) { delete I; return error("Operand bundles found with no consumer"); } CurBB->getInstList().push_back(I); // If this was a terminator instruction, move to the next block. if (isa(I)) { ++CurBBNo; CurBB = CurBBNo < FunctionBBs.size() ? FunctionBBs[CurBBNo] : nullptr; } // Non-void values get registered in the value table for future use. if (I && !I->getType()->isVoidTy()) ValueList.assignValue(I, NextValueNo++); } OutOfRecordLoop: if (!OperandBundles.empty()) return error("Operand bundles found with no consumer"); // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (!A->getParent()) { // We found at least one unresolved value. Nuke them all to avoid leaks. for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){ if ((A = dyn_cast_or_null(ValueList[i])) && !A->getParent()) { A->replaceAllUsesWith(UndefValue::get(A->getType())); delete A; } } return error("Never resolved value found in function"); } } // FIXME: Check for unresolved forward-declared metadata references // and clean up leaks. // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MetadataList.shrinkTo(ModuleMetadataListSize); std::vector().swap(FunctionBBs); return std::error_code(); } /// Find the function body in the bitcode stream std::error_code BitcodeReader::findFunctionInStream( Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { // This is the fallback handling for the old format bitcode that // didn't contain the function index in the VST, or when we have // an anonymous function which would not have a VST entry. // Assert that we have one of those two cases. assert(VSTOffset == 0 || !F->hasName()); // Parse the next body in the stream and set its position in the // DeferredFunctionInfo map. if (std::error_code EC = rememberAndSkipFunctionBodies()) return EC; } return std::error_code(); } //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// void BitcodeReader::releaseBuffer() { Buffer.release(); } std::error_code BitcodeReader::materialize(GlobalValue *GV) { if (std::error_code EC = materializeMetadata()) return EC; Function *F = dyn_cast(GV); // If it's not a function or is already material, ignore the request. if (!F || !F->isMaterializable()) return std::error_code(); DenseMap::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. if (DFII->second == 0) if (std::error_code EC = findFunctionInStream(F, DFII)) return EC; // Move the bit stream to the saved position of the deferred function body. Stream.JumpToBit(DFII->second); if (std::error_code EC = parseFunctionBody(F)) return EC; F->setIsMaterializable(false); if (StripDebugInfo) stripDebugInfo(*F); // Upgrade any old intrinsic calls in the function. for (auto &I : UpgradedIntrinsics) { for (auto UI = I.first->materialized_user_begin(), UE = I.first->user_end(); UI != UE;) { User *U = *UI; ++UI; if (CallInst *CI = dyn_cast(U)) UpgradeIntrinsicCall(CI, I.second); } } // Finish fn->subprogram upgrade for materialized functions. if (DISubprogram *SP = FunctionsWithSPs.lookup(F)) F->setSubprogram(SP); // Bring in any functions that this function forward-referenced via // blockaddresses. return materializeForwardReferencedFunctions(); } std::error_code BitcodeReader::materializeModule() { if (std::error_code EC = materializeMetadata()) return EC; // Promise to materialize all forward references. WillMaterializeAllForwardRefs = true; // Iterate over the module, deserializing any functions that are still on // disk. for (Function &F : *TheModule) { if (std::error_code EC = materialize(&F)) return EC; } // At this point, if there are any function bodies, parse the rest of // the bits in the module past the last function block we have recorded // through either lazy scanning or the VST. if (LastFunctionBlockBit || NextUnreadBit) parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit : NextUnreadBit); // Check that all block address forward references got resolved (as we // promised above). if (!BasicBlockFwdRefs.empty()) return error("Never resolved function from blockaddress"); // Upgrading intrinsic calls before TBAA can cause TBAA metadata to be lost, // to prevent this instructions with TBAA tags should be upgraded first. for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++) UpgradeInstWithTBAATag(InstsWithTBAATag[I]); // Upgrade any intrinsic calls that slipped through (should not happen!) and // delete the old functions to clean up. We can't do this unless the entire // module is materialized because there could always be another function body // with calls to the old function. for (auto &I : UpgradedIntrinsics) { for (auto *U : I.first->users()) { if (CallInst *CI = dyn_cast(U)) UpgradeIntrinsicCall(CI, I.second); } if (!I.first->use_empty()) I.first->replaceAllUsesWith(I.second); I.first->eraseFromParent(); } UpgradedIntrinsics.clear(); UpgradeDebugInfo(*TheModule); return std::error_code(); } std::vector BitcodeReader::getIdentifiedStructTypes() const { return IdentifiedStructTypes; } std::error_code BitcodeReader::initStream(std::unique_ptr Streamer) { if (Streamer) return initLazyStream(std::move(Streamer)); return initStreamFromBuffer(); } std::error_code BitcodeReader::initStreamFromBuffer() { const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) return error("Invalid bitcode signature"); // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) return error("Invalid bitcode wrapper header"); StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); Stream.init(&*StreamFile); return std::error_code(); } std::error_code BitcodeReader::initLazyStream(std::unique_ptr Streamer) { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. auto OwnedBytes = llvm::make_unique(std::move(Streamer)); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); unsigned char buf[16]; if (Bytes.readBytes(buf, 16, 0) != 16) return error("Invalid bitcode signature"); if (!isBitcode(buf, buf + 16)) return error("Invalid bitcode signature"); if (isBitcodeWrapper(buf, buf + 4)) { const unsigned char *bitcodeStart = buf; const unsigned char *bitcodeEnd = buf + 16; SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false); Bytes.dropLeadingBytes(bitcodeStart - buf); Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart); } return std::error_code(); } std::error_code ModuleSummaryIndexBitcodeReader::error(BitcodeError E, const Twine &Message) { return ::error(DiagnosticHandler, make_error_code(E), Message); } std::error_code ModuleSummaryIndexBitcodeReader::error(const Twine &Message) { return ::error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code ModuleSummaryIndexBitcodeReader::error(BitcodeError E) { return ::error(DiagnosticHandler, make_error_code(E)); } ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy, bool CheckGlobalValSummaryPresenceOnly) : DiagnosticHandler(DiagnosticHandler), Buffer(Buffer), IsLazy(IsLazy), CheckGlobalValSummaryPresenceOnly(CheckGlobalValSummaryPresenceOnly) {} ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader( DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy, bool CheckGlobalValSummaryPresenceOnly) : DiagnosticHandler(DiagnosticHandler), Buffer(nullptr), IsLazy(IsLazy), CheckGlobalValSummaryPresenceOnly(CheckGlobalValSummaryPresenceOnly) {} void ModuleSummaryIndexBitcodeReader::freeState() { Buffer = nullptr; } void ModuleSummaryIndexBitcodeReader::releaseBuffer() { Buffer.release(); } uint64_t ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) { auto VGI = ValueIdToCallGraphGUIDMap.find(ValueId); assert(VGI != ValueIdToCallGraphGUIDMap.end()); return VGI->second; } GlobalValueInfo * ModuleSummaryIndexBitcodeReader::getInfoFromSummaryOffset(uint64_t Offset) { auto I = SummaryOffsetToInfoMap.find(Offset); assert(I != SummaryOffsetToInfoMap.end()); return I->second; } // Specialized value symbol table parser used when reading module index // blocks where we don't actually create global values. // At the end of this routine the module index is populated with a map // from global value name to GlobalValueInfo. The global value info contains // the function block's bitcode offset (if applicable), or the offset into the // summary section for the combined index. std::error_code ModuleSummaryIndexBitcodeReader::parseValueSymbolTable( uint64_t Offset, DenseMap &ValueIdToLinkageMap) { assert(Offset > 0 && "Expected non-zero VST offset"); uint64_t CurrentBit = jumpToValueSymbolTable(Offset, Stream); if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; // Read all the records for this value table. SmallString<128> ValueName; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: // Done parsing VST, jump back to wherever we came from. Stream.JumpToBit(CurrentBit); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore (e.g. VST_CODE_BBENTRY records). break; case bitc::VST_CODE_ENTRY: { // VST_CODE_ENTRY: [valueid, namechar x N] if (convertToString(Record, 1, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; std::unique_ptr GlobalValInfo = llvm::make_unique(); assert(!SourceFileName.empty()); auto VLI = ValueIdToLinkageMap.find(ValueID); assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); std::string GlobalId = GlobalValue::getGlobalIdentifier( ValueName, VLI->second, SourceFileName); TheIndex->addGlobalValueInfo(GlobalId, std::move(GlobalValInfo)); ValueIdToCallGraphGUIDMap[ValueID] = GlobalValue::getGUID(GlobalId); ValueName.clear(); break; } case bitc::VST_CODE_FNENTRY: { // VST_CODE_FNENTRY: [valueid, offset, namechar x N] if (convertToString(Record, 2, ValueName)) return error("Invalid record"); unsigned ValueID = Record[0]; uint64_t FuncOffset = Record[1]; assert(!IsLazy && "Lazy summary read only supported for combined index"); std::unique_ptr FuncInfo = llvm::make_unique(FuncOffset); assert(!SourceFileName.empty()); auto VLI = ValueIdToLinkageMap.find(ValueID); assert(VLI != ValueIdToLinkageMap.end() && "No linkage found for VST entry?"); std::string FunctionGlobalId = GlobalValue::getGlobalIdentifier( ValueName, VLI->second, SourceFileName); TheIndex->addGlobalValueInfo(FunctionGlobalId, std::move(FuncInfo)); ValueIdToCallGraphGUIDMap[ValueID] = GlobalValue::getGUID(FunctionGlobalId); ValueName.clear(); break; } case bitc::VST_CODE_COMBINED_GVDEFENTRY: { // VST_CODE_COMBINED_GVDEFENTRY: [valueid, offset, guid] unsigned ValueID = Record[0]; uint64_t GlobalValSummaryOffset = Record[1]; uint64_t GlobalValGUID = Record[2]; std::unique_ptr GlobalValInfo = llvm::make_unique(GlobalValSummaryOffset); SummaryOffsetToInfoMap[GlobalValSummaryOffset] = GlobalValInfo.get(); TheIndex->addGlobalValueInfo(GlobalValGUID, std::move(GlobalValInfo)); ValueIdToCallGraphGUIDMap[ValueID] = GlobalValGUID; break; } case bitc::VST_CODE_COMBINED_ENTRY: { // VST_CODE_COMBINED_ENTRY: [valueid, refguid] unsigned ValueID = Record[0]; uint64_t RefGUID = Record[1]; ValueIdToCallGraphGUIDMap[ValueID] = RefGUID; break; } } } } // Parse just the blocks needed for building the index out of the module. // At the end of this routine the module Index is populated with a map // from global value name to GlobalValueInfo. The global value info contains // either the parsed summary information (when parsing summaries // eagerly), or just to the summary record's offset // if parsing lazily (IsLazy). std::error_code ModuleSummaryIndexBitcodeReader::parseModule() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); SmallVector Record; DenseMap ValueIdToLinkageMap; unsigned ValueId = 0; // Read the index for this module. while (1) { BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::SubBlock: if (CheckGlobalValSummaryPresenceOnly) { if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) { SeenGlobalValSummary = true; // No need to parse the rest since we found the summary. return std::error_code(); } if (Stream.SkipBlock()) return error("Invalid record"); continue; } switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::BLOCKINFO_BLOCK_ID: // Need to parse these to get abbrev ids (e.g. for VST) if (Stream.ReadBlockInfoBlock()) return error("Malformed block"); break; case bitc::VALUE_SYMTAB_BLOCK_ID: // Should have been parsed earlier via VSTOffset, unless there // is no summary section. assert(((SeenValueSymbolTable && VSTOffset > 0) || !SeenGlobalValSummary) && "Expected early VST parse via VSTOffset record"); if (Stream.SkipBlock()) return error("Invalid record"); break; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: assert(VSTOffset > 0 && "Expected non-zero VST offset"); assert(!SeenValueSymbolTable && "Already read VST when parsing summary block?"); if (std::error_code EC = parseValueSymbolTable(VSTOffset, ValueIdToLinkageMap)) return EC; SeenValueSymbolTable = true; SeenGlobalValSummary = true; if (IsLazy) { // Lazy parsing of summary info, skip it. if (Stream.SkipBlock()) return error("Invalid record"); } else if (std::error_code EC = parseEntireSummary()) return EC; break; case bitc::MODULE_STRTAB_BLOCK_ID: if (std::error_code EC = parseModuleStringTable()) return EC; break; } continue; - case BitstreamEntry::Record: - // Once we find the last record of interest, skip the rest. - if (VSTOffset > 0) - Stream.skipRecord(Entry.ID); - else { + case BitstreamEntry::Record: { Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: break; // Default behavior, ignore unknown content. /// MODULE_CODE_SOURCE_FILENAME: [namechar x N] case bitc::MODULE_CODE_SOURCE_FILENAME: { SmallString<128> ValueName; if (convertToString(Record, 0, ValueName)) return error("Invalid record"); SourceFileName = ValueName.c_str(); break; } + /// MODULE_CODE_HASH: [5*i32] + case bitc::MODULE_CODE_HASH: { + if (Record.size() != 5) + return error("Invalid hash length " + Twine(Record.size()).str()); + if (!TheIndex) + break; + if (TheIndex->modulePaths().empty()) + // Does not have any summary emitted. + break; + if (TheIndex->modulePaths().size() != 1) + return error("Don't expect multiple modules defined?"); + auto &Hash = TheIndex->modulePaths().begin()->second.second; + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + Hash[Pos++] = Val; + } + break; + } /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) return error("Invalid record"); VSTOffset = Record[0]; break; // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat] case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) return error("Invalid record"); uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); ValueIdToLinkageMap[ValueId++] = Linkage; break; } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, // alignment, section, visibility, gc, unnamed_addr, // prologuedata, dllstorageclass, comdat, prefixdata] case bitc::MODULE_CODE_FUNCTION: { if (Record.size() < 8) return error("Invalid record"); uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); ValueIdToLinkageMap[ValueId++] = Linkage; break; } // ALIAS: [alias type, addrspace, aliasee val#, linkage, visibility, // dllstorageclass] case bitc::MODULE_CODE_ALIAS: { if (Record.size() < 6) return error("Invalid record"); uint64_t RawLinkage = Record[3]; GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage); ValueIdToLinkageMap[ValueId++] = Linkage; break; } } } continue; } } } // Eagerly parse the entire summary block. This populates the GlobalValueSummary // objects in the index. std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() { if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID)) return error("Invalid record"); SmallVector Record; bool Combined = false; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: // For a per-module index, remove any entries that still have empty // summaries. The VST parsing creates entries eagerly for all symbols, // but not all have associated summaries (e.g. it doesn't know how to // distinguish between VST_CODE_ENTRY for function declarations vs global // variables with initializers that end up with a summary). Remove those // entries now so that we don't need to rely on the combined index merger // to clean them up (especially since that may not run for the first // module's index if we merge into that). if (!Combined) TheIndex->removeEmptySummaryEntries(); return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } // Read a record. The record format depends on whether this // is a per-module index or a combined index file. In the per-module // case the records contain the associated value's ID for correlation // with VST entries. In the combined index the correlation is done // via the bitcode offset of the summary records (which were saved // in the combined index VST entries). The records also contain // information used for ThinLTO renaming and importing. Record.clear(); uint64_t CurRecordBit = Stream.GetCurrentBitNo(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: ignore. break; // FS_PERMODULE: [valueid, linkage, instcount, numrefs, numrefs x valueid, // n x (valueid, callsitecount)] // FS_PERMODULE_PROFILE: [valueid, linkage, instcount, numrefs, // numrefs x valueid, // n x (valueid, callsitecount, profilecount)] case bitc::FS_PERMODULE: case bitc::FS_PERMODULE_PROFILE: { unsigned ValueID = Record[0]; uint64_t RawLinkage = Record[1]; unsigned InstCount = Record[2]; unsigned NumRefs = Record[3]; std::unique_ptr FS = llvm::make_unique( getDecodedLinkage(RawLinkage), InstCount); // The module path string ref set in the summary must be owned by the // index's module string table. Since we don't have a module path // string table section in the per-module index, we create a single // module path string table entry with an empty (0) ID to take // ownership. FS->setModulePath( - TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first()); static int RefListStartIndex = 4; int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; assert(Record.size() >= RefListStartIndex + NumRefs && "Record size inconsistent with number of references"); for (unsigned I = 4, E = CallGraphEdgeStartIndex; I != E; ++I) { unsigned RefValueId = Record[I]; uint64_t RefGUID = getGUIDFromValueId(RefValueId); FS->addRefEdge(RefGUID); } bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { unsigned CalleeValueId = Record[I]; unsigned CallsiteCount = Record[++I]; uint64_t ProfileCount = HasProfile ? Record[++I] : 0; uint64_t CalleeGUID = getGUIDFromValueId(CalleeValueId); FS->addCallGraphEdge(CalleeGUID, CalleeInfo(CallsiteCount, ProfileCount)); } uint64_t GUID = getGUIDFromValueId(ValueID); auto InfoList = TheIndex->findGlobalValueInfoList(GUID); assert(InfoList != TheIndex->end() && "Expected VST parse to create GlobalValueInfo entry"); assert(InfoList->second.size() == 1 && "Expected a single GlobalValueInfo per GUID in module"); auto &Info = InfoList->second[0]; assert(!Info->summary() && "Expected a single summary per VST entry"); Info->setSummary(std::move(FS)); break; } // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, linkage, n x valueid] case bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS: { unsigned ValueID = Record[0]; uint64_t RawLinkage = Record[1]; std::unique_ptr FS = llvm::make_unique(getDecodedLinkage(RawLinkage)); FS->setModulePath( - TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)); + TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first()); for (unsigned I = 2, E = Record.size(); I != E; ++I) { unsigned RefValueId = Record[I]; uint64_t RefGUID = getGUIDFromValueId(RefValueId); FS->addRefEdge(RefGUID); } uint64_t GUID = getGUIDFromValueId(ValueID); auto InfoList = TheIndex->findGlobalValueInfoList(GUID); assert(InfoList != TheIndex->end() && "Expected VST parse to create GlobalValueInfo entry"); assert(InfoList->second.size() == 1 && "Expected a single GlobalValueInfo per GUID in module"); auto &Info = InfoList->second[0]; assert(!Info->summary() && "Expected a single summary per VST entry"); Info->setSummary(std::move(FS)); break; } // FS_COMBINED: [modid, linkage, instcount, numrefs, numrefs x valueid, // n x (valueid, callsitecount)] // FS_COMBINED_PROFILE: [modid, linkage, instcount, numrefs, // numrefs x valueid, // n x (valueid, callsitecount, profilecount)] case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: { uint64_t ModuleId = Record[0]; uint64_t RawLinkage = Record[1]; unsigned InstCount = Record[2]; unsigned NumRefs = Record[3]; std::unique_ptr FS = llvm::make_unique( getDecodedLinkage(RawLinkage), InstCount); FS->setModulePath(ModuleIdMap[ModuleId]); static int RefListStartIndex = 4; int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs; assert(Record.size() >= RefListStartIndex + NumRefs && "Record size inconsistent with number of references"); for (unsigned I = 4, E = CallGraphEdgeStartIndex; I != E; ++I) { unsigned RefValueId = Record[I]; uint64_t RefGUID = getGUIDFromValueId(RefValueId); FS->addRefEdge(RefGUID); } bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE); for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E; ++I) { unsigned CalleeValueId = Record[I]; unsigned CallsiteCount = Record[++I]; uint64_t ProfileCount = HasProfile ? Record[++I] : 0; uint64_t CalleeGUID = getGUIDFromValueId(CalleeValueId); FS->addCallGraphEdge(CalleeGUID, CalleeInfo(CallsiteCount, ProfileCount)); } auto *Info = getInfoFromSummaryOffset(CurRecordBit); assert(!Info->summary() && "Expected a single summary per VST entry"); Info->setSummary(std::move(FS)); Combined = true; break; } // FS_COMBINED_GLOBALVAR_INIT_REFS: [modid, linkage, n x valueid] case bitc::FS_COMBINED_GLOBALVAR_INIT_REFS: { uint64_t ModuleId = Record[0]; uint64_t RawLinkage = Record[1]; std::unique_ptr FS = llvm::make_unique(getDecodedLinkage(RawLinkage)); FS->setModulePath(ModuleIdMap[ModuleId]); for (unsigned I = 2, E = Record.size(); I != E; ++I) { unsigned RefValueId = Record[I]; uint64_t RefGUID = getGUIDFromValueId(RefValueId); FS->addRefEdge(RefGUID); } auto *Info = getInfoFromSummaryOffset(CurRecordBit); assert(!Info->summary() && "Expected a single summary per VST entry"); Info->setSummary(std::move(FS)); Combined = true; break; } } } llvm_unreachable("Exit infinite loop"); } // Parse the module string table block into the Index. // This populates the ModulePathStringTable map in the index. std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() { if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID)) return error("Invalid record"); SmallVector Record; SmallString<128> ModulePath; + ModulePathStringTableTy::iterator LastSeenModulePath; while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; } Record.clear(); switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::MST_CODE_ENTRY: { // MST_ENTRY: [modid, namechar x N] + uint64_t ModuleId = Record[0]; + if (convertToString(Record, 1, ModulePath)) return error("Invalid record"); - uint64_t ModuleId = Record[0]; - StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); - ModuleIdMap[ModuleId] = ModulePathInMap; + + LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId); + ModuleIdMap[ModuleId] = LastSeenModulePath->first(); + ModulePath.clear(); break; } + /// MST_CODE_HASH: [5*i32] + case bitc::MST_CODE_HASH: { + if (Record.size() != 5) + return error("Invalid hash length " + Twine(Record.size()).str()); + if (LastSeenModulePath == TheIndex->modulePaths().end()) + return error("Invalid hash that does not follow a module path"); + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + LastSeenModulePath->second.second[Pos++] = Val; + } + // Reset LastSeenModulePath to avoid overriding the hash unexpectedly. + LastSeenModulePath = TheIndex->modulePaths().end(); + break; + } } } llvm_unreachable("Exit infinite loop"); } // Parse the function info index from the bitcode streamer into the given index. std::error_code ModuleSummaryIndexBitcodeReader::parseSummaryIndexInto( std::unique_ptr Streamer, ModuleSummaryIndex *I) { TheIndex = I; if (std::error_code EC = initStream(std::move(Streamer))) return EC; // Sniff for the signature. if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { if (Stream.AtEndOfStream()) { // We didn't really read a proper Module block. return error("Malformed block"); } BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); // If we see a MODULE_BLOCK, parse it to find the blocks needed for // building the function summary index. if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(); if (Stream.SkipBlock()) return error("Invalid record"); } } // Parse the summary information at the given offset in the buffer into // the index. Used to support lazy parsing of summaries from the // combined index during importing. // TODO: This function is not yet complete as it won't have a consumer // until ThinLTO function importing is added. std::error_code ModuleSummaryIndexBitcodeReader::parseGlobalValueSummary( std::unique_ptr Streamer, ModuleSummaryIndex *I, size_t SummaryOffset) { TheIndex = I; if (std::error_code EC = initStream(std::move(Streamer))) return EC; // Sniff for the signature. if (!hasValidBitcodeHeader(Stream)) return error("Invalid bitcode signature"); Stream.JumpToBit(SummaryOffset); BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); switch (Entry.Kind) { default: return error("Malformed block"); case BitstreamEntry::Record: // The expected case. break; } // TODO: Read a record. This interface will be completed when ThinLTO // importing is added so that it can be tested. SmallVector Record; switch (Stream.readRecord(Entry.ID, Record)) { case bitc::FS_COMBINED: case bitc::FS_COMBINED_PROFILE: case bitc::FS_COMBINED_GLOBALVAR_INIT_REFS: default: return error("Invalid record"); } return std::error_code(); } std::error_code ModuleSummaryIndexBitcodeReader::initStream( std::unique_ptr Streamer) { if (Streamer) return initLazyStream(std::move(Streamer)); return initStreamFromBuffer(); } std::error_code ModuleSummaryIndexBitcodeReader::initStreamFromBuffer() { const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) return error("Invalid bitcode signature"); // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) return error("Invalid bitcode wrapper header"); StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); Stream.init(&*StreamFile); return std::error_code(); } std::error_code ModuleSummaryIndexBitcodeReader::initLazyStream( std::unique_ptr Streamer) { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. auto OwnedBytes = llvm::make_unique(std::move(Streamer)); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); unsigned char buf[16]; if (Bytes.readBytes(buf, 16, 0) != 16) return error("Invalid bitcode signature"); if (!isBitcode(buf, buf + 16)) return error("Invalid bitcode signature"); if (isBitcodeWrapper(buf, buf + 4)) { const unsigned char *bitcodeStart = buf; const unsigned char *bitcodeEnd = buf + 16; SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false); Bytes.dropLeadingBytes(bitcodeStart - buf); Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart); } return std::error_code(); } namespace { class BitcodeErrorCategoryType : public std::error_category { const char *name() const LLVM_NOEXCEPT override { return "llvm.bitcode"; } std::string message(int IE) const override { BitcodeError E = static_cast(IE); switch (E) { case BitcodeError::InvalidBitcodeSignature: return "Invalid bitcode signature"; case BitcodeError::CorruptedBitcode: return "Corrupted bitcode"; } llvm_unreachable("Unknown error type!"); } }; } // end anonymous namespace static ManagedStatic ErrorCategory; const std::error_category &llvm::BitcodeErrorCategory() { return *ErrorCategory; } //===----------------------------------------------------------------------===// // External interface //===----------------------------------------------------------------------===// static ErrorOr> getBitcodeModuleImpl(std::unique_ptr Streamer, StringRef Name, BitcodeReader *R, LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata) { std::unique_ptr M = make_unique(Name, Context); M->setMaterializer(R); auto cleanupOnError = [&](std::error_code EC) { R->releaseBuffer(); // Never take ownership on error. return EC; }; // Delay parsing Metadata if ShouldLazyLoadMetadata is true. if (std::error_code EC = R->parseBitcodeInto(std::move(Streamer), M.get(), ShouldLazyLoadMetadata)) return cleanupOnError(EC); if (MaterializeAll) { // Read in the entire module, and destroy the BitcodeReader. if (std::error_code EC = M->materializeAll()) return cleanupOnError(EC); } else { // Resolve forward references from blockaddresses. if (std::error_code EC = R->materializeForwardReferencedFunctions()) return cleanupOnError(EC); } return std::move(M); } /// \brief Get a lazy one-at-time loading module from bitcode. /// /// This isn't always used in a lazy context. In particular, it's also used by /// \a parseBitcodeFile(). If this is truly lazy, then we need to eagerly pull /// in forward-referenced functions from block address references. /// /// \param[in] MaterializeAll Set to \c true if we should materialize /// everything. static ErrorOr> getLazyBitcodeModuleImpl(std::unique_ptr &&Buffer, LLVMContext &Context, bool MaterializeAll, bool ShouldLazyLoadMetadata = false) { BitcodeReader *R = new BitcodeReader(Buffer.get(), Context); ErrorOr> Ret = getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context, MaterializeAll, ShouldLazyLoadMetadata); if (!Ret) return Ret; Buffer.release(); // The BitcodeReader owns it now. return Ret; } ErrorOr> llvm::getLazyBitcodeModule(std::unique_ptr &&Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata) { return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false, ShouldLazyLoadMetadata); } ErrorOr> llvm::getStreamedBitcodeModule(StringRef Name, std::unique_ptr Streamer, LLVMContext &Context) { std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Context); return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false, false); } ErrorOr> llvm::parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); return getLazyBitcodeModuleImpl(std::move(Buf), Context, true); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer, LLVMContext &Context) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); auto R = llvm::make_unique(Buf.release(), Context); ErrorOr Triple = R->parseTriple(); if (Triple.getError()) return ""; return Triple.get(); } std::string llvm::getBitcodeProducerString(MemoryBufferRef Buffer, LLVMContext &Context) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); BitcodeReader R(Buf.release(), Context); ErrorOr ProducerString = R.parseIdentificationBlock(); if (ProducerString.getError()) return ""; return ProducerString.get(); } // Parse the specified bitcode buffer, returning the function info index. // If IsLazy is false, parse the entire function summary into // the index. Otherwise skip the function summary section, and only create // an index object with a map from function name to function summary offset. // The index is used to perform lazy function summary reading later. ErrorOr> llvm::getModuleSummaryIndex(MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, bool IsLazy) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); ModuleSummaryIndexBitcodeReader R(Buf.get(), DiagnosticHandler, IsLazy); auto Index = llvm::make_unique(); auto cleanupOnError = [&](std::error_code EC) { R.releaseBuffer(); // Never take ownership on error. return EC; }; if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get())) return cleanupOnError(EC); Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now. return std::move(Index); } // Check if the given bitcode buffer contains a global value summary block. bool llvm::hasGlobalValueSummary(MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); ModuleSummaryIndexBitcodeReader R(Buf.get(), DiagnosticHandler, false, true); auto cleanupOnError = [&](std::error_code EC) { R.releaseBuffer(); // Never take ownership on error. return false; }; if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr)) return cleanupOnError(EC); Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now. return R.foundGlobalValSummary(); } // This method supports lazy reading of summary data from the combined // index during ThinLTO function importing. When reading the combined index // file, getModuleSummaryIndex is first invoked with IsLazy=true. // Then this method is called for each value considered for importing, // to parse the summary information for the given value name into // the index. std::error_code llvm::readGlobalValueSummary( MemoryBufferRef Buffer, DiagnosticHandlerFunction DiagnosticHandler, StringRef ValueName, std::unique_ptr Index) { std::unique_ptr Buf = MemoryBuffer::getMemBuffer(Buffer, false); ModuleSummaryIndexBitcodeReader R(Buf.get(), DiagnosticHandler); auto cleanupOnError = [&](std::error_code EC) { R.releaseBuffer(); // Never take ownership on error. return EC; }; // Lookup the given value name in the GlobalValueMap, which may // contain a list of global value infos in the case of a COMDAT. Walk through // and parse each summary info at the summary offset // recorded when parsing the value symbol table. for (const auto &FI : Index->getGlobalValueInfoList(ValueName)) { size_t SummaryOffset = FI->bitcodeIndex(); if (std::error_code EC = R.parseGlobalValueSummary(nullptr, Index.get(), SummaryOffset)) return cleanupOnError(EC); } Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now. return std::error_code(); } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index c15134168175..18fb7ad8d111 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1,3408 +1,3464 @@ //===--- Bitcode/Writer/BitcodeWriter.cpp - Bitcode Writer ----------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Bitcode writer implementation. // //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BlockFrequencyInfoImpl.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Bitcode/BitstreamWriter.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/UseListOrder.h" #include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SHA1.h" #include #include using namespace llvm; /// These are manifest constants used by the bitcode writer. They do not need to /// be kept in sync with the reader, but need to be consistent within this file. enum { // VALUE_SYMTAB_BLOCK abbrev id's. VST_ENTRY_8_ABBREV = bitc::FIRST_APPLICATION_ABBREV, VST_ENTRY_7_ABBREV, VST_ENTRY_6_ABBREV, VST_BBENTRY_6_ABBREV, // CONSTANTS_BLOCK abbrev id's. CONSTANTS_SETTYPE_ABBREV = bitc::FIRST_APPLICATION_ABBREV, CONSTANTS_INTEGER_ABBREV, CONSTANTS_CE_CAST_Abbrev, CONSTANTS_NULL_Abbrev, // FUNCTION_BLOCK abbrev id's. FUNCTION_INST_LOAD_ABBREV = bitc::FIRST_APPLICATION_ABBREV, FUNCTION_INST_BINOP_ABBREV, FUNCTION_INST_BINOP_FLAGS_ABBREV, FUNCTION_INST_CAST_ABBREV, FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, FUNCTION_INST_UNREACHABLE_ABBREV, FUNCTION_INST_GEP_ABBREV, }; static unsigned GetEncodedCastOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown cast instruction!"); case Instruction::Trunc : return bitc::CAST_TRUNC; case Instruction::ZExt : return bitc::CAST_ZEXT; case Instruction::SExt : return bitc::CAST_SEXT; case Instruction::FPToUI : return bitc::CAST_FPTOUI; case Instruction::FPToSI : return bitc::CAST_FPTOSI; case Instruction::UIToFP : return bitc::CAST_UITOFP; case Instruction::SIToFP : return bitc::CAST_SITOFP; case Instruction::FPTrunc : return bitc::CAST_FPTRUNC; case Instruction::FPExt : return bitc::CAST_FPEXT; case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; case Instruction::BitCast : return bitc::CAST_BITCAST; case Instruction::AddrSpaceCast: return bitc::CAST_ADDRSPACECAST; } } static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { switch (Opcode) { default: llvm_unreachable("Unknown binary instruction!"); case Instruction::Add: case Instruction::FAdd: return bitc::BINOP_ADD; case Instruction::Sub: case Instruction::FSub: return bitc::BINOP_SUB; case Instruction::Mul: case Instruction::FMul: return bitc::BINOP_MUL; case Instruction::UDiv: return bitc::BINOP_UDIV; case Instruction::FDiv: case Instruction::SDiv: return bitc::BINOP_SDIV; case Instruction::URem: return bitc::BINOP_UREM; case Instruction::FRem: case Instruction::SRem: return bitc::BINOP_SREM; case Instruction::Shl: return bitc::BINOP_SHL; case Instruction::LShr: return bitc::BINOP_LSHR; case Instruction::AShr: return bitc::BINOP_ASHR; case Instruction::And: return bitc::BINOP_AND; case Instruction::Or: return bitc::BINOP_OR; case Instruction::Xor: return bitc::BINOP_XOR; } } static unsigned GetEncodedRMWOperation(AtomicRMWInst::BinOp Op) { switch (Op) { default: llvm_unreachable("Unknown RMW operation!"); case AtomicRMWInst::Xchg: return bitc::RMW_XCHG; case AtomicRMWInst::Add: return bitc::RMW_ADD; case AtomicRMWInst::Sub: return bitc::RMW_SUB; case AtomicRMWInst::And: return bitc::RMW_AND; case AtomicRMWInst::Nand: return bitc::RMW_NAND; case AtomicRMWInst::Or: return bitc::RMW_OR; case AtomicRMWInst::Xor: return bitc::RMW_XOR; case AtomicRMWInst::Max: return bitc::RMW_MAX; case AtomicRMWInst::Min: return bitc::RMW_MIN; case AtomicRMWInst::UMax: return bitc::RMW_UMAX; case AtomicRMWInst::UMin: return bitc::RMW_UMIN; } } static unsigned GetEncodedOrdering(AtomicOrdering Ordering) { switch (Ordering) { case NotAtomic: return bitc::ORDERING_NOTATOMIC; case Unordered: return bitc::ORDERING_UNORDERED; case Monotonic: return bitc::ORDERING_MONOTONIC; case Acquire: return bitc::ORDERING_ACQUIRE; case Release: return bitc::ORDERING_RELEASE; case AcquireRelease: return bitc::ORDERING_ACQREL; case SequentiallyConsistent: return bitc::ORDERING_SEQCST; } llvm_unreachable("Invalid ordering"); } static unsigned GetEncodedSynchScope(SynchronizationScope SynchScope) { switch (SynchScope) { case SingleThread: return bitc::SYNCHSCOPE_SINGLETHREAD; case CrossThread: return bitc::SYNCHSCOPE_CROSSTHREAD; } llvm_unreachable("Invalid synch scope"); } static void WriteStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse, BitstreamWriter &Stream) { SmallVector Vals; // Code: [strchar x N] for (unsigned i = 0, e = Str.size(); i != e; ++i) { if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i])) AbbrevToUse = 0; Vals.push_back(Str[i]); } // Emit the finished record. Stream.EmitRecord(Code, Vals, AbbrevToUse); } static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { switch (Kind) { case Attribute::Alignment: return bitc::ATTR_KIND_ALIGNMENT; case Attribute::AlwaysInline: return bitc::ATTR_KIND_ALWAYS_INLINE; case Attribute::ArgMemOnly: return bitc::ATTR_KIND_ARGMEMONLY; case Attribute::Builtin: return bitc::ATTR_KIND_BUILTIN; case Attribute::ByVal: return bitc::ATTR_KIND_BY_VAL; case Attribute::Convergent: return bitc::ATTR_KIND_CONVERGENT; case Attribute::InAlloca: return bitc::ATTR_KIND_IN_ALLOCA; case Attribute::Cold: return bitc::ATTR_KIND_COLD; case Attribute::InaccessibleMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_ONLY; case Attribute::InaccessibleMemOrArgMemOnly: return bitc::ATTR_KIND_INACCESSIBLEMEM_OR_ARGMEMONLY; case Attribute::InlineHint: return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: return bitc::ATTR_KIND_IN_REG; case Attribute::JumpTable: return bitc::ATTR_KIND_JUMP_TABLE; case Attribute::MinSize: return bitc::ATTR_KIND_MIN_SIZE; case Attribute::Naked: return bitc::ATTR_KIND_NAKED; case Attribute::Nest: return bitc::ATTR_KIND_NEST; case Attribute::NoAlias: return bitc::ATTR_KIND_NO_ALIAS; case Attribute::NoBuiltin: return bitc::ATTR_KIND_NO_BUILTIN; case Attribute::NoCapture: return bitc::ATTR_KIND_NO_CAPTURE; case Attribute::NoDuplicate: return bitc::ATTR_KIND_NO_DUPLICATE; case Attribute::NoImplicitFloat: return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT; case Attribute::NoInline: return bitc::ATTR_KIND_NO_INLINE; case Attribute::NoRecurse: return bitc::ATTR_KIND_NO_RECURSE; case Attribute::NonLazyBind: return bitc::ATTR_KIND_NON_LAZY_BIND; case Attribute::NonNull: return bitc::ATTR_KIND_NON_NULL; case Attribute::Dereferenceable: return bitc::ATTR_KIND_DEREFERENCEABLE; case Attribute::DereferenceableOrNull: return bitc::ATTR_KIND_DEREFERENCEABLE_OR_NULL; case Attribute::NoRedZone: return bitc::ATTR_KIND_NO_RED_ZONE; case Attribute::NoReturn: return bitc::ATTR_KIND_NO_RETURN; case Attribute::NoUnwind: return bitc::ATTR_KIND_NO_UNWIND; case Attribute::OptimizeForSize: return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE; case Attribute::OptimizeNone: return bitc::ATTR_KIND_OPTIMIZE_NONE; case Attribute::ReadNone: return bitc::ATTR_KIND_READ_NONE; case Attribute::ReadOnly: return bitc::ATTR_KIND_READ_ONLY; case Attribute::Returned: return bitc::ATTR_KIND_RETURNED; case Attribute::ReturnsTwice: return bitc::ATTR_KIND_RETURNS_TWICE; case Attribute::SExt: return bitc::ATTR_KIND_S_EXT; case Attribute::StackAlignment: return bitc::ATTR_KIND_STACK_ALIGNMENT; case Attribute::StackProtect: return bitc::ATTR_KIND_STACK_PROTECT; case Attribute::StackProtectReq: return bitc::ATTR_KIND_STACK_PROTECT_REQ; case Attribute::StackProtectStrong: return bitc::ATTR_KIND_STACK_PROTECT_STRONG; case Attribute::SafeStack: return bitc::ATTR_KIND_SAFESTACK; case Attribute::StructRet: return bitc::ATTR_KIND_STRUCT_RET; case Attribute::SanitizeAddress: return bitc::ATTR_KIND_SANITIZE_ADDRESS; case Attribute::SanitizeThread: return bitc::ATTR_KIND_SANITIZE_THREAD; case Attribute::SanitizeMemory: return bitc::ATTR_KIND_SANITIZE_MEMORY; case Attribute::SwiftSelf: return bitc::ATTR_KIND_SWIFT_SELF; case Attribute::UWTable: return bitc::ATTR_KIND_UW_TABLE; case Attribute::ZExt: return bitc::ATTR_KIND_Z_EXT; case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); case Attribute::None: llvm_unreachable("Can not encode none-attribute."); } llvm_unreachable("Trying to encode unknown attribute"); } static void WriteAttributeGroupTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &AttrGrps = VE.getAttributeGroups(); if (AttrGrps.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); SmallVector Record; for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { AttributeSet AS = AttrGrps[i]; for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { AttributeSet A = AS.getSlotAttributes(i); Record.push_back(VE.getAttributeGroupID(A)); Record.push_back(AS.getSlotIndex(i)); for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0); I != E; ++I) { Attribute Attr = *I; if (Attr.isEnumAttribute()) { Record.push_back(0); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); } else if (Attr.isIntAttribute()) { Record.push_back(1); Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); Record.push_back(Attr.getValueAsInt()); } else { StringRef Kind = Attr.getKindAsString(); StringRef Val = Attr.getValueAsString(); Record.push_back(Val.empty() ? 3 : 4); Record.append(Kind.begin(), Kind.end()); Record.push_back(0); if (!Val.empty()) { Record.append(Val.begin(), Val.end()); Record.push_back(0); } } } Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); Record.clear(); } } Stream.ExitBlock(); } static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &Attrs = VE.getAttributes(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { const AttributeSet &A = Attrs[i]; for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); } Stream.ExitBlock(); } /// WriteTypeTable - Write out the type table for a module. static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::TypeList &TypeList = VE.getTypes(); Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector TypeVals; uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies(); // Abbrev for TYPE_CODE_POINTER. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_POINTER)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); Abbv->Add(BitCodeAbbrevOp(0)); // Addrspace = 0 unsigned PtrAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_FUNCTION. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_ANON. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_NAME. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_STRUCT_NAMED. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // ispacked Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // size Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned ArrayAbbrev = Stream.EmitAbbrev(Abbv); // Emit an entry count so the reader can reserve space. TypeVals.push_back(TypeList.size()); Stream.EmitRecord(bitc::TYPE_CODE_NUMENTRY, TypeVals); TypeVals.clear(); // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; switch (T->getTypeID()) { case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; TypeVals.push_back(cast(T)->getBitWidth()); break; case Type::PointerTyID: { PointerType *PTy = cast(T); // POINTER: [pointee type, address space] Code = bitc::TYPE_CODE_POINTER; TypeVals.push_back(VE.getTypeID(PTy->getElementType())); unsigned AddressSpace = PTy->getAddressSpace(); TypeVals.push_back(AddressSpace); if (AddressSpace == 0) AbbrevToUse = PtrAbbrev; break; } case Type::FunctionTyID: { FunctionType *FT = cast(T); // FUNCTION: [isvararg, retty, paramty x N] Code = bitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); AbbrevToUse = FunctionAbbrev; break; } case Type::StructTyID: { StructType *ST = cast(T); // STRUCT: [ispacked, eltty x N] TypeVals.push_back(ST->isPacked()); // Output all of the element types. for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) TypeVals.push_back(VE.getTypeID(*I)); if (ST->isLiteral()) { Code = bitc::TYPE_CODE_STRUCT_ANON; AbbrevToUse = StructAnonAbbrev; } else { if (ST->isOpaque()) { Code = bitc::TYPE_CODE_OPAQUE; } else { Code = bitc::TYPE_CODE_STRUCT_NAMED; AbbrevToUse = StructNamedAbbrev; } // Emit the name if it is present. if (!ST->getName().empty()) WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), StructNameAbbrev, Stream); } break; } case Type::ArrayTyID: { ArrayType *AT = cast(T); // ARRAY: [numelts, eltty] Code = bitc::TYPE_CODE_ARRAY; TypeVals.push_back(AT->getNumElements()); TypeVals.push_back(VE.getTypeID(AT->getElementType())); AbbrevToUse = ArrayAbbrev; break; } case Type::VectorTyID: { VectorType *VT = cast(T); // VECTOR [numelts, eltty] Code = bitc::TYPE_CODE_VECTOR; TypeVals.push_back(VT->getNumElements()); TypeVals.push_back(VE.getTypeID(VT->getElementType())); break; } } // Emit the finished record. Stream.EmitRecord(Code, TypeVals, AbbrevToUse); TypeVals.clear(); } Stream.ExitBlock(); } static unsigned getEncodedLinkage(const GlobalValue::LinkageTypes Linkage) { switch (Linkage) { case GlobalValue::ExternalLinkage: return 0; case GlobalValue::WeakAnyLinkage: return 16; case GlobalValue::AppendingLinkage: return 2; case GlobalValue::InternalLinkage: return 3; case GlobalValue::LinkOnceAnyLinkage: return 18; case GlobalValue::ExternalWeakLinkage: return 7; case GlobalValue::CommonLinkage: return 8; case GlobalValue::PrivateLinkage: return 9; case GlobalValue::WeakODRLinkage: return 17; case GlobalValue::LinkOnceODRLinkage: return 19; case GlobalValue::AvailableExternallyLinkage: return 12; } llvm_unreachable("Invalid linkage"); } static unsigned getEncodedLinkage(const GlobalValue &GV) { return getEncodedLinkage(GV.getLinkage()); } static unsigned getEncodedVisibility(const GlobalValue &GV) { switch (GV.getVisibility()) { case GlobalValue::DefaultVisibility: return 0; case GlobalValue::HiddenVisibility: return 1; case GlobalValue::ProtectedVisibility: return 2; } llvm_unreachable("Invalid visibility"); } static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { switch (GV.getDLLStorageClass()) { case GlobalValue::DefaultStorageClass: return 0; case GlobalValue::DLLImportStorageClass: return 1; case GlobalValue::DLLExportStorageClass: return 2; } llvm_unreachable("Invalid DLL storage class"); } static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) { switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; case GlobalVariable::LocalDynamicTLSModel: return 2; case GlobalVariable::InitialExecTLSModel: return 3; case GlobalVariable::LocalExecTLSModel: return 4; } llvm_unreachable("Invalid TLS model"); } static unsigned getEncodedComdatSelectionKind(const Comdat &C) { switch (C.getSelectionKind()) { case Comdat::Any: return bitc::COMDAT_SELECTION_KIND_ANY; case Comdat::ExactMatch: return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH; case Comdat::Largest: return bitc::COMDAT_SELECTION_KIND_LARGEST; case Comdat::NoDuplicates: return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES; case Comdat::SameSize: return bitc::COMDAT_SELECTION_KIND_SAME_SIZE; } llvm_unreachable("Invalid selection kind"); } static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) { SmallVector Vals; for (const Comdat *C : VE.getComdats()) { // COMDAT: [selection_kind, name] Vals.push_back(getEncodedComdatSelectionKind(*C)); size_t Size = C->getName().size(); assert(isUInt<32>(Size)); Vals.push_back(Size); for (char Chr : C->getName()) Vals.push_back((unsigned char)Chr); Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); Vals.clear(); } } /// Write a record that will eventually hold the word offset of the /// module-level VST. For now the offset is 0, which will be backpatched /// after the real VST is written. Returns the bit offset to backpatch. static uint64_t WriteValueSymbolTableForwardDecl(BitstreamWriter &Stream) { // Write a placeholder value in for the offset of the real VST, // which is written after the function blocks so that it can include // the offset of each function. The placeholder offset will be // updated when the real VST is written. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_VSTOFFSET)); // Blocks are 32-bit aligned, so we can use a 32-bit word offset to // hold the real VST offset. Must use fixed instead of VBR as we don't // know how many VBR chunks to reserve ahead of time. Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); unsigned VSTOffsetAbbrev = Stream.EmitAbbrev(Abbv); // Emit the placeholder uint64_t Vals[] = {bitc::MODULE_CODE_VSTOFFSET, 0}; Stream.EmitRecordWithAbbrev(VSTOffsetAbbrev, Vals); // Compute and return the bit offset to the placeholder, which will be // patched when the real VST is written. We can simply subtract the 32-bit // fixed size from the current bit number to get the location to backpatch. return Stream.GetCurrentBitNo() - 32; } enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 }; /// Determine the encoding to use for the given string name and length. static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) { bool isChar6 = true; for (const char *C = Str, *E = C + StrLen; C != E; ++C) { if (isChar6) isChar6 = BitCodeAbbrevOp::isChar6(*C); if ((unsigned char)*C & 128) // don't bother scanning the rest. return SE_Fixed8; } if (isChar6) return SE_Char6; else return SE_Fixed7; } /// Emit top-level description of module, including target triple, inline asm, /// descriptors for global variables, and function prototype info. /// Returns the bit offset to backpatch with the location of the real VST. static uint64_t WriteModuleInfo(const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { // Emit various pieces of data attached to a module. if (!M->getTargetTriple().empty()) WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(), 0/*TODO*/, Stream); const std::string &DL = M->getDataLayoutStr(); if (!DL.empty()) WriteStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/, Stream); if (!M->getModuleInlineAsm().empty()) WriteStringRecord(bitc::MODULE_CODE_ASM, M->getModuleInlineAsm(), 0/*TODO*/, Stream); // Emit information about sections and GC, computing how many there are. Also // compute the maximum alignment value. std::map SectionMap; std::map GCMap; unsigned MaxAlignment = 0; unsigned MaxGlobalType = 0; for (const GlobalValue &GV : M->globals()) { MaxAlignment = std::max(MaxAlignment, GV.getAlignment()); MaxGlobalType = std::max(MaxGlobalType, VE.getTypeID(GV.getValueType())); if (GV.hasSection()) { // Give section names unique ID's. unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } } for (const Function &F : *M) { MaxAlignment = std::max(MaxAlignment, F.getAlignment()); if (F.hasSection()) { // Give section names unique ID's. unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { WriteStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0/*TODO*/, Stream); Entry = SectionMap.size(); } } if (F.hasGC()) { // Same for GC names. unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { WriteStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0/*TODO*/, Stream); Entry = GCMap.size(); } } } // Emit abbrev for globals, now that we know # sections and max alignment. unsigned SimpleGVarAbbrev = 0; if (!M->global_empty()) { // Add an abbrev for common globals with no visibility or thread localness. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_GLOBALVAR)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxGlobalType+1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // AddrSpace << 2 //| explicitType << 1 //| constant Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Initializer. Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // Linkage. if (MaxAlignment == 0) // Alignment. Abbv->Add(BitCodeAbbrevOp(0)); else { unsigned MaxEncAlignment = Log2_32(MaxAlignment)+1; Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(MaxEncAlignment+1))); } if (SectionMap.empty()) // Section. Abbv->Add(BitCodeAbbrevOp(0)); else Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(SectionMap.size()+1))); // Don't bother emitting vis + thread local. SimpleGVarAbbrev = Stream.EmitAbbrev(Abbv); } // Emit the global variable information. SmallVector Vals; for (const GlobalVariable &GV : M->globals()) { unsigned AbbrevToUse = 0; // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, externally_initialized, dllstorageclass, // comdat] Vals.push_back(VE.getTypeID(GV.getValueType())); Vals.push_back(GV.getType()->getAddressSpace() << 2 | 2 | GV.isConstant()); Vals.push_back(GV.isDeclaration() ? 0 : (VE.getValueID(GV.getInitializer()) + 1)); Vals.push_back(getEncodedLinkage(GV)); Vals.push_back(Log2_32(GV.getAlignment())+1); Vals.push_back(GV.hasSection() ? SectionMap[GV.getSection()] : 0); if (GV.isThreadLocal() || GV.getVisibility() != GlobalValue::DefaultVisibility || GV.hasUnnamedAddr() || GV.isExternallyInitialized() || GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || GV.hasComdat()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV.hasUnnamedAddr()); Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); } else { AbbrevToUse = SimpleGVarAbbrev; } Stream.EmitRecord(bitc::MODULE_CODE_GLOBALVAR, Vals, AbbrevToUse); Vals.clear(); } // Emit the function proto information. for (const Function &F : *M) { // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, // section, visibility, gc, unnamed_addr, prologuedata, // dllstorageclass, comdat, prefixdata, personalityfn] Vals.push_back(VE.getTypeID(F.getFunctionType())); Vals.push_back(F.getCallingConv()); Vals.push_back(F.isDeclaration()); Vals.push_back(getEncodedLinkage(F)); Vals.push_back(VE.getAttributeID(F.getAttributes())); Vals.push_back(Log2_32(F.getAlignment())+1); Vals.push_back(F.hasSection() ? SectionMap[F.getSection()] : 0); Vals.push_back(getEncodedVisibility(F)); Vals.push_back(F.hasGC() ? GCMap[F.getGC()] : 0); Vals.push_back(F.hasUnnamedAddr()); Vals.push_back(F.hasPrologueData() ? (VE.getValueID(F.getPrologueData()) + 1) : 0); Vals.push_back(getEncodedDLLStorageClass(F)); Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0); Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) : 0); Vals.push_back( F.hasPersonalityFn() ? (VE.getValueID(F.getPersonalityFn()) + 1) : 0); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); Vals.clear(); } // Emit the alias information. for (const GlobalAlias &A : M->aliases()) { // ALIAS: [alias type, aliasee val#, linkage, visibility] Vals.push_back(VE.getTypeID(A.getValueType())); Vals.push_back(A.getType()->getAddressSpace()); Vals.push_back(VE.getValueID(A.getAliasee())); Vals.push_back(getEncodedLinkage(A)); Vals.push_back(getEncodedVisibility(A)); Vals.push_back(getEncodedDLLStorageClass(A)); Vals.push_back(getEncodedThreadLocalMode(A)); Vals.push_back(A.hasUnnamedAddr()); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); } // Emit the module's source file name. { StringEncoding Bits = getStringEncoding(M->getSourceFileName().data(), M->getSourceFileName().size()); BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8); if (Bits == SE_Char6) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6); else if (Bits == SE_Fixed7) AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7); // MODULE_CODE_SOURCE_FILENAME: [namechar x N] BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MODULE_CODE_SOURCE_FILENAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(AbbrevOpToUse); unsigned FilenameAbbrev = Stream.EmitAbbrev(Abbv); for (const auto P : M->getSourceFileName()) Vals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(bitc::MODULE_CODE_SOURCE_FILENAME, Vals, FilenameAbbrev); Vals.clear(); } // If we have a VST, write the VSTOFFSET record placeholder and return // its offset. if (M->getValueSymbolTable().empty()) return 0; return WriteValueSymbolTableForwardDecl(Stream); } static uint64_t GetOptimizationFlags(const Value *V) { uint64_t Flags = 0; if (const auto *OBO = dyn_cast(V)) { if (OBO->hasNoSignedWrap()) Flags |= 1 << bitc::OBO_NO_SIGNED_WRAP; if (OBO->hasNoUnsignedWrap()) Flags |= 1 << bitc::OBO_NO_UNSIGNED_WRAP; } else if (const auto *PEO = dyn_cast(V)) { if (PEO->isExact()) Flags |= 1 << bitc::PEO_EXACT; } else if (const auto *FPMO = dyn_cast(V)) { if (FPMO->hasUnsafeAlgebra()) Flags |= FastMathFlags::UnsafeAlgebra; if (FPMO->hasNoNaNs()) Flags |= FastMathFlags::NoNaNs; if (FPMO->hasNoInfs()) Flags |= FastMathFlags::NoInfs; if (FPMO->hasNoSignedZeros()) Flags |= FastMathFlags::NoSignedZeros; if (FPMO->hasAllowReciprocal()) Flags |= FastMathFlags::AllowReciprocal; } return Flags; } static void writeValueAsMetadata(const ValueAsMetadata *MD, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record) { // Mimic an MDNode with a value as one operand. Value *V = MD->getValue(); Record.push_back(VE.getTypeID(V->getType())); Record.push_back(VE.getValueID(V)); Stream.EmitRecord(bitc::METADATA_VALUE, Record, 0); Record.clear(); } static void writeMDTuple(const MDTuple *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { Metadata *MD = N->getOperand(i); assert(!(MD && isa(MD)) && "Unexpected function-local metadata"); Record.push_back(VE.getMetadataOrNullID(MD)); } Stream.EmitRecord(N->isDistinct() ? bitc::METADATA_DISTINCT_NODE : bitc::METADATA_NODE, Record, Abbrev); Record.clear(); } static unsigned createDILocationAbbrev(BitstreamWriter &Stream) { // Assume the column is usually under 128, and always output the inlined-at // location (it's never more expensive than building an array size 1). BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_LOCATION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Stream.EmitAbbrev(Abbv); } static void writeDILocation(const DILocation *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned &Abbrev) { if (!Abbrev) Abbrev = createDILocationAbbrev(Stream); Record.push_back(N->isDistinct()); Record.push_back(N->getLine()); Record.push_back(N->getColumn()); Record.push_back(VE.getMetadataID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getInlinedAt())); Stream.EmitRecord(bitc::METADATA_LOCATION, Record, Abbrev); Record.clear(); } static unsigned createGenericDINodeAbbrev(BitstreamWriter &Stream) { // Assume the column is usually under 128, and always output the inlined-at // location (it's never more expensive than building an array size 1). BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_GENERIC_DEBUG)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); return Stream.EmitAbbrev(Abbv); } static void writeGenericDINode(const GenericDINode *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned &Abbrev) { if (!Abbrev) Abbrev = createGenericDINodeAbbrev(Stream); Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(0); // Per-tag version field; unused for now. for (auto &I : N->operands()) Record.push_back(VE.getMetadataOrNullID(I)); Stream.EmitRecord(bitc::METADATA_GENERIC_DEBUG, Record, Abbrev); Record.clear(); } static uint64_t rotateSign(int64_t I) { uint64_t U = I; return I < 0 ? ~(U << 1) : U << 1; } static void writeDISubrange(const DISubrange *N, const ValueEnumerator &, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getCount()); Record.push_back(rotateSign(N->getLowerBound())); Stream.EmitRecord(bitc::METADATA_SUBRANGE, Record, Abbrev); Record.clear(); } static void writeDIEnumerator(const DIEnumerator *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(rotateSign(N->getValue())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Stream.EmitRecord(bitc::METADATA_ENUMERATOR, Record, Abbrev); Record.clear(); } static void writeDIBasicType(const DIBasicType *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getEncoding()); Stream.EmitRecord(bitc::METADATA_BASIC_TYPE, Record, Abbrev); Record.clear(); } static void writeDIDerivedType(const DIDerivedType *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getOffsetInBits()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getExtraData())); Stream.EmitRecord(bitc::METADATA_DERIVED_TYPE, Record, Abbrev); Record.clear(); } static void writeDICompositeType(const DICompositeType *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getBaseType())); Record.push_back(N->getSizeInBits()); Record.push_back(N->getAlignInBits()); Record.push_back(N->getOffsetInBits()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Record.push_back(N->getRuntimeLang()); Record.push_back(VE.getMetadataOrNullID(N->getVTableHolder())); Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getRawIdentifier())); Stream.EmitRecord(bitc::METADATA_COMPOSITE_TYPE, Record, Abbrev); Record.clear(); } static void writeDISubroutineType(const DISubroutineType *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getFlags()); Record.push_back(VE.getMetadataOrNullID(N->getTypeArray().get())); Stream.EmitRecord(bitc::METADATA_SUBROUTINE_TYPE, Record, Abbrev); Record.clear(); } static void writeDIFile(const DIFile *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawFilename())); Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory())); Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev); Record.clear(); } static void writeDICompileUnit(const DICompileUnit *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { assert(N->isDistinct() && "Expected distinct compile units"); Record.push_back(/* IsDistinct */ true); Record.push_back(N->getSourceLanguage()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawProducer())); Record.push_back(N->isOptimized()); Record.push_back(VE.getMetadataOrNullID(N->getRawFlags())); Record.push_back(N->getRuntimeVersion()); Record.push_back(VE.getMetadataOrNullID(N->getRawSplitDebugFilename())); Record.push_back(N->getEmissionKind()); Record.push_back(VE.getMetadataOrNullID(N->getEnumTypes().get())); Record.push_back(VE.getMetadataOrNullID(N->getRetainedTypes().get())); Record.push_back(VE.getMetadataOrNullID(N->getSubprograms().get())); Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); Record.push_back(N->getDWOId()); Record.push_back(VE.getMetadataOrNullID(N->getMacros().get())); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); } static void writeDISubprogram(const DISubprogram *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->isLocalToUnit()); Record.push_back(N->isDefinition()); Record.push_back(N->getScopeLine()); Record.push_back(VE.getMetadataOrNullID(N->getContainingType())); Record.push_back(N->getVirtuality()); Record.push_back(N->getVirtualIndex()); Record.push_back(N->getFlags()); Record.push_back(N->isOptimized()); Record.push_back(VE.getMetadataOrNullID(N->getTemplateParams().get())); Record.push_back(VE.getMetadataOrNullID(N->getDeclaration())); Record.push_back(VE.getMetadataOrNullID(N->getVariables().get())); Stream.EmitRecord(bitc::METADATA_SUBPROGRAM, Record, Abbrev); Record.clear(); } static void writeDILexicalBlock(const DILexicalBlock *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(N->getColumn()); Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK, Record, Abbrev); Record.clear(); } static void writeDILexicalBlockFile(const DILexicalBlockFile *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getDiscriminator()); Stream.EmitRecord(bitc::METADATA_LEXICAL_BLOCK_FILE, Record, Abbrev); Record.clear(); } static void writeDINamespace(const DINamespace *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(N->getLine()); Stream.EmitRecord(bitc::METADATA_NAMESPACE, Record, Abbrev); Record.clear(); } static void writeDIMacro(const DIMacro *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getMacinfoType()); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawValue())); Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev); Record.clear(); } static void writeDIMacroFile(const DIMacroFile *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getMacinfoType()); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev); Record.clear(); } static void writeDIModule(const DIModule *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); for (auto &I : N->operands()) Record.push_back(VE.getMetadataOrNullID(I)); Stream.EmitRecord(bitc::METADATA_MODULE, Record, Abbrev); Record.clear(); } static void writeDITemplateTypeParameter(const DITemplateTypeParameter *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getType())); Stream.EmitRecord(bitc::METADATA_TEMPLATE_TYPE, Record, Abbrev); Record.clear(); } static void writeDITemplateValueParameter(const DITemplateValueParameter *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(VE.getMetadataOrNullID(N->getValue())); Stream.EmitRecord(bitc::METADATA_TEMPLATE_VALUE, Record, Abbrev); Record.clear(); } static void writeDIGlobalVariable(const DIGlobalVariable *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->isLocalToUnit()); Record.push_back(N->isDefinition()); Record.push_back(VE.getMetadataOrNullID(N->getRawVariable())); Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration())); Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev); Record.clear(); } static void writeDILocalVariable(const DILocalVariable *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Record.push_back(N->getArg()); Record.push_back(N->getFlags()); Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev); Record.clear(); } static void writeDIExpression(const DIExpression *N, const ValueEnumerator &, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.reserve(N->getElements().size() + 1); Record.push_back(N->isDistinct()); Record.append(N->elements_begin(), N->elements_end()); Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev); Record.clear(); } static void writeDIObjCProperty(const DIObjCProperty *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Record.push_back(VE.getMetadataOrNullID(N->getFile())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawSetterName())); Record.push_back(VE.getMetadataOrNullID(N->getRawGetterName())); Record.push_back(N->getAttributes()); Record.push_back(VE.getMetadataOrNullID(N->getType())); Stream.EmitRecord(bitc::METADATA_OBJC_PROPERTY, Record, Abbrev); Record.clear(); } static void writeDIImportedEntity(const DIImportedEntity *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { Record.push_back(N->isDistinct()); Record.push_back(N->getTag()); Record.push_back(VE.getMetadataOrNullID(N->getScope())); Record.push_back(VE.getMetadataOrNullID(N->getEntity())); Record.push_back(N->getLine()); Record.push_back(VE.getMetadataOrNullID(N->getRawName())); Stream.EmitRecord(bitc::METADATA_IMPORTED_ENTITY, Record, Abbrev); Record.clear(); } static unsigned createNamedMetadataAbbrev(BitstreamWriter &Stream) { BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_NAME)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); return Stream.EmitAbbrev(Abbv); } static void writeNamedMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record) { if (M.named_metadata_empty()) return; unsigned Abbrev = createNamedMetadataAbbrev(Stream); for (const NamedMDNode &NMD : M.named_metadata()) { // Write name. StringRef Str = NMD.getName(); Record.append(Str.bytes_begin(), Str.bytes_end()); Stream.EmitRecord(bitc::METADATA_NAME, Record, Abbrev); Record.clear(); // Write named metadata operands. for (const MDNode *N : NMD.operands()) Record.push_back(VE.getMetadataID(N)); Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0); Record.clear(); } } static unsigned createMetadataStringsAbbrev(BitstreamWriter &Stream) { BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_STRINGS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // # of strings Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // offset to chars Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); return Stream.EmitAbbrev(Abbv); } /// Write out a record for MDString. /// /// All the metadata strings in a metadata block are emitted in a single /// record. The sizes and strings themselves are shoved into a blob. static void writeMetadataStrings(ArrayRef Strings, BitstreamWriter &Stream, SmallVectorImpl &Record) { if (Strings.empty()) return; // Start the record with the number of strings. Record.push_back(bitc::METADATA_STRINGS); Record.push_back(Strings.size()); // Emit the sizes of the strings in the blob. SmallString<256> Blob; { BitstreamWriter W(Blob); for (const Metadata *MD : Strings) W.EmitVBR(cast(MD)->getLength(), 6); W.FlushToWord(); } // Add the offset to the strings to the record. Record.push_back(Blob.size()); // Add the strings to the blob. for (const Metadata *MD : Strings) Blob.append(cast(MD)->getString()); // Emit the final record. Stream.EmitRecordWithBlob(createMetadataStringsAbbrev(Stream), Record, Blob); Record.clear(); } static void writeMetadataRecords(ArrayRef MDs, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record) { if (MDs.empty()) return; // Initialize MDNode abbreviations. #define HANDLE_MDNODE_LEAF(CLASS) unsigned CLASS##Abbrev = 0; #include "llvm/IR/Metadata.def" for (const Metadata *MD : MDs) { if (const MDNode *N = dyn_cast(MD)) { assert(N->isResolved() && "Expected forward references to be resolved"); switch (N->getMetadataID()) { default: llvm_unreachable("Invalid MDNode subclass"); #define HANDLE_MDNODE_LEAF(CLASS) \ case Metadata::CLASS##Kind: \ write##CLASS(cast(N), VE, Stream, Record, CLASS##Abbrev); \ continue; #include "llvm/IR/Metadata.def" } } writeValueAsMetadata(cast(MD), VE, Stream, Record); } } static void writeModuleMetadata(const Module &M, const ValueEnumerator &VE, BitstreamWriter &Stream) { if (VE.getMDs().empty() && M.named_metadata_empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); SmallVector Record; writeMetadataStrings(VE.getMDStrings(), Stream, Record); writeMetadataRecords(VE.getNonMDStrings(), VE, Stream, Record); writeNamedMetadata(M, VE, Stream, Record); Stream.ExitBlock(); } static void writeFunctionMetadata(const Function &F, const ValueEnumerator &VE, BitstreamWriter &Stream) { ArrayRef MDs = VE.getFunctionMDs(); if (MDs.empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); SmallVector Record; writeMetadataRecords(MDs, VE, Stream, Record); Stream.ExitBlock(); } static void WriteMetadataAttachment(const Function &F, const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::METADATA_ATTACHMENT_ID, 3); SmallVector Record; // Write metadata attachments // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] SmallVector, 4> MDs; F.getAllMetadata(MDs); if (!MDs.empty()) { for (const auto &I : MDs) { Record.push_back(I.first); Record.push_back(VE.getMetadataID(I.second)); } Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); Record.clear(); } for (const BasicBlock &BB : F) for (const Instruction &I : BB) { MDs.clear(); I.getAllMetadataOtherThanDebugLoc(MDs); // If no metadata, ignore instruction. if (MDs.empty()) continue; Record.push_back(VE.getInstructionID(&I)); for (unsigned i = 0, e = MDs.size(); i != e; ++i) { Record.push_back(MDs[i].first); Record.push_back(VE.getMetadataID(MDs[i].second)); } Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0); Record.clear(); } Stream.ExitBlock(); } static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { SmallVector Record; // Write metadata kinds // METADATA_KIND - [n x [id, name]] SmallVector Names; M->getMDKindNames(Names); if (Names.empty()) return; Stream.EnterSubblock(bitc::METADATA_KIND_BLOCK_ID, 3); for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { Record.push_back(MDKindID); StringRef KName = Names[MDKindID]; Record.append(KName.begin(), KName.end()); Stream.EmitRecord(bitc::METADATA_KIND, Record, 0); Record.clear(); } Stream.ExitBlock(); } static void WriteOperandBundleTags(const Module *M, BitstreamWriter &Stream) { // Write metadata kinds // // OPERAND_BUNDLE_TAGS_BLOCK_ID : N x OPERAND_BUNDLE_TAG // // OPERAND_BUNDLE_TAG - [strchr x N] SmallVector Tags; M->getOperandBundleTags(Tags); if (Tags.empty()) return; Stream.EnterSubblock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID, 3); SmallVector Record; for (auto Tag : Tags) { Record.append(Tag.begin(), Tag.end()); Stream.EmitRecord(bitc::OPERAND_BUNDLE_TAG, Record, 0); Record.clear(); } Stream.ExitBlock(); } static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); else Vals.push_back((-V << 1) | 1); } static void WriteConstants(unsigned FirstVal, unsigned LastVal, const ValueEnumerator &VE, BitstreamWriter &Stream, bool isGlobal) { if (FirstVal == LastVal) return; Stream.EnterSubblock(bitc::CONSTANTS_BLOCK_ID, 4); unsigned AggregateAbbrev = 0; unsigned String8Abbrev = 0; unsigned CString7Abbrev = 0; unsigned CString6Abbrev = 0; // If this is a constant pool for the module, emit module-specific abbrevs. if (isGlobal) { // Abbrev for CST_CODE_AGGREGATE. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_AGGREGATE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(LastVal+1))); AggregateAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for CST_CODE_STRING. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_STRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); String8Abbrev = Stream.EmitAbbrev(Abbv); // Abbrev for CST_CODE_CSTRING. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); CString7Abbrev = Stream.EmitAbbrev(Abbv); // Abbrev for CST_CODE_CSTRING. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CSTRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); CString6Abbrev = Stream.EmitAbbrev(Abbv); } SmallVector Record; const ValueEnumerator::ValueList &Vals = VE.getValues(); Type *LastTy = nullptr; for (unsigned i = FirstVal; i != LastVal; ++i) { const Value *V = Vals[i].first; // If we need to switch types, do so now. if (V->getType() != LastTy) { LastTy = V->getType(); Record.push_back(VE.getTypeID(LastTy)); Stream.EmitRecord(bitc::CST_CODE_SETTYPE, Record, CONSTANTS_SETTYPE_ABBREV); Record.clear(); } if (const InlineAsm *IA = dyn_cast(V)) { Record.push_back(unsigned(IA->hasSideEffects()) | unsigned(IA->isAlignStack()) << 1 | unsigned(IA->getDialect()&1) << 2); // Add the asm string. const std::string &AsmStr = IA->getAsmString(); Record.push_back(AsmStr.size()); Record.append(AsmStr.begin(), AsmStr.end()); // Add the constraint string. const std::string &ConstraintStr = IA->getConstraintString(); Record.push_back(ConstraintStr.size()); Record.append(ConstraintStr.begin(), ConstraintStr.end()); Stream.EmitRecord(bitc::CST_CODE_INLINEASM, Record); Record.clear(); continue; } const Constant *C = cast(V); unsigned Code = -1U; unsigned AbbrevToUse = 0; if (C->isNullValue()) { Code = bitc::CST_CODE_NULL; } else if (isa(C)) { Code = bitc::CST_CODE_UNDEF; } else if (const ConstantInt *IV = dyn_cast(C)) { if (IV->getBitWidth() <= 64) { uint64_t V = IV->getSExtValue(); emitSignedInt64(Record, V); Code = bitc::CST_CODE_INTEGER; AbbrevToUse = CONSTANTS_INTEGER_ABBREV; } else { // Wide integers, > 64 bits in size. // We have an arbitrary precision integer value to write whose // bit width is > 64. However, in canonical unsigned integer // format it is likely that the high bits are going to be zero. // So, we only write the number of active words. unsigned NWords = IV->getValue().getActiveWords(); const uint64_t *RawWords = IV->getValue().getRawData(); for (unsigned i = 0; i != NWords; ++i) { emitSignedInt64(Record, RawWords[i]); } Code = bitc::CST_CODE_WIDE_INTEGER; } } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; Type *Ty = CFP->getType(); if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) { Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); } else if (Ty->isX86_FP80Ty()) { // api needed to prevent premature destruction // bits are not in the same order as a normal i80 APInt, compensate. APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back((p[1] << 48) | (p[0] >> 16)); Record.push_back(p[0] & 0xffffLL); } else if (Ty->isFP128Ty() || Ty->isPPC_FP128Ty()) { APInt api = CFP->getValueAPF().bitcastToAPInt(); const uint64_t *p = api.getRawData(); Record.push_back(p[0]); Record.push_back(p[1]); } else { assert (0 && "Unknown FP type!"); } } else if (isa(C) && cast(C)->isString()) { const ConstantDataSequential *Str = cast(C); // Emit constant strings specially. unsigned NumElts = Str->getNumElements(); // If this is a null-terminated string, use the denser CSTRING encoding. if (Str->isCString()) { Code = bitc::CST_CODE_CSTRING; --NumElts; // Don't encode the null, which isn't allowed by char6. } else { Code = bitc::CST_CODE_STRING; AbbrevToUse = String8Abbrev; } bool isCStr7 = Code == bitc::CST_CODE_CSTRING; bool isCStrChar6 = Code == bitc::CST_CODE_CSTRING; for (unsigned i = 0; i != NumElts; ++i) { unsigned char V = Str->getElementAsInteger(i); Record.push_back(V); isCStr7 &= (V & 128) == 0; if (isCStrChar6) isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } if (isCStrChar6) AbbrevToUse = CString6Abbrev; else if (isCStr7) AbbrevToUse = CString7Abbrev; } else if (const ConstantDataSequential *CDS = dyn_cast(C)) { Code = bitc::CST_CODE_DATA; Type *EltTy = CDS->getType()->getElementType(); if (isa(EltTy)) { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) Record.push_back(CDS->getElementAsInteger(i)); } else { for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) Record.push_back( CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue()); } } else if (isa(C) || isa(C) || isa(C)) { Code = bitc::CST_CODE_AGGREGATE; for (const Value *Op : C->operands()) Record.push_back(VE.getValueID(Op)); AbbrevToUse = AggregateAbbrev; } else if (const ConstantExpr *CE = dyn_cast(C)) { switch (CE->getOpcode()) { default: if (Instruction::isCast(CE->getOpcode())) { Code = bitc::CST_CODE_CE_CAST; Record.push_back(GetEncodedCastOpcode(CE->getOpcode())); Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); AbbrevToUse = CONSTANTS_CE_CAST_Abbrev; } else { assert(CE->getNumOperands() == 2 && "Unknown constant expr!"); Code = bitc::CST_CODE_CE_BINOP; Record.push_back(GetEncodedBinaryOpcode(CE->getOpcode())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); uint64_t Flags = GetOptimizationFlags(CE); if (Flags != 0) Record.push_back(Flags); } break; case Instruction::GetElementPtr: { Code = bitc::CST_CODE_CE_GEP; const auto *GO = cast(C); if (GO->isInBounds()) Code = bitc::CST_CODE_CE_INBOUNDS_GEP; Record.push_back(VE.getTypeID(GO->getSourceElementType())); for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) { Record.push_back(VE.getTypeID(C->getOperand(i)->getType())); Record.push_back(VE.getValueID(C->getOperand(i))); } break; } case Instruction::Select: Code = bitc::CST_CODE_CE_SELECT; Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ExtractElement: Code = bitc::CST_CODE_CE_EXTRACTELT; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getTypeID(C->getOperand(1)->getType())); Record.push_back(VE.getValueID(C->getOperand(1))); break; case Instruction::InsertElement: Code = bitc::CST_CODE_CE_INSERTELT; Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getTypeID(C->getOperand(2)->getType())); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ShuffleVector: // If the return type and argument types are the same, this is a // standard shufflevector instruction. If the types are different, // then the shuffle is widening or truncating the input vectors, and // the argument type must also be encoded. if (C->getType() == C->getOperand(0)->getType()) { Code = bitc::CST_CODE_CE_SHUFFLEVEC; } else { Code = bitc::CST_CODE_CE_SHUFVEC_EX; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); } Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(VE.getValueID(C->getOperand(2))); break; case Instruction::ICmp: case Instruction::FCmp: Code = bitc::CST_CODE_CE_CMP; Record.push_back(VE.getTypeID(C->getOperand(0)->getType())); Record.push_back(VE.getValueID(C->getOperand(0))); Record.push_back(VE.getValueID(C->getOperand(1))); Record.push_back(CE->getPredicate()); break; } } else if (const BlockAddress *BA = dyn_cast(C)) { Code = bitc::CST_CODE_BLOCKADDRESS; Record.push_back(VE.getTypeID(BA->getFunction()->getType())); Record.push_back(VE.getValueID(BA->getFunction())); Record.push_back(VE.getGlobalBasicBlockID(BA->getBasicBlock())); } else { #ifndef NDEBUG C->dump(); #endif llvm_unreachable("Unknown constant!"); } Stream.EmitRecord(Code, Record, AbbrevToUse); Record.clear(); } Stream.ExitBlock(); } static void WriteModuleConstants(const ValueEnumerator &VE, BitstreamWriter &Stream) { const ValueEnumerator::ValueList &Vals = VE.getValues(); // Find the first constant to emit, which is the first non-globalvalue value. // We know globalvalues have been emitted by WriteModuleInfo. for (unsigned i = 0, e = Vals.size(); i != e; ++i) { if (!isa(Vals[i].first)) { WriteConstants(i, Vals.size(), VE, Stream, true); return; } } } /// PushValueAndType - The file has to encode both the value and type id for /// many values, because we need to know what type to create for forward /// references. However, most operands are not forward references, so this type /// field is not needed. /// /// This function adds V's value ID to Vals. If the value ID is higher than the /// instruction ID, then it is a forward reference, and it also includes the /// type ID. The value ID that is written is encoded relative to the InstID. static bool PushValueAndType(const Value *V, unsigned InstID, SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); // Make encoding relative to the InstID. Vals.push_back(InstID - ValID); if (ValID >= InstID) { Vals.push_back(VE.getTypeID(V->getType())); return true; } return false; } static void WriteOperandBundles(BitstreamWriter &Stream, ImmutableCallSite CS, unsigned InstID, ValueEnumerator &VE) { SmallVector Record; LLVMContext &C = CS.getInstruction()->getContext(); for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) { const auto &Bundle = CS.getOperandBundleAt(i); Record.push_back(C.getOperandBundleTagID(Bundle.getTagName())); for (auto &Input : Bundle.Inputs) PushValueAndType(Input, InstID, Record, VE); Stream.EmitRecord(bitc::FUNC_CODE_OPERAND_BUNDLE, Record); Record.clear(); } } /// pushValue - Like PushValueAndType, but where the type of the value is /// omitted (perhaps it was already encoded in an earlier operand). static void pushValue(const Value *V, unsigned InstID, SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); Vals.push_back(InstID - ValID); } static void pushValueSigned(const Value *V, unsigned InstID, SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); int64_t diff = ((int32_t)InstID - (int32_t)ValID); emitSignedInt64(Vals, diff); } /// WriteInstruction - Emit an instruction to the specified stream. static void WriteInstruction(const Instruction &I, unsigned InstID, ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Vals) { unsigned Code = 0; unsigned AbbrevToUse = 0; VE.setInstructionID(&I); switch (I.getOpcode()) { default: if (Instruction::isCast(I.getOpcode())) { Code = bitc::FUNC_CODE_INST_CAST; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) AbbrevToUse = FUNCTION_INST_CAST_ABBREV; Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(GetEncodedCastOpcode(I.getOpcode())); } else { assert(isa(I) && "Unknown instruction!"); Code = bitc::FUNC_CODE_INST_BINOP; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) AbbrevToUse = FUNCTION_INST_BINOP_ABBREV; pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode())); uint64_t Flags = GetOptimizationFlags(&I); if (Flags != 0) { if (AbbrevToUse == FUNCTION_INST_BINOP_ABBREV) AbbrevToUse = FUNCTION_INST_BINOP_FLAGS_ABBREV; Vals.push_back(Flags); } } break; case Instruction::GetElementPtr: { Code = bitc::FUNC_CODE_INST_GEP; AbbrevToUse = FUNCTION_INST_GEP_ABBREV; auto &GEPInst = cast(I); Vals.push_back(GEPInst.isInBounds()); Vals.push_back(VE.getTypeID(GEPInst.getSourceElementType())); for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) PushValueAndType(I.getOperand(i), InstID, Vals, VE); break; } case Instruction::ExtractValue: { Code = bitc::FUNC_CODE_INST_EXTRACTVAL; PushValueAndType(I.getOperand(0), InstID, Vals, VE); const ExtractValueInst *EVI = cast(&I); Vals.append(EVI->idx_begin(), EVI->idx_end()); break; } case Instruction::InsertValue: { Code = bitc::FUNC_CODE_INST_INSERTVAL; PushValueAndType(I.getOperand(0), InstID, Vals, VE); PushValueAndType(I.getOperand(1), InstID, Vals, VE); const InsertValueInst *IVI = cast(&I); Vals.append(IVI->idx_begin(), IVI->idx_end()); break; } case Instruction::Select: Code = bitc::FUNC_CODE_INST_VSELECT; PushValueAndType(I.getOperand(1), InstID, Vals, VE); pushValue(I.getOperand(2), InstID, Vals, VE); PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; case Instruction::ExtractElement: Code = bitc::FUNC_CODE_INST_EXTRACTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); PushValueAndType(I.getOperand(1), InstID, Vals, VE); break; case Instruction::InsertElement: Code = bitc::FUNC_CODE_INST_INSERTELT; PushValueAndType(I.getOperand(0), InstID, Vals, VE); pushValue(I.getOperand(1), InstID, Vals, VE); PushValueAndType(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ShuffleVector: Code = bitc::FUNC_CODE_INST_SHUFFLEVEC; PushValueAndType(I.getOperand(0), InstID, Vals, VE); pushValue(I.getOperand(1), InstID, Vals, VE); pushValue(I.getOperand(2), InstID, Vals, VE); break; case Instruction::ICmp: case Instruction::FCmp: { // compare returning Int1Ty or vector of Int1Ty Code = bitc::FUNC_CODE_INST_CMP2; PushValueAndType(I.getOperand(0), InstID, Vals, VE); pushValue(I.getOperand(1), InstID, Vals, VE); Vals.push_back(cast(I).getPredicate()); uint64_t Flags = GetOptimizationFlags(&I); if (Flags != 0) Vals.push_back(Flags); break; } case Instruction::Ret: { Code = bitc::FUNC_CODE_INST_RET; unsigned NumOperands = I.getNumOperands(); if (NumOperands == 0) AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV; else if (NumOperands == 1) { if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV; } else { for (unsigned i = 0, e = NumOperands; i != e; ++i) PushValueAndType(I.getOperand(i), InstID, Vals, VE); } } break; case Instruction::Br: { Code = bitc::FUNC_CODE_INST_BR; const BranchInst &II = cast(I); Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); pushValue(II.getCondition(), InstID, Vals, VE); } } break; case Instruction::Switch: { Code = bitc::FUNC_CODE_INST_SWITCH; const SwitchInst &SI = cast(I); Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); pushValue(SI.getCondition(), InstID, Vals, VE); Vals.push_back(VE.getValueID(SI.getDefaultDest())); for (SwitchInst::ConstCaseIt Case : SI.cases()) { Vals.push_back(VE.getValueID(Case.getCaseValue())); Vals.push_back(VE.getValueID(Case.getCaseSuccessor())); } } break; case Instruction::IndirectBr: Code = bitc::FUNC_CODE_INST_INDIRECTBR; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // Encode the address operand as relative, but not the basic blocks. pushValue(I.getOperand(0), InstID, Vals, VE); for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; case Instruction::Invoke: { const InvokeInst *II = cast(&I); const Value *Callee = II->getCalledValue(); FunctionType *FTy = II->getFunctionType(); if (II->hasOperandBundles()) WriteOperandBundles(Stream, II, InstID, VE); Code = bitc::FUNC_CODE_INST_INVOKE; Vals.push_back(VE.getAttributeID(II->getAttributes())); Vals.push_back(II->getCallingConv() | 1 << 13); Vals.push_back(VE.getValueID(II->getNormalDest())); Vals.push_back(VE.getValueID(II->getUnwindDest())); Vals.push_back(VE.getTypeID(FTy)); PushValueAndType(Callee, InstID, Vals, VE); // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) pushValue(I.getOperand(i), InstID, Vals, VE); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3; i != e; ++i) PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg } break; } case Instruction::Resume: Code = bitc::FUNC_CODE_INST_RESUME; PushValueAndType(I.getOperand(0), InstID, Vals, VE); break; case Instruction::CleanupRet: { Code = bitc::FUNC_CODE_INST_CLEANUPRET; const auto &CRI = cast(I); pushValue(CRI.getCleanupPad(), InstID, Vals, VE); if (CRI.hasUnwindDest()) Vals.push_back(VE.getValueID(CRI.getUnwindDest())); break; } case Instruction::CatchRet: { Code = bitc::FUNC_CODE_INST_CATCHRET; const auto &CRI = cast(I); pushValue(CRI.getCatchPad(), InstID, Vals, VE); Vals.push_back(VE.getValueID(CRI.getSuccessor())); break; } case Instruction::CleanupPad: case Instruction::CatchPad: { const auto &FuncletPad = cast(I); Code = isa(FuncletPad) ? bitc::FUNC_CODE_INST_CATCHPAD : bitc::FUNC_CODE_INST_CLEANUPPAD; pushValue(FuncletPad.getParentPad(), InstID, Vals, VE); unsigned NumArgOperands = FuncletPad.getNumArgOperands(); Vals.push_back(NumArgOperands); for (unsigned Op = 0; Op != NumArgOperands; ++Op) PushValueAndType(FuncletPad.getArgOperand(Op), InstID, Vals, VE); break; } case Instruction::CatchSwitch: { Code = bitc::FUNC_CODE_INST_CATCHSWITCH; const auto &CatchSwitch = cast(I); pushValue(CatchSwitch.getParentPad(), InstID, Vals, VE); unsigned NumHandlers = CatchSwitch.getNumHandlers(); Vals.push_back(NumHandlers); for (const BasicBlock *CatchPadBB : CatchSwitch.handlers()) Vals.push_back(VE.getValueID(CatchPadBB)); if (CatchSwitch.hasUnwindDest()) Vals.push_back(VE.getValueID(CatchSwitch.getUnwindDest())); break; } case Instruction::Unreachable: Code = bitc::FUNC_CODE_INST_UNREACHABLE; AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV; break; case Instruction::PHI: { const PHINode &PN = cast(I); Code = bitc::FUNC_CODE_INST_PHI; // With the newer instruction encoding, forward references could give // negative valued IDs. This is most common for PHIs, so we use // signed VBRs. SmallVector Vals64; Vals64.push_back(VE.getTypeID(PN.getType())); for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) { pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE); Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i))); } // Emit a Vals64 vector and exit. Stream.EmitRecord(Code, Vals64, AbbrevToUse); Vals64.clear(); return; } case Instruction::LandingPad: { const LandingPadInst &LP = cast(I); Code = bitc::FUNC_CODE_INST_LANDINGPAD; Vals.push_back(VE.getTypeID(LP.getType())); Vals.push_back(LP.isCleanup()); Vals.push_back(LP.getNumClauses()); for (unsigned I = 0, E = LP.getNumClauses(); I != E; ++I) { if (LP.isCatch(I)) Vals.push_back(LandingPadInst::Catch); else Vals.push_back(LandingPadInst::Filter); PushValueAndType(LP.getClause(I), InstID, Vals, VE); } break; } case Instruction::Alloca: { Code = bitc::FUNC_CODE_INST_ALLOCA; const AllocaInst &AI = cast(I); Vals.push_back(VE.getTypeID(AI.getAllocatedType())); Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. unsigned AlignRecord = Log2_32(AI.getAlignment()) + 1; assert(Log2_32(Value::MaximumAlignment) + 1 < 1 << 5 && "not enough bits for maximum alignment"); assert(AlignRecord < 1 << 5 && "alignment greater than 1 << 64"); AlignRecord |= AI.isUsedWithInAlloca() << 5; AlignRecord |= 1 << 6; // Reserve bit 7 for SwiftError flag. // AlignRecord |= AI.isSwiftError() << 7; Vals.push_back(AlignRecord); break; } case Instruction::Load: if (cast(I).isAtomic()) { Code = bitc::FUNC_CODE_INST_LOADATOMIC; PushValueAndType(I.getOperand(0), InstID, Vals, VE); } else { Code = bitc::FUNC_CODE_INST_LOAD; if (!PushValueAndType(I.getOperand(0), InstID, Vals, VE)) // ptr AbbrevToUse = FUNCTION_INST_LOAD_ABBREV; } Vals.push_back(VE.getTypeID(I.getType())); Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(GetEncodedOrdering(cast(I).getOrdering())); Vals.push_back(GetEncodedSynchScope(cast(I).getSynchScope())); } break; case Instruction::Store: if (cast(I).isAtomic()) Code = bitc::FUNC_CODE_INST_STOREATOMIC; else Code = bitc::FUNC_CODE_INST_STORE; PushValueAndType(I.getOperand(1), InstID, Vals, VE); // ptrty + ptr PushValueAndType(I.getOperand(0), InstID, Vals, VE); // valty + val Vals.push_back(Log2_32(cast(I).getAlignment())+1); Vals.push_back(cast(I).isVolatile()); if (cast(I).isAtomic()) { Vals.push_back(GetEncodedOrdering(cast(I).getOrdering())); Vals.push_back(GetEncodedSynchScope(cast(I).getSynchScope())); } break; case Instruction::AtomicCmpXchg: Code = bitc::FUNC_CODE_INST_CMPXCHG; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr PushValueAndType(I.getOperand(1), InstID, Vals, VE); // cmp. pushValue(I.getOperand(2), InstID, Vals, VE); // newval. Vals.push_back(cast(I).isVolatile()); Vals.push_back(GetEncodedOrdering( cast(I).getSuccessOrdering())); Vals.push_back(GetEncodedSynchScope( cast(I).getSynchScope())); Vals.push_back(GetEncodedOrdering( cast(I).getFailureOrdering())); Vals.push_back(cast(I).isWeak()); break; case Instruction::AtomicRMW: Code = bitc::FUNC_CODE_INST_ATOMICRMW; PushValueAndType(I.getOperand(0), InstID, Vals, VE); // ptrty + ptr pushValue(I.getOperand(1), InstID, Vals, VE); // val. Vals.push_back(GetEncodedRMWOperation( cast(I).getOperation())); Vals.push_back(cast(I).isVolatile()); Vals.push_back(GetEncodedOrdering(cast(I).getOrdering())); Vals.push_back(GetEncodedSynchScope( cast(I).getSynchScope())); break; case Instruction::Fence: Code = bitc::FUNC_CODE_INST_FENCE; Vals.push_back(GetEncodedOrdering(cast(I).getOrdering())); Vals.push_back(GetEncodedSynchScope(cast(I).getSynchScope())); break; case Instruction::Call: { const CallInst &CI = cast(I); FunctionType *FTy = CI.getFunctionType(); if (CI.hasOperandBundles()) WriteOperandBundles(Stream, &CI, InstID, VE); Code = bitc::FUNC_CODE_INST_CALL; Vals.push_back(VE.getAttributeID(CI.getAttributes())); unsigned Flags = GetOptimizationFlags(&I); Vals.push_back(CI.getCallingConv() << bitc::CALL_CCONV | unsigned(CI.isTailCall()) << bitc::CALL_TAIL | unsigned(CI.isMustTailCall()) << bitc::CALL_MUSTTAIL | 1 << bitc::CALL_EXPLICIT_TYPE | unsigned(CI.isNoTailCall()) << bitc::CALL_NOTAIL | unsigned(Flags != 0) << bitc::CALL_FMF); if (Flags != 0) Vals.push_back(Flags); Vals.push_back(VE.getTypeID(FTy)); PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { // Check for labels (can happen with asm labels). if (FTy->getParamType(i)->isLabelTy()) Vals.push_back(VE.getValueID(CI.getArgOperand(i))); else pushValue(CI.getArgOperand(i), InstID, Vals, VE); // fixed param. } // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands(); i != e; ++i) PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs } break; } case Instruction::VAArg: Code = bitc::FUNC_CODE_INST_VAARG; Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); // valistty pushValue(I.getOperand(0), InstID, Vals, VE); // valist. Vals.push_back(VE.getTypeID(I.getType())); // restype. break; } Stream.EmitRecord(Code, Vals, AbbrevToUse); Vals.clear(); } /// Emit names for globals/functions etc. The VSTOffsetPlaceholder, /// BitcodeStartBit and ModuleSummaryIndex are only passed for the module-level /// VST, where we are including a function bitcode index and need to /// backpatch the VST forward declaration record. static void WriteValueSymbolTable( const ValueSymbolTable &VST, const ValueEnumerator &VE, BitstreamWriter &Stream, uint64_t VSTOffsetPlaceholder = 0, uint64_t BitcodeStartBit = 0, DenseMap> *FunctionIndex = nullptr) { if (VST.empty()) { // WriteValueSymbolTableForwardDecl should have returned early as // well. Ensure this handling remains in sync by asserting that // the placeholder offset is not set. assert(VSTOffsetPlaceholder == 0); return; } if (VSTOffsetPlaceholder > 0) { // Get the offset of the VST we are writing, and backpatch it into // the VST forward declaration record. uint64_t VSTOffset = Stream.GetCurrentBitNo(); // The BitcodeStartBit was the stream offset of the actual bitcode // (e.g. excluding any initial darwin header). VSTOffset -= BitcodeStartBit; assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); } Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); // For the module-level VST, add abbrev Ids for the VST_CODE_FNENTRY // records, which are not used in the per-function VSTs. unsigned FnEntry8BitAbbrev; unsigned FnEntry7BitAbbrev; unsigned FnEntry6BitAbbrev; if (VSTOffsetPlaceholder > 0) { // 8-bit fixed-width VST_CODE_FNENTRY function strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); FnEntry8BitAbbrev = Stream.EmitAbbrev(Abbv); // 7-bit fixed width VST_CODE_FNENTRY function strings. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); FnEntry7BitAbbrev = Stream.EmitAbbrev(Abbv); // 6-bit char6 VST_CODE_FNENTRY function strings. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_FNENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // funcoffset Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); FnEntry6BitAbbrev = Stream.EmitAbbrev(Abbv); } // FIXME: Set up the abbrev, we know how many values there are! // FIXME: We know if the type names can use 7-bit ascii. SmallVector NameVals; for (const ValueName &Name : VST) { // Figure out the encoding to use for the name. StringEncoding Bits = getStringEncoding(Name.getKeyData(), Name.getKeyLength()); unsigned AbbrevToUse = VST_ENTRY_8_ABBREV; NameVals.push_back(VE.getValueID(Name.getValue())); Function *F = dyn_cast(Name.getValue()); if (!F) { // If value is an alias, need to get the aliased base object to // see if it is a function. auto *GA = dyn_cast(Name.getValue()); if (GA && GA->getBaseObject()) F = dyn_cast(GA->getBaseObject()); } // VST_CODE_ENTRY: [valueid, namechar x N] // VST_CODE_FNENTRY: [valueid, funcoffset, namechar x N] // VST_CODE_BBENTRY: [bbid, namechar x N] unsigned Code; if (isa(Name.getValue())) { Code = bitc::VST_CODE_BBENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_BBENTRY_6_ABBREV; } else if (F && !F->isDeclaration()) { // Must be the module-level VST, where we pass in the Index and // have a VSTOffsetPlaceholder. The function-level VST should not // contain any Function symbols. assert(FunctionIndex); assert(VSTOffsetPlaceholder > 0); // Save the word offset of the function (from the start of the // actual bitcode written to the stream). uint64_t BitcodeIndex = (*FunctionIndex)[F]->bitcodeIndex() - BitcodeStartBit; assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned"); NameVals.push_back(BitcodeIndex / 32); Code = bitc::VST_CODE_FNENTRY; AbbrevToUse = FnEntry8BitAbbrev; if (Bits == SE_Char6) AbbrevToUse = FnEntry6BitAbbrev; else if (Bits == SE_Fixed7) AbbrevToUse = FnEntry7BitAbbrev; } else { Code = bitc::VST_CODE_ENTRY; if (Bits == SE_Char6) AbbrevToUse = VST_ENTRY_6_ABBREV; else if (Bits == SE_Fixed7) AbbrevToUse = VST_ENTRY_7_ABBREV; } for (const auto P : Name.getKey()) NameVals.push_back((unsigned char)P); // Emit the finished record. Stream.EmitRecord(Code, NameVals, AbbrevToUse); NameVals.clear(); } Stream.ExitBlock(); } /// Emit function names and summary offsets for the combined index /// used by ThinLTO. static void WriteCombinedValueSymbolTable(const ModuleSummaryIndex &Index, BitstreamWriter &Stream, std::map &GUIDToValueIdMap, uint64_t VSTOffsetPlaceholder) { assert(VSTOffsetPlaceholder > 0 && "Expected non-zero VSTOffsetPlaceholder"); // Get the offset of the VST we are writing, and backpatch it into // the VST forward declaration record. uint64_t VSTOffset = Stream.GetCurrentBitNo(); assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned"); Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32); Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4); BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_GVDEFENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // sumoffset Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // guid unsigned DefEntryAbbrev = Stream.EmitAbbrev(Abbv); Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_COMBINED_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // refguid unsigned EntryAbbrev = Stream.EmitAbbrev(Abbv); SmallVector NameVals; for (const auto &FII : Index) { uint64_t FuncGUID = FII.first; const auto &VMI = GUIDToValueIdMap.find(FuncGUID); assert(VMI != GUIDToValueIdMap.end()); for (const auto &FI : FII.second) { // VST_CODE_COMBINED_GVDEFENTRY: [valueid, sumoffset, guid] NameVals.push_back(VMI->second); NameVals.push_back(FI->bitcodeIndex()); NameVals.push_back(FuncGUID); // Emit the finished record. Stream.EmitRecord(bitc::VST_CODE_COMBINED_GVDEFENTRY, NameVals, DefEntryAbbrev); NameVals.clear(); } GUIDToValueIdMap.erase(VMI); } for (const auto &GVI : GUIDToValueIdMap) { // VST_CODE_COMBINED_ENTRY: [valueid, refguid] NameVals.push_back(GVI.second); NameVals.push_back(GVI.first); // Emit the finished record. Stream.EmitRecord(bitc::VST_CODE_COMBINED_ENTRY, NameVals, EntryAbbrev); NameVals.clear(); } Stream.ExitBlock(); } static void WriteUseList(ValueEnumerator &VE, UseListOrder &&Order, BitstreamWriter &Stream) { assert(Order.Shuffle.size() >= 2 && "Shuffle too small"); unsigned Code; if (isa(Order.V)) Code = bitc::USELIST_CODE_BB; else Code = bitc::USELIST_CODE_DEFAULT; SmallVector Record(Order.Shuffle.begin(), Order.Shuffle.end()); Record.push_back(VE.getValueID(Order.V)); Stream.EmitRecord(Code, Record); } static void WriteUseListBlock(const Function *F, ValueEnumerator &VE, BitstreamWriter &Stream) { assert(VE.shouldPreserveUseListOrder() && "Expected to be preserving use-list order"); auto hasMore = [&]() { return !VE.UseListOrders.empty() && VE.UseListOrders.back().F == F; }; if (!hasMore()) // Nothing to do. return; Stream.EnterSubblock(bitc::USELIST_BLOCK_ID, 3); while (hasMore()) { WriteUseList(VE, std::move(VE.UseListOrders.back()), Stream); VE.UseListOrders.pop_back(); } Stream.ExitBlock(); } // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). static void findRefEdges(const User *CurUser, const ValueEnumerator &VE, DenseSet &RefEdges, SmallPtrSet &Visited) { SmallVector Worklist; Worklist.push_back(CurUser); while (!Worklist.empty()) { const User *U = Worklist.pop_back_val(); if (!Visited.insert(U).second) continue; ImmutableCallSite CS(U); for (const auto &OI : U->operands()) { const User *Operand = dyn_cast(OI); if (!Operand) continue; if (isa(Operand)) continue; if (isa(Operand)) { // We have a reference to a global value. This should be added to // the reference set unless it is a callee. Callees are handled // specially by WriteFunction and are added to a separate list. if (!(CS && CS.isCallee(&OI))) RefEdges.insert(VE.getValueID(Operand)); continue; } Worklist.push_back(Operand); } } } /// Emit a function body to the module stream. static void WriteFunction( const Function &F, const Module *M, ValueEnumerator &VE, BitstreamWriter &Stream, DenseMap> &FunctionIndex, bool EmitSummaryIndex) { // Save the bitcode index of the start of this function block for recording // in the VST. uint64_t BitcodeIndex = Stream.GetCurrentBitNo(); bool HasProfileData = F.getEntryCount().hasValue(); std::unique_ptr BFI; if (EmitSummaryIndex && HasProfileData) { Function &Func = const_cast(F); LoopInfo LI{DominatorTree(Func)}; BranchProbabilityInfo BPI{Func, LI}; BFI = llvm::make_unique(Func, BPI, LI); } Stream.EnterSubblock(bitc::FUNCTION_BLOCK_ID, 4); VE.incorporateFunction(F); SmallVector Vals; // Emit the number of basic blocks, so the reader can create them ahead of // time. Vals.push_back(VE.getBasicBlocks().size()); Stream.EmitRecord(bitc::FUNC_CODE_DECLAREBLOCKS, Vals); Vals.clear(); // If there are function-local constants, emit them now. unsigned CstStart, CstEnd; VE.getFunctionConstantRange(CstStart, CstEnd); WriteConstants(CstStart, CstEnd, VE, Stream, false); // If there is function-local metadata, emit it now. writeFunctionMetadata(F, VE, Stream); // Keep a running idea of what the instruction ID is. unsigned InstID = CstEnd; bool NeedsMetadataAttachment = F.hasMetadata(); DILocation *LastDL = nullptr; unsigned NumInsts = 0; // Map from callee ValueId to profile count. Used to accumulate profile // counts for all static calls to a given callee. DenseMap CallGraphEdges; DenseSet RefEdges; SmallPtrSet Visited; // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { WriteInstruction(*I, InstID, VE, Stream, Vals); if (!isa(I)) ++NumInsts; if (!I->getType()->isVoidTy()) ++InstID; if (EmitSummaryIndex) { if (auto CS = ImmutableCallSite(&*I)) { auto *CalledFunction = CS.getCalledFunction(); if (CalledFunction && CalledFunction->hasName() && !CalledFunction->isIntrinsic()) { auto ScaledCount = BFI ? BFI->getBlockProfileCount(&*BB) : None; unsigned CalleeId = VE.getValueID( M->getValueSymbolTable().lookup(CalledFunction->getName())); CallGraphEdges[CalleeId] += (ScaledCount ? ScaledCount.getValue() : 0); } } findRefEdges(&*I, VE, RefEdges, Visited); } // If the instruction has metadata, write a metadata attachment later. NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); // If the instruction has a debug location, emit it. DILocation *DL = I->getDebugLoc(); if (!DL) continue; if (DL == LastDL) { // Just repeat the same debug loc as last time. Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC_AGAIN, Vals); continue; } Vals.push_back(DL->getLine()); Vals.push_back(DL->getColumn()); Vals.push_back(VE.getMetadataOrNullID(DL->getScope())); Vals.push_back(VE.getMetadataOrNullID(DL->getInlinedAt())); Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); Vals.clear(); LastDL = DL; } std::unique_ptr FuncSummary; if (EmitSummaryIndex) { FuncSummary = llvm::make_unique(F.getLinkage(), NumInsts); FuncSummary->addCallGraphEdges(CallGraphEdges); FuncSummary->addRefEdges(RefEdges); } FunctionIndex[&F] = llvm::make_unique(BitcodeIndex, std::move(FuncSummary)); // Emit names for all the instructions etc. WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream); if (NeedsMetadataAttachment) WriteMetadataAttachment(F, VE, Stream); if (VE.shouldPreserveUseListOrder()) WriteUseListBlock(&F, VE, Stream); VE.purgeFunction(); Stream.ExitBlock(); } // Emit blockinfo, which defines the standard abbreviations etc. static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { // We only want to emit block info records for blocks that have multiple // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. // Other blocks can define their abbrevs inline. Stream.EnterBlockInfoBlock(2); { // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 3)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_8_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 7-bit fixed width VST_CODE_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_7_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_CODE_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_ENTRY_6_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // 6-bit char6 VST_CODE_BBENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_BBENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); if (Stream.EmitBlockInfoAbbrev(bitc::VALUE_SYMTAB_BLOCK_ID, Abbv) != VST_BBENTRY_6_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // SETTYPE abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, VE.computeBitsRequiredForTypeIndicies())); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_SETTYPE_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INTEGER abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_INTEGER)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_INTEGER_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // CE_CAST abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_CE_CAST_Abbrev) llvm_unreachable("Unexpected abbrev ordering!"); } { // NULL abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_NULL)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_NULL_Abbrev) llvm_unreachable("Unexpected abbrev ordering!"); } // FIXME: This should only use space for first class types! { // INST_LOAD abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_LOAD_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_BINOP abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_BINOP_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_BINOP_FLAGS abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_BINOP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_BINOP_FLAGS_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_CAST abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty VE.computeBitsRequiredForTypeIndicies())); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_CAST_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_RET abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VOID_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_RET abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_RET)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // ValID if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_RET_VAL_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_UNREACHABLE)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } { BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty Log2_32_Ceil(VE.getTypes().size() + 1))); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_GEP_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); } Stream.ExitBlock(); } /// Write the module path strings, currently only used when generating /// a combined index file. static void WriteModStrings(const ModuleSummaryIndex &I, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::MODULE_STRTAB_BLOCK_ID, 3); // TODO: See which abbrev sizes we actually need to emit // 8-bit fixed-width MST_ENTRY strings. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv); // 7-bit fixed width MST_ENTRY strings. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv); // 6-bit char6 MST_ENTRY strings. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); - SmallVector NameVals; - for (const StringMapEntry &MPSE : I.modulePaths()) { + // Module Hash, 160 bits SHA1. Optionally, emitted after each MST_CODE_ENTRY. + Abbv = new BitCodeAbbrev(); + Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_HASH)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + unsigned AbbrevHash = Stream.EmitAbbrev(Abbv); + + SmallVector Vals; + for (const auto &MPSE : I.modulePaths()) { StringEncoding Bits = getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); unsigned AbbrevToUse = Abbrev8Bit; if (Bits == SE_Char6) AbbrevToUse = Abbrev6Bit; else if (Bits == SE_Fixed7) AbbrevToUse = Abbrev7Bit; - NameVals.push_back(MPSE.getValue()); + Vals.push_back(MPSE.getValue().first); for (const auto P : MPSE.getKey()) - NameVals.push_back((unsigned char)P); + Vals.push_back((unsigned char)P); // Emit the finished record. - Stream.EmitRecord(bitc::MST_CODE_ENTRY, NameVals, AbbrevToUse); - NameVals.clear(); + Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse); + + Vals.clear(); + // Emit an optional hash for the module now + auto &Hash = MPSE.getValue().second; + bool AllZero = true; // Detect if the hash is empty, and do not generate it + for (auto Val : Hash) { + if (Val) + AllZero = false; + Vals.push_back(Val); + } + if (!AllZero) { + // Emit the hash record. + Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash); + } + + Vals.clear(); } Stream.ExitBlock(); } // Helper to emit a single function summary record. static void WritePerModuleFunctionSummaryRecord( SmallVector &NameVals, FunctionSummary *FS, unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev, BitstreamWriter &Stream, const Function &F) { assert(FS); NameVals.push_back(ValueID); NameVals.push_back(getEncodedLinkage(FS->linkage())); NameVals.push_back(FS->instCount()); NameVals.push_back(FS->refs().size()); for (auto &RI : FS->refs()) NameVals.push_back(RI); bool HasProfileData = F.getEntryCount().hasValue(); for (auto &ECI : FS->calls()) { NameVals.push_back(ECI.first); assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite"); NameVals.push_back(ECI.second.CallsiteCount); if (HasProfileData) NameVals.push_back(ECI.second.ProfileCount); } unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = (HasProfileData ? bitc::FS_PERMODULE_PROFILE : bitc::FS_PERMODULE); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); } // Collect the global value references in the given variable's initializer, // and emit them in a summary record. static void WriteModuleLevelReferences(const GlobalVariable &V, const ValueEnumerator &VE, SmallVector &NameVals, unsigned FSModRefsAbbrev, BitstreamWriter &Stream) { // Only interested in recording variable defs in the summary. if (V.isDeclaration()) return; DenseSet RefEdges; SmallPtrSet Visited; findRefEdges(&V, VE, RefEdges, Visited); NameVals.push_back(VE.getValueID(&V)); NameVals.push_back(getEncodedLinkage(V.getLinkage())); for (auto RefId : RefEdges) { NameVals.push_back(RefId); } Stream.EmitRecord(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS, NameVals, FSModRefsAbbrev); NameVals.clear(); } /// Emit the per-module summary section alongside the rest of /// the module's bitcode. static void WritePerModuleGlobalValueSummary( DenseMap> &FunctionIndex, const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { if (M->empty()) return; Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); // Abbrev for FS_PERMODULE. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, callsitecount) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for FS_PERMODULE_PROFILE. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, callsitecount, profilecount) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for FS_PERMODULE_GLOBALVAR_INIT_REFS. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_GLOBALVAR_INIT_REFS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv); SmallVector NameVals; // Iterate over the list of functions instead of the FunctionIndex map to // ensure the ordering is stable. for (const Function &F : *M) { if (F.isDeclaration()) continue; // Skip anonymous functions. We will emit a function summary for // any aliases below. if (!F.hasName()) continue; assert(FunctionIndex.count(&F) == 1); WritePerModuleFunctionSummaryRecord( NameVals, cast(FunctionIndex[&F]->summary()), VE.getValueID(M->getValueSymbolTable().lookup(F.getName())), FSCallsAbbrev, FSCallsProfileAbbrev, Stream, F); } for (const GlobalAlias &A : M->aliases()) { if (!A.getBaseObject()) continue; const Function *F = dyn_cast(A.getBaseObject()); if (!F || F->isDeclaration()) continue; assert(FunctionIndex.count(F) == 1); FunctionSummary *FS = cast(FunctionIndex[F]->summary()); // Add the alias to the reference list of aliasee function. FS->addRefEdge( VE.getValueID(M->getValueSymbolTable().lookup(A.getName()))); WritePerModuleFunctionSummaryRecord( NameVals, FS, VE.getValueID(M->getValueSymbolTable().lookup(A.getName())), FSCallsAbbrev, FSCallsProfileAbbrev, Stream, *F); } // Capture references from GlobalVariable initializers, which are outside // of a function scope. for (const GlobalVariable &G : M->globals()) WriteModuleLevelReferences(G, VE, NameVals, FSModRefsAbbrev, Stream); for (const GlobalAlias &A : M->aliases()) if (auto *GV = dyn_cast(A.getBaseObject())) WriteModuleLevelReferences(*GV, VE, NameVals, FSModRefsAbbrev, Stream); Stream.ExitBlock(); } /// Emit the combined summary section into the combined index file. static void WriteCombinedGlobalValueSummary( const ModuleSummaryIndex &I, BitstreamWriter &Stream, std::map &GUIDToValueIdMap, unsigned GlobalValueId) { Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3); // Abbrev for FS_COMBINED. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, callsitecount) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for FS_COMBINED_PROFILE. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_PROFILE)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // instcount Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numrefs // numrefs x valueid, n x (valueid, callsitecount, profilecount) Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv); // Abbrev for FS_COMBINED_GLOBALVAR_INIT_REFS. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // modid Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 5)); // linkage Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); // valueids Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); unsigned FSModRefsAbbrev = Stream.EmitAbbrev(Abbv); SmallVector NameVals; for (const auto &FII : I) { for (auto &FI : FII.second) { GlobalValueSummary *S = FI->summary(); assert(S); if (auto *VS = dyn_cast(S)) { NameVals.push_back(I.getModuleId(VS->modulePath())); NameVals.push_back(getEncodedLinkage(VS->linkage())); for (auto &RI : VS->refs()) { const auto &VMI = GUIDToValueIdMap.find(RI); unsigned RefId; // If this GUID doesn't have an entry, assign one. if (VMI == GUIDToValueIdMap.end()) { GUIDToValueIdMap[RI] = ++GlobalValueId; RefId = GlobalValueId; } else { RefId = VMI->second; } NameVals.push_back(RefId); } // Record the starting offset of this summary entry for use // in the VST entry. Add the current code size since the // reader will invoke readRecord after the abbrev id read. FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth()); // Emit the finished record. Stream.EmitRecord(bitc::FS_COMBINED_GLOBALVAR_INIT_REFS, NameVals, FSModRefsAbbrev); NameVals.clear(); continue; } auto *FS = cast(S); NameVals.push_back(I.getModuleId(FS->modulePath())); NameVals.push_back(getEncodedLinkage(FS->linkage())); NameVals.push_back(FS->instCount()); NameVals.push_back(FS->refs().size()); for (auto &RI : FS->refs()) { const auto &VMI = GUIDToValueIdMap.find(RI); unsigned RefId; // If this GUID doesn't have an entry, assign one. if (VMI == GUIDToValueIdMap.end()) { GUIDToValueIdMap[RI] = ++GlobalValueId; RefId = GlobalValueId; } else { RefId = VMI->second; } NameVals.push_back(RefId); } bool HasProfileData = false; for (auto &EI : FS->calls()) { HasProfileData |= EI.second.ProfileCount != 0; if (HasProfileData) break; } for (auto &EI : FS->calls()) { const auto &VMI = GUIDToValueIdMap.find(EI.first); // If this GUID doesn't have an entry, it doesn't have a function // summary and we don't need to record any calls to it. if (VMI == GUIDToValueIdMap.end()) continue; NameVals.push_back(VMI->second); assert(EI.second.CallsiteCount > 0 && "Expected at least one callsite"); NameVals.push_back(EI.second.CallsiteCount); if (HasProfileData) NameVals.push_back(EI.second.ProfileCount); } // Record the starting offset of this summary entry for use // in the VST entry. Add the current code size since the // reader will invoke readRecord after the abbrev id read. FI->setBitcodeIndex(Stream.GetCurrentBitNo() + Stream.GetAbbrevIDWidth()); unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev); unsigned Code = (HasProfileData ? bitc::FS_COMBINED_PROFILE : bitc::FS_COMBINED); // Emit the finished record. Stream.EmitRecord(Code, NameVals, FSAbbrev); NameVals.clear(); } } Stream.ExitBlock(); } // Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the // current llvm version, and a record for the epoch number. static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); // Write the "user readable" string identifying the bitcode producer BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_STRING)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); auto StringAbbrev = Stream.EmitAbbrev(Abbv); WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING, "LLVM" LLVM_VERSION_STRING, StringAbbrev, Stream); // Write the epoch version Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::IDENTIFICATION_CODE_EPOCH)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); auto EpochAbbrev = Stream.EmitAbbrev(Abbv); SmallVector Vals = {bitc::BITCODE_CURRENT_EPOCH}; Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals, EpochAbbrev); Stream.ExitBlock(); } +static void writeModuleHash(BitstreamWriter &Stream, + SmallVectorImpl &Buffer, + size_t BlockStartPos) { + // Emit the module's hash. + // MODULE_CODE_HASH: [5*i32] + SHA1 Hasher; + Hasher.update(ArrayRef((uint8_t *)&Buffer[BlockStartPos], + Buffer.size() - BlockStartPos)); + auto Hash = Hasher.result(); + SmallVector Vals; + auto LShift = [&](unsigned char Val, unsigned Amount) + -> uint64_t { return ((uint64_t)Val) << Amount; }; + for (int Pos = 0; Pos < 20; Pos += 4) { + uint32_t SubHash = LShift(Hash[Pos + 0], 24); + SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) | + (unsigned)(unsigned char)Hash[Pos + 3]; + Vals.push_back(SubHash); + } + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, - uint64_t BitcodeStartBit, bool EmitSummaryIndex) { + uint64_t BitcodeStartBit, bool EmitSummaryIndex, + bool GenerateHash, SmallVectorImpl &Buffer) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + size_t BlockStartPos = Buffer.size(); SmallVector Vals; unsigned CurVersion = 1; Vals.push_back(CurVersion); Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // Analyze the module, enumerating globals, functions, etc. ValueEnumerator VE(*M, ShouldPreserveUseListOrder); // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); // Emit information about attribute groups. WriteAttributeGroupTable(VE, Stream); // Emit information about parameter attributes. WriteAttributeTable(VE, Stream); // Emit information describing all of the types in the module. WriteTypeTable(VE, Stream); writeComdats(VE, Stream); // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. uint64_t VSTOffsetPlaceholder = WriteModuleInfo(M, VE, Stream); // Emit constants. WriteModuleConstants(VE, Stream); // Emit metadata. writeModuleMetadata(*M, VE, Stream); // Emit metadata. WriteModuleMetadataStore(M, Stream); // Emit module-level use-lists. if (VE.shouldPreserveUseListOrder()) WriteUseListBlock(nullptr, VE, Stream); WriteOperandBundleTags(M, Stream); // Emit function bodies. DenseMap> FunctionIndex; for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) if (!F->isDeclaration()) WriteFunction(*F, M, VE, Stream, FunctionIndex, EmitSummaryIndex); // Need to write after the above call to WriteFunction which populates // the summary information in the index. if (EmitSummaryIndex) WritePerModuleGlobalValueSummary(FunctionIndex, M, VE, Stream); WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); + if (GenerateHash) { + writeModuleHash(Stream, Buffer, BlockStartPos); + } + Stream.ExitBlock(); } /// EmitDarwinBCHeader - If generating a bc file on darwin, we have to emit a /// header and trailer to make it compatible with the system archiver. To do /// this we emit the following header, and then emit a trailer that pads the /// file out to be a multiple of 16 bytes. /// /// struct bc_header { /// uint32_t Magic; // 0x0B17C0DE /// uint32_t Version; // Version, currently always 0. /// uint32_t BitcodeOffset; // Offset to traditional bitcode file. /// uint32_t BitcodeSize; // Size of traditional bitcode file. /// uint32_t CPUType; // CPU specifier. /// ... potentially more later ... /// }; static void WriteInt32ToBuffer(uint32_t Value, SmallVectorImpl &Buffer, uint32_t &Position) { support::endian::write32le(&Buffer[Position], Value); Position += 4; } static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, const Triple &TT) { unsigned CPUType = ~0U; // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*, // armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*. The CPUType is a magic // number from /usr/include/mach/machine.h. It is ok to reproduce the // specific constants here because they are implicitly part of the Darwin ABI. enum { DARWIN_CPU_ARCH_ABI64 = 0x01000000, DARWIN_CPU_TYPE_X86 = 7, DARWIN_CPU_TYPE_ARM = 12, DARWIN_CPU_TYPE_POWERPC = 18 }; Triple::ArchType Arch = TT.getArch(); if (Arch == Triple::x86_64) CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64; else if (Arch == Triple::x86) CPUType = DARWIN_CPU_TYPE_X86; else if (Arch == Triple::ppc) CPUType = DARWIN_CPU_TYPE_POWERPC; else if (Arch == Triple::ppc64) CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64; else if (Arch == Triple::arm || Arch == Triple::thumb) CPUType = DARWIN_CPU_TYPE_ARM; // Traditional Bitcode starts after header. assert(Buffer.size() >= BWH_HeaderSize && "Expected header size to be reserved"); unsigned BCOffset = BWH_HeaderSize; unsigned BCSize = Buffer.size() - BWH_HeaderSize; // Write the magic and version. unsigned Position = 0; WriteInt32ToBuffer(0x0B17C0DE , Buffer, Position); WriteInt32ToBuffer(0 , Buffer, Position); // Version. WriteInt32ToBuffer(BCOffset , Buffer, Position); WriteInt32ToBuffer(BCSize , Buffer, Position); WriteInt32ToBuffer(CPUType , Buffer, Position); // If the file is not a multiple of 16 bytes, insert dummy padding. while (Buffer.size() & 15) Buffer.push_back(0); } /// Helper to write the header common to all bitcode files. static void WriteBitcodeHeader(BitstreamWriter &Stream) { // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); Stream.Emit(0x0, 4); Stream.Emit(0xC, 4); Stream.Emit(0xE, 4); Stream.Emit(0xD, 4); } /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder, - bool EmitSummaryIndex) { + bool EmitSummaryIndex, bool GenerateHash) { SmallVector Buffer; Buffer.reserve(256*1024); // If this is darwin or another generic macho target, reserve space for the // header. Triple TT(M->getTargetTriple()); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); // Emit the module into the buffer. { BitstreamWriter Stream(Buffer); // Save the start bit of the actual bitcode, in case there is space // saved at the start for the darwin header above. The reader stream // will start at the bitcode, and we need the offset of the VST // to line up. uint64_t BitcodeStartBit = Stream.GetCurrentBitNo(); // Emit the file header. WriteBitcodeHeader(Stream); WriteIdentificationBlock(M, Stream); // Emit the module. WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, - EmitSummaryIndex); + EmitSummaryIndex, GenerateHash, Buffer); } if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) EmitDarwinBCHeaderAndTrailer(Buffer, TT); // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); } // Write the specified module summary index to the given raw output stream, // where it will be written in a new bitcode block. This is used when // writing the combined index file for ThinLTO. void llvm::WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out) { SmallVector Buffer; Buffer.reserve(256 * 1024); BitstreamWriter Stream(Buffer); // Emit the bitcode header. WriteBitcodeHeader(Stream); Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector Vals; unsigned CurVersion = 1; Vals.push_back(CurVersion); Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); // If we have a VST, write the VSTOFFSET record placeholder and record // its offset. uint64_t VSTOffsetPlaceholder = WriteValueSymbolTableForwardDecl(Stream); // Write the module paths in the combined index. WriteModStrings(Index, Stream); // Assign unique value ids to all functions in the index for use // in writing out the call graph edges. Save the mapping from GUID // to the new global value id to use when writing those edges, which // are currently saved in the index in terms of GUID. std::map GUIDToValueIdMap; unsigned GlobalValueId = 0; for (auto &II : Index) GUIDToValueIdMap[II.first] = ++GlobalValueId; // Write the summary combined index records. WriteCombinedGlobalValueSummary(Index, Stream, GUIDToValueIdMap, GlobalValueId); // Need a special VST writer for the combined index (we don't have a // real VST and real values when this is invoked). WriteCombinedValueSymbolTable(Index, Stream, GUIDToValueIdMap, VSTOffsetPlaceholder); Stream.ExitBlock(); Out.write((char *)&Buffer.front(), Buffer.size()); } diff --git a/llvm/lib/IR/ModuleSummaryIndex.cpp b/llvm/lib/IR/ModuleSummaryIndex.cpp index 16b58ddbeff4..c23e072b48aa 100644 --- a/llvm/lib/IR/ModuleSummaryIndex.cpp +++ b/llvm/lib/IR/ModuleSummaryIndex.cpp @@ -1,68 +1,70 @@ //===-- ModuleSummaryIndex.cpp - Module Summary Index ---------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements the module index and summary classes for the // IR library. // //===----------------------------------------------------------------------===// #include "llvm/IR/ModuleSummaryIndex.h" #include "llvm/ADT/StringMap.h" using namespace llvm; // Create the combined module index/summary from multiple // per-module instances. void ModuleSummaryIndex::mergeFrom(std::unique_ptr Other, uint64_t NextModuleId) { StringRef ModPath; for (auto &OtherGlobalValInfoLists : *Other) { uint64_t ValueGUID = OtherGlobalValInfoLists.first; GlobalValueInfoList &List = OtherGlobalValInfoLists.second; // Assert that the value info list only has one entry, since we shouldn't // have duplicate names within a single per-module index. assert(List.size() == 1); std::unique_ptr Info = std::move(List.front()); // Skip if there was no summary section. if (!Info->summary()) continue; // Add the module path string ref for this module if we haven't already // saved a reference to it. - if (ModPath.empty()) - ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId); - else + if (ModPath.empty()) { + auto Path = Info->summary()->modulePath(); + ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path)) + ->first(); + } else assert(ModPath == Info->summary()->modulePath() && "Each module in the combined map should have a unique ID"); // Note the module path string ref was copied above and is still owned by // the original per-module index. Reset it to the new module path // string reference owned by the combined index. Info->summary()->setModulePath(ModPath); // Add new value info to existing list. There may be duplicates when // combining GlobalValueMap entries, due to COMDAT values. Any local // values were given unique global IDs. addGlobalValueInfo(ValueGUID, std::move(Info)); } } void ModuleSummaryIndex::removeEmptySummaryEntries() { for (auto MI = begin(), MIE = end(); MI != MIE;) { // Only expect this to be called on a per-module index, which has a single // entry per value entry list. assert(MI->second.size() == 1); if (!MI->second[0]->summary()) MI = GlobalValueMap.erase(MI); else ++MI; } } diff --git a/llvm/lib/Transforms/IPO/FunctionImport.cpp b/llvm/lib/Transforms/IPO/FunctionImport.cpp index 255a55827456..b5f1ffb046f2 100644 --- a/llvm/lib/Transforms/IPO/FunctionImport.cpp +++ b/llvm/lib/Transforms/IPO/FunctionImport.cpp @@ -1,485 +1,485 @@ //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This file implements Function import based on summaries. // //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSet.h" #include "llvm/IR/AutoUpgrade.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Module.h" #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Object/ModuleSummaryIndexObjectFile.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #define DEBUG_TYPE "function-import" using namespace llvm; STATISTIC(NumImported, "Number of functions imported"); /// Limit on instruction count of imported functions. static cl::opt ImportInstrLimit( "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"), cl::desc("Only import functions with less than N instructions")); static cl::opt ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7), cl::Hidden, cl::value_desc("x"), cl::desc("As we import functions, multiply the " "`import-instr-limit` threshold by this factor " "before processing newly imported functions")); static cl::opt PrintImports("print-imports", cl::init(false), cl::Hidden, cl::desc("Print imported functions")); // Load lazily a module from \p FileName in \p Context. static std::unique_ptr loadFile(const std::string &FileName, LLVMContext &Context) { SMDiagnostic Err; DEBUG(dbgs() << "Loading '" << FileName << "'\n"); // Metadata isn't loaded until functions are imported, to minimize // the memory overhead. std::unique_ptr Result = getLazyIRFileModule(FileName, Err, Context, /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); - return nullptr; + report_fatal_error("Abort"); } return Result; } namespace { /// Given a list of possible callee implementation for a call site, select one /// that fits the \p Threshold. /// /// FIXME: select "best" instead of first that fits. But what is "best"? /// - The smallest: more likely to be inlined. /// - The one with the least outgoing edges (already well optimized). /// - One from a module already being imported from in order to reduce the /// number of source modules parsed/linked. /// - One that has PGO data attached. /// - [insert you fancy metric here] static const FunctionSummary * selectCallee(const GlobalValueInfoList &CalleeInfoList, unsigned Threshold) { auto It = llvm::find_if( CalleeInfoList, [&](const std::unique_ptr &GlobInfo) { assert(GlobInfo->summary() && "We should not have a Global Info without summary"); auto *Summary = cast(GlobInfo->summary()); if (GlobalValue::isWeakAnyLinkage(Summary->linkage())) return false; if (Summary->instCount() > Threshold) return false; return true; }); if (It == CalleeInfoList.end()) return nullptr; return cast((*It)->summary()); } /// Return the summary for the function \p GUID that fits the \p Threshold, or /// null if there's no match. static const FunctionSummary *selectCallee(uint64_t GUID, unsigned Threshold, const ModuleSummaryIndex &Index) { auto CalleeInfoList = Index.findGlobalValueInfoList(GUID); if (CalleeInfoList == Index.end()) { return nullptr; // This function does not have a summary } return selectCallee(CalleeInfoList->second, Threshold); } /// Return true if the global \p GUID is exported by module \p ExportModulePath. static bool isGlobalExported(const ModuleSummaryIndex &Index, StringRef ExportModulePath, uint64_t GUID) { auto CalleeInfoList = Index.findGlobalValueInfoList(GUID); if (CalleeInfoList == Index.end()) // This global does not have a summary, it is not part of the ThinLTO // process return false; auto DefinedInCalleeModule = llvm::find_if( CalleeInfoList->second, [&](const std::unique_ptr &GlobInfo) { auto *Summary = GlobInfo->summary(); assert(Summary && "Unexpected GlobalValueInfo without summary"); return Summary->modulePath() == ExportModulePath; }); return (DefinedInCalleeModule != CalleeInfoList->second.end()); } using EdgeInfo = std::pair; /// Compute the list of functions to import for a given caller. Mark these /// imported functions and the symbols they reference in their source module as /// exported from their source module. static void computeImportForFunction( StringRef ModulePath, const FunctionSummary &Summary, const ModuleSummaryIndex &Index, unsigned Threshold, const std::map &DefinedFunctions, SmallVectorImpl &Worklist, FunctionImporter::ImportMapTy &ImportsForModule, StringMap &ExportLists) { for (auto &Edge : Summary.calls()) { auto GUID = Edge.first; DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n"); if (DefinedFunctions.count(GUID)) { DEBUG(dbgs() << "ignored! Target already in destination module.\n"); continue; } auto *CalleeSummary = selectCallee(GUID, Threshold, Index); if (!CalleeSummary) { DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n"); continue; } assert(CalleeSummary->instCount() <= Threshold && "selectCallee() didn't honor the threshold"); auto &ProcessedThreshold = ImportsForModule[CalleeSummary->modulePath()][GUID]; /// Since the traversal of the call graph is DFS, we can revisit a function /// a second time with a higher threshold. In this case, it is added back to /// the worklist with the new threshold. if (ProcessedThreshold && ProcessedThreshold > Threshold) { DEBUG(dbgs() << "ignored! Target was already seen with Threshold " << ProcessedThreshold << "\n"); continue; } // Mark this function as imported in this module, with the current Threshold ProcessedThreshold = Threshold; // Make exports in the source module. auto ExportModulePath = CalleeSummary->modulePath(); auto ExportList = ExportLists[ExportModulePath]; ExportList.insert(GUID); // Mark all functions and globals referenced by this function as exported to // the outside if they are defined in the same source module. for (auto &Edge : CalleeSummary->calls()) { auto CalleeGUID = Edge.first; if (isGlobalExported(Index, ExportModulePath, CalleeGUID)) ExportList.insert(CalleeGUID); } for (auto &GUID : CalleeSummary->refs()) { if (isGlobalExported(Index, ExportModulePath, GUID)) ExportList.insert(GUID); } // Insert the newly imported function to the worklist. Worklist.push_back(std::make_pair(CalleeSummary, Threshold)); } } /// Given the list of globals defined in a module, compute the list of imports /// as well as the list of "exports", i.e. the list of symbols referenced from /// another module (that may require promotion). static void ComputeImportForModule( StringRef ModulePath, const std::map &DefinedFunctions, const ModuleSummaryIndex &Index, FunctionImporter::ImportMapTy &ImportsForModule, StringMap &ExportLists) { // Worklist contains the list of function imported in this module, for which // we will analyse the callees and may import further down the callgraph. SmallVector Worklist; // Populate the worklist with the import for the functions in the current // module for (auto &FuncInfo : DefinedFunctions) { auto *Summary = FuncInfo.second; DEBUG(dbgs() << "Initalize import for " << FuncInfo.first << "\n"); computeImportForFunction(ModulePath, *Summary, Index, ImportInstrLimit, DefinedFunctions, Worklist, ImportsForModule, ExportLists); } while (!Worklist.empty()) { auto FuncInfo = Worklist.pop_back_val(); auto *Summary = FuncInfo.first; auto Threshold = FuncInfo.second; // Process the newly imported functions and add callees to the worklist. // Adjust the threshold Threshold = Threshold * ImportInstrFactor; computeImportForFunction(ModulePath, *Summary, Index, Threshold, DefinedFunctions, Worklist, ImportsForModule, ExportLists); } } } // anonymous namespace /// Compute all the import and export for every module in the Index. void llvm::ComputeCrossModuleImport( const ModuleSummaryIndex &Index, StringMap &ImportLists, StringMap &ExportLists) { auto ModuleCount = Index.modulePaths().size(); // Collect for each module the list of function it defines. // GUID -> Summary StringMap> Module2FunctionInfoMap( ModuleCount); for (auto &GlobalList : Index) { auto GUID = GlobalList.first; for (auto &GlobInfo : GlobalList.second) { auto *Summary = dyn_cast_or_null(GlobInfo->summary()); if (!Summary) /// Ignore global variable, focus on functions continue; DEBUG(dbgs() << "Adding definition: Module '" << Summary->modulePath() << "' defines '" << GUID << "'\n"); Module2FunctionInfoMap[Summary->modulePath()][GUID] = Summary; } } // For each module that has function defined, compute the import/export lists. for (auto &DefinedFunctions : Module2FunctionInfoMap) { auto &ImportsForModule = ImportLists[DefinedFunctions.first()]; DEBUG(dbgs() << "Computing import for Module '" << DefinedFunctions.first() << "'\n"); ComputeImportForModule(DefinedFunctions.first(), DefinedFunctions.second, Index, ImportsForModule, ExportLists); } #ifndef NDEBUG DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size() << " modules:\n"); for (auto &ModuleImports : ImportLists) { auto ModName = ModuleImports.first(); auto &Exports = ExportLists[ModName]; DEBUG(dbgs() << "* Module " << ModName << " exports " << Exports.size() << " functions. Imports from " << ModuleImports.second.size() << " modules.\n"); for (auto &Src : ModuleImports.second) { auto SrcModName = Src.first(); DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from " << SrcModName << "\n"); } } #endif } // Automatically import functions in Module \p DestModule based on the summaries // index. // bool FunctionImporter::importFunctions( Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) { DEBUG(dbgs() << "Starting import for Module " << DestModule.getModuleIdentifier() << "\n"); unsigned ImportedCount = 0; // Linker that will be used for importing function Linker TheLinker(DestModule); // Do the actual import of functions now, one Module at a time std::set ModuleNameOrderedList; for (auto &FunctionsToImportPerModule : ImportList) { ModuleNameOrderedList.insert(FunctionsToImportPerModule.first()); } for (auto &Name : ModuleNameOrderedList) { // Get the module for the import const auto &FunctionsToImportPerModule = ImportList.find(Name); assert(FunctionsToImportPerModule != ImportList.end()); std::unique_ptr SrcModule = ModuleLoader(Name); assert(&DestModule.getContext() == &SrcModule->getContext() && "Context mismatch"); // If modules were created with lazy metadata loading, materialize it // now, before linking it (otherwise this will be a noop). SrcModule->materializeMetadata(); UpgradeDebugInfo(*SrcModule); auto &ImportGUIDs = FunctionsToImportPerModule->second; // Find the globals to import DenseSet GlobalsToImport; for (auto &GV : *SrcModule) { if (GV.hasName() && ImportGUIDs.count(GV.getGUID())) { GV.materialize(); GlobalsToImport.insert(&GV); } } for (auto &GV : SrcModule->aliases()) { if (!GV.hasName()) continue; auto GUID = GV.getGUID(); if (ImportGUIDs.count(GUID)) { // Alias can't point to "available_externally". However when we import // linkOnceODR the linkage does not change. So we import the alias // and aliasee only in this case. const GlobalObject *GO = GV.getBaseObject(); if (!GO->hasLinkOnceODRLinkage()) continue; GV.materialize(); GlobalsToImport.insert(&GV); GlobalsToImport.insert(GO); } } for (auto &GV : SrcModule->globals()) { if (!GV.hasName()) continue; auto GUID = GV.getGUID(); if (ImportGUIDs.count(GUID)) { GV.materialize(); GlobalsToImport.insert(&GV); } } // Link in the specified functions. if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport)) return true; if (PrintImports) { for (const auto *GV : GlobalsToImport) dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName() << " from " << SrcModule->getSourceFileName() << "\n"; } if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None, &GlobalsToImport)) report_fatal_error("Function Import: link error"); ImportedCount += GlobalsToImport.size(); } NumImported += ImportedCount; DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " << DestModule.getModuleIdentifier() << "\n"); return ImportedCount; } /// Summary file to use for function importing when using -function-import from /// the command line. static cl::opt SummaryFile("summary-file", cl::desc("The summary file to use for function importing.")); static void diagnosticHandler(const DiagnosticInfo &DI) { raw_ostream &OS = errs(); DiagnosticPrinterRawOStream DP(OS); DI.print(DP); OS << '\n'; } /// Parse the summary index out of an IR file and return the summary /// index object if found, or nullptr if not. static std::unique_ptr getModuleSummaryIndexForFile(StringRef Path, std::string &Error, DiagnosticHandlerFunction DiagnosticHandler) { std::unique_ptr Buffer; ErrorOr> BufferOrErr = MemoryBuffer::getFile(Path); if (std::error_code EC = BufferOrErr.getError()) { Error = EC.message(); return nullptr; } Buffer = std::move(BufferOrErr.get()); ErrorOr> ObjOrErr = object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(), DiagnosticHandler); if (std::error_code EC = ObjOrErr.getError()) { Error = EC.message(); return nullptr; } return (*ObjOrErr)->takeIndex(); } namespace { /// Pass that performs cross-module function import provided a summary file. class FunctionImportPass : public ModulePass { /// Optional module summary index to use for importing, otherwise /// the summary-file option must be specified. const ModuleSummaryIndex *Index; public: /// Pass identification, replacement for typeid static char ID; /// Specify pass name for debug output const char *getPassName() const override { return "Function Importing"; } explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr) : ModulePass(ID), Index(Index) {} bool runOnModule(Module &M) override { if (SummaryFile.empty() && !Index) report_fatal_error("error: -function-import requires -summary-file or " "file from frontend\n"); std::unique_ptr IndexPtr; if (!SummaryFile.empty()) { if (Index) report_fatal_error("error: -summary-file and index from frontend\n"); std::string Error; IndexPtr = getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler); if (!IndexPtr) { errs() << "Error loading file '" << SummaryFile << "': " << Error << "\n"; return false; } Index = IndexPtr.get(); } // First step is collecting the import/export lists // The export list is not used yet, but could limit the amount of renaming // performed in renameModuleForThinLTO() StringMap ImportLists; StringMap ExportLists; ComputeCrossModuleImport(*Index, ImportLists, ExportLists); auto &ImportList = ImportLists[M.getModuleIdentifier()]; // Next we need to promote to global scope and rename any local values that // are potentially exported to other modules. if (renameModuleForThinLTO(M, *Index, nullptr)) { errs() << "Error renaming module\n"; return false; } // Perform the import now. auto ModuleLoader = [&M](StringRef Identifier) { return loadFile(Identifier, M.getContext()); }; FunctionImporter Importer(*Index, ModuleLoader); return Importer.importFunctions(M, ImportList); } }; } // anonymous namespace char FunctionImportPass::ID = 0; INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import", "Summary Based Function Import", false, false) INITIALIZE_PASS_END(FunctionImportPass, "function-import", "Summary Based Function Import", false, false) namespace llvm { Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) { return new FunctionImportPass(Index); } } diff --git a/llvm/test/Bitcode/module_hash.ll b/llvm/test/Bitcode/module_hash.ll new file mode 100644 index 000000000000..70977b104c6b --- /dev/null +++ b/llvm/test/Bitcode/module_hash.ll @@ -0,0 +1,35 @@ +; Check per module hash. +; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1 +; MOD1: +; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2 +; MOD2: + +; Check that the hash matches in the combined index. + +; First regenerate the modules with a summary +; RUN: llvm-as -module-hash -module-summary %s -o %t.m1.bc +; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t.m2.bc + +; Recover the hashes from the modules themselves. +; RUN: llvm-bcanalyzer -dump %t1 | grep ' %t.hash +; RUN: llvm-bcanalyzer -dump %t2 | grep '> %t.hash + +; Generate the combined index and gather the hashes there. +; RUN: llvm-lto --thinlto-action=thinlink -o - %t.m1.bc %t.m2.bc | llvm-bcanalyzer -dump | grep '> %t.hash + +; Validate the output now, the hahes in the individual modules and the combined index are in the same file. +; RUN: cat %t.hash | FileCheck %s --check-prefix=COMBINED + +; First capture the value of the hash for the two modules. +; COMBINED: +; COMBINED: + +; Validate against the value extracted from the combined index +; COMBINED-DAG: +; COMBINED-DAG: + + +; Need a function for the combined index to be populated. +define void @foo() { + ret void +} diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index 7318bfe341dd..7e9500a66723 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -1,124 +1,127 @@ //===--- llvm-as.cpp - The low-level LLVM assembler -----------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This utility may be invoked in the following manner: // llvm-as --help - Output information about command line switches // llvm-as [options] - Read LLVM asm from stdin, write bitcode to stdout // llvm-as [options] x.ll - Read LLVM asm from the x.ll file, write bitcode // to the x.bc file. // //===----------------------------------------------------------------------===// #include "llvm/IR/LLVMContext.h" #include "llvm/AsmParser/Parser.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Module.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/SystemUtils.h" #include "llvm/Support/ToolOutputFile.h" #include using namespace llvm; static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::init("-")); static cl::opt OutputFilename("o", cl::desc("Override output filename"), cl::value_desc("filename")); static cl::opt Force("f", cl::desc("Enable binary output on terminals")); static cl::opt DisableOutput("disable-output", cl::desc("Disable output"), cl::init(false)); static cl::opt EmitSummaryIndex("module-summary", cl::desc("Emit module summary index"), cl::init(false)); +static cl::opt EmitModuleHash("module-hash", cl::desc("Emit module hash"), + cl::init(false)); + static cl::opt DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); static cl::opt DisableVerify("disable-verify", cl::Hidden, cl::desc("Do not run verifier on input LLVM (dangerous!)")); static cl::opt PreserveBitcodeUseListOrder( "preserve-bc-uselistorder", cl::desc("Preserve use-list order when writing LLVM bitcode."), cl::init(true), cl::Hidden); static void WriteOutputFile(const Module *M) { // Infer the output filename if needed. if (OutputFilename.empty()) { if (InputFilename == "-") { OutputFilename = "-"; } else { StringRef IFN = InputFilename; OutputFilename = (IFN.endswith(".ll") ? IFN.drop_back(3) : IFN).str(); OutputFilename += ".bc"; } } std::error_code EC; std::unique_ptr Out( new tool_output_file(OutputFilename, EC, sys::fs::F_None)); if (EC) { errs() << EC.message() << '\n'; exit(1); } if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder, - EmitSummaryIndex); + EmitSummaryIndex, EmitModuleHash); // Declare success. Out->keep(); } int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); LLVMContext &Context = getGlobalContext(); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm .ll -> .bc assembler\n"); // Parse the file now... SMDiagnostic Err; std::unique_ptr M = parseAssemblyFile(InputFilename, Err, Context); if (!M.get()) { Err.print(argv[0], errs()); return 1; } if (!DisableVerify) { std::string ErrorStr; raw_string_ostream OS(ErrorStr); if (verifyModule(*M.get(), &OS)) { errs() << argv[0] << ": assembly parsed, but does not verify as correct!\n"; errs() << OS.str(); return 1; } } if (DumpAsm) errs() << "Here's the assembly:\n" << *M.get(); if (!DisableOutput) WriteOutputFile(M.get()); return 0; } diff --git a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp index 6645045db8fa..85dd5f52daac 100644 --- a/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ b/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -1,875 +1,915 @@ //===-- llvm-bcanalyzer.cpp - Bitcode Analyzer --------------------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This tool may be invoked in the following manner: // llvm-bcanalyzer [options] - Read LLVM bitcode from stdin // llvm-bcanalyzer [options] x.bc - Read LLVM bitcode from the x.bc file // // Options: // --help - Output information about command line switches // --dump - Dump low-level bitcode structure in readable format // // This tool provides analytical information about a bitcode file. It is // intended as an aid to developers of bitcode reading and writing software. It // produces on std::out a summary of the bitcode file that shows various // statistics about the contents of the file. By default this information is // detailed and contains information about individual bitcode blocks and the // functions in the module. // The tool is also able to print a bitcode file in a straight forward text // format that shows the containment and relationships of the information in // the bitcode file (-dump option). // //===----------------------------------------------------------------------===// #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include using namespace llvm; static cl::opt InputFilename(cl::Positional, cl::desc(""), cl::init("-")); static cl::opt Dump("dump", cl::desc("Dump low level bitcode trace")); //===----------------------------------------------------------------------===// // Bitcode specific analysis. //===----------------------------------------------------------------------===// static cl::opt NoHistogram("disable-histogram", cl::desc("Do not print per-code histogram")); static cl::opt NonSymbolic("non-symbolic", cl::desc("Emit numeric info in dump even if" " symbolic info is available")); static cl::opt BlockInfoFilename("block-info", cl::desc("Use the BLOCK_INFO from the given file")); static cl::opt ShowBinaryBlobs("show-binary-blobs", cl::desc("Print binary blobs using hex escapes")); namespace { /// CurStreamTypeType - A type for CurStreamType enum CurStreamTypeType { UnknownBitstream, LLVMIRBitstream }; } /// GetBlockName - Return a symbolic block name if known, otherwise return /// null. static const char *GetBlockName(unsigned BlockID, const BitstreamReader &StreamFile, CurStreamTypeType CurStreamType) { // Standard blocks for all bitcode files. if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { if (BlockID == bitc::BLOCKINFO_BLOCK_ID) return "BLOCKINFO_BLOCK"; return nullptr; } // Check to see if we have a blockinfo record for this block, with a name. if (const BitstreamReader::BlockInfo *Info = StreamFile.getBlockInfo(BlockID)) { if (!Info->Name.empty()) return Info->Name.c_str(); } if (CurStreamType != LLVMIRBitstream) return nullptr; switch (BlockID) { default: return nullptr; case bitc::MODULE_BLOCK_ID: return "MODULE_BLOCK"; case bitc::PARAMATTR_BLOCK_ID: return "PARAMATTR_BLOCK"; case bitc::PARAMATTR_GROUP_BLOCK_ID: return "PARAMATTR_GROUP_BLOCK_ID"; case bitc::TYPE_BLOCK_ID_NEW: return "TYPE_BLOCK_ID"; case bitc::CONSTANTS_BLOCK_ID: return "CONSTANTS_BLOCK"; case bitc::FUNCTION_BLOCK_ID: return "FUNCTION_BLOCK"; case bitc::IDENTIFICATION_BLOCK_ID: return "IDENTIFICATION_BLOCK_ID"; case bitc::VALUE_SYMTAB_BLOCK_ID: return "VALUE_SYMTAB"; case bitc::METADATA_BLOCK_ID: return "METADATA_BLOCK"; case bitc::METADATA_KIND_BLOCK_ID: return "METADATA_KIND_BLOCK"; case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK"; case bitc::USELIST_BLOCK_ID: return "USELIST_BLOCK_ID"; case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: return "GLOBALVAL_SUMMARY_BLOCK"; case bitc::MODULE_STRTAB_BLOCK_ID: return "MODULE_STRTAB_BLOCK"; } } /// GetCodeName - Return a symbolic code name if known, otherwise return /// null. static const char *GetCodeName(unsigned CodeID, unsigned BlockID, const BitstreamReader &StreamFile, CurStreamTypeType CurStreamType) { // Standard blocks for all bitcode files. if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { switch (CodeID) { default: return nullptr; case bitc::BLOCKINFO_CODE_SETBID: return "SETBID"; case bitc::BLOCKINFO_CODE_BLOCKNAME: return "BLOCKNAME"; case bitc::BLOCKINFO_CODE_SETRECORDNAME: return "SETRECORDNAME"; } } return nullptr; } // Check to see if we have a blockinfo record for this record, with a name. if (const BitstreamReader::BlockInfo *Info = StreamFile.getBlockInfo(BlockID)) { for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i) if (Info->RecordNames[i].first == CodeID) return Info->RecordNames[i].second.c_str(); } if (CurStreamType != LLVMIRBitstream) return nullptr; #define STRINGIFY_CODE(PREFIX, CODE) \ case bitc::PREFIX##_##CODE: \ return #CODE; switch (BlockID) { default: return nullptr; case bitc::MODULE_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(MODULE_CODE, VERSION) STRINGIFY_CODE(MODULE_CODE, TRIPLE) STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) STRINGIFY_CODE(MODULE_CODE, ASM) STRINGIFY_CODE(MODULE_CODE, SECTIONNAME) STRINGIFY_CODE(MODULE_CODE, DEPLIB) // FIXME: Remove in 4.0 STRINGIFY_CODE(MODULE_CODE, GLOBALVAR) STRINGIFY_CODE(MODULE_CODE, FUNCTION) STRINGIFY_CODE(MODULE_CODE, ALIAS) STRINGIFY_CODE(MODULE_CODE, PURGEVALS) STRINGIFY_CODE(MODULE_CODE, GCNAME) STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) + STRINGIFY_CODE(MODULE_CODE, HASH) } case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) } case bitc::PARAMATTR_BLOCK_ID: switch (CodeID) { default: return nullptr; // FIXME: Should these be different? case bitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY"; case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY"; } case bitc::PARAMATTR_GROUP_BLOCK_ID: switch (CodeID) { default: return nullptr; case bitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY"; } case bitc::TYPE_BLOCK_ID_NEW: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(TYPE_CODE, NUMENTRY) STRINGIFY_CODE(TYPE_CODE, VOID) STRINGIFY_CODE(TYPE_CODE, FLOAT) STRINGIFY_CODE(TYPE_CODE, DOUBLE) STRINGIFY_CODE(TYPE_CODE, LABEL) STRINGIFY_CODE(TYPE_CODE, OPAQUE) STRINGIFY_CODE(TYPE_CODE, INTEGER) STRINGIFY_CODE(TYPE_CODE, POINTER) STRINGIFY_CODE(TYPE_CODE, ARRAY) STRINGIFY_CODE(TYPE_CODE, VECTOR) STRINGIFY_CODE(TYPE_CODE, X86_FP80) STRINGIFY_CODE(TYPE_CODE, FP128) STRINGIFY_CODE(TYPE_CODE, PPC_FP128) STRINGIFY_CODE(TYPE_CODE, METADATA) STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) STRINGIFY_CODE(TYPE_CODE, FUNCTION) } case bitc::CONSTANTS_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(CST_CODE, SETTYPE) STRINGIFY_CODE(CST_CODE, NULL) STRINGIFY_CODE(CST_CODE, UNDEF) STRINGIFY_CODE(CST_CODE, INTEGER) STRINGIFY_CODE(CST_CODE, WIDE_INTEGER) STRINGIFY_CODE(CST_CODE, FLOAT) STRINGIFY_CODE(CST_CODE, AGGREGATE) STRINGIFY_CODE(CST_CODE, STRING) STRINGIFY_CODE(CST_CODE, CSTRING) STRINGIFY_CODE(CST_CODE, CE_BINOP) STRINGIFY_CODE(CST_CODE, CE_CAST) STRINGIFY_CODE(CST_CODE, CE_GEP) STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP) STRINGIFY_CODE(CST_CODE, CE_SELECT) STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT) STRINGIFY_CODE(CST_CODE, CE_INSERTELT) STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC) STRINGIFY_CODE(CST_CODE, CE_CMP) STRINGIFY_CODE(CST_CODE, INLINEASM) STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX) case bitc::CST_CODE_BLOCKADDRESS: return "CST_CODE_BLOCKADDRESS"; STRINGIFY_CODE(CST_CODE, DATA) } case bitc::FUNCTION_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) STRINGIFY_CODE(FUNC_CODE, INST_BINOP) STRINGIFY_CODE(FUNC_CODE, INST_CAST) STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD) STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD) STRINGIFY_CODE(FUNC_CODE, INST_SELECT) STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT) STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT) STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC) STRINGIFY_CODE(FUNC_CODE, INST_CMP) STRINGIFY_CODE(FUNC_CODE, INST_RET) STRINGIFY_CODE(FUNC_CODE, INST_BR) STRINGIFY_CODE(FUNC_CODE, INST_SWITCH) STRINGIFY_CODE(FUNC_CODE, INST_INVOKE) STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE) STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET) STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET) STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD) STRINGIFY_CODE(FUNC_CODE, INST_PHI) STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA) STRINGIFY_CODE(FUNC_CODE, INST_LOAD) STRINGIFY_CODE(FUNC_CODE, INST_VAARG) STRINGIFY_CODE(FUNC_CODE, INST_STORE) STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL) STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL) STRINGIFY_CODE(FUNC_CODE, INST_CMP2) STRINGIFY_CODE(FUNC_CODE, INST_VSELECT) STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN) STRINGIFY_CODE(FUNC_CODE, INST_CALL) STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC) STRINGIFY_CODE(FUNC_CODE, INST_GEP) } case bitc::VALUE_SYMTAB_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(VST_CODE, ENTRY) STRINGIFY_CODE(VST_CODE, BBENTRY) STRINGIFY_CODE(VST_CODE, FNENTRY) STRINGIFY_CODE(VST_CODE, COMBINED_GVDEFENTRY) STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY) } case bitc::MODULE_STRTAB_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(MST_CODE, ENTRY) + STRINGIFY_CODE(MST_CODE, HASH) } case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(FS, PERMODULE) STRINGIFY_CODE(FS, PERMODULE_PROFILE) STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) STRINGIFY_CODE(FS, COMBINED) STRINGIFY_CODE(FS, COMBINED_PROFILE) STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) } case bitc::METADATA_ATTACHMENT_ID: switch(CodeID) { default:return nullptr; STRINGIFY_CODE(METADATA, ATTACHMENT) } case bitc::METADATA_BLOCK_ID: switch(CodeID) { default:return nullptr; STRINGIFY_CODE(METADATA, STRING_OLD) STRINGIFY_CODE(METADATA, STRINGS) STRINGIFY_CODE(METADATA, NAME) STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK STRINGIFY_CODE(METADATA, NODE) STRINGIFY_CODE(METADATA, VALUE) STRINGIFY_CODE(METADATA, OLD_NODE) STRINGIFY_CODE(METADATA, OLD_FN_NODE) STRINGIFY_CODE(METADATA, NAMED_NODE) STRINGIFY_CODE(METADATA, DISTINCT_NODE) STRINGIFY_CODE(METADATA, LOCATION) STRINGIFY_CODE(METADATA, GENERIC_DEBUG) STRINGIFY_CODE(METADATA, SUBRANGE) STRINGIFY_CODE(METADATA, ENUMERATOR) STRINGIFY_CODE(METADATA, BASIC_TYPE) STRINGIFY_CODE(METADATA, FILE) STRINGIFY_CODE(METADATA, DERIVED_TYPE) STRINGIFY_CODE(METADATA, COMPOSITE_TYPE) STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE) STRINGIFY_CODE(METADATA, COMPILE_UNIT) STRINGIFY_CODE(METADATA, SUBPROGRAM) STRINGIFY_CODE(METADATA, LEXICAL_BLOCK) STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE) STRINGIFY_CODE(METADATA, NAMESPACE) STRINGIFY_CODE(METADATA, TEMPLATE_TYPE) STRINGIFY_CODE(METADATA, TEMPLATE_VALUE) STRINGIFY_CODE(METADATA, GLOBAL_VAR) STRINGIFY_CODE(METADATA, LOCAL_VAR) STRINGIFY_CODE(METADATA, EXPRESSION) STRINGIFY_CODE(METADATA, OBJC_PROPERTY) STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) STRINGIFY_CODE(METADATA, MODULE) } case bitc::METADATA_KIND_BLOCK_ID: switch (CodeID) { default: return nullptr; STRINGIFY_CODE(METADATA, KIND) } case bitc::USELIST_BLOCK_ID: switch(CodeID) { default:return nullptr; case bitc::USELIST_CODE_DEFAULT: return "USELIST_CODE_DEFAULT"; case bitc::USELIST_CODE_BB: return "USELIST_CODE_BB"; } } #undef STRINGIFY_CODE } struct PerRecordStats { unsigned NumInstances; unsigned NumAbbrev; uint64_t TotalBits; PerRecordStats() : NumInstances(0), NumAbbrev(0), TotalBits(0) {} }; struct PerBlockIDStats { /// NumInstances - This the number of times this block ID has been seen. unsigned NumInstances; /// NumBits - The total size in bits of all of these blocks. uint64_t NumBits; /// NumSubBlocks - The total number of blocks these blocks contain. unsigned NumSubBlocks; /// NumAbbrevs - The total number of abbreviations. unsigned NumAbbrevs; /// NumRecords - The total number of records these blocks contain, and the /// number that are abbreviated. unsigned NumRecords, NumAbbreviatedRecords; /// CodeFreq - Keep track of the number of times we see each code. std::vector CodeFreq; PerBlockIDStats() : NumInstances(0), NumBits(0), NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0) {} }; static std::map BlockIDStats; /// Error - All bitcode analysis errors go through this function, making this a /// good place to breakpoint if debugging. static bool Error(const Twine &Err) { errs() << Err << "\n"; return true; } static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent, ArrayRef Record, StringRef Blob) { if (Blob.empty()) return true; if (Record.size() != 2) return true; unsigned NumStrings = Record[0]; unsigned StringsOffset = Record[1]; outs() << " num-strings = " << NumStrings << " {\n"; StringRef Lengths = Blob.slice(0, StringsOffset); SimpleBitstreamCursor R(Reader); R.jumpToPointer(Lengths.begin()); // Ensure that Blob doesn't get invalidated, even if this is reading from a // StreamingMemoryObject with corrupt data. R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset); StringRef Strings = Blob.drop_front(StringsOffset); do { if (R.AtEndOfStream()) return Error("bad length"); unsigned Size = R.ReadVBR(6); if (Strings.size() < Size) return Error("truncated chars"); outs() << Indent << " '"; outs().write_escaped(Strings.slice(0, Size), /*hex=*/true); outs() << "'\n"; Strings = Strings.drop_front(Size); } while (--NumStrings); outs() << Indent << " }"; return false; } static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader, StringRef Indent, ArrayRef Record, StringRef Blob) { if (BlockID != bitc::METADATA_BLOCK_ID) return true; if (Code != bitc::METADATA_STRINGS) return true; return decodeMetadataStringsBlob(Reader, Indent, Record, Blob); } /// ParseBlock - Read a block, updating statistics, etc. static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID, unsigned IndentLevel, CurStreamTypeType CurStreamType) { std::string Indent(IndentLevel*2, ' '); uint64_t BlockBitStart = Stream.GetCurrentBitNo(); // Get the statistics for this BlockID. PerBlockIDStats &BlockStats = BlockIDStats[BlockID]; BlockStats.NumInstances++; // BLOCKINFO is a special part of the stream. bool DumpRecords = Dump; if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { if (Dump) outs() << Indent << "\n"; if (BitstreamCursor(Stream).ReadBlockInfoBlock()) return Error("Malformed BlockInfoBlock"); // It's not really interesting to dump the contents of the blockinfo block. DumpRecords = false; } unsigned NumWords = 0; if (Stream.EnterSubBlock(BlockID, &NumWords)) return Error("Malformed block record"); + // Keep it for later, when we see a MODULE_HASH record + uint64_t BlockEntryPos = Stream.getCurrentByteNo(); + const char *BlockName = nullptr; if (DumpRecords) { outs() << Indent << "<"; if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader(), CurStreamType))) outs() << BlockName; else outs() << "UnknownBlock" << BlockID; if (NonSymbolic && BlockName) outs() << " BlockID=" << BlockID; outs() << " NumWords=" << NumWords << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; } SmallVector Record; // Read all the records for this block. while (1) { if (Stream.AtEndOfStream()) return Error("Premature end of bitstream"); uint64_t RecordStartBit = Stream.GetCurrentBitNo(); BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); switch (Entry.Kind) { case BitstreamEntry::Error: return Error("malformed bitcode file"); case BitstreamEntry::EndBlock: { uint64_t BlockBitEnd = Stream.GetCurrentBitNo(); BlockStats.NumBits += BlockBitEnd-BlockBitStart; if (DumpRecords) { outs() << Indent << "\n"; else outs() << "UnknownBlock" << BlockID << ">\n"; } return false; } case BitstreamEntry::SubBlock: { uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); if (ParseBlock(Stream, Entry.ID, IndentLevel+1, CurStreamType)) return true; ++BlockStats.NumSubBlocks; uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); // Don't include subblock sizes in the size of this block. BlockBitStart += SubBlockBitEnd-SubBlockBitStart; continue; } case BitstreamEntry::Record: // The interesting case. break; } if (Entry.ID == bitc::DEFINE_ABBREV) { Stream.ReadAbbrevRecord(); ++BlockStats.NumAbbrevs; continue; } Record.clear(); ++BlockStats.NumRecords; StringRef Blob; + unsigned CurrentRecordPos = Stream.getCurrentByteNo(); unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob); // Increment the # occurrences of this code. if (BlockStats.CodeFreq.size() <= Code) BlockStats.CodeFreq.resize(Code+1); BlockStats.CodeFreq[Code].NumInstances++; BlockStats.CodeFreq[Code].TotalBits += Stream.GetCurrentBitNo()-RecordStartBit; if (Entry.ID != bitc::UNABBREV_RECORD) { BlockStats.CodeFreq[Code].NumAbbrev++; ++BlockStats.NumAbbreviatedRecords; } if (DumpRecords) { outs() << Indent << " <"; if (const char *CodeName = GetCodeName(Code, BlockID, *Stream.getBitStreamReader(), CurStreamType)) outs() << CodeName; else outs() << "UnknownCode" << Code; if (NonSymbolic && GetCodeName(Code, BlockID, *Stream.getBitStreamReader(), CurStreamType)) outs() << " codeid=" << Code; const BitCodeAbbrev *Abbv = nullptr; if (Entry.ID != bitc::UNABBREV_RECORD) { Abbv = Stream.getAbbrev(Entry.ID); outs() << " abbrevid=" << Entry.ID; } for (unsigned i = 0, e = Record.size(); i != e; ++i) outs() << " op" << i << "=" << (int64_t)Record[i]; + // If we found a module hash, let's verify that it matches! + if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) { + if (Record.size() != 5) + outs() << " (invalid)"; + else { + // Recompute the hash and compare it to the one in the bitcode + SHA1 Hasher; + StringRef Hash; + { + int BlockSize = CurrentRecordPos - BlockEntryPos; + auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); + Hasher.update(ArrayRef(Ptr, BlockSize)); + Hash = Hasher.result(); + } + SmallString<20> RecordedHash; + RecordedHash.resize(20); + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + RecordedHash[Pos++] = (Val >> 24) & 0xFF; + RecordedHash[Pos++] = (Val >> 16) & 0xFF; + RecordedHash[Pos++] = (Val >> 8) & 0xFF; + RecordedHash[Pos++] = (Val >> 0) & 0xFF; + } + if (Hash == RecordedHash) + outs() << " (match)"; + else + outs() << " (!mismatch!)"; + } + } + outs() << "/>"; if (Abbv) { for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array) continue; assert(i + 2 == e && "Array op not second to last"); std::string Str; bool ArrayIsPrintable = true; for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { if (!isprint(static_cast(Record[j]))) { ArrayIsPrintable = false; break; } Str += (char)Record[j]; } if (ArrayIsPrintable) outs() << " record string = '" << Str << "'"; break; } } if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(), Indent, Record, Blob)) { outs() << " blob data = "; if (ShowBinaryBlobs) { outs() << "'"; outs().write_escaped(Blob, /*hex=*/true) << "'"; } else { bool BlobIsPrintable = true; for (unsigned i = 0, e = Blob.size(); i != e; ++i) if (!isprint(static_cast(Blob[i]))) { BlobIsPrintable = false; break; } if (BlobIsPrintable) outs() << "'" << Blob << "'"; else outs() << "unprintable, " << Blob.size() << " bytes."; } } outs() << "\n"; } } } static void PrintSize(double Bits) { outs() << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32)); } static void PrintSize(uint64_t Bits) { outs() << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits/8, (unsigned long)(Bits/32)); } static bool openBitcodeFile(StringRef Path, std::unique_ptr &MemBuf, BitstreamReader &StreamFile, BitstreamCursor &Stream, CurStreamTypeType &CurStreamType) { // Read the input file. ErrorOr> MemBufOrErr = MemoryBuffer::getFileOrSTDIN(Path); if (std::error_code EC = MemBufOrErr.getError()) return Error(Twine("Error reading '") + Path + "': " + EC.message()); MemBuf = std::move(MemBufOrErr.get()); if (MemBuf->getBufferSize() & 3) return Error("Bitcode stream should be a multiple of 4 bytes in length"); const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart(); const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize(); // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, EndBufPtr)) { if (EndBufPtr - BufPtr < BWH_HeaderSize) return Error("Invalid bitcode wrapper header"); if (Dump) { unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]); unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]); unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]); outs() << "\n"; } if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true)) return Error("Invalid bitcode wrapper header"); } StreamFile = BitstreamReader(BufPtr, EndBufPtr); Stream = BitstreamCursor(StreamFile); StreamFile.CollectBlockInfoNames(); // Read the stream signature. char Signature[6]; Signature[0] = Stream.Read(8); Signature[1] = Stream.Read(8); Signature[2] = Stream.Read(4); Signature[3] = Stream.Read(4); Signature[4] = Stream.Read(4); Signature[5] = Stream.Read(4); // Autodetect the file contents, if it is one we know. CurStreamType = UnknownBitstream; if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 && Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD) CurStreamType = LLVMIRBitstream; return false; } /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename. static int AnalyzeBitcode() { std::unique_ptr StreamBuffer; BitstreamReader StreamFile; BitstreamCursor Stream; CurStreamTypeType CurStreamType; if (openBitcodeFile(InputFilename, StreamBuffer, StreamFile, Stream, CurStreamType)) return true; // Read block info from BlockInfoFilename, if specified. // The block info must be a top-level block. if (!BlockInfoFilename.empty()) { std::unique_ptr BlockInfoBuffer; BitstreamReader BlockInfoFile; BitstreamCursor BlockInfoCursor; CurStreamTypeType BlockInfoStreamType; if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoFile, BlockInfoCursor, BlockInfoStreamType)) return true; while (!BlockInfoCursor.AtEndOfStream()) { unsigned Code = BlockInfoCursor.ReadCode(); if (Code != bitc::ENTER_SUBBLOCK) return Error("Invalid record at top-level in block info file"); unsigned BlockID = BlockInfoCursor.ReadSubBlockID(); if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { if (BlockInfoCursor.ReadBlockInfoBlock()) return Error("Malformed BlockInfoBlock in block info file"); break; } BlockInfoCursor.SkipBlock(); } StreamFile.takeBlockInfo(std::move(BlockInfoFile)); } unsigned NumTopBlocks = 0; // Parse the top-level structure. We only allow blocks at the top-level. while (!Stream.AtEndOfStream()) { unsigned Code = Stream.ReadCode(); if (Code != bitc::ENTER_SUBBLOCK) return Error("Invalid record at top-level"); unsigned BlockID = Stream.ReadSubBlockID(); if (ParseBlock(Stream, BlockID, 0, CurStreamType)) return true; ++NumTopBlocks; } if (Dump) outs() << "\n\n"; uint64_t BufferSizeBits = StreamFile.getBitcodeBytes().getExtent() * CHAR_BIT; // Print a summary of the read file. outs() << "Summary of " << InputFilename << ":\n"; outs() << " Total size: "; PrintSize(BufferSizeBits); outs() << "\n"; outs() << " Stream type: "; switch (CurStreamType) { case UnknownBitstream: outs() << "unknown\n"; break; case LLVMIRBitstream: outs() << "LLVM IR\n"; break; } outs() << " # Toplevel Blocks: " << NumTopBlocks << "\n"; outs() << "\n"; // Emit per-block stats. outs() << "Per-block Summary:\n"; for (std::map::iterator I = BlockIDStats.begin(), E = BlockIDStats.end(); I != E; ++I) { outs() << " Block ID #" << I->first; if (const char *BlockName = GetBlockName(I->first, StreamFile, CurStreamType)) outs() << " (" << BlockName << ")"; outs() << ":\n"; const PerBlockIDStats &Stats = I->second; outs() << " Num Instances: " << Stats.NumInstances << "\n"; outs() << " Total Size: "; PrintSize(Stats.NumBits); outs() << "\n"; double pct = (Stats.NumBits * 100.0) / BufferSizeBits; outs() << " Percent of file: " << format("%2.4f%%", pct) << "\n"; if (Stats.NumInstances > 1) { outs() << " Average Size: "; PrintSize(Stats.NumBits/(double)Stats.NumInstances); outs() << "\n"; outs() << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/" << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n"; outs() << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/" << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n"; outs() << " Tot/Avg Records: " << Stats.NumRecords << "/" << Stats.NumRecords/(double)Stats.NumInstances << "\n"; } else { outs() << " Num SubBlocks: " << Stats.NumSubBlocks << "\n"; outs() << " Num Abbrevs: " << Stats.NumAbbrevs << "\n"; outs() << " Num Records: " << Stats.NumRecords << "\n"; } if (Stats.NumRecords) { double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords; outs() << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n"; } outs() << "\n"; // Print a histogram of the codes we see. if (!NoHistogram && !Stats.CodeFreq.empty()) { std::vector > FreqPairs; // for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) if (unsigned Freq = Stats.CodeFreq[i].NumInstances) FreqPairs.push_back(std::make_pair(Freq, i)); std::stable_sort(FreqPairs.begin(), FreqPairs.end()); std::reverse(FreqPairs.begin(), FreqPairs.end()); outs() << "\tRecord Histogram:\n"; outs() << "\t\t Count # Bits b/Rec % Abv Record Kind\n"; for (unsigned i = 0, e = FreqPairs.size(); i != e; ++i) { const PerRecordStats &RecStats = Stats.CodeFreq[FreqPairs[i].second]; outs() << format("\t\t%7d %9lu", RecStats.NumInstances, (unsigned long)RecStats.TotalBits); if (RecStats.NumInstances > 1) outs() << format(" %9.1f", (double)RecStats.TotalBits/RecStats.NumInstances); else outs() << " "; if (RecStats.NumAbbrev) outs() << format(" %7.2f", (double)RecStats.NumAbbrev/RecStats.NumInstances*100); else outs() << " "; outs() << " "; if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first, StreamFile, CurStreamType)) outs() << CodeName << "\n"; else outs() << "UnknownCode" << FreqPairs[i].second << "\n"; } outs() << "\n"; } } return 0; } int main(int argc, char **argv) { // Print a stack trace if we signal out. sys::PrintStackTraceOnErrorSignal(); PrettyStackTraceProgram X(argc, argv); llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm-bcanalyzer file analyzer\n"); return AnalyzeBitcode(); }