Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -4158,6 +4158,12 @@ continue; } + if (Entry.ID == bitc::BLOCKINFO_BLOCK_ID) { + if (Stream.ReadBlockInfoBlock()) + return error("Malformed block"); + continue; + } + if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(0, ShouldLazyLoadMetadata); @@ -6536,6 +6542,12 @@ if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); + if (Entry.ID == bitc::BLOCKINFO_BLOCK_ID) { + if (Stream.ReadBlockInfoBlock()) + return error("Malformed block"); + continue; + } + // If we see a MODULE_BLOCK, parse it to find the blocks needed for // building the function summary index. if (Entry.ID == bitc::MODULE_BLOCK_ID) Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -65,45 +65,30 @@ }; /// Abstract class to manage the bitcode writing, subclassed for each bitcode -/// file type. Owns the BitstreamWriter, and includes the main entry point for -/// writing. +/// file type. class BitcodeWriter { protected: - /// Pointer to the buffer allocated by caller for bitcode writing. - const SmallVectorImpl &Buffer; - - /// The stream created and owned by the BitodeWriter. - BitstreamWriter Stream; + /// The stream created and owned by the caller. + BitstreamWriter &Stream; /// Saves the offset of the VSTOffset record that must eventually be /// backpatched with the offset of the actual VST. uint64_t VSTOffsetPlaceholder = 0; public: - /// Constructs a BitcodeWriter object, and initializes a BitstreamRecord, - /// writing to the provided \p Buffer. - BitcodeWriter(SmallVectorImpl &Buffer) - : Buffer(Buffer), Stream(Buffer) {} - - virtual ~BitcodeWriter() = default; - - /// Main entry point to write the bitcode file, which writes the bitcode - /// header and will then invoke the virtual writeBlocks() method. - void write(); - -private: - /// Derived classes must implement this to write the corresponding blocks for - /// that bitcode file type. - virtual void writeBlocks() = 0; + /// Constructs a BitcodeWriter object that writes to the provided \p Stream. + BitcodeWriter(BitstreamWriter &Stream) : Stream(Stream) {} protected: bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; } void writeValueSymbolTableForwardDecl(); - void writeBitcodeHeader(); }; /// Class to manage the bitcode writing for a module. class ModuleBitcodeWriter : public BitcodeWriter { + /// Pointer to the buffer allocated by caller for bitcode writing. + const SmallVectorImpl &Buffer; + /// The Module to write to bitcode. const Module &M; @@ -117,7 +102,7 @@ bool GenerateHash; /// The start bit of the module block, for use in generating a module hash - uint64_t BitcodeStartBit = 0; + uint64_t BitcodeStartBit; /// Map that holds the correspondence between GUIDs in the summary index, /// that came from indirect call profiles, and a value id generated by this @@ -131,16 +116,12 @@ /// Constructs a ModuleBitcodeWriter object for the given Module, /// writing to the provided \p Buffer. ModuleBitcodeWriter(const Module *M, SmallVectorImpl &Buffer, - bool ShouldPreserveUseListOrder, - const ModuleSummaryIndex *Index, bool GenerateHash) - : BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder), - Index(Index), GenerateHash(GenerateHash) { - // Save the start bit of the actual bitcode, in case there is space - // saved at the start for the darwin header above. The reader stream - // will start at the bitcode, and we need the offset of the VST - // to line up. - BitcodeStartBit = Stream.GetCurrentBitNo(); - + BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, + const ModuleSummaryIndex *Index, bool GenerateHash, + uint64_t BitcodeStartBit) + : BitcodeWriter(Stream), Buffer(Buffer), M(*M), + VE(*M, ShouldPreserveUseListOrder), Index(Index), + GenerateHash(GenerateHash), BitcodeStartBit(BitcodeStartBit) { // Assign ValueIds to any callee values in the index that came from // indirect call profiles and were recorded as a GUID not a Value* // (which would have been assigned an ID by the ValueEnumerator). @@ -162,21 +143,16 @@ assignValueId(CallEdge.first.getGUID()); } -private: - /// Main entry point for writing a module to bitcode, invoked by - /// BitcodeWriter::write() after it writes the header. - void writeBlocks() override; - - /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the - /// current llvm version, and a record for the epoch number. - void writeIdentificationBlock(); - /// Emit the current module to the bitstream. - void writeModule(); + void write(); + uint64_t getBitsRequiredForTypeIndices() const { + return VE.computeBitsRequiredForTypeIndices(); + } + +private: uint64_t bitcodeStartBit() { return BitcodeStartBit; } - void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse); void writeAttributeGroupTable(); void writeAttributeTable(); void writeTypeTable(); @@ -276,7 +252,6 @@ void writeFunction(const Function &F, DenseMap &FunctionToBitcodeIndex); - void writeBlockInfo(); void writePerModuleFunctionSummaryRecord(SmallVector &NameVals, GlobalValueSummary *Summary, unsigned ValueID, @@ -329,11 +304,10 @@ /// Constructs a IndexBitcodeWriter object for the given combined index, /// writing to the provided \p Buffer. When writing a subset of the index /// for a distributed backend, provide a \p ModuleToSummariesForIndex map. - IndexBitcodeWriter(SmallVectorImpl &Buffer, - const ModuleSummaryIndex &Index, + IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index, const std::map *ModuleToSummariesForIndex = nullptr) - : BitcodeWriter(Buffer), Index(Index), + : BitcodeWriter(Stream), Index(Index), ModuleToSummariesForIndex(ModuleToSummariesForIndex) { // Assign unique value ids to all summaries to be written, for use // in writing out the call graph edges. Save the mapping from GUID @@ -480,12 +454,10 @@ /// Obtain the end iterator over the summaries to be written. iterator end() { return iterator(*this, /*IsAtEnd=*/true); } -private: - /// Main entry point for writing a combined index to bitcode, invoked by - /// BitcodeWriter::write() after it writes the header. - void writeBlocks() override; + /// Main entry point for writing a combined index to bitcode. + void write(); - void writeIndex(); +private: void writeModStrings(); void writeCombinedValueSymbolTable(); void writeCombinedGlobalValueSummary(); @@ -597,8 +569,8 @@ llvm_unreachable("Invalid synch scope"); } -void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str, - unsigned AbbrevToUse) { +static void writeStringRecord(BitstreamWriter &Stream, unsigned Code, + StringRef Str, unsigned AbbrevToUse) { SmallVector Vals; // Code: [strchar x N] @@ -800,7 +772,7 @@ Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */); SmallVector TypeVals; - uint64_t NumBits = VE.computeBitsRequiredForTypeIndicies(); + uint64_t NumBits = VE.computeBitsRequiredForTypeIndices(); // Abbrev for TYPE_CODE_POINTER. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); @@ -922,7 +894,7 @@ // Emit the name if it is present. if (!ST->getName().empty()) - writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), + writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(), StructNameAbbrev); } break; @@ -1119,13 +1091,13 @@ void ModuleBitcodeWriter::writeModuleInfo() { // Emit various pieces of data attached to a module. if (!M.getTargetTriple().empty()) - writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), + writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(), 0 /*TODO*/); const std::string &DL = M.getDataLayoutStr(); if (!DL.empty()) - writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/); if (!M.getModuleInlineAsm().empty()) - writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), + writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(), 0 /*TODO*/); // Emit information about sections and GC, computing how many there are. Also @@ -1141,7 +1113,7 @@ // Give section names unique ID's. unsigned &Entry = SectionMap[GV.getSection()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } @@ -1153,7 +1125,7 @@ // Give section names unique ID's. unsigned &Entry = SectionMap[F.getSection()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(), + writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(), 0 /*TODO*/); Entry = SectionMap.size(); } @@ -1162,7 +1134,7 @@ // Same for GC names. unsigned &Entry = GCMap[F.getGC()]; if (!Entry) { - writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/); + writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/); Entry = GCMap.size(); } } @@ -3011,7 +2983,8 @@ } // Emit blockinfo, which defines the standard abbreviations etc. -void ModuleBitcodeWriter::writeBlockInfo() { +static void writeModuleBlockInfo(BitstreamWriter &Stream, + uint64_t BitsRequiredForTypeIndices) { // We only want to emit block info records for blocks that have multiple // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK. // Other blocks can define their abbrevs inline. @@ -3064,8 +3037,8 @@ { // SETTYPE abbrev for CONSTANTS_BLOCK. BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_SETTYPE)); - Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, - VE.computeBitsRequiredForTypeIndicies())); + Abbv->Add( + BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, BitsRequiredForTypeIndices)); if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != CONSTANTS_SETTYPE_ABBREV) llvm_unreachable("Unexpected abbrev ordering!"); @@ -3085,7 +3058,7 @@ Abbv->Add(BitCodeAbbrevOp(bitc::CST_CODE_CE_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // cast opc Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // typeid - VE.computeBitsRequiredForTypeIndicies())); + BitsRequiredForTypeIndices)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // value id if (Stream.EmitBlockInfoAbbrev(bitc::CONSTANTS_BLOCK_ID, Abbv) != @@ -3107,7 +3080,7 @@ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_LOAD)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // Ptr Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty - VE.computeBitsRequiredForTypeIndicies())); + BitsRequiredForTypeIndices)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // Align Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // volatile if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != @@ -3140,7 +3113,7 @@ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_CAST)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // OpVal Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty - VE.computeBitsRequiredForTypeIndicies())); + BitsRequiredForTypeIndices)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != FUNCTION_INST_CAST_ABBREV) @@ -3174,7 +3147,7 @@ Abbv->Add(BitCodeAbbrevOp(bitc::FUNC_CODE_INST_GEP)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, // dest ty - Log2_32_Ceil(VE.getTypes().size() + 1))); + BitsRequiredForTypeIndices)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) != @@ -3586,7 +3559,9 @@ Stream.ExitBlock(); } -void ModuleBitcodeWriter::writeIdentificationBlock() { +/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the +/// current llvm version, and a record for the epoch number. +static void writeIdentificationBlock(BitstreamWriter &Stream) { Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); // Write the "user readable" string identifying the bitcode producer @@ -3595,7 +3570,7 @@ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); auto StringAbbrev = Stream.EmitAbbrev(Abbv); - writeStringRecord(bitc::IDENTIFICATION_CODE_STRING, + writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING, "LLVM" LLVM_VERSION_STRING, StringAbbrev); // Write the epoch version @@ -3629,24 +3604,7 @@ Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); } -void BitcodeWriter::write() { - // Emit the file header first. - writeBitcodeHeader(); - - writeBlocks(); -} - -void ModuleBitcodeWriter::writeBlocks() { - writeIdentificationBlock(); - writeModule(); -} - -void IndexBitcodeWriter::writeBlocks() { - // Index contains only a single outer (module) block. - writeIndex(); -} - -void ModuleBitcodeWriter::writeModule() { +void ModuleBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); size_t BlockStartPos = Buffer.size(); @@ -3655,9 +3613,6 @@ Vals.push_back(CurVersion); Stream.EmitRecord(bitc::MODULE_CODE_VERSION, Vals); - // Emit blockinfo, which defines the standard abbreviations etc. - writeBlockInfo(); - // Emit information about attribute groups. writeAttributeGroupTable(); @@ -3775,7 +3730,7 @@ } /// Helper to write the header common to all bitcode files. -void BitcodeWriter::writeBitcodeHeader() { +static void writeBitcodeHeader(BitstreamWriter &Stream) { // Emit the file header. Stream.Emit((unsigned)'B', 8); Stream.Emit((unsigned)'C', 8); @@ -3794,16 +3749,31 @@ SmallVector Buffer; Buffer.reserve(256*1024); + BitstreamWriter Stream(Buffer); + // If this is darwin or another generic macho target, reserve space for the // header. Triple TT(M->getTargetTriple()); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); + // Save the start bit of the actual bitcode, in case there is space + // saved at the start for the darwin header above. The reader stream + // will start at the bitcode, and we need the offset of the VST + // to line up. + uint64_t BitcodeStartBit = Stream.GetCurrentBitNo(); + + writeBitcodeHeader(Stream); + writeIdentificationBlock(Stream); + + ModuleBitcodeWriter Writer(M, Buffer, Stream, ShouldPreserveUseListOrder, + Index, GenerateHash, BitcodeStartBit); + + // Emit module abbreviations into the buffer. + writeModuleBlockInfo(Stream, Writer.getBitsRequiredForTypeIndices()); + // Emit the module into the buffer. - ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index, - GenerateHash); - ModuleWriter.write(); + Writer.write(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); @@ -3812,7 +3782,7 @@ Out.write((char*)&Buffer.front(), Buffer.size()); } -void IndexBitcodeWriter::writeIndex() { +void IndexBitcodeWriter::write() { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); SmallVector Vals; @@ -3846,7 +3816,11 @@ SmallVector Buffer; Buffer.reserve(256 * 1024); - IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex); + BitstreamWriter Stream(Buffer); + + writeBitcodeHeader(Stream); + + IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex); IndexWriter.write(); Out.write((char *)&Buffer.front(), Buffer.size()); Index: llvm/lib/Bitcode/Writer/ValueEnumerator.h =================================================================== --- llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -226,7 +226,7 @@ /// void incorporateFunction(const Function &F); void purgeFunction(); - uint64_t computeBitsRequiredForTypeIndicies() const; + uint64_t computeBitsRequiredForTypeIndices() const; private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); Index: llvm/lib/Bitcode/Writer/ValueEnumerator.cpp =================================================================== --- llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -1000,6 +1000,6 @@ return getGlobalBasicBlockID(BB); } -uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const { +uint64_t ValueEnumerator::computeBitsRequiredForTypeIndices() const { return Log2_32_Ceil(getTypes().size() + 1); }