Index: include/llvm/Support/StreamingMemoryObject.h =================================================================== --- include/llvm/Support/StreamingMemoryObject.h +++ include/llvm/Support/StreamingMemoryObject.h @@ -55,29 +55,41 @@ std::unique_ptr Streamer; mutable size_t BytesRead; // Bytes read from stream size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header) - mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or EOF reached - mutable bool EOFReached; + mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or end of + // object reached. + mutable bool EOOReached; // end of object reached. - // Fetch enough bytes such that Pos can be read or EOF is reached - // (i.e. BytesRead > Pos). Return true if Pos can be read. - // Unlike most of the functions in BitcodeReader, returns true on success. - // Most of the requests will be small, but we fetch at kChunkSize bytes - // at a time to avoid making too many potentially expensive GetBytes calls + // Fetch enough bytes such that Pos can be read or end of object is + // reached (i.e. BytesRead > Pos). Note: EOF sets end of object if + // not already defined. Returns true if Pos can be read. Unlike + // most of the functions in BitcodeReader, returns true on success. + // Most of the requests will be small, but we fetch at kChunkSize + // bytes at a time to avoid making too many potentially expensive + // GetBytes calls bool fetchToPos(size_t Pos) const { - if (EOFReached) + if (EOOReached) return Pos < ObjectSize; + while (Pos >= BytesRead) { - Bytes.resize(BytesRead + BytesSkipped + kChunkSize); + size_t NextChunkSize = kChunkSize; + if (ObjectSize && ObjectSize < BytesRead + kChunkSize) { + if (BytesRead >= ObjectSize) { + EOOReached = true; + return false; + } + NextChunkSize = ObjectSize - BytesRead; + } + Bytes.resize(BytesRead + BytesSkipped + NextChunkSize); size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped], - kChunkSize); + NextChunkSize); BytesRead += bytes; - if (bytes != kChunkSize) { // reached EOF/ran out of bytes + if (bytes == 0) { // reached EOF/ran out of bytes ObjectSize = BytesRead; - EOFReached = true; + EOOReached = true; break; } } - return Pos < BytesRead; + return (Pos < BytesRead) || (ObjectSize && Pos < ObjectSize); } StreamingMemoryObject(const StreamingMemoryObject&) = delete; Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -135,11 +135,11 @@ LLVMContext &Context; DiagnosticHandlerFunction DiagnosticHandler; Module *TheModule; + // The following two fields define the type of memory to parse. std::unique_ptr Buffer; + DataStreamer *Streamer; std::unique_ptr StreamFile; BitstreamCursor Stream; - DataStreamer *LazyStreamer; - uint64_t NextUnreadBit; bool SeenValueSymbolTable; std::vector TypeList; @@ -211,13 +211,41 @@ /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. - bool WillMaterializeAllForwardRefs; + bool WillMaterializeAllForwardRefs = false; /// Functions that have block addresses taken. This is usually empty. SmallPtrSet BlockAddressesTaken; /// True if any Metadata block has been materialized. - bool IsMetadataMaterialized; + bool IsMetadataMaterialized = false; + + /// True if the module is materialized. + bool IsModuleMaterialized = false; + + /// True if meta data should initially be skipped. + bool ShouldLazyLoadMetadata = false; + + /// True if everything should materialize all before finishing parsing. + bool ShouldMaterializeAll = false; + + /// The state of the parse. + enum BitcodeReaderState { + AtStart, + AtTopLevel, // Processing top-level records. + InsideModule, // processing records inside a module block. + // All states below here represent cases where input shouldn't be parsed. + NoMoreInput, // Generic marker for having parsed input. + ReachedEof, // parsed input, but not necessary materializations. + FinishedParse, // Parsed input and materialized necessary parts. + ParseError, // An error has occurred, stop parsing. + } ParseState = AtStart; + + /// The position (within the bitcode) where parsing left off when + /// incrementally parsing. + uint64_t NextUnreadBit = 0; + + /// The number of modules read at the top level. + bool NumModulesParsed = 0; bool StripDebugInfo = false; @@ -226,9 +254,9 @@ std::error_code Error(BitcodeError E); std::error_code Error(const Twine &Message); - explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, + explicit BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler); - explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, + explicit BitcodeReader(DataStreamer *Streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler); ~BitcodeReader() { FreeState(); } @@ -244,10 +272,24 @@ std::vector getIdentifiedStructTypes() const override; void Dematerialize(GlobalValue *GV) override; - /// @brief Main interface to parsing a bitcode buffer. + /// @brief Starts an incremental parse for module M. Reads enough to + /// define global values. The flags define what should happen before + /// finishing the parse. That is: + /// ShouldMaterializeAll: When true, the module should be materialized + /// completely. Otherwise, function bodies are only loaded on demand. + /// ShouldLazyLoadMetadata: When true, the metadata blocks should be + /// parsed. + /// @returns true if an error occurred. + std::error_code StartParse(Module *M, + bool MaterializeAll, + bool ShouldLazyLoadMetadata = false); + + /// @brief Parses bitcode. Materializes based on flags. /// @returns true if an error occurred. std::error_code ParseBitcodeInto(Module *M, - bool ShouldLazyLoadMetadata = false); + bool ShouleMaterializeAll, + bool ShouldLazyLoadMetadata); + /// @brief Cheap mechanism to just extract module triple /// @returns true if an error occurred. @@ -350,12 +392,46 @@ return getFnValueByID(ValNo, Ty); } + // Continue incremental parse to next skipped block, or eof, whichever + // comes first. + std::error_code ContinueParse(); + + // Finish the parse and then materialize based on flags passed to + // StartParse(). + std::error_code FinishParse(); + + // The updateParseState methods update the parse state with the + // given information, so that the next call to ContinueParse can + // continue. Returns an error code to be returned by + // ContinueParse(). + std::error_code &updateParseState (BitcodeReaderState NewValue, + std::error_code &EC) { + ParseState = EC ? ParseError : NewValue; + NextUnreadBit = Stream.GetCurrentBitNo(); + return EC; + } + + std::error_code updateParseState(std::error_code EC) { + return updateParseState(ParseState, EC); + } + + std::error_code updateParseState(BitcodeReaderState NewValue) { + ParseState = NewValue; + NextUnreadBit = Stream.GetCurrentBitNo(); + return std::error_code(); + } + + std::error_code updateParseState() { + NextUnreadBit = Stream.GetCurrentBitNo(); + return std::error_code(); + } + /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code ParseModule(); std::error_code ParseAttributeBlock(); std::error_code ParseAttributeGroupBlock(); std::error_code ParseTypeTable(); @@ -421,21 +497,19 @@ return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); }; } -BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, +BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(Buffer), Streamer(nullptr), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} -BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, +BitcodeReader::BitcodeReader(DataStreamer *Streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(nullptr), Streamer(Streamer), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -2666,12 +2740,14 @@ return std::error_code(); } -std::error_code BitcodeReader::ParseModule(bool Resume, - bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); - else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Invalid record"); +std::error_code BitcodeReader::ParseModule() { + if (ParseState == AtTopLevel) { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return Error("Invalid record"); + ParseState = InsideModule; + } else { + assert(ParseState == InsideModule); + } SmallVector Record; std::vector SectionTable; @@ -2685,6 +2761,7 @@ case BitstreamEntry::Error: return Error("Malformed block"); case BitstreamEntry::EndBlock: + ParseState = AtTopLevel; return GlobalCleanup(); case BitstreamEntry::SubBlock: @@ -2742,16 +2819,12 @@ if (std::error_code EC = RememberAndSkipFunctionBody()) return EC; - // For streaming bitcode, suspend parsing when we reach the function - // bodies. Subsequent materialization calls will resume it when - // necessary. For streaming, the function bodies must be at the end of - // the bitcode. If the bitcode file is old, the symbol table will be - // at the end instead and will not have been seen yet. In this case, - // just finish the parse now. - if (LazyStreamer && SeenValueSymbolTable) { - NextUnreadBit = Stream.GetCurrentBitNo(); + // Suspend parsing when we reach a function body, assuming we + // have already associated names with global values. NOte: If + // the bitcode file is old, the symbol table will be at the + // end instead and will not have been seen yet. + if (SeenValueSymbolTable) return std::error_code(); - } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = ParseUseLists()) @@ -2992,8 +3065,7 @@ if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (LazyStreamer) - DeferredFunctionInfo[Func] = 0; + DeferredFunctionInfo[Func] = 0; } break; } @@ -3042,8 +3114,30 @@ } std::error_code BitcodeReader::ParseBitcodeInto(Module *M, + bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata) { - TheModule = nullptr; + auto cleanupOnError = [&](std::error_code EC) { + releaseBuffer(); // Never take ownership on error. + return EC; + }; + + if (std::error_code EC = + StartParse(M, ShouldMaterializeAll, ShouldLazyLoadMetadata)) + return cleanupOnError(EC); + + if (std::error_code EC = FinishParse()) + return cleanupOnError(EC); + + return std::error_code(); +} + +std::error_code BitcodeReader::StartParse(Module *M, + bool MaterializeAll, + bool LazyLoadMetadata) { + assert(ParseState == AtStart); + TheModule = M; + ShouldLazyLoadMetadata = LazyLoadMetadata; + ShouldMaterializeAll = MaterializeAll; if (std::error_code EC = InitStream()) return EC; @@ -3056,41 +3150,62 @@ Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) return Error("Invalid bitcode signature"); + return ContinueParse(); +} + +std::error_code BitcodeReader::ContinueParse() { + switch (ParseState) { + case AtStart: + ParseState = AtTopLevel; + break; + case ReachedEof: + case FinishedParse: + return updateParseState(); + case ParseError: + return updateParseState( + Error("Can't continue, bitcode error already found")); + default: + Stream.JumpToBit(NextUnreadBit); + if (ParseState == InsideModule) { + std::error_code EC = ParseModule(); + return updateParseState(EC); + } + break; + } // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { if (Stream.AtEndOfStream()) - return std::error_code(); + return updateParseState(ReachedEof); + assert(ParseState == AtTopLevel); BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); switch (Entry.Kind) { case BitstreamEntry::Error: - return Error("Malformed block"); + return updateParseState(Error("Malformed block")); case BitstreamEntry::EndBlock: - return std::error_code(); + return updateParseState(AtTopLevel); case BitstreamEntry::SubBlock: switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) - return Error("Malformed block"); + return updateParseState(Error("Malformed block")); break; case bitc::MODULE_BLOCK_ID: // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) - return Error("Invalid multiple blocks"); - TheModule = M; - if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata)) - return EC; - if (LazyStreamer) - return std::error_code(); + if (NumModulesParsed++) + return updateParseState(Error("Invalid multiple blocks")); + if (std::error_code EC = ParseModule()) + return updateParseState(EC); + return updateParseState(); break; default: if (Stream.SkipBlock()) - return Error("Invalid record"); + return updateParseState(Error("Invalid record")); break; } continue; @@ -3103,11 +3218,35 @@ if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) - return std::error_code(); + return updateParseState(); - return Error("Invalid record"); + return updateParseState(Error("Invalid record")); + } + } +} + +std::error_code BitcodeReader::FinishParse() { + if (ParseState == FinishedParse) + return std::error_code(); + + while (ParseState < NoMoreInput) { + if (std::error_code EC = ContinueParse()) { + return EC; } } + + assert(TheModule); + ParseState = FinishedParse; + + if (ShouldMaterializeAll) { + if (std::error_code EC = MaterializeModule(TheModule)) + return EC; + } else { + if (std::error_code EC = materializeForwardReferencedFunctions()) + return EC; + } + + return std::error_code(); } ErrorOr BitcodeReader::parseModuleTriple() { @@ -4272,12 +4411,14 @@ Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) - return Error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = ParseModule(true)) - return EC; + if (ParseState < NoMoreInput) { + // Continue will parse the next body in the stream and set its + // position in the DeferredFunctionInfo map. + if (std::error_code EC = ContinueParse()) + return EC; + break; + } + return Error("Could not find function in stream"); } return std::error_code(); } @@ -4301,7 +4442,7 @@ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0 && LazyStreamer) + if (DFII->second == 0) if (std::error_code EC = FindFunctionInStream(F, DFII)) return EC; @@ -4329,7 +4470,7 @@ // Bring in any functions that this function forward-referenced via // blockaddresses. - return materializeForwardReferencedFunctions(); + return materializeForwardReferencedFunctions(); } bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { @@ -4362,11 +4503,21 @@ assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); - if (std::error_code EC = materializeMetadata()) + if (IsModuleMaterialized) + return std::error_code(); + + // Set flag now so that FinishParse will not recursively apply this + // function. + IsModuleMaterialized = true; + + // At this point, if there are any function bodies, the current bit is + // pointing to the END_BLOCK record after them. Now make sure the rest + // of the bits in the module have been read. + if (std::error_code EC = FinishParse()) return EC; - // Promise to materialize all forward references. - WillMaterializeAllForwardRefs = true; + if (std::error_code EC = materializeMetadata()) + return EC; // Iterate over the module, deserializing any functions that are still on // disk. @@ -4375,11 +4526,9 @@ if (std::error_code EC = materialize(F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - ParseModule(true); + + if (std::error_code EC = materializeForwardReferencedFunctions()) + return EC; // Check that all block address forward references got resolved (as we // promised above). @@ -4417,7 +4566,7 @@ } std::error_code BitcodeReader::InitStream() { - if (LazyStreamer) + if (Streamer) return InitLazyStream(); return InitStreamFromBuffer(); } @@ -4444,7 +4593,7 @@ std::error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. - auto OwnedBytes = llvm::make_unique(LazyStreamer); + auto OwnedBytes = llvm::make_unique(Streamer); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); @@ -4512,20 +4661,13 @@ new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); M->setMaterializer(R); - auto cleanupOnError = [&](std::error_code EC) { + // Delay parsing Metadata if ShouldLazyLoadMetadata is true. + if (std::error_code EC = + R->ParseBitcodeInto(M, WillMaterializeAll, ShouldLazyLoadMetadata)) { R->releaseBuffer(); // Never take ownership on error. delete M; // Also deletes R. return EC; - }; - - // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata)) - return cleanupOnError(EC); - - if (!WillMaterializeAll) - // Resolve forward references from blockaddresses. - if (std::error_code EC = R->materializeForwardReferencedFunctions()) - return cleanupOnError(EC); + } Buffer.release(); // The BitcodeReader owns it now. return M; @@ -4547,7 +4689,7 @@ std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Streamer, Context, DiagnosticHandler); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M.get())) + if (std::error_code EC = R->ParseBitcodeInto(M.get(), false, false)) return EC; return std::move(M); } @@ -4561,11 +4703,6 @@ if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); - // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) { - delete M; - return EC; - } // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. Index: lib/Bitcode/Reader/BitstreamReader.cpp =================================================================== --- lib/Bitcode/Reader/BitstreamReader.cpp +++ lib/Bitcode/Reader/BitstreamReader.cpp @@ -338,4 +338,3 @@ } } } - Index: lib/Support/StreamingMemoryObject.cpp =================================================================== --- lib/Support/StreamingMemoryObject.cpp +++ lib/Support/StreamingMemoryObject.cpp @@ -87,13 +87,17 @@ uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size, uint64_t Address) const { fetchToPos(Address + Size - 1); - if (Address >= BytesRead) + if (Address >= BytesRead || (ObjectSize && Address >= ObjectSize)) return 0; uint64_t End = Address + Size; - if (End > BytesRead) + if (ObjectSize) { + if (End > ObjectSize) { + End = ObjectSize; + } + } else if (End > BytesRead) End = BytesRead; - assert(static_cast(End - Address) >= 0); + assert(End >= Address); Size = End - Address; memcpy(Buf, &Bytes[Address + BytesSkipped], Size); return Size; @@ -118,7 +122,7 @@ StreamingMemoryObject::StreamingMemoryObject(DataStreamer *streamer) : Bytes(kChunkSize), Streamer(streamer), BytesRead(0), BytesSkipped(0), - ObjectSize(0), EOFReached(false) { + ObjectSize(0), EOOReached(false) { BytesRead = streamer->GetBytes(&Bytes[0], kChunkSize); } }