Index: include/llvm/Bitcode/BitstreamReader.h =================================================================== --- include/llvm/Bitcode/BitstreamReader.h +++ include/llvm/Bitcode/BitstreamReader.h @@ -326,6 +326,8 @@ // If we run out of data, stop at the end of the stream. if (BytesRead == 0) { Size = NextChar; + CurWord = 0; + BitsInCurWord = 0; return; } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -136,7 +136,8 @@ std::unique_ptr Buffer; std::unique_ptr StreamFile; BitstreamCursor Stream; - bool IsStreamed; + /// The position (within the bitcode) where continueParse() left off, and used + /// to set input position on the next call to continueParse(). uint64_t NextUnreadBit = 0; bool SeenValueSymbolTable = false; @@ -214,6 +215,24 @@ /// True if any Metadata block has been materialized. bool IsMetadataMaterialized = false; + /// True if meta data should initially be skipped. + bool ShouldLazyLoadMetadata = false; + + /// The name of state of the parse. Along with NextUnreadBit, they + /// define the state of the parse between calls to continueParse(). + enum BitcodeReaderState { + AtStart, + AtTopLevel, // Processing top-level records. + InsideModule, // Processing records inside a module block. + // All states below here represent cases where input shouldn't be parsed. + NoMoreInput, // Generic marker for having parsed input. + ReachedEof, // Parsed input, but not necessary materializations. + FinishedParse, // Parsed input and materialized necessary parts. + ParseError, // An error has occurred, stop parsing. + } ParseState = AtStart; + + bool SeenModuleBlockID = false; + bool StripDebugInfo = false; public: @@ -239,11 +258,21 @@ std::vector getIdentifiedStructTypes() const override; void dematerialize(GlobalValue *GV) override; - /// \brief Main interface to parsing a bitcode buffer. + /// \brief Starts parse of bitcode. Materializes during parse based on flags. + /// + /// \param M the module to build. + /// \param ShouldMaterializeAll true when the module should be materialized + /// completely before returning. Otherwise, function bodies are only loaded on + /// demand. + /// \param ShouldLazyLoadMetadata true when the metadata blocks should be + /// parsed. + /// \param Streamer (if non-null) is the data streamer to use (instead + /// of a memory buffer). + /// /// \returns true if an error occurred. - std::error_code parseBitcodeInto(std::unique_ptr Streamer, - Module *M, - bool ShouldLazyLoadMetadata = false); + std::error_code parseBitcodeInto( + Module *M, bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata, + std::unique_ptr Streamer = nullptr); /// \brief Cheap mechanism to just extract module triple /// \returns true if an error occurred. @@ -346,12 +375,33 @@ return getFnValueByID(ValNo, Ty); } + /// \name Functions that parses bitcode files, other than skipped blocks based + /// on flags to parseBitcodeInto(). + /// @{ + std::error_code startParse(); + std::error_code continueParse(); + std::error_code finishParse(); + /// @} + + // Changes the parse state to the new value. + void setParseState(BitcodeReaderState NewValue) { + NextUnreadBit = Stream.GetCurrentBitNo(); + ParseState = NewValue; + } + + // Changes the parse state to ParseError if given an error. + void setParseStateIfError(std::error_code EC) { + NextUnreadBit = Stream.GetCurrentBitNo(); + if (EC) + ParseState = ParseError; + } + /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code parseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code parseModule(); std::error_code parseAttributeBlock(); std::error_code parseAttributeGroupBlock(); std::error_code parseTypeTable(); @@ -404,15 +454,18 @@ } std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) { + setParseState(ParseError); return ::error(DiagnosticHandler, make_error_code(E), Message); } std::error_code BitcodeReader::error(const Twine &Message) { + setParseState(ParseError); return ::error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code BitcodeReader::error(BitcodeError E) { + setParseState(ParseError); return ::error(DiagnosticHandler, make_error_code(E)); } @@ -427,15 +480,14 @@ DiagnosticHandlerFunction DiagnosticHandler) : Context(Context), DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), - Buffer(Buffer), IsStreamed(false), ValueList(Context), + Buffer(std::move(Buffer)), ValueList(Context), MDValueList(Context) {} BitcodeReader::BitcodeReader(LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) : Context(Context), DiagnosticHandler(getDiagHandler(DiagnosticHandler, Context)), - Buffer(nullptr), IsStreamed(true), ValueList(Context), - MDValueList(Context) {} + Buffer(nullptr), ValueList(Context), MDValueList(Context) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -2697,12 +2749,14 @@ return std::error_code(); } -std::error_code BitcodeReader::parseModule(bool Resume, - bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); - else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return error("Invalid record"); +std::error_code BitcodeReader::parseModule() { + if (ParseState == AtTopLevel) { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return error("Invalid record"); + setParseState(InsideModule); + } else { + assert(ParseState == InsideModule); + } SmallVector Record; std::vector SectionTable; @@ -2716,6 +2770,7 @@ case BitstreamEntry::Error: return error("Malformed block"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return globalCleanup(); case BitstreamEntry::SubBlock: @@ -2773,16 +2828,12 @@ if (std::error_code EC = rememberAndSkipFunctionBody()) return EC; - // For streaming bitcode, suspend parsing when we reach the function - // bodies. Subsequent materialization calls will resume it when - // necessary. For streaming, the function bodies must be at the end of - // the bitcode. If the bitcode file is old, the symbol table will be - // at the end instead and will not have been seen yet. In this case, - // just finish the parse now. - if (IsStreamed && SeenValueSymbolTable) { - NextUnreadBit = Stream.GetCurrentBitNo(); + // Suspend parsing when we reach a function body, assuming we + // have already associated names with global values. Note: If + // the bitcode file is old, the symbol table will be at the + // end instead and will not have been seen yet. + if (SeenValueSymbolTable) return std::error_code(); - } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = parseUseLists()) @@ -2796,7 +2847,6 @@ break; } - // Read a record. switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. @@ -3030,8 +3080,7 @@ if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (IsStreamed) - DeferredFunctionInfo[Func] = 0; + DeferredFunctionInfo[Func] = 0; } break; } @@ -3079,12 +3128,36 @@ } std::error_code -BitcodeReader::parseBitcodeInto(std::unique_ptr Streamer, - Module *M, bool ShouldLazyLoadMetadata) { +BitcodeReader::parseBitcodeInto( + Module *M, bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata, + std::unique_ptr Streamer) { + + auto cleanupOnError = [&](std::error_code EC) { + releaseBuffer(); // Never take ownership on error. + return EC; + }; TheModule = M; + this->ShouldLazyLoadMetadata = ShouldLazyLoadMetadata; if (std::error_code EC = initStream(std::move(Streamer))) - return EC; + return cleanupOnError(EC); + + if (std::error_code EC = startParse()) + return cleanupOnError(EC); + + if (ShouldMaterializeAll) { + if (std::error_code EC = materializeModule(TheModule)) + return cleanupOnError(EC); + } else { + if (std::error_code EC = materializeForwardReferencedFunctions()) + return cleanupOnError(EC); + } + + return std::error_code(); +} + +std::error_code BitcodeReader::startParse() { + assert(ParseState == AtStart); // Sniff for the signature. if (Stream.Read(8) != 'B' || @@ -3095,28 +3168,106 @@ Stream.Read(4) != 0xD) return error("Invalid bitcode signature"); + return continueParse(); +} + +std::error_code BitcodeReader::continueParse() { + switch (ParseState) { + case AtStart: + setParseState(AtTopLevel); + break; + case AtTopLevel: + // Restore input position to saved position on last call. + Stream.JumpToBit(NextUnreadBit); + break; + case InsideModule: { + // Restore input position to saved position on last call, + // and then continue parsing module. + Stream.JumpToBit(NextUnreadBit); + std::error_code EC = parseModule(); + setParseStateIfError(EC); + return EC; + } + case NoMoreInput: + case ReachedEof: + case FinishedParse: + return std::error_code(); + case ParseError: + return error("Can't continue, bitcode error already found"); + } + // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { + assert(ParseState == AtTopLevel); + if (Stream.AtEndOfStream()) { - // We didn't really read a proper Module. - return error("Malformed IR file"); + setParseState(ReachedEof); + return std::error_code(); } BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - if (Entry.Kind != BitstreamEntry::SubBlock) + if (Entry.Kind != BitstreamEntry::SubBlock) { + // We should reject the input because there should be no more + // data in the input stream (other than unknown + // blocks). However, some bitcode files are padded (see + // test/Bitcode/padding.test for more details). Hence, we assume + // that if a module has been parsed, and a record is expected + // next, that the remaining input is padding. A better solution + // would be to check the actual padding. + if (SeenModuleBlockID && Entry.Kind == BitstreamEntry::Record) { + setParseState(FinishedParse); + return std::error_code(); + } return error("Malformed block"); + } - if (Entry.ID == bitc::MODULE_BLOCK_ID) - return parseModule(false, ShouldLazyLoadMetadata); + if (Entry.ID == bitc::MODULE_BLOCK_ID) { + // Reject multiple MODULE_BLOCK's in a single bitstream. + if (SeenModuleBlockID) + return error("Invalid multiple blocks"); + SeenModuleBlockID = true; + std::error_code EC = parseModule(); + setParseStateIfError(EC); + return EC; + } + // Skip unknown blocks. if (Stream.SkipBlock()) return error("Invalid record"); } } +std::error_code BitcodeReader::finishParse() { + assert(TheModule); + + while (ParseState < NoMoreInput) { + if (std::error_code EC = continueParse()) + return EC; + } + + switch (ParseState) { + case AtStart: + case AtTopLevel: + case InsideModule: + llvm_unreachable("finishParse exits with ParseState < NoMoreInput"); + case NoMoreInput: + case ReachedEof: + setParseState(FinishedParse); + break; + case FinishedParse: + break; + case ParseError: + return error("Can't continue, bitcode error already found"); + } + if (SeenModuleBlockID) + return std::error_code(); + // We didn't really read a proper Module. + return error("Malformed IR file"); +} + ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return error("Invalid record"); @@ -4372,12 +4523,12 @@ Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) + if (ParseState >= NoMoreInput) { return error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = parseModule(true)) + } + if (std::error_code EC = continueParse()) { return EC; + } } return std::error_code(); } @@ -4401,7 +4552,7 @@ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0 && IsStreamed) + if (DFII->second == 0) if (std::error_code EC = findFunctionInStream(F, DFII)) return EC; @@ -4462,11 +4613,13 @@ assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); - if (std::error_code EC = materializeMetadata()) + // Make sure the rest of the bits in the module (excluding materializable) + // have been read. + if (std::error_code EC = finishParse()) return EC; - // Promise to materialize all forward references. - WillMaterializeAllForwardRefs = true; + if (std::error_code EC = materializeMetadata()) + return EC; // Iterate over the module, deserializing any functions that are still on // disk. @@ -4475,14 +4628,8 @@ if (std::error_code EC = materialize(F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - parseModule(true); - - // Check that all block address forward references got resolved (as we - // promised above). + + // Check that all block address forward references got resolved. if (!BasicBlockFwdRefs.empty()) return error("Never resolved function from blockaddress"); @@ -4597,35 +4744,6 @@ // External interface //===----------------------------------------------------------------------===// -static ErrorOr> -getBitcodeModuleImpl(std::unique_ptr Streamer, StringRef Name, - BitcodeReader *R, LLVMContext &Context, - bool MaterializeAll, bool ShouldLazyLoadMetadata) { - std::unique_ptr M = make_unique(Name, Context); - M->setMaterializer(R); - - auto cleanupOnError = [&](std::error_code EC) { - R->releaseBuffer(); // Never take ownership on error. - return EC; - }; - - // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (std::error_code EC = R->parseBitcodeInto(std::move(Streamer), M.get(), - ShouldLazyLoadMetadata)) - return cleanupOnError(EC); - - if (MaterializeAll) { - // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) - return cleanupOnError(EC); - } else { - // Resolve forward references from blockaddresses. - if (std::error_code EC = R->materializeForwardReferencedFunctions()) - return cleanupOnError(EC); - } - return std::move(M); -} - /// \brief Get a lazy one-at-time loading module from bitcode. /// /// This isn't always used in a lazy context. In particular, it's also used by @@ -4639,17 +4757,19 @@ LLVMContext &Context, bool MaterializeAll, DiagnosticHandlerFunction DiagnosticHandler, bool ShouldLazyLoadMetadata = false) { + std::unique_ptr M = + make_unique(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); + M->setMaterializer(R); - ErrorOr> Ret = - getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context, - MaterializeAll, ShouldLazyLoadMetadata); - if (!Ret) - return Ret; + if (std::error_code EC = + R->parseBitcodeInto(M.get(), MaterializeAll, ShouldLazyLoadMetadata)) { + return EC; + } Buffer.release(); // The BitcodeReader owns it now. - return Ret; + return std::move(M); } ErrorOr> llvm::getLazyBitcodeModule( @@ -4664,9 +4784,11 @@ LLVMContext &Context, DiagnosticHandlerFunction DiagnosticHandler) { std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Context, DiagnosticHandler); - - return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false, - false); + M->setMaterializer(R); + if (std::error_code EC = + R->parseBitcodeInto(M.get(), false, false, std::move(Streamer))) + return EC; + return std::move(M); } ErrorOr>