Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -135,11 +135,11 @@ LLVMContext &Context; DiagnosticHandlerFunction DiagnosticHandler; Module *TheModule; + // The following two fields define the type of memory to parse. std::unique_ptr Buffer; + DataStreamer *Streamer; std::unique_ptr StreamFile; BitstreamCursor Stream; - DataStreamer *LazyStreamer; - uint64_t NextUnreadBit; bool SeenValueSymbolTable; std::vector TypeList; @@ -211,13 +211,36 @@ /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. - bool WillMaterializeAllForwardRefs; + bool WillMaterializeAllForwardRefs = false; /// Functions that have block addresses taken. This is usually empty. SmallPtrSet BlockAddressesTaken; /// True if any Metadata block has been materialized. - bool IsMetadataMaterialized; + bool IsMetadataMaterialized = false; + + /// True if meta data should initially be skipped. + bool ShouldLazyLoadMetadata = false; + + /// The name of state of the parse. Along with NextUnreadBit, they + /// define the state of the parse between calls to continueParse(). + enum BitcodeReaderState { + AtStart, + AtTopLevel, // Processing top-level records. + InsideModule, // Processing records inside a module block. + // All states below here represent cases where input shouldn't be parsed. + NoMoreInput, // Generic marker for having parsed input. + ReachedEof, // Parsed input, but not necessary materializations. + FinishedParse, // Parsed input and materialized necessary parts. + ParseError, // An error has occurred, stop parsing. + } ParseState = AtStart; + + /// The position (within the bitcode) where continueParse() left off, and used + /// to set input position on the next call to continueParse(). + uint64_t NextUnreadBit = 0; + + /// The number of modules read at the top level. + size_t NumModulesParsed = 0; bool StripDebugInfo = false; @@ -226,10 +249,10 @@ std::error_code Error(BitcodeError E); std::error_code Error(const Twine &Message); - explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); - explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(DataStreamer *Streamer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); ~BitcodeReader() override { FreeState(); } std::error_code materializeForwardReferencedFunctions(); @@ -244,13 +267,23 @@ std::vector getIdentifiedStructTypes() const override; void Dematerialize(GlobalValue *GV) override; - /// @brief Main interface to parsing a bitcode buffer. - /// @returns true if an error occurred. - std::error_code ParseBitcodeInto(Module *M, - bool ShouldLazyLoadMetadata = false); + /// \brief Starts parse of bitcode. Materializes during parse based on flags. + /// + /// \param M the module to build. + + /// \param ShouldMaterializeAll true when the module should be materialized + /// completely before returning. Otherwise, function bodies are only loaded on + /// demand. + /// \param ShouldLazyLoadMetadata true when the metadata blocks should be + /// parsed. + /// + /// \returns true if an error occurred. + std::error_code parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, + bool ShouldLazyLoadMetadata); - /// @brief Cheap mechanism to just extract module triple - /// @returns true if an error occurred. + /// \brief Cheap mechanism to just extract module triple + /// \returns true if an error occurred. ErrorOr parseTriple(); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -350,12 +383,33 @@ return getFnValueByID(ValNo, Ty); } + /// \name Functions that parses bitcode files, other than skipped blocks based + /// on flags to parseBitcodeInto(). + /// @{ + std::error_code startParse(); + std::error_code continueParse(); + std::error_code finishParse(); + /// @} + + // Changes the parse state to the new value. + void setParseState(BitcodeReaderState NewValue) { + NextUnreadBit = Stream.GetCurrentBitNo(); + ParseState = NewValue; + } + + // Changes the parse state to ParseError if given an error. + void setParseStateIfError(std::error_code EC) { + NextUnreadBit = Stream.GetCurrentBitNo(); + if (EC) + ParseState = ParseError; + } + /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code ParseModule(); std::error_code ParseAttributeBlock(); std::error_code ParseAttributeGroupBlock(); std::error_code ParseTypeTable(); @@ -421,21 +475,19 @@ return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); }; } -BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, +BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(Buffer), Streamer(nullptr), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} -BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, +BitcodeReader::BitcodeReader(DataStreamer *Streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(nullptr), Streamer(Streamer), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -694,7 +746,7 @@ namespace llvm { namespace { - /// @brief A class for maintaining the slot number definition + /// \brief A class for maintaining the slot number definition /// as a placeholder for the actual definition for forward constants defs. class ConstantPlaceHolder : public ConstantExpr { void operator=(const ConstantPlaceHolder &) = delete; @@ -708,7 +760,7 @@ Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } - /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. + /// \brief Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; @@ -2669,12 +2721,14 @@ return std::error_code(); } -std::error_code BitcodeReader::ParseModule(bool Resume, - bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); - else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Invalid record"); +std::error_code BitcodeReader::ParseModule() { + if (ParseState == AtTopLevel) { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return Error("Invalid record"); + setParseState(InsideModule); + } else { + assert(ParseState == InsideModule); + } SmallVector Record; std::vector SectionTable; @@ -2688,6 +2742,7 @@ case BitstreamEntry::Error: return Error("Malformed block"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return GlobalCleanup(); case BitstreamEntry::SubBlock: @@ -2745,16 +2800,12 @@ if (std::error_code EC = RememberAndSkipFunctionBody()) return EC; - // For streaming bitcode, suspend parsing when we reach the function - // bodies. Subsequent materialization calls will resume it when - // necessary. For streaming, the function bodies must be at the end of - // the bitcode. If the bitcode file is old, the symbol table will be - // at the end instead and will not have been seen yet. In this case, - // just finish the parse now. - if (LazyStreamer && SeenValueSymbolTable) { - NextUnreadBit = Stream.GetCurrentBitNo(); + // Suspend parsing when we reach a function body, assuming we + // have already associated names with global values. Note: If + // the bitcode file is old, the symbol table will be at the + // end instead and will not have been seen yet. + if (SeenValueSymbolTable) return std::error_code(); - } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = ParseUseLists()) @@ -2995,8 +3046,7 @@ if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (LazyStreamer) - DeferredFunctionInfo[Func] = 0; + DeferredFunctionInfo[Func] = 0; } break; } @@ -3044,9 +3094,33 @@ } } -std::error_code BitcodeReader::ParseBitcodeInto(Module *M, +std::error_code BitcodeReader::parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata) { - TheModule = nullptr; + auto cleanupOnError = [&](std::error_code EC) { + releaseBuffer(); // Never take ownership on error. + return EC; + }; + + TheModule = M; + this->ShouldLazyLoadMetadata = ShouldLazyLoadMetadata; + + if (std::error_code EC = startParse()) + return cleanupOnError(EC); + + if (ShouldMaterializeAll) { + if (std::error_code EC = MaterializeModule(TheModule)) + return cleanupOnError(EC); + } else { + if (std::error_code EC = materializeForwardReferencedFunctions()) + return cleanupOnError(EC); + } + + return std::error_code(); +} + +std::error_code BitcodeReader::startParse() { + assert(ParseState == AtStart); if (std::error_code EC = InitStream()) return EC; @@ -3057,43 +3131,81 @@ Stream.Read(4) != 0x0 || Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) + Stream.Read(4) != 0xD) { return Error("Invalid bitcode signature"); + } + + return continueParse(); +} + +std::error_code BitcodeReader::continueParse() { + switch (ParseState) { + case AtStart: + setParseState(AtTopLevel); + break; + case AtTopLevel: + // Restore input position to saved position on last call. + Stream.JumpToBit(NextUnreadBit); + break; + case InsideModule: { + // Restore input position to saved position on last call, + // and then continue parsing module. + Stream.JumpToBit(NextUnreadBit); + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } + case NoMoreInput: + case ReachedEof: + case FinishedParse: + return std::error_code(); + case ParseError: + return Error("Can't continue, bitcode error already found"); + } // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { - if (Stream.AtEndOfStream()) + if (Stream.AtEndOfStream()) { + setParseState(ReachedEof); return std::error_code(); + } + assert(ParseState == AtTopLevel); BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); switch (Entry.Kind) { case BitstreamEntry::Error: + setParseState(ParseError); return Error("Malformed block"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return std::error_code(); case BitstreamEntry::SubBlock: switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: - if (Stream.ReadBlockInfoBlock()) + if (Stream.ReadBlockInfoBlock()) { + setParseState(ParseError); return Error("Malformed block"); + } break; - case bitc::MODULE_BLOCK_ID: + case bitc::MODULE_BLOCK_ID: { // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) + if (NumModulesParsed++) { + setParseState(ParseError); return Error("Invalid multiple blocks"); - TheModule = M; - if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata)) - return EC; - if (LazyStreamer) - return std::error_code(); - break; + } + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } default: - if (Stream.SkipBlock()) + if (Stream.SkipBlock()) { + setParseState(ParseError); return Error("Invalid record"); + } break; } continue; @@ -3105,14 +3217,42 @@ // have to read and ignore these final 4 bytes :-( if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && - Stream.AtEndOfStream()) + Stream.AtEndOfStream()) { + setParseState(ReachedEof); return std::error_code(); + } + setParseState(ParseError); return Error("Invalid record"); } } } +std::error_code BitcodeReader::finishParse() { + assert(TheModule); + + while (ParseState < NoMoreInput) { + if (std::error_code EC = continueParse()) + return EC; + } + + switch (ParseState) { + case AtStart: + case AtTopLevel: + case InsideModule: + llvm_unreachable("finishParse exits with ParseState <= NoMorInput"); + case NoMoreInput: + case ReachedEof: + setParseState(FinishedParse); + break; + case FinishedParse: + break; + case ParseError: + return Error("Can't continue, bitcode error already found"); + } + return std::error_code(); +} + ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Invalid record"); @@ -4275,12 +4415,12 @@ Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) + if (ParseState >= NoMoreInput) { return Error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = ParseModule(true)) + } + if (std::error_code EC = continueParse()) { return EC; + } } return std::error_code(); } @@ -4304,7 +4444,7 @@ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0 && LazyStreamer) + if (DFII->second == 0) if (std::error_code EC = FindFunctionInStream(F, DFII)) return EC; @@ -4365,11 +4505,13 @@ assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); - if (std::error_code EC = materializeMetadata()) + // Make sure the rest of the bits in the module (excluding materializable) + // have been read. + if (std::error_code EC = finishParse()) return EC; - // Promise to materialize all forward references. - WillMaterializeAllForwardRefs = true; + if (std::error_code EC = materializeMetadata()) + return EC; // Iterate over the module, deserializing any functions that are still on // disk. @@ -4378,14 +4520,8 @@ if (std::error_code EC = materialize(F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - ParseModule(true); - - // Check that all block address forward references got resolved (as we - // promised above). + + // Check that all block address forward references got resolved. if (!BasicBlockFwdRefs.empty()) return Error("Never resolved function from blockaddress"); @@ -4420,7 +4556,7 @@ } std::error_code BitcodeReader::InitStream() { - if (LazyStreamer) + if (Streamer) return InitLazyStream(); return InitStreamFromBuffer(); } @@ -4447,7 +4583,7 @@ std::error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. - auto OwnedBytes = llvm::make_unique(LazyStreamer); + auto OwnedBytes = llvm::make_unique(Streamer); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); @@ -4515,20 +4651,11 @@ new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); M->setMaterializer(R); - auto cleanupOnError = [&](std::error_code EC) { - R->releaseBuffer(); // Never take ownership on error. + if (std::error_code EC = + R->parseBitcodeInto(M, WillMaterializeAll, ShouldLazyLoadMetadata)) { delete M; // Also deletes R. return EC; - }; - - // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata)) - return cleanupOnError(EC); - - if (!WillMaterializeAll) - // Resolve forward references from blockaddresses. - if (std::error_code EC = R->materializeForwardReferencedFunctions()) - return cleanupOnError(EC); + } Buffer.release(); // The BitcodeReader owns it now. return M; @@ -4550,7 +4677,7 @@ std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Streamer, Context, DiagnosticHandler); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M.get())) + if (std::error_code EC = R->parseBitcodeInto(M.get(), false, false)) return EC; return std::move(M); } @@ -4564,11 +4691,6 @@ if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); - // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) { - delete M; - return EC; - } // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. Index: lib/Bitcode/Reader/BitstreamReader.cpp =================================================================== --- lib/Bitcode/Reader/BitstreamReader.cpp +++ lib/Bitcode/Reader/BitstreamReader.cpp @@ -338,4 +338,3 @@ } } } -