Index: include/llvm/Bitcode/BitstreamReader.h =================================================================== --- include/llvm/Bitcode/BitstreamReader.h +++ include/llvm/Bitcode/BitstreamReader.h @@ -326,6 +326,8 @@ // If we run out of data, stop at the end of the stream. if (BytesRead == 0) { Size = NextChar; + CurWord = 0; + BitsInCurWord = 0; return; } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Format.h" #include using namespace llvm; @@ -133,11 +134,11 @@ LLVMContext &Context; DiagnosticHandlerFunction DiagnosticHandler; Module *TheModule; + // The following two fields define the type of memory to parse. std::unique_ptr Buffer; + DataStreamer *Streamer; std::unique_ptr StreamFile; BitstreamCursor Stream; - DataStreamer *LazyStreamer; - uint64_t NextUnreadBit; bool SeenValueSymbolTable; std::vector TypeList; @@ -209,13 +210,36 @@ /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. - bool WillMaterializeAllForwardRefs; + bool WillMaterializeAllForwardRefs = false; /// Functions that have block addresses taken. This is usually empty. SmallPtrSet BlockAddressesTaken; /// True if any Metadata block has been materialized. - bool IsMetadataMaterialized; + bool IsMetadataMaterialized = false; + + /// True if meta data should initially be skipped. + bool ShouldLazyLoadMetadata = false; + + /// The name of state of the parse. Along with NextUnreadBit, they + /// define the state of the parse between calls to continueParse(). + enum BitcodeReaderState { + AtStart, + AtTopLevel, // Processing top-level records. + InsideModule, // Processing records inside a module block. + // All states below here represent cases where input shouldn't be parsed. + NoMoreInput, // Generic marker for having parsed input. + ReachedEof, // Parsed input, but not necessary materializations. + FinishedParse, // Parsed input and materialized necessary parts. + ParseError, // An error has occurred, stop parsing. + } ParseState = AtStart; + + /// The position (within the bitcode) where continueParse() left off, and used + /// to set input position on the next call to continueParse(). + uint64_t NextUnreadBit = 0; + + /// The number of modules read at the top level. + size_t NumModulesParsed = 0; bool StripDebugInfo = false; @@ -224,10 +248,10 @@ std::error_code Error(BitcodeError E); std::error_code Error(const Twine &Message); - explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); - explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(DataStreamer *Streamer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); ~BitcodeReader() override { FreeState(); } std::error_code materializeForwardReferencedFunctions(); @@ -242,13 +266,22 @@ std::vector getIdentifiedStructTypes() const override; void dematerialize(GlobalValue *GV) override; - /// @brief Main interface to parsing a bitcode buffer. - /// @returns true if an error occurred. - std::error_code ParseBitcodeInto(Module *M, - bool ShouldLazyLoadMetadata = false); + /// \brief Starts parse of bitcode. Materializes during parse based on flags. + /// + /// \param M the module to build. + /// \param ShouldMaterializeAll true when the module should be materialized + /// completely before returning. Otherwise, function bodies are only loaded on + /// demand. + /// \param ShouldLazyLoadMetadata true when the metadata blocks should be + /// parsed. + /// + /// \returns true if an error occurred. + std::error_code parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, + bool ShouldLazyLoadMetadata); - /// @brief Cheap mechanism to just extract module triple - /// @returns true if an error occurred. + /// Cheap mechanism to just extract module triple. + /// \returns true if an error occurred. ErrorOr parseTriple(); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -348,12 +381,33 @@ return getFnValueByID(ValNo, Ty); } + /// \name Functions that parses bitcode files, other than skipped blocks based + /// on flags to parseBitcodeInto(). + /// @{ + std::error_code startParse(); + std::error_code continueParse(); + std::error_code finishParse(); + /// @} + + // Changes the parse state to the new value. + void setParseState(BitcodeReaderState NewValue) { + NextUnreadBit = Stream.GetCurrentBitNo(); + ParseState = NewValue; + } + + // Changes the parse state to ParseError if given an error. + void setParseStateIfError(std::error_code EC) { + NextUnreadBit = Stream.GetCurrentBitNo(); + if (EC) + ParseState = ParseError; + } + /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code ParseModule(); std::error_code ParseAttributeBlock(); std::error_code ParseAttributeGroupBlock(); std::error_code ParseTypeTable(); @@ -406,15 +460,18 @@ } std::error_code BitcodeReader::Error(BitcodeError E, const Twine &Message) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(E), Message); } std::error_code BitcodeReader::Error(const Twine &Message) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code BitcodeReader::Error(BitcodeError E) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(E)); } @@ -425,21 +482,19 @@ return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); }; } -BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, +BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(Buffer), Streamer(nullptr), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} -BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, +BitcodeReader::BitcodeReader(DataStreamer *Streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(nullptr), Streamer(Streamer), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -713,12 +768,12 @@ namespace llvm { namespace { - /// @brief A class for maintaining the slot number definition - /// as a placeholder for the actual definition for forward constants defs. + /// A class for maintaining the slot number definition as a + /// placeholder for the actual definition for forward constants defs. class ConstantPlaceHolder : public ConstantExpr { void operator=(const ConstantPlaceHolder &) = delete; public: - // allocate space for exactly one operand + /// Allocate space for exactly one operand void *operator new(size_t s) { return User::operator new(s, 1); } @@ -727,7 +782,7 @@ Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } - /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. + /// Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; @@ -2707,12 +2762,14 @@ return std::error_code(); } -std::error_code BitcodeReader::ParseModule(bool Resume, - bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); - else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Invalid record"); +std::error_code BitcodeReader::ParseModule() { + if (ParseState == AtTopLevel) { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return Error("Invalid record"); + setParseState(InsideModule); + } else { + assert(ParseState == InsideModule); + } SmallVector Record; std::vector SectionTable; @@ -2726,6 +2783,7 @@ case BitstreamEntry::Error: return Error("Malformed block"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return GlobalCleanup(); case BitstreamEntry::SubBlock: @@ -2783,16 +2841,12 @@ if (std::error_code EC = RememberAndSkipFunctionBody()) return EC; - // For streaming bitcode, suspend parsing when we reach the function - // bodies. Subsequent materialization calls will resume it when - // necessary. For streaming, the function bodies must be at the end of - // the bitcode. If the bitcode file is old, the symbol table will be - // at the end instead and will not have been seen yet. In this case, - // just finish the parse now. - if (LazyStreamer && SeenValueSymbolTable) { - NextUnreadBit = Stream.GetCurrentBitNo(); + // Suspend parsing when we reach a function body, assuming we + // have already associated names with global values. Note: If + // the bitcode file is old, the symbol table will be at the + // end instead and will not have been seen yet. + if (SeenValueSymbolTable) return std::error_code(); - } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = ParseUseLists()) @@ -2806,7 +2860,6 @@ break; } - // Read a record. switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. @@ -3040,8 +3093,7 @@ if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (LazyStreamer) - DeferredFunctionInfo[Func] = 0; + DeferredFunctionInfo[Func] = 0; } break; } @@ -3088,9 +3140,33 @@ } } -std::error_code BitcodeReader::ParseBitcodeInto(Module *M, +std::error_code BitcodeReader::parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata) { - TheModule = nullptr; + auto cleanupOnError = [&](std::error_code EC) { + releaseBuffer(); // Never take ownership on error. + return EC; + }; + + TheModule = M; + this->ShouldLazyLoadMetadata = ShouldLazyLoadMetadata; + + if (std::error_code EC = startParse()) + return cleanupOnError(EC); + + if (ShouldMaterializeAll) { + if (std::error_code EC = materializeModule(TheModule)) + return cleanupOnError(EC); + } else { + if (std::error_code EC = materializeForwardReferencedFunctions()) + return cleanupOnError(EC); + } + + return std::error_code(); +} + +std::error_code BitcodeReader::startParse() { + assert(ParseState == AtStart); if (std::error_code EC = InitStream()) return EC; @@ -3104,14 +3180,42 @@ Stream.Read(4) != 0xD) return Error("Invalid bitcode signature"); + return continueParse(); +} + +std::error_code BitcodeReader::continueParse() { + switch (ParseState) { + case AtStart: + setParseState(AtTopLevel); + break; + case AtTopLevel: + // Restore input position to saved position on last call. + Stream.JumpToBit(NextUnreadBit); + break; + case InsideModule: { + // Restore input position to saved position on last call, + // and then continue parsing module. + Stream.JumpToBit(NextUnreadBit); + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } + case NoMoreInput: + case ReachedEof: + case FinishedParse: + return std::error_code(); + case ParseError: + return Error("Can't continue, bitcode error already found"); + } + // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { + assert(ParseState == AtTopLevel); + if (Stream.AtEndOfStream()) { - if (TheModule) - return std::error_code(); - // We didn't really read a proper Module. - return Error("Malformed IR file"); + setParseState(ReachedEof); + return std::error_code(); } BitstreamEntry Entry = @@ -3119,26 +3223,31 @@ switch (Entry.Kind) { case BitstreamEntry::Error: - return Error("Malformed block"); case BitstreamEntry::EndBlock: - return std::error_code(); - + { + // Give bit address where error is found, so that it can be + // easily repaired if is in an invalid test file. + std::string Buffer; + raw_string_ostream StrBuf(Buffer); + uint64_t Bit = Stream.GetCurrentBitNo(); + StrBuf << "Malformed IR file at bit " << format("%x", (Bit / CHAR_BIT)) + << ":" << (Bit % CHAR_BIT); + return Error(StrBuf.str()); + } case BitstreamEntry::SubBlock: switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) return Error("Malformed block"); break; - case bitc::MODULE_BLOCK_ID: + case bitc::MODULE_BLOCK_ID: { // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) + if (NumModulesParsed++) return Error("Invalid multiple blocks"); - TheModule = M; - if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata)) - return EC; - if (LazyStreamer) - return std::error_code(); - break; + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } default: if (Stream.SkipBlock()) return Error("Invalid record"); @@ -3153,14 +3262,44 @@ // have to read and ignore these final 4 bytes :-( if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && - Stream.AtEndOfStream()) + Stream.AtEndOfStream()) { + setParseState(ReachedEof); return std::error_code(); + } return Error("Invalid record"); } } } +std::error_code BitcodeReader::finishParse() { + assert(TheModule); + + while (ParseState < NoMoreInput) { + if (std::error_code EC = continueParse()) + return EC; + } + + switch (ParseState) { + case AtStart: + case AtTopLevel: + case InsideModule: + llvm_unreachable("finishParse exits with ParseState < NoMoreInput"); + case NoMoreInput: + case ReachedEof: + setParseState(FinishedParse); + break; + case FinishedParse: + break; + case ParseError: + return Error("Can't continue, bitcode error already found"); + } + if (NumModulesParsed == 1) + return std::error_code(); + // We didn't really read a proper Module. + return Error("Malformed IR file"); +} + ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Invalid record"); @@ -4417,12 +4556,12 @@ Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) + if (ParseState >= NoMoreInput) { return Error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = ParseModule(true)) + } + if (std::error_code EC = continueParse()) { return EC; + } } return std::error_code(); } @@ -4446,7 +4585,7 @@ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0 && LazyStreamer) + if (DFII->second == 0) if (std::error_code EC = FindFunctionInStream(F, DFII)) return EC; @@ -4507,11 +4646,13 @@ assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); - if (std::error_code EC = materializeMetadata()) + // Make sure the rest of the bits in the module (excluding materializable) + // have been read. + if (std::error_code EC = finishParse()) return EC; - // Promise to materialize all forward references. - WillMaterializeAllForwardRefs = true; + if (std::error_code EC = materializeMetadata()) + return EC; // Iterate over the module, deserializing any functions that are still on // disk. @@ -4520,14 +4661,8 @@ if (std::error_code EC = materialize(F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - ParseModule(true); - - // Check that all block address forward references got resolved (as we - // promised above). + + // Check that all block address forward references got resolved. if (!BasicBlockFwdRefs.empty()) return Error("Never resolved function from blockaddress"); @@ -4562,7 +4697,7 @@ } std::error_code BitcodeReader::InitStream() { - if (LazyStreamer) + if (Streamer) return InitLazyStream(); return InitStreamFromBuffer(); } @@ -4589,7 +4724,7 @@ std::error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. - auto OwnedBytes = llvm::make_unique(LazyStreamer); + auto OwnedBytes = llvm::make_unique(Streamer); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); @@ -4657,20 +4792,11 @@ new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); M->setMaterializer(R); - auto cleanupOnError = [&](std::error_code EC) { - R->releaseBuffer(); // Never take ownership on error. + if (std::error_code EC = + R->parseBitcodeInto(M, WillMaterializeAll, ShouldLazyLoadMetadata)) { delete M; // Also deletes R. return EC; - }; - - // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata)) - return cleanupOnError(EC); - - if (!WillMaterializeAll) - // Resolve forward references from blockaddresses. - if (std::error_code EC = R->materializeForwardReferencedFunctions()) - return cleanupOnError(EC); + } Buffer.release(); // The BitcodeReader owns it now. return M; @@ -4692,7 +4818,7 @@ std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Streamer, Context, DiagnosticHandler); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M.get())) + if (std::error_code EC = R->parseBitcodeInto(M.get(), false, false)) return EC; return std::move(M); } @@ -4706,11 +4832,6 @@ if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); - // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) { - delete M; - return EC; - } // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. Index: test/Bitcode/invalid.test =================================================================== --- test/Bitcode/invalid.test +++ test/Bitcode/invalid.test @@ -202,3 +202,9 @@ RUN: FileCheck --check-prefix=ALIAS-TYPE-MISMATCH %s ALIAS-TYPE-MISMATCH: Alias and aliasee types don't match + +RUN: not llvm-dis -disable-output %p/Inputs/invalid-data-after-module.bc 2>&1 | \ +RUN: FileCheck --check-prefix=DATA_AFTER_MODULE %s + +DATA_AFTER_MODULE: Malformed IR file at bit 1f0:2 +