Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" @@ -41,6 +42,21 @@ SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; +// Flag to deal with the fact that previous implementations of the +// bitcode parser did not check for more input when lazily +// materializing a module (This has been fixed by the call to +// finishParse in materializeModule). Allows old bitcode invalid +// tests to continue to work. + +// TODO(kschimpf): Remove this check once invalid tests are correctly +// generated. +cl::opt +UseOldLazyBitcodeParser( + "old-lazy-bitcode-parser", + cl::desc("use old bitcode parser that doesn't check for more " + "after materialization"), + cl::init(false)); + class BitcodeReaderValueList { std::vector ValuePtrs; @@ -135,11 +151,11 @@ LLVMContext &Context; DiagnosticHandlerFunction DiagnosticHandler; Module *TheModule; + // The following two fields define the type of memory to parse. std::unique_ptr Buffer; + DataStreamer *Streamer; std::unique_ptr StreamFile; BitstreamCursor Stream; - DataStreamer *LazyStreamer; - uint64_t NextUnreadBit; bool SeenValueSymbolTable; std::vector TypeList; @@ -211,13 +227,36 @@ /// True if all functions will be materialized, negating the need to process /// (e.g.) blockaddress forward references. - bool WillMaterializeAllForwardRefs; + bool WillMaterializeAllForwardRefs = false; /// Functions that have block addresses taken. This is usually empty. SmallPtrSet BlockAddressesTaken; /// True if any Metadata block has been materialized. - bool IsMetadataMaterialized; + bool IsMetadataMaterialized = false; + + /// True if meta data should initially be skipped. + bool ShouldLazyLoadMetadata = false; + + /// The name of state of the parse. Along with NextUnreadBit, they + /// define the state of the parse between calls to continueParse(). + enum BitcodeReaderState { + AtStart, + AtTopLevel, // Processing top-level records. + InsideModule, // Processing records inside a module block. + // All states below here represent cases where input shouldn't be parsed. + NoMoreInput, // Generic marker for having parsed input. + ReachedEof, // Parsed input, but not necessary materializations. + FinishedParse, // Parsed input and materialized necessary parts. + ParseError, // An error has occurred, stop parsing. + } ParseState = AtStart; + + /// The position (within the bitcode) where continueParse() left off, and used + /// to set input position on the next call to continueParse(). + uint64_t NextUnreadBit = 0; + + /// The number of modules read at the top level. + size_t NumModulesParsed = 0; bool StripDebugInfo = false; @@ -226,10 +265,10 @@ std::error_code Error(BitcodeError E); std::error_code Error(const Twine &Message); - explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); - explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C, - DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); + BitcodeReader(DataStreamer *Streamer, LLVMContext &C, + DiagnosticHandlerFunction DiagnosticHandler); ~BitcodeReader() override { FreeState(); } std::error_code materializeForwardReferencedFunctions(); @@ -244,13 +283,23 @@ std::vector getIdentifiedStructTypes() const override; void dematerialize(GlobalValue *GV) override; - /// @brief Main interface to parsing a bitcode buffer. - /// @returns true if an error occurred. - std::error_code ParseBitcodeInto(Module *M, - bool ShouldLazyLoadMetadata = false); + /// \brief Starts parse of bitcode. Materializes during parse based on flags. + /// + /// \param M the module to build. - /// @brief Cheap mechanism to just extract module triple - /// @returns true if an error occurred. + /// \param ShouldMaterializeAll true when the module should be materialized + /// completely before returning. Otherwise, function bodies are only loaded on + /// demand. + /// \param ShouldLazyLoadMetadata true when the metadata blocks should be + /// parsed. + /// + /// \returns true if an error occurred. + std::error_code parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, + bool ShouldLazyLoadMetadata); + + /// \brief Cheap mechanism to just extract module triple + /// \returns true if an error occurred. ErrorOr parseTriple(); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -350,12 +399,33 @@ return getFnValueByID(ValNo, Ty); } + /// \name Functions that parses bitcode files, other than skipped blocks based + /// on flags to parseBitcodeInto(). + /// @{ + std::error_code startParse(); + std::error_code continueParse(); + std::error_code finishParse(); + /// @} + + // Changes the parse state to the new value. + void setParseState(BitcodeReaderState NewValue) { + NextUnreadBit = Stream.GetCurrentBitNo(); + ParseState = NewValue; + } + + // Changes the parse state to ParseError if given an error. + void setParseStateIfError(std::error_code EC) { + NextUnreadBit = Stream.GetCurrentBitNo(); + if (EC) + ParseState = ParseError; + } + /// Converts alignment exponent (i.e. power of two (or zero)) to the /// corresponding alignment to use. If alignment is too large, returns /// a corresponding error code. std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment); std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - std::error_code ParseModule(bool Resume, bool ShouldLazyLoadMetadata = false); + std::error_code ParseModule(); std::error_code ParseAttributeBlock(); std::error_code ParseAttributeGroupBlock(); std::error_code ParseTypeTable(); @@ -408,15 +478,18 @@ } std::error_code BitcodeReader::Error(BitcodeError E, const Twine &Message) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(E), Message); } std::error_code BitcodeReader::Error(const Twine &Message) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } std::error_code BitcodeReader::Error(BitcodeError E) { + setParseState(ParseError); return ::Error(DiagnosticHandler, make_error_code(E)); } @@ -427,21 +500,19 @@ return [&C](const DiagnosticInfo &DI) { C.diagnose(DI); }; } -BitcodeReader::BitcodeReader(MemoryBuffer *buffer, LLVMContext &C, +BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(Buffer), Streamer(nullptr), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} -BitcodeReader::BitcodeReader(DataStreamer *streamer, LLVMContext &C, +BitcodeReader::BitcodeReader(DataStreamer *Streamer, LLVMContext &C, DiagnosticHandlerFunction DiagnosticHandler) : Context(C), DiagnosticHandler(getDiagHandler(DiagnosticHandler, C)), - TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), - NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), - MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false), - WillMaterializeAllForwardRefs(false), IsMetadataMaterialized(false) {} + TheModule(nullptr), Buffer(nullptr), Streamer(Streamer), + SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} std::error_code BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -715,7 +786,7 @@ namespace llvm { namespace { - /// @brief A class for maintaining the slot number definition + /// \brief A class for maintaining the slot number definition /// as a placeholder for the actual definition for forward constants defs. class ConstantPlaceHolder : public ConstantExpr { void operator=(const ConstantPlaceHolder &) = delete; @@ -729,7 +800,7 @@ Op<0>() = UndefValue::get(Type::getInt32Ty(Context)); } - /// @brief Methods to support type inquiry through isa, cast, and dyn_cast. + /// \brief Methods to support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return isa(V) && cast(V)->getOpcode() == Instruction::UserOp1; @@ -2709,12 +2780,14 @@ return std::error_code(); } -std::error_code BitcodeReader::ParseModule(bool Resume, - bool ShouldLazyLoadMetadata) { - if (Resume) - Stream.JumpToBit(NextUnreadBit); - else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Invalid record"); +std::error_code BitcodeReader::ParseModule() { + if (ParseState == AtTopLevel) { + if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) + return Error("Invalid record"); + setParseState(InsideModule); + } else { + assert(ParseState == InsideModule); + } SmallVector Record; std::vector SectionTable; @@ -2728,6 +2801,7 @@ case BitstreamEntry::Error: return Error("Malformed block"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return GlobalCleanup(); case BitstreamEntry::SubBlock: @@ -2785,16 +2859,12 @@ if (std::error_code EC = RememberAndSkipFunctionBody()) return EC; - // For streaming bitcode, suspend parsing when we reach the function - // bodies. Subsequent materialization calls will resume it when - // necessary. For streaming, the function bodies must be at the end of - // the bitcode. If the bitcode file is old, the symbol table will be - // at the end instead and will not have been seen yet. In this case, - // just finish the parse now. - if (LazyStreamer && SeenValueSymbolTable) { - NextUnreadBit = Stream.GetCurrentBitNo(); + // Suspend parsing when we reach a function body, assuming we + // have already associated names with global values. Note: If + // the bitcode file is old, the symbol table will be at the + // end instead and will not have been seen yet. + if (SeenValueSymbolTable) return std::error_code(); - } break; case bitc::USELIST_BLOCK_ID: if (std::error_code EC = ParseUseLists()) @@ -3042,8 +3112,7 @@ if (!isProto) { Func->setIsMaterializable(true); FunctionsWithBodies.push_back(Func); - if (LazyStreamer) - DeferredFunctionInfo[Func] = 0; + DeferredFunctionInfo[Func] = 0; } break; } @@ -3090,9 +3159,33 @@ } } -std::error_code BitcodeReader::ParseBitcodeInto(Module *M, +std::error_code BitcodeReader::parseBitcodeInto(Module *M, + bool ShouldMaterializeAll, bool ShouldLazyLoadMetadata) { - TheModule = nullptr; + auto cleanupOnError = [&](std::error_code EC) { + releaseBuffer(); // Never take ownership on error. + return EC; + }; + + TheModule = M; + this->ShouldLazyLoadMetadata = ShouldLazyLoadMetadata; + + if (std::error_code EC = startParse()) + return cleanupOnError(EC); + + if (ShouldMaterializeAll) { + if (std::error_code EC = materializeModule(TheModule)) + return cleanupOnError(EC); + } else { + if (std::error_code EC = materializeForwardReferencedFunctions()) + return cleanupOnError(EC); + } + + return std::error_code(); +} + +std::error_code BitcodeReader::startParse() { + assert(ParseState == AtStart); if (std::error_code EC = InitStream()) return EC; @@ -3103,17 +3196,52 @@ Stream.Read(4) != 0x0 || Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || - Stream.Read(4) != 0xD) + Stream.Read(4) != 0xD) { return Error("Invalid bitcode signature"); + } + + return continueParse(); +} + +std::error_code BitcodeReader::continueParse() { + switch (ParseState) { + case AtStart: + setParseState(AtTopLevel); + break; + case AtTopLevel: + // Restore input position to saved position on last call. + Stream.JumpToBit(NextUnreadBit); + break; + case InsideModule: { + // Restore input position to saved position on last call, + // and then continue parsing module. + Stream.JumpToBit(NextUnreadBit); + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } + case NoMoreInput: + case ReachedEof: + case FinishedParse: + return std::error_code(); + case ParseError: + return Error("Can't continue, bitcode error already found"); + } // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { + assert(ParseState == AtTopLevel); + if (Stream.AtEndOfStream()) { - if (TheModule) - return std::error_code(); - // We didn't really read a proper Module. - return Error("Malformed IR file"); + setParseState(ReachedEof); + return std::error_code(); + } + + if (UseOldLazyBitcodeParser && NumModulesParsed == 1) { + // Fake at end. + setParseState(ReachedEof); + return std::error_code(); } BitstreamEntry Entry = @@ -3121,8 +3249,9 @@ switch (Entry.Kind) { case BitstreamEntry::Error: - return Error("Malformed block"); + return Error("Malformed IR file"); case BitstreamEntry::EndBlock: + setParseState(AtTopLevel); return std::error_code(); case BitstreamEntry::SubBlock: @@ -3131,16 +3260,14 @@ if (Stream.ReadBlockInfoBlock()) return Error("Malformed block"); break; - case bitc::MODULE_BLOCK_ID: + case bitc::MODULE_BLOCK_ID: { // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) + if (NumModulesParsed++) return Error("Invalid multiple blocks"); - TheModule = M; - if (std::error_code EC = ParseModule(false, ShouldLazyLoadMetadata)) - return EC; - if (LazyStreamer) - return std::error_code(); - break; + std::error_code EC = ParseModule(); + setParseStateIfError(EC); + return EC; + } default: if (Stream.SkipBlock()) return Error("Invalid record"); @@ -3155,14 +3282,44 @@ // have to read and ignore these final 4 bytes :-( if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && - Stream.AtEndOfStream()) + Stream.AtEndOfStream()) { + setParseState(ReachedEof); return std::error_code(); + } return Error("Invalid record"); } } } +std::error_code BitcodeReader::finishParse() { + assert(TheModule); + + while (ParseState < NoMoreInput) { + if (std::error_code EC = continueParse()) + return EC; + } + + switch (ParseState) { + case AtStart: + case AtTopLevel: + case InsideModule: + llvm_unreachable("finishParse exits with ParseState < NoMoreInput"); + case NoMoreInput: + case ReachedEof: + setParseState(FinishedParse); + break; + case FinishedParse: + break; + case ParseError: + return Error("Can't continue, bitcode error already found"); + } + if (NumModulesParsed == 1) + return std::error_code(); + // We didn't really read a proper Module. + return Error("Malformed IR file"); +} + ErrorOr BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error("Invalid record"); @@ -4419,12 +4576,12 @@ Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { - if (Stream.AtEndOfStream()) + if (ParseState >= NoMoreInput) { return Error("Could not find function in stream"); - // ParseModule will parse the next body in the stream and set its - // position in the DeferredFunctionInfo map. - if (std::error_code EC = ParseModule(true)) + } + if (std::error_code EC = continueParse()) { return EC; + } } return std::error_code(); } @@ -4448,7 +4605,7 @@ assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0 && LazyStreamer) + if (DFII->second == 0) if (std::error_code EC = FindFunctionInStream(F, DFII)) return EC; @@ -4509,11 +4666,13 @@ assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); - if (std::error_code EC = materializeMetadata()) + // Make sure the rest of the bits in the module (excluding materializable) + // have been read. + if (std::error_code EC = finishParse()) return EC; - // Promise to materialize all forward references. - WillMaterializeAllForwardRefs = true; + if (std::error_code EC = materializeMetadata()) + return EC; // Iterate over the module, deserializing any functions that are still on // disk. @@ -4522,14 +4681,8 @@ if (std::error_code EC = materialize(F)) return EC; } - // At this point, if there are any function bodies, the current bit is - // pointing to the END_BLOCK record after them. Now make sure the rest - // of the bits in the module have been read. - if (NextUnreadBit) - ParseModule(true); - - // Check that all block address forward references got resolved (as we - // promised above). + + // Check that all block address forward references got resolved. if (!BasicBlockFwdRefs.empty()) return Error("Never resolved function from blockaddress"); @@ -4564,7 +4717,7 @@ } std::error_code BitcodeReader::InitStream() { - if (LazyStreamer) + if (Streamer) return InitLazyStream(); return InitStreamFromBuffer(); } @@ -4591,7 +4744,7 @@ std::error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. - auto OwnedBytes = llvm::make_unique(LazyStreamer); + auto OwnedBytes = llvm::make_unique(Streamer); StreamingMemoryObject &Bytes = *OwnedBytes; StreamFile = llvm::make_unique(std::move(OwnedBytes)); Stream.init(&*StreamFile); @@ -4659,20 +4812,11 @@ new BitcodeReader(Buffer.get(), Context, DiagnosticHandler); M->setMaterializer(R); - auto cleanupOnError = [&](std::error_code EC) { - R->releaseBuffer(); // Never take ownership on error. + if (std::error_code EC = + R->parseBitcodeInto(M, WillMaterializeAll, ShouldLazyLoadMetadata)) { delete M; // Also deletes R. return EC; - }; - - // Delay parsing Metadata if ShouldLazyLoadMetadata is true. - if (std::error_code EC = R->ParseBitcodeInto(M, ShouldLazyLoadMetadata)) - return cleanupOnError(EC); - - if (!WillMaterializeAll) - // Resolve forward references from blockaddresses. - if (std::error_code EC = R->materializeForwardReferencedFunctions()) - return cleanupOnError(EC); + } Buffer.release(); // The BitcodeReader owns it now. return M; @@ -4694,7 +4838,7 @@ std::unique_ptr M = make_unique(Name, Context); BitcodeReader *R = new BitcodeReader(Streamer, Context, DiagnosticHandler); M->setMaterializer(R); - if (std::error_code EC = R->ParseBitcodeInto(M.get())) + if (std::error_code EC = R->parseBitcodeInto(M.get(), false, false)) return EC; return std::move(M); } @@ -4708,11 +4852,6 @@ if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); - // Read in the entire module, and destroy the BitcodeReader. - if (std::error_code EC = M->materializeAllPermanently()) { - delete M; - return EC; - } // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. Index: test/Bitcode/invalid.test =================================================================== --- test/Bitcode/invalid.test +++ test/Bitcode/invalid.test @@ -99,7 +99,8 @@ FWDREF-TYPE: Invalid record -RUN: not llvm-dis -disable-output %p/Inputs/invalid-fwdref-type-mismatch-2.bc 2>&1 | \ +RUN: not llvm-dis -disable-output %p/Inputs/invalid-fwdref-type-mismatch-2.bc 2>&1 \ +RUN: -old-lazy-bitcode-parser | \ RUN: FileCheck --check-prefix=FWDREF-TYPE-MISMATCH %s FWDREF-TYPE-MISMATCH: Type mismatch in constant table! @@ -143,7 +144,8 @@ EXTRACT-0-IDXS: EXTRACTVAL: Invalid instruction with 0 indices -RUN: not llvm-dis -disable-output %p/Inputs/invalid-load-ptr-type.bc 2>&1 | \ +RUN: not llvm-dis -disable-output %p/Inputs/invalid-load-ptr-type.bc 2>&1 \ +RUN: -old-lazy-bitcode-parser | \ RUN: FileCheck --check-prefix=BAD-LOAD-PTR-TYPE %s BAD-LOAD-PTR-TYPE: Cannot load/store from pointer