Index: llvm/include/llvm/Bitcode/BitcodeReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeReader.h +++ llvm/include/llvm/Bitcode/BitcodeReader.h @@ -40,6 +40,40 @@ return std::move(*Val); } + /// Represents a module in a bitcode file. + class BitcodeModule { + ArrayRef Buffer; + StringRef ModuleIdentifier; + uint64_t IdentificationBit; + uint64_t ModuleBit; + + BitcodeModule(ArrayRef Buffer, StringRef ModuleIdentifier, + uint64_t IdentificationBit, uint64_t ModuleBit) + : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier), + IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} + + // Calls the ctor. + friend Expected> + getBitcodeModuleList(MemoryBufferRef Buffer); + + Expected> + getModuleImpl(LLVMContext &Context, bool MaterializeAll, + bool ShouldLazyLoadMetadata); + + public: + /// Read the bitcode module and prepare for lazy deserialization of function + /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well. + Expected> + getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata); + + /// Read the entire bitcode module and return it. + Expected> parseModule(LLVMContext &Context); + }; + + /// Returns a list of modules in the specified bitcode buffer. + Expected> + getBitcodeModuleList(MemoryBufferRef Buffer); + /// Read the header of the specified bitcode buffer and prepare for lazy /// deserialization of function bodies. If ShouldLazyLoadMetadata is true, /// lazily load metadata as well. Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -607,7 +607,8 @@ std::vector BundleTags; public: - BitcodeReader(BitstreamCursor Stream, LLVMContext &Context); + BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification, + LLVMContext &Context); Error materializeForwardReferencedFunctions(); @@ -841,9 +842,13 @@ return std::error_code(); } -BitcodeReader::BitcodeReader(BitstreamCursor Stream, LLVMContext &Context) - : BitcodeReaderBase(std::move(Stream)), Context(Context), ValueList(Context), - MetadataList(Context) {} +BitcodeReader::BitcodeReader(BitstreamCursor Stream, + StringRef ProducerIdentification, + LLVMContext &Context) + : BitcodeReaderBase(std::move(Stream)), Context(Context), + ValueList(Context), MetadataList(Context) { + this->ProducerIdentification = ProducerIdentification; +} Error BitcodeReader::materializeForwardReferencedFunctions() { if (WillMaterializeAllForwardRefs) @@ -4365,36 +4370,7 @@ Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata) { TheModule = M; - - // We expect a number of well-defined blocks, though we don't necessarily - // need to understand them all. - while (true) { - if (Stream.AtEndOfStream()) { - // We didn't really read a proper Module. - return error("Malformed IR file"); - } - - BitstreamEntry Entry = - Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - - if (Entry.Kind != BitstreamEntry::SubBlock) - return error("Malformed block"); - - if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { - Expected ProducerIdentificationOrErr = - readIdentificationBlock(Stream); - if (!ProducerIdentificationOrErr) - return ProducerIdentificationOrErr.takeError(); - ProducerIdentification = *ProducerIdentificationOrErr; - continue; - } - - if (Entry.ID == bitc::MODULE_BLOCK_ID) - return parseModule(0, ShouldLazyLoadMetadata); - - if (Stream.SkipBlock()) - return error("Invalid record"); - } + return parseModule(0, ShouldLazyLoadMetadata); } Error BitcodeReader::parseGlobalObjectAttachment(GlobalObject &GO, @@ -6566,26 +6542,76 @@ // External interface //===----------------------------------------------------------------------===// +Expected> +llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { + Expected StreamOrErr = initStream(Buffer); + if (!StreamOrErr) + return StreamOrErr.takeError(); + BitstreamCursor &Stream = *StreamOrErr; + + uint64_t IdentificationBit = -1ull; + std::vector Modules; + while (true) { + // We may be consuming bitcode from a client that leaves garbage at the end + // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to + // the end that there cannot possibly be another module, stop looking. + if (Stream.getCurrentByteNo() + 8 >= Stream.getBitcodeBytes().size()) + return Modules; + + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::EndBlock: + case BitstreamEntry::Error: + return error("Malformed block"); + + case BitstreamEntry::SubBlock: + if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) + IdentificationBit = Stream.GetCurrentBitNo(); + else if (Entry.ID == bitc::MODULE_BLOCK_ID) + Modules.push_back({Stream.getBitcodeBytes(), + Buffer.getBufferIdentifier(), IdentificationBit, + Stream.GetCurrentBitNo()}); + + if (Stream.SkipBlock()) + return error("Malformed block"); + continue; + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; + } + } +} + /// \brief Get a lazy one-at-time loading module from bitcode. /// /// This isn't always used in a lazy context. In particular, it's also used by -/// \a parseBitcodeFile(). If this is truly lazy, then we need to eagerly pull +/// \a parseModule(). If this is truly lazy, then we need to eagerly pull /// in forward-referenced functions from block address references. /// /// \param[in] MaterializeAll Set to \c true if we should materialize /// everything. -static Expected> -getLazyBitcodeModuleImpl(MemoryBufferRef Buffer, LLVMContext &Context, - bool MaterializeAll, - bool ShouldLazyLoadMetadata = false) { - Expected StreamOrErr = initStream(Buffer); - if (!StreamOrErr) - return StreamOrErr.takeError(); +Expected> +BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll, + bool ShouldLazyLoadMetadata) { + BitstreamCursor Stream(Buffer); - BitcodeReader *R = new BitcodeReader(std::move(*StreamOrErr), Context); + std::string ProducerIdentification; + if (IdentificationBit != -1ull) { + Stream.JumpToBit(IdentificationBit); + Expected ProducerIdentificationOrErr = + readIdentificationBlock(Stream); + if (!ProducerIdentificationOrErr) + return ProducerIdentificationOrErr.takeError(); + + ProducerIdentification = *ProducerIdentificationOrErr; + } + + Stream.JumpToBit(ModuleBit); + auto *R = + new BitcodeReader(std::move(Stream), ProducerIdentification, Context); std::unique_ptr M = - llvm::make_unique(Buffer.getBufferIdentifier(), Context); + llvm::make_unique(ModuleIdentifier, Context); M->setMaterializer(R); // Delay parsing Metadata if ShouldLazyLoadMetadata is true. @@ -6605,10 +6631,22 @@ } Expected> +BitcodeModule::getLazyModule(LLVMContext &Context, + bool ShouldLazyLoadMetadata) { + return getModuleImpl(Context, false, ShouldLazyLoadMetadata); +} + +Expected> llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context, bool ShouldLazyLoadMetadata) { - return getLazyBitcodeModuleImpl(Buffer, Context, false, - ShouldLazyLoadMetadata); + Expected> MsOrErr = getBitcodeModuleList(Buffer); + if (!MsOrErr) + return MsOrErr.takeError(); + + if (MsOrErr->size() != 1) + return error("Expected a single module"); + + return (*MsOrErr)[0].getLazyModule(Context, ShouldLazyLoadMetadata); } Expected> @@ -6621,13 +6659,25 @@ return MOrErr; } -Expected> llvm::parseBitcodeFile(MemoryBufferRef Buffer, - LLVMContext &Context) { - return getLazyBitcodeModuleImpl(Buffer, Context, true); +Expected> +BitcodeModule::parseModule(LLVMContext &Context) { + return getModuleImpl(Context, true, false); // TODO: Restore the use-lists to the in-memory state when the bitcode was // written. We must defer until the Module has been fully materialized. } +Expected> llvm::parseBitcodeFile(MemoryBufferRef Buffer, + LLVMContext &Context) { + Expected> MsOrErr = getBitcodeModuleList(Buffer); + if (!MsOrErr) + return MsOrErr.takeError(); + + if (MsOrErr->size() != 1) + return error("Expected a single module"); + + return (*MsOrErr)[0].parseModule(Context); +} + Expected llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) Index: llvm/test/Bitcode/invalid.test =================================================================== --- llvm/test/Bitcode/invalid.test +++ llvm/test/Bitcode/invalid.test @@ -31,11 +31,11 @@ INVALID-EMPTY: Invalid bitcode signature INVALID-ENCODING: Invalid encoding -BAD-ABBREV: Abbreviation starts with an Array or a Blob -UNEXPECTED-EOF: Unexpected end of file -BAD-ABBREV-NUMBER: Invalid abbrev number +BAD-ABBREV: Malformed block +UNEXPECTED-EOF: Malformed block +BAD-ABBREV-NUMBER: Malformed block BAD-TYPE-TABLE-FORWARD-REF: Invalid TYPE table: Only named structs can be forward referenced -BAD-BITWIDTH: Bitwidth for integer type out of range +BAD-BITWIDTH: Malformed block BAD-ALIGN: Invalid alignment value MISMATCHED-EXPLICIT-GEP: Explicit gep type does not match pointee type of pointer operand MISMATCHED-EXPLICIT-LOAD: Explicit load/store type does not match pointee type of pointer operand @@ -69,7 +69,7 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-no-proper-module.bc 2>&1 | \ RUN: FileCheck --check-prefix=NO-MODULE %s -NO-MODULE: Malformed IR file +NO-MODULE: Expected a single module RUN: not llvm-dis -disable-output %p/Inputs/invalid-fp-shift.bc 2>&1 | \ RUN: FileCheck --check-prefix=FP-SHIFT %s @@ -105,7 +105,7 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-fwdref-type-mismatch-2.bc 2>&1 | \ RUN: FileCheck --check-prefix=FWDREF-TYPE-MISMATCH %s -FWDREF-TYPE-MISMATCH: Type mismatch in constant table! +FWDREF-TYPE-MISMATCH: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-element-type.bc 2>&1 | \ RUN: FileCheck --check-prefix=ELEMENT-TYPE %s @@ -154,7 +154,7 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-load-ptr-type.bc 2>&1 | \ RUN: FileCheck --check-prefix=BAD-LOAD-PTR-TYPE %s -BAD-LOAD-PTR-TYPE: Cannot load/store from pointer +BAD-LOAD-PTR-TYPE: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-inserted-value-type-mismatch.bc 2>&1 | \ RUN: FileCheck --check-prefix=INSERT-TYPE-MISMATCH %s @@ -174,7 +174,7 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-function-comdat-id.bc 2>&1 | \ RUN: FileCheck --check-prefix=INVALID-FCOMDAT-ID %s -INVALID-FCOMDAT-ID: Invalid function comdat ID +INVALID-FCOMDAT-ID: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-global-var-comdat-id.bc 2>&1 | \ RUN: FileCheck --check-prefix=INVALID-GVCOMDAT-ID %s @@ -189,12 +189,12 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-array-operand-encoding.bc 2>&1 | \ RUN: FileCheck --check-prefix=ARRAY-OP-ENC %s -ARRAY-OP-ENC: Array element type has to be an encoding of a type +ARRAY-OP-ENC: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-metadata-not-followed-named-node.bc 2>&1 | \ RUN: FileCheck --check-prefix=META-NOT-FOLLOWED-BY-NAMED-META %s -META-NOT-FOLLOWED-BY-NAMED-META: METADATA_NAME not followed by METADATA_NAMED_NODE +META-NOT-FOLLOWED-BY-NAMED-META: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-vector-length.bc 2>&1 | \ RUN: FileCheck --check-prefix=VECTOR-LENGTH %s @@ -214,7 +214,7 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-name-with-0-byte.bc 2>&1 | \ RUN: FileCheck --check-prefix=NAME-WITH-0 %s -NAME-WITH-0: Invalid value name +NAME-WITH-0: Malformed block RUN: not llvm-dis -disable-output %p/Inputs/invalid-void-constant.bc 2>&1 | \ RUN: FileCheck --check-prefix=VOID-CONSTANT-TYPE %s Index: llvm/test/Bitcode/null-type.ll =================================================================== --- llvm/test/Bitcode/null-type.ll +++ llvm/test/Bitcode/null-type.ll @@ -1,4 +1,4 @@ ; RUN: not llvm-dis < %s.bc 2>&1 | FileCheck %s ; PR8494 -; CHECK: Invalid record +; CHECK: Malformed block