Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -34,7 +34,10 @@ CONSTANTS_BLOCK_ID, FUNCTION_BLOCK_ID, - UNUSED_ID1, + // Block intended to contains information on the bitcode versioning. + // Can be used to provide better error messages when we fail to parse a + // bitcode file. + IDENTIFICATION_BLOCK_ID, VALUE_SYMTAB_BLOCK_ID, METADATA_BLOCK_ID, @@ -50,6 +53,22 @@ OPERAND_BUNDLE_TAGS_BLOCK_ID }; + /// Idenfitication block contains a string that describes the producer details, + /// and an epoch that defines the auto-upgrade capability. + enum IdentificationCodes { + IDENTIFICATION_CODE_STRING = 1, // IDENTIFICATION: [strchr x N] + IDENTIFICATION_CODE_EPOCH = 2, // EPOCH: [epoch#] + }; + + /// The epoch that defines the auto-upgrade compatibility for the bitcode. + /// + /// LLVM guarantees in a major release that a minor release can read bitcode + /// generated by previous minor releases. We translate this by making the reader + /// accepting only bitcode with the same epoch, except for the X.0 release which + /// also accepts N-1. + enum { + BITCODE_CURRENT_EPOCH = 0 + }; /// MODULE blocks have a number of optional fields and subblocks. enum ModuleCodes { Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -152,6 +152,8 @@ uint64_t LastFunctionBlockBit = 0; bool SeenValueSymbolTable = false; unsigned VSTOffset = 0; + // Contains an arbitrary and optional string identifying the bitcode producer + std::string ProducerIdentification; std::vector TypeList; BitcodeReaderValueList ValueList; @@ -273,6 +275,11 @@ void setStripDebugInfo() override; private: + /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the + // ProducerIdentification data member, and do some basic enforcement on the + // "epoch" encoded in the bitcode. + std::error_code parseBitcodeVersion(); + std::vector IdentifiedStructTypes; StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name); StructType *createIdentifiedStructType(LLVMContext &Context); @@ -518,10 +525,21 @@ } std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) { + if (!ProducerIdentification.empty()) { + Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"; + return ::error(DiagnosticHandler, make_error_code(E), MsgWithID); + } return ::error(DiagnosticHandler, make_error_code(E), Message); } std::error_code BitcodeReader::error(const Twine &Message) { + if (!ProducerIdentification.empty()) { + Twine MsgWithID = Message + " (Producer: '" + ProducerIdentification + + "' Reader: 'LLVM " + LLVM_VERSION_STRING "')"; + return ::error(DiagnosticHandler, + make_error_code(BitcodeError::CorruptedBitcode), MsgWithID); + } return ::error(DiagnosticHandler, make_error_code(BitcodeError::CorruptedBitcode), Message); } @@ -3061,6 +3079,50 @@ } } +std::error_code BitcodeReader::parseBitcodeVersion() { + if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID)) + return error("Invalid record"); + + + // Read all the records. + SmallVector Record; + while (1) { + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + default: + case BitstreamEntry::Error: + return error("Malformed block"); + case BitstreamEntry::EndBlock: + return std::error_code(); + case BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read a record. + Record.clear(); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); + switch (BitCode) { + default: // Default behavior: reject + return error("Invalid value"); + case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION: [strchr x N] + convertToString(Record, 0, ProducerIdentification); + break; + } + case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#] + unsigned epoch = (unsigned)Record[0]; + if (epoch != bitc::BITCODE_CURRENT_EPOCH) { + auto BitcodeEpoch = std::to_string(epoch); + auto CurrentEpoch = std::to_string(bitc::BITCODE_CURRENT_EPOCH); + return error(Twine("Incompatible epoch: Bitcode '") + BitcodeEpoch + + "' vs current: '" + CurrentEpoch + "'"); + } + } + } + } +} + std::error_code BitcodeReader::parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata) { if (ResumeBit) @@ -3552,6 +3614,11 @@ if (Entry.Kind != BitstreamEntry::SubBlock) return error("Malformed block"); + if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { + parseBitcodeVersion(); + continue; + } + if (Entry.ID == bitc::MODULE_BLOCK_ID) return parseModule(0, ShouldLazyLoadMetadata); Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2830,6 +2830,16 @@ Stream.ExitBlock(); } +// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the +// current llvm version. +static void WriteIdentificationBlock(const Module *M, BitstreamWriter &Stream) { + Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5); + WriteStringRecord(bitc::IDENTIFICATION_CODE_STRING, "LLVM " LLVM_VERSION_STRING, 0, Stream); + SmallVector Vals = { bitc::BITCODE_CURRENT_EPOCH }; + Stream.EmitRecord(bitc::IDENTIFICATION_CODE_EPOCH, Vals); + Stream.ExitBlock(); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, @@ -3001,6 +3011,8 @@ // Emit the file header. WriteBitcodeHeader(Stream); + WriteIdentificationBlock(M, Stream); + // Emit the module. WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, EmitFunctionSummary); Index: test/Bitcode/identification.ll =================================================================== --- /dev/null +++ test/Bitcode/identification.ll @@ -0,0 +1,6 @@ +; Check that a block "IDENTIFICATION_BLOCK_ID" is emitted. +;RUN: llvm-as < %s | llvm-bcanalyzer -dump | FileCheck %s +;CHECK: