diff --git a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h --- a/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h +++ b/llvm/include/llvm/Bitcode/BitcodeAnalyzer.h @@ -52,6 +52,7 @@ CurStreamTypeType CurStreamType; Optional BlockInfoStream; unsigned NumTopBlocks = 0; + uint64_t Size = UINT64_MAX; struct PerRecordStats { unsigned NumInstances; diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h --- a/llvm/include/llvm/Bitcode/BitcodeReader.h +++ b/llvm/include/llvm/Bitcode/BitcodeReader.h @@ -78,7 +78,7 @@ // Calls the ctor. friend Expected - getBitcodeFileContents(MemoryBufferRef Buffer); + getBitcodeFileContentsFromPrefix(MemoryBufferRef &Buffer); Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, @@ -129,7 +129,12 @@ /// the symbol table embedded in the bitcode file. Clients which require a /// symbol table should prefer to use irsymtab::read instead of this function /// because it creates a reader for the irsymtab and handles upgrading bitcode - /// files without a symbol table or with an old symbol table. + /// files without a symbol table or with an old symbol table. Buffer is + /// modified to represent the unparsed part. + Expected + getBitcodeFileContentsFromPrefix(MemoryBufferRef &Buffer); + + /// Returns the contents of a bitcode file, assuming there is only one stream. Expected getBitcodeFileContents(MemoryBufferRef Buffer); /// Returns a list of modules in the specified bitcode buffer. diff --git a/llvm/include/llvm/Bitcode/BitcodeWriter.h b/llvm/include/llvm/Bitcode/BitcodeWriter.h --- a/llvm/include/llvm/Bitcode/BitcodeWriter.h +++ b/llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -51,6 +51,13 @@ ~BitcodeWriter(); + /// Emit a BITCODE_SIZE_BLOCK_ID block which encodes a 64-bit integer + /// representing the size (in bytes) of the bitstream. This is used to mark + /// the end of the bitstream. At the end of the bitstream, call + /// backfillSizeBlock to fill in the size field. + uint64_t reserveSizeBlock(); + void backfillSizeBlock(uint64_t SizeBitPos); + /// Attempt to write a symbol table to the bitcode file. This must be called /// at most once after all modules have been written. /// diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -60,6 +60,8 @@ SYMTAB_BLOCK_ID, SYNC_SCOPE_NAMES_BLOCK_ID, + + BITCODE_SIZE_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, diff --git a/llvm/include/llvm/Bitstream/BitstreamWriter.h b/llvm/include/llvm/Bitstream/BitstreamWriter.h --- a/llvm/include/llvm/Bitstream/BitstreamWriter.h +++ b/llvm/include/llvm/Bitstream/BitstreamWriter.h @@ -72,8 +72,6 @@ reinterpret_cast(&Value + 1)); } - size_t GetBufferOffset() const { return Out.size(); } - size_t GetWordIndex() const { size_t Offset = GetBufferOffset(); assert((Offset & 3) == 0 && "Not 32-bit aligned"); @@ -89,6 +87,8 @@ assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"); } + size_t GetBufferOffset() const { return Out.size(); } + /// Retrieve the current position in the stream, in bits. uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; } diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -80,6 +80,8 @@ return "STRTAB_BLOCK"; case bitc::SYMTAB_BLOCK_ID: return "SYMTAB_BLOCK"; + case bitc::BITCODE_SIZE_BLOCK_ID: + return "BITCODE_SIZE_BLOCK_ID"; } } @@ -593,7 +595,8 @@ } // Parse the top-level structure. We only allow blocks at the top-level. - while (!Stream.AtEndOfStream()) { + // Stop if BITCODE_SIZE_BLOCK_ID exists and the cursor is at the end. + while (!Stream.AtEndOfStream() && Stream.getCurrentByteNo() != Size) { Expected MaybeCode = Stream.ReadCode(); if (!MaybeCode) return MaybeCode.takeError(); @@ -777,6 +780,28 @@ << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; } + if (BlockID == bitc::BITCODE_SIZE_BLOCK_ID) { + Expected MaybeSize = Stream.Read(32); + if (!MaybeSize) + return MaybeSize.takeError(); + Size = *MaybeSize; + MaybeSize = Stream.Read(32); + if (!MaybeSize) + return MaybeSize.takeError(); + Size |= static_cast(*MaybeSize) << 32; + + Expected MaybeEntry = Stream.advance(); + if (!MaybeEntry) + return MaybeEntry.takeError(); + if (MaybeEntry.get().Kind != BitstreamEntry::EndBlock) + return reportError("malformed BITCODE_SIZE_BLOCK_ID"); + if (DumpRecords) { + O->OS << Indent << " \n"; + O->OS << Indent << "\n"; + } + return Error::success(); + } + SmallVector Record; // Keep the offset to the metadata index if seen. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -6492,21 +6492,33 @@ } Expected -llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { +llvm::getBitcodeFileContentsFromPrefix(MemoryBufferRef &Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; BitcodeFileContents F; + uint64_t Size = UINT64_MAX; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); + if (BCBegin >= Size) { + if (BCBegin == Size) { + Buffer = MemoryBufferRef(Buffer.getBuffer().substr(BCBegin), + Buffer.getBufferIdentifier()); + return F; + } + return error("byte position 0x" + Twine::utohexstr(BCBegin) + + " is greater than bitcode size 0x" + Twine::utohexstr(Size)); + } // We may be consuming bitcode from a client that leaves garbage at the end // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. - if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) + if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) { + Buffer = MemoryBufferRef("", Buffer.getBufferIdentifier()); return F; + } Expected MaybeEntry = Stream.advance(); if (!MaybeEntry) @@ -6519,6 +6531,29 @@ return error("Malformed block"); case BitstreamEntry::SubBlock: { + // A BITCODE_SIZE_BLOCK_ID block encodes a 64-bit integer representing the + // size (in bytes) of the bitstream. This is used to mark the end of the + // bitstream. + if (Entry.ID == bitc::BITCODE_SIZE_BLOCK_ID) { + if (Error Err = Stream.EnterSubBlock(Entry.ID)) + return std::move(Err); + Expected MaybeSize = Stream.Read(32); + if (!MaybeSize) + return MaybeSize.takeError(); + Size = *MaybeSize; + MaybeSize = Stream.Read(32); + if (!MaybeSize) + return MaybeSize.takeError(); + Size |= static_cast(*MaybeSize) << 32; + + MaybeEntry = Stream.advance(); + if (!MaybeEntry) + return MaybeEntry.takeError(); + if (MaybeEntry.get().Kind != BitstreamEntry::EndBlock) + return error("Malformed block"); + continue; + } + uint64_t IdentificationBit = -1ull; if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) { IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8; @@ -6601,6 +6636,11 @@ } } +Expected +llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { + return getBitcodeFileContentsFromPrefix(Buffer); +} + /// Get a lazy one-at-time loading module from bitcode. /// /// This isn't always used in a lazy context. In particular, it's also used by diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -4473,6 +4473,19 @@ Stream->ExitBlock(); } +uint64_t BitcodeWriter::reserveSizeBlock() { + Stream->EnterSubblock(bitc::BITCODE_SIZE_BLOCK_ID, 2); + const uint64_t Pos = Stream->GetCurrentBitNo(); + Stream->Emit(0, 32); + Stream->Emit(0, 32); + Stream->ExitBlock(); + return Pos; +} + +void BitcodeWriter::backfillSizeBlock(uint64_t SizeBitPos) { + Stream->BackpatchWord64(SizeBitPos, Stream->GetBufferOffset()); +} + void BitcodeWriter::writeSymtab() { assert(!WroteStrtab && !WroteSymtab); @@ -4566,10 +4579,13 @@ Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0); BitcodeWriter Writer(Buffer); + const uint64_t SizeBitPos = Writer.reserveSizeBlock(); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); Writer.writeSymtab(); Writer.writeStrtab(); + // Backfill the BITCODE_SIZE_BLOCK_ID block after the stream is complete. + Writer.backfillSizeBlock(SizeBitPos); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) emitDarwinBCHeaderAndTrailer(Buffer, TT); diff --git a/llvm/test/Assembler/multi-mod-disassemble.ll b/llvm/test/Assembler/multi-mod-disassemble.ll --- a/llvm/test/Assembler/multi-mod-disassemble.ll +++ b/llvm/test/Assembler/multi-mod-disassemble.ll @@ -1,15 +1,40 @@ -; RUN: llvm-as %s -o %t.o -; RUN: llvm-cat -b -o %t2.o %t.o %t.o -; RUN: llvm-dis -o %t3 %t2.o -; RUN: llvm-as -o %t4.o %t3.0 -; RUN: llvm-as -o %t5.o %t3.1 -; RUN: cmp %t4.o %t5.o -; RUN: llvm-cat -b -o %t6.o %t5.o %t4.o -; RUN: llvm-dis -o %t7.o %t6.o -; RUN: diff %t7.o.0 %t7.o.1 -; RUN: FileCheck < %t7.o.0 %s -; RUN: FileCheck < %t7.o.1 %s +; RUN: rm -rf %t && mkdir %t && cd %t + +;; A single bitstream with multiple modules can be disassembled. +; RUN: llvm-as %s -o a.bc +; RUN: llvm-cat -b -o a2.bc a.bc a.bc +; RUN: llvm-dis -o a3 a2.bc +; RUN: llvm-as -o a3.0.bc a3.0 +; RUN: llvm-as -o a3.1.bc a3.1 +; RUN: cmp a3.0.bc a3.1.bc + +; RUN: llvm-cat -b -o a4.bc a3.1.bc a3.0.bc +; RUN: llvm-dis -o a5.bc a4.bc +; RUN: diff a5.bc.0 a5.bc.1 +; RUN: FileCheck < a5.bc.0 %s +; RUN: FileCheck < a5.bc.1 %s ; CHECK: source_filename = "{{.*}}multi-mod-disassemble.ll" + +;; Concatenated bitstreams can be dumped. For a component bitstream with multiple modules, +;; the output filenames get an additional `.N` suffixes. +; RUN: cat a.bc a2.bc > a6.bc +; RUN: llvm-dis -o a7 a6.bc +; RUN: ls a7.0 a7.1.0 a7.1.1 +; RUN: llvm-dis < a6.bc | FileCheck %s --check-prefix=CONCAT + +; CONCAT: source_filename = "{{.*}}multi-mod-disassemble.ll" +; CONCAT: source_filename = "{{.*}}multi-mod-disassemble.ll" +; CONCAT: source_filename = "{{.*}}multi-mod-disassemble.ll" + +;; llvm-bcanalyzer only dumps the prefix. +; RUN: llvm-bcanalyzer --dump a6.bc | FileCheck %s --check-prefix=CONCAT-DUMP --match-full-lines --strict-whitespace + +; CONCAT-DUMP: +; CONCAT-DUMP-NEXT: +; CONCAT-DUMP-NEXT: +; CONCAT-DUMP-NEXT:(argv[0])); - cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); - - std::unique_ptr MB = - ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); - - BitcodeFileContents IF = ExitOnErr(llvm::getBitcodeFileContents(*MB)); - - const size_t N = IF.Mods.size(); - - if (OutputFilename == "-" && N > 1) - errs() << "only single module bitcode files can be written to stdout\n"; - - for (size_t i = 0; i < N; ++i) { +static bool disassemble(int Nth, const BitcodeFileContents &IF, + LLVMContext &Ctx) { + for (size_t i = 0, N = IF.Mods.size(); i != N; ++i) { BitcodeModule MB = IF.Mods[i]; - std::unique_ptr M = ExitOnErr(MB.getLazyModule(Context, MaterializeMetadata, - SetImporting)); + std::unique_ptr M = + ExitOnErr(MB.getLazyModule(Ctx, MaterializeMetadata, SetImporting)); if (MaterializeMetadata) ExitOnErr(M->materializeMetadata()); else @@ -180,33 +162,30 @@ if (LTOInfo.HasSummary) Index = ExitOnErr(MB.getSummary()); - std::string FinalFilename(OutputFilename); + std::string OutName(OutputFilename); // Just use stdout. We won't actually print anything on it. - if (DontPrint) - FinalFilename = "-"; - - if (FinalFilename.empty()) { // Unspecified output, infer it. - if (InputFilename == "-") { - FinalFilename = "-"; - } else { - StringRef IFN = InputFilename; - FinalFilename = (IFN.endswith(".bc") ? IFN.drop_back(3) : IFN).str(); - if (N > 1) - FinalFilename += std::string(".") + std::to_string(i); - FinalFilename += ".ll"; - } + if (DontPrint || (OutName.empty() && InputFilename == "-")) { + OutName = "-"; } else { + if (OutName.empty()) { + StringRef IFN = InputFilename; + OutName = (IFN.endswith(".bc") ? IFN.drop_back(3) : IFN).str(); + } + if (Nth >= 0) + OutName += ("." + Twine(Nth)).str(); if (N > 1) - FinalFilename += std::string(".") + std::to_string(i); + OutName += ("." + Twine(i)).str(); + if (OutputFilename.empty()) + OutName += ".ll"; } std::error_code EC; std::unique_ptr Out( - new ToolOutputFile(FinalFilename, EC, sys::fs::OF_Text)); + new ToolOutputFile(OutName, EC, sys::fs::OF_Text)); if (EC) { errs() << EC.message() << '\n'; - return 1; + return false; } std::unique_ptr Annotator; @@ -223,6 +202,36 @@ // Declare success. Out->keep(); } + return true; +} + +int main(int argc, char **argv) { + InitLLVM X(argc, argv); + + ExitOnErr.setBanner(std::string(argv[0]) + ": error: "); + + LLVMContext Context; + Context.setDiagnosticHandler( + std::make_unique(argv[0])); + cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n"); + + std::unique_ptr Buffer = + ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename))); + MemoryBufferRef MB(*Buffer); + + int Nth = 0; + bool Stop; + do { + BitcodeFileContents IF = + ExitOnErr(llvm::getBitcodeFileContentsFromPrefix(MB)); + Stop = MB.getBuffer().empty(); + if (!disassemble((Nth == 0 && Stop ? -1 : Nth), IF, Context)) + return 1; + ++Nth; + + // After parsing the prefix of MB as a complete bitstream, if MB is still + // non-empty, continue parsing succeeding bitstreams. + } while (!Stop); return 0; }