Index: llvm/include/llvm/Bitcode/BitcodeReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeReader.h +++ llvm/include/llvm/Bitcode/BitcodeReader.h @@ -111,9 +111,14 @@ struct BitcodeFileContents { std::vector Mods; + StringRef Symtab, StrtabForSymtab; }; - /// Returns the contents of a bitcode file. + /// Returns the contents of a bitcode file. This includes the raw contents of + /// the symbol table embedded in the bitcode file. Clients which require a + /// symbol table should prefer to use irsymtab::read instead of this function + /// because it creates a reader for the irsymtab and handles upgrading bitcode + /// files without a symbol table or with an old symbol table. Expected getBitcodeFileContents(MemoryBufferRef Buffer); /// Returns a list of modules in the specified bitcode buffer. Index: llvm/include/llvm/Object/IRSymtab.h =================================================================== --- llvm/include/llvm/Object/IRSymtab.h +++ llvm/include/llvm/Object/IRSymtab.h @@ -255,6 +255,8 @@ /// copied into an irsymtab::Symbol object. symbol_range symbols() const; + size_t getNumModules() const { return Modules.size(); } + /// Returns a slice of the symbol table for the I'th module in the file. /// The symbols enumerated by this method are ephemeral, but they can be /// copied into an irsymtab::Symbol object. Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5360,8 +5360,9 @@ return *ErrorCategory; } -static Expected readStrtab(BitstreamCursor &Stream) { - if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) +static Expected readBlobInRecord(BitstreamCursor &Stream, + unsigned Block, unsigned RecordID) { + if (Stream.EnterSubBlock(Block)) return error("Invalid record"); StringRef Strtab; @@ -5382,7 +5383,7 @@ case BitstreamEntry::Record: StringRef Blob; SmallVector Record; - if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID) Strtab = Blob; break; } @@ -5450,7 +5451,8 @@ } if (Entry.ID == bitc::STRTAB_BLOCK_ID) { - Expected Strtab = readStrtab(Stream); + Expected Strtab = + readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB); if (!Strtab) return Strtab.takeError(); // This string table is used by every preceding bitcode module that does @@ -5462,6 +5464,28 @@ break; I->Strtab = *Strtab; } + // Similarly, the string table is used by every preceding symbol table; + // normally there will be just one unless the bitcode file was created + // by binary concatenation. + if (!F.Symtab.empty() && F.StrtabForSymtab.empty()) + F.StrtabForSymtab = *Strtab; + continue; + } + + if (Entry.ID == bitc::SYMTAB_BLOCK_ID) { + Expected SymtabOrErr = + readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB); + if (!SymtabOrErr) + return SymtabOrErr.takeError(); + + // We can expect the bitcode file to have multiple symbol tables if it + // was created by binary concatenation. In that case we silently + // ignore any subsequent symbol tables, which is fine because this is a + // low level function. The client is expected to notice that the number + // of modules in the symbol table does not match the number of modules + // in the input file and regenerate the symbol table. + if (F.Symtab.empty()) + F.Symtab = *SymtabOrErr; continue; } Index: llvm/lib/Object/IRSymtab.cpp =================================================================== --- llvm/lib/Object/IRSymtab.cpp +++ llvm/lib/Object/IRSymtab.cpp @@ -318,7 +318,31 @@ return make_error("Bitcode file does not contain any modules", inconvertibleErrorCode()); - // Right now we have no on-disk representation of symbol tables, so we always - // upgrade. - return upgrade(BFC.Mods); + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(BFC.Mods); + + // We cannot use the regular reader to read the version and producer, because + // it will expect the header to be in the current format. The only thing we + // can rely on is that the version and producer will be present as the first + // struct elements. + auto *Hdr = reinterpret_cast(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(BFC.Mods); + + FileContents FC; + FC.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (FC.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + return std::move(FC); } Index: llvm/test/Object/X86/irsymtab.ll =================================================================== --- llvm/test/Object/X86/irsymtab.ll +++ llvm/test/Object/X86/irsymtab.ll @@ -1,6 +1,12 @@ ; RUN: env LLVM_OVERRIDE_PRODUCER=producer opt -o %t %s ; RUN: llvm-bcanalyzer -dump -show-binary-blobs %t | FileCheck --check-prefix=BCA %s +; Same producer, does not require upgrade. +; RUN: env LLVM_OVERRIDE_PRODUCER=producer llvm-lto2 dump-symtab %t | FileCheck --check-prefix=SYMTAB %s + +; Different producer, requires upgrade. +; RUN: env LLVM_OVERRIDE_PRODUCER=consumer llvm-lto2 dump-symtab %t | FileCheck --check-prefix=SYMTAB %s + ; BCA: blob data = '\x00\x00\x00\x00\x06\x00\x00\x00\x08\x00\x00\x00D\x00\x00\x00\x01\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00P\x00\x00\x00\x02\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x0E\x00\x00\x00\x18\x00\x00\x00&\x00\x00\x00\x0B\x00\x00\x001\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x00$\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\x03\x00\x00\x00\xFF\xFF\xFF\xFF\x08$\x00\x00' @@ -9,6 +15,13 @@ ; BCA-NEXT: blob data = 'foobarproducerx86_64-unknown-linux-gnuirsymtab.ll' ; BCA-NEXT: +; SYMTAB: version: 0 +; SYMTAB-NEXT: producer: producer +; SYMTAB-NEXT: target triple: x86_64-unknown-linux-gnu +; SYMTAB-NEXT: source filename: irsymtab.ll +; SYMTAB-NEXT: D------X foo +; SYMTAB-NEXT: DU-----X bar + target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" source_filename = "irsymtab.ll" Index: llvm/tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- llvm/tools/llvm-lto2/llvm-lto2.cpp +++ llvm/tools/llvm-lto2/llvm-lto2.cpp @@ -16,9 +16,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/LTO/Caching.h" +#include "llvm/Bitcode/BitcodeReader.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/LTO/Caching.h" #include "llvm/LTO/LTO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -298,6 +299,17 @@ static int dumpSymtab(int argc, char **argv) { for (StringRef F : make_range(argv + 1, argv + argc)) { std::unique_ptr MB = check(MemoryBuffer::getFile(F), F); + BitcodeFileContents BFC = check(getBitcodeFileContents(*MB), F); + + if (BFC.Symtab.size() >= sizeof(irsymtab::storage::Header)) { + auto *Hdr = reinterpret_cast( + BFC.Symtab.data()); + outs() << "version: " << Hdr->Version << '\n'; + if (Hdr->Version == irsymtab::storage::Header::kCurrentVersion) + outs() << "producer: " << Hdr->Producer.get(BFC.StrtabForSymtab) + << '\n'; + } + std::unique_ptr Input = check(InputFile::create(MB->getMemBufferRef()), F);