Index: llvm/include/llvm/Bitcode/BitcodeReader.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeReader.h +++ llvm/include/llvm/Bitcode/BitcodeReader.h @@ -40,6 +40,8 @@ return std::move(*Val); } + struct BitcodeFileContents; + /// Represents a module in a bitcode file. class BitcodeModule { // This covers the identification (if present) and module blocks. @@ -61,8 +63,8 @@ IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {} // Calls the ctor. - friend Expected> - getBitcodeModuleList(MemoryBufferRef Buffer); + friend Expected + getBitcodeFileContents(MemoryBufferRef Buffer); Expected> getModuleImpl(LLVMContext &Context, bool MaterializeAll, @@ -95,6 +97,17 @@ Expected> getSummary(); }; + /// Returns the contents of a bitcode file. This includes the raw contents of + /// the symbol table embedded in the bitcode file. Clients which require a + /// symbol table should prefer to use irsymtab::read instead of this function + /// because it creates a reader for the irsymtab and handles upgrading bitcode + /// files without a symbol table or with an old symbol table. + Expected getBitcodeFileContents(MemoryBufferRef Buffer); + struct BitcodeFileContents { + std::vector Mods; + StringRef Symtab, StrtabForSymtab; + }; + /// Returns a list of modules in the specified bitcode buffer. Expected> getBitcodeModuleList(MemoryBufferRef Buffer); Index: llvm/include/llvm/Bitcode/BitcodeWriter.h =================================================================== --- llvm/include/llvm/Bitcode/BitcodeWriter.h +++ llvm/include/llvm/Bitcode/BitcodeWriter.h @@ -28,18 +28,30 @@ std::unique_ptr Stream; StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; - bool WroteStrtab = false; + BumpPtrAllocator Alloc; + bool WroteStrtab = false, WroteSymtab = false; void writeBlob(unsigned Block, unsigned Record, StringRef Blob); + std::vector Mods; + public: /// Create a BitcodeWriter that writes to Buffer. BitcodeWriter(SmallVectorImpl &Buffer); ~BitcodeWriter(); + /// Attempt to write a symbol table to the bitcode file. This must be called + /// at most once after all modules have been written. + /// + /// A reader does not require a symbol table to interpret a bitcode file; + /// the symbol table is needed only to improve link-time performance. So + /// this function may decide not to write a symbol table. It may so decide + /// if, for example, the target is unregistered or the IR is malformed. + void writeSymtab(); + /// Write the bitcode file's string table. This must be called exactly once - /// after all modules have been written. + /// after all modules and the optional symbol table have been written. void writeStrtab(); /// Copy the string table for another module into this bitcode file. This @@ -94,7 +106,8 @@ bool ShouldPreserveUseListOrder = false, const ModuleSummaryIndex *Index = nullptr, bool GenerateHash = false, - ModuleHash *ModHash = nullptr); + ModuleHash *ModHash = nullptr, + bool WriteSymtab = true); /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when Index: llvm/include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -22,7 +22,7 @@ namespace llvm { namespace bitc { -// The only top-level block types are MODULE, IDENTIFICATION and STRTAB. +// The only top-level block types are MODULE, IDENTIFICATION, STRTAB and SYMTAB. enum BlockIDs { // Blocks MODULE_BLOCK_ID = FIRST_APPLICATION_BLOCKID, @@ -55,6 +55,7 @@ METADATA_KIND_BLOCK_ID, STRTAB_BLOCK_ID, + SYMTAB_BLOCK_ID, }; /// Identification block contains a string that describes the producer details, @@ -560,6 +561,10 @@ STRTAB_BLOB = 1, }; +enum SymtabCodes { + SYMTAB_BLOB = 1, +}; + } // End bitc namespace } // End llvm namespace Index: llvm/include/llvm/Object/IRSymtab.h =================================================================== --- llvm/include/llvm/Object/IRSymtab.h +++ llvm/include/llvm/Object/IRSymtab.h @@ -38,6 +38,7 @@ namespace llvm { class BitcodeModule; +class StringTableBuilder; namespace irsymtab { @@ -123,6 +124,18 @@ }; struct Header { + /// Version number of the symtab format. This number should be incremented + /// when the format changes, but it does not need to be incremented if a + /// change to LLVM would cause it to create a different symbol table. + Word Version; + enum { kCurrentVersion = 0 }; + + /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION). + /// Consumers should rebuild the symbol table from IR if the producer's + /// version does not match the consumer's version due to potential differences + /// in symbol table format, symbol enumeration order and so on. + Str Producer; + Range Modules; Range Comdats; Range Symbols; @@ -136,9 +149,10 @@ } // end namespace storage -/// Fills in Symtab and Strtab with a valid symbol and string table for Mods. -Error build(ArrayRef Mods, SmallVector &Symtab, - SmallVector &Strtab); +/// Fills in Symtab and StrtabBuilder with a valid symbol and string table for +/// Mods. +Error build(ArrayRef Mods, SmallVector &Symtab, + StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc); /// This represents a symbol that has been read from a storage::Symbol and /// possibly a storage::Uncommon. @@ -241,6 +255,8 @@ /// copied into an irsymtab::Symbol object. symbol_range symbols() const; + size_t getNumModules() const { return Modules.size(); } + /// Returns a slice of the symbol table for the I'th module in the file. /// The symbols enumerated by this method are ephemeral, but they can be /// copied into an irsymtab::Symbol object. Index: llvm/lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -5323,8 +5323,9 @@ return *ErrorCategory; } -static Expected readStrtab(BitstreamCursor &Stream) { - if (Stream.EnterSubBlock(bitc::STRTAB_BLOCK_ID)) +static Expected readBlobInRecord(BitstreamCursor &Stream, + unsigned Block, unsigned RecordID) { + if (Stream.EnterSubBlock(Block)) return error("Invalid record"); StringRef Strtab; @@ -5345,7 +5346,7 @@ case BitstreamEntry::Record: StringRef Blob; SmallVector Record; - if (Stream.readRecord(Entry.ID, Record, &Blob) == bitc::STRTAB_BLOB) + if (Stream.readRecord(Entry.ID, Record, &Blob) == RecordID) Strtab = Blob; break; } @@ -5358,12 +5359,20 @@ Expected> llvm::getBitcodeModuleList(MemoryBufferRef Buffer) { + auto FOrErr = getBitcodeFileContents(Buffer); + if (!FOrErr) + return FOrErr.takeError(); + return std::move(FOrErr->Mods); +} + +Expected +llvm::getBitcodeFileContents(MemoryBufferRef Buffer) { Expected StreamOrErr = initStream(Buffer); if (!StreamOrErr) return StreamOrErr.takeError(); BitstreamCursor &Stream = *StreamOrErr; - std::vector Modules; + BitcodeFileContents F; while (true) { uint64_t BCBegin = Stream.getCurrentByteNo(); @@ -5371,7 +5380,9 @@ // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to // the end that there cannot possibly be another module, stop looking. if (BCBegin + 8 >= Stream.getBitcodeBytes().size()) - return Modules; + return F; + + StringRef Symtab, StrtabForSymtab; BitstreamEntry Entry = Stream.advance(); switch (Entry.Kind) { @@ -5397,26 +5408,41 @@ if (Stream.SkipBlock()) return error("Malformed block"); - Modules.push_back({Stream.getBitcodeBytes().slice( - BCBegin, Stream.getCurrentByteNo() - BCBegin), - Buffer.getBufferIdentifier(), IdentificationBit, - ModuleBit}); + F.Mods.push_back({Stream.getBitcodeBytes().slice( + BCBegin, Stream.getCurrentByteNo() - BCBegin), + Buffer.getBufferIdentifier(), IdentificationBit, + ModuleBit}); continue; } if (Entry.ID == bitc::STRTAB_BLOCK_ID) { - Expected Strtab = readStrtab(Stream); + Expected Strtab = + readBlobInRecord(Stream, bitc::STRTAB_BLOCK_ID, bitc::STRTAB_BLOB); if (!Strtab) return Strtab.takeError(); // This string table is used by every preceding bitcode module that does // not have its own string table. A bitcode file may have multiple // string tables if it was created by binary concatenation, for example // with "llvm-cat -b". - for (auto I = Modules.rbegin(), E = Modules.rend(); I != E; ++I) { + for (auto I = F.Mods.rbegin(), E = F.Mods.rend(); I != E; ++I) { if (!I->Strtab.empty()) break; I->Strtab = *Strtab; } + // Similarly, the string table is used by every preceding symbol table; + // normally there will be just one. + if (!F.Symtab.empty() && F.StrtabForSymtab.empty()) + F.StrtabForSymtab = *Strtab; + continue; + } + + if (Entry.ID == bitc::SYMTAB_BLOCK_ID) { + Expected SymtabOrErr = + readBlobInRecord(Stream, bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB); + if (!SymtabOrErr) + return SymtabOrErr.takeError(); + if (F.Symtab.empty()) + F.Symtab = *SymtabOrErr; continue; } Index: llvm/lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -33,7 +33,9 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Program.h" #include "llvm/Support/SHA1.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Object/IRSymtab.h" #include #include using namespace llvm; @@ -3922,6 +3924,38 @@ Stream->ExitBlock(); } +void BitcodeWriter::writeSymtab() { + assert(!WroteStrtab && !WroteSymtab); + + // If any module has module-level inline asm, we will require a registered asm + // parser for the target so that we can create an accurate symbol table for + // the module. + for (const Module *M : Mods) { + if (M->getModuleInlineAsm().empty()) + continue; + + std::string Err; + const Triple TT(M->getTargetTriple()); + const Target *T = TargetRegistry::lookupTarget(TT.str(), Err); + if (!T || !T->hasMCAsmParser()) + return; + } + + WroteSymtab = true; + SmallVector Symtab; + // The irsymtab::build function may be unable to create a symbol table if the + // module is malformed (e.g. it contains an invalid alias). Writing a symbol + // table is not required for correctness, but we still want to be able to + // write malformed modules to bitcode files, so swallow the error. + if (Error E = irsymtab::build(Mods, Symtab, StrtabBuilder, Alloc)) { + consumeError(std::move(E)); + return; + } + + writeBlob(bitc::SYMTAB_BLOCK_ID, bitc::SYMTAB_BLOB, + {Symtab.data(), Symtab.size()}); +} + void BitcodeWriter::writeStrtab() { assert(!WroteStrtab); @@ -3945,6 +3979,9 @@ bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, bool GenerateHash, ModuleHash *ModHash) { + assert(!WroteStrtab); + + Mods.push_back(M); ModuleBitcodeWriter ModuleWriter(M, Buffer, StrtabBuilder, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); @@ -3956,7 +3993,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder, const ModuleSummaryIndex *Index, - bool GenerateHash, ModuleHash *ModHash) { + bool GenerateHash, ModuleHash *ModHash, + bool WriteSymtab) { SmallVector Buffer; Buffer.reserve(256*1024); @@ -3969,6 +4007,8 @@ BitcodeWriter Writer(Buffer); Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash, ModHash); + if (WriteSymtab) + Writer.writeSymtab(); Writer.writeStrtab(); if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Index: llvm/lib/Object/CMakeLists.txt =================================================================== --- llvm/lib/Object/CMakeLists.txt +++ llvm/lib/Object/CMakeLists.txt @@ -25,4 +25,5 @@ DEPENDS intrinsics_gen + llvm_vcsrevision_h ) Index: llvm/lib/Object/IRSymtab.cpp =================================================================== --- llvm/lib/Object/IRSymtab.cpp +++ llvm/lib/Object/IRSymtab.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/StringSaver.h" +#include "llvm/Support/VCSRevision.h" #include #include #include @@ -43,18 +44,22 @@ namespace { +char kExpectedProducerName[] = LLVM_VERSION_STRING +#ifdef LLVM_REVISION + " " LLVM_REVISION +#endif + ; + /// Stores the temporary state that is required to build an IR symbol table. struct Builder { SmallVector &Symtab; - SmallVector &Strtab; - - Builder(SmallVector &Symtab, SmallVector &Strtab) - : Symtab(Symtab), Strtab(Strtab) {} + StringTableBuilder &StrtabBuilder; - StringTableBuilder StrtabBuilder{StringTableBuilder::RAW}; + Builder(SmallVector &Symtab, StringTableBuilder &StrtabBuilder, + BumpPtrAllocator &Alloc) + : Symtab(Symtab), StrtabBuilder(StrtabBuilder), Saver(Alloc) {} - BumpPtrAllocator Alloc; - StringSaver Saver{Alloc}; + StringSaver Saver; DenseMap ComdatMap; Mangler Mang; @@ -81,15 +86,25 @@ reinterpret_cast(Objs.data() + Objs.size())); } - Error addModule(Module *M); + Error addModule(const Module *M); Error addSymbol(const ModuleSymbolTable &Msymtab, const SmallPtrSet &Used, ModuleSymbolTable::Symbol Sym); - Error build(ArrayRef Mods); + Error build(ArrayRef Mods); }; -Error Builder::addModule(Module *M) { +Error Builder::addModule(const Module *ConstM) { + if (ConstM->getDataLayoutStr().empty()) + return make_error("input module has no datalayout", + inconvertibleErrorCode()); + + // FIXME: Think about const correctness; ModuleSymbolTable and + // materializeMetadata need non-const Modules. We can't just propagate const + // into ModuleSymbolTable because LTO needs non-const references to + // GlobalValues. + Module *M = const_cast(ConstM); + SmallPtrSet Used; collectUsedGlobalVariables(*M, Used, /*CompilerUsed*/ false); @@ -220,10 +235,12 @@ return Error::success(); } -Error Builder::build(ArrayRef IRMods) { +Error Builder::build(ArrayRef IRMods) { storage::Header Hdr; assert(!IRMods.empty()); + Hdr.Version = storage::Header::kCurrentVersion; + setStr(Hdr.Producer, kExpectedProducerName); setStr(Hdr.TargetTriple, IRMods[0]->getTargetTriple()); setStr(Hdr.SourceFileName, IRMods[0]->getSourceFileName()); TT = Triple(IRMods[0]->getTargetTriple()); @@ -233,7 +250,7 @@ return Err; COFFLinkerOptsOS.flush(); - setStr(Hdr.COFFLinkerOpts, COFFLinkerOpts); + setStr(Hdr.COFFLinkerOpts, Saver.save(COFFLinkerOpts)); // We are about to fill in the header's range fields, so reserve space for it // and copy it in afterwards. @@ -244,37 +261,21 @@ writeRange(Hdr.Uncommons, Uncommons); *reinterpret_cast(Symtab.data()) = Hdr; - - raw_svector_ostream OS(Strtab); - StrtabBuilder.finalizeInOrder(); - StrtabBuilder.write(OS); - return Error::success(); } } // end anonymous namespace -Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, - SmallVector &Strtab) { - return Builder(Symtab, Strtab).build(Mods); +Error irsymtab::build(ArrayRef Mods, SmallVector &Symtab, + StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc) { + return Builder(Symtab, StrtabBuilder, Alloc).build(Mods); } -Expected irsymtab::read(MemoryBufferRef MBRef) { +// Upgrade a vector of bitcode modules created by an old version of LLVM by +// creating an irsymtab for them in the current format. +static Expected upgrade(std::vector BMs) { File F; - ErrorOr BCOrErr = - object::IRObjectFile::findBitcodeInMemBuffer(MBRef); - if (!BCOrErr) - return errorCodeToError(BCOrErr.getError()); - - Expected> BMsOrErr = - getBitcodeModuleList(*BCOrErr); - if (!BMsOrErr) - return BMsOrErr.takeError(); - - if (BMsOrErr->empty()) - return make_error("Bitcode file does not contain any modules", - inconvertibleErrorCode()); - F.Mods = std::move(*BMsOrErr); + F.Mods = std::move(BMs); LLVMContext Ctx; std::vector Mods; @@ -286,18 +287,64 @@ if (!MOrErr) return MOrErr.takeError(); - if ((*MOrErr)->getDataLayoutStr().empty()) - return make_error("input module has no datalayout", - inconvertibleErrorCode()); - Mods.push_back(MOrErr->get()); OwnedMods.push_back(std::move(*MOrErr)); } - if (Error E = build(Mods, F.Symtab, F.Strtab)) + StringTableBuilder StrtabBuilder(StringTableBuilder::RAW); + BumpPtrAllocator Alloc; + if (Error E = build(Mods, F.Symtab, StrtabBuilder, Alloc)) return std::move(E); + StrtabBuilder.finalizeInOrder(); + F.Strtab.resize(StrtabBuilder.getSize()); + StrtabBuilder.write((uint8_t *)F.Strtab.data()); + F.TheReader = {{F.Symtab.data(), F.Symtab.size()}, {F.Strtab.data(), F.Strtab.size()}}; return std::move(F); } + +Expected irsymtab::read(MemoryBufferRef MBRef) { + ErrorOr BCOrErr = + object::IRObjectFile::findBitcodeInMemBuffer(MBRef); + if (!BCOrErr) + return errorCodeToError(BCOrErr.getError()); + + Expected BFCOrErr = getBitcodeFileContents(*BCOrErr); + if (!BFCOrErr) + return BFCOrErr.takeError(); + BitcodeFileContents &BFC = *BFCOrErr; + if (BFC.Mods.empty()) + return make_error("Bitcode file does not contain any modules", + inconvertibleErrorCode()); + + if (BFC.StrtabForSymtab.empty() || + BFC.Symtab.size() < sizeof(storage::Header)) + return upgrade(std::move(BFC.Mods)); + + // We cannot use the regular reader to read the version and producer, because + // it will expect the header to be in the current format. The only thing we + // can rely on is that the version and producer will be present as the first + // struct elements. + auto *Hdr = reinterpret_cast(BFC.Symtab.data()); + unsigned Version = Hdr->Version; + StringRef Producer = Hdr->Producer.get(BFC.StrtabForSymtab); + if (Version != storage::Header::kCurrentVersion || + Producer != kExpectedProducerName) + return upgrade(std::move(BFC.Mods)); + + File F; + F.TheReader = {{BFC.Symtab.data(), BFC.Symtab.size()}, + {BFC.StrtabForSymtab.data(), BFC.StrtabForSymtab.size()}}; + + // Finally, make sure that the number of modules in the symbol table matches + // the number of modules in the bitcode file. If they differ, it may mean that + // the bitcode file was created by binary concatenation, so we need to create + // a new symbol table from scratch. + if (F.TheReader.getNumModules() != BFC.Mods.size()) + return upgrade(std::move(BFC.Mods)); + + F.Mods = std::move(BFC.Mods); + return std::move(F); +} Index: llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp =================================================================== --- llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -363,6 +363,7 @@ W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/true, &ModHash); W.writeModule(MergedM.get()); + W.writeSymtab(); W.writeStrtab(); OS << Buffer; @@ -376,6 +377,7 @@ W2.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index, /*GenerateHash=*/false, &ModHash); W2.writeModule(MergedM.get()); + W2.writeSymtab(); W2.writeStrtab(); *ThinLinkOS << Buffer; } Index: llvm/test/Bitcode/invalid.test =================================================================== --- llvm/test/Bitcode/invalid.test +++ llvm/test/Bitcode/invalid.test @@ -112,9 +112,10 @@ RUN: not llvm-dis -disable-output %p/Inputs/invalid-vector-element-type.bc 2>&1 | \ RUN: FileCheck --check-prefix=ELEMENT-TYPE %s RUN: not llvm-dis -disable-output %p/Inputs/invalid-pointer-element-type.bc 2>&1 | \ -RUN: FileCheck --check-prefix=ELEMENT-TYPE %s +RUN: FileCheck --check-prefix=ELEMENT-TYPE2 %s ELEMENT-TYPE: Invalid type +ELEMENT-TYPE2: Invalid record RUN: not llvm-dis -disable-output %p/Inputs/invalid-cast.bc 2>&1 | \ RUN: FileCheck --check-prefix=INVALID-CAST %s Index: llvm/test/Bitcode/thinlto-alias.ll =================================================================== --- llvm/test/Bitcode/thinlto-alias.ll +++ llvm/test/Bitcode/thinlto-alias.ll @@ -18,7 +18,7 @@ ; CHECK-NEXT: ; CHECK: ; CHECK-NEXT: ; CHECK: ; CHECK: ; CHECK-NEXT: ; CHECK: ; CHECK: