Index: include/llvm/ADT/ArrayRef.h =================================================================== --- include/llvm/ADT/ArrayRef.h +++ include/llvm/ADT/ArrayRef.h @@ -354,6 +354,12 @@ return ArrayRef(Arr); } + /// Construct an ArrayRef from a std::array. + template + ArrayRef makeArrayRef(const std::array &Arr) { + return ArrayRef(Arr); + } + /// @} /// @name ArrayRef Comparison Operators /// @{ Index: include/llvm/Bitcode/BitstreamWriter.h =================================================================== --- include/llvm/Bitcode/BitstreamWriter.h +++ include/llvm/Bitcode/BitstreamWriter.h @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/SHA1.h" #include namespace llvm { @@ -42,6 +43,12 @@ /// selected BLOCK ID. unsigned BlockInfoCurBID; + /// The writer has the ability to compute the hash of the stream on the fly. + SHA1 Hash; + + /// Flag to mark the hashing enabled/disabled. + bool HashEnabled = false; + /// CurAbbrevs - Abbrevs installed at in this block. std::vector> CurAbbrevs; @@ -69,6 +76,8 @@ void WriteWord(unsigned Value) { Value = support::endian::byte_swap(Value); + if (HashEnabled) + Hash.write(reinterpret_cast(&Value), sizeof(Value)); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); } @@ -96,6 +105,14 @@ /// \brief Retrieve the number of bits currently used to encode an abbrev ID. unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + /// Enable (and reset) the hashing + void enableHash(bool Enabled) { + HashEnabled = Enabled; + Hash.init(); + } + + StringRef getCurrentHash() { return Hash.result(); } + //===--------------------------------------------------------------------===// // Basic Primitives for emitting bits to the stream. //===--------------------------------------------------------------------===// Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -71,44 +71,47 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; /// MODULE blocks have a number of optional fields and subblocks. - enum ModuleCodes { - MODULE_CODE_VERSION = 1, // VERSION: [version#] - MODULE_CODE_TRIPLE = 2, // TRIPLE: [strchr x N] - MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] - MODULE_CODE_ASM = 4, // ASM: [strchr x N] - MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] +enum ModuleCodes { + MODULE_CODE_VERSION = 1, // VERSION: [version#] + MODULE_CODE_TRIPLE = 2, // TRIPLE: [strchr x N] + MODULE_CODE_DATALAYOUT = 3, // DATALAYOUT: [strchr x N] + MODULE_CODE_ASM = 4, // ASM: [strchr x N] + MODULE_CODE_SECTIONNAME = 5, // SECTIONNAME: [strchr x N] - // FIXME: Remove DEPLIB in 4.0. - MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] + // FIXME: Remove DEPLIB in 4.0. + MODULE_CODE_DEPLIB = 6, // DEPLIB: [strchr x N] - // GLOBALVAR: [pointer type, isconst, initid, - // linkage, alignment, section, visibility, threadlocal] - MODULE_CODE_GLOBALVAR = 7, + // GLOBALVAR: [pointer type, isconst, initid, + // linkage, alignment, section, visibility, threadlocal] + MODULE_CODE_GLOBALVAR = 7, - // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility, gc, unnamed_addr] - MODULE_CODE_FUNCTION = 8, + // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, + // section, visibility, gc, unnamed_addr] + MODULE_CODE_FUNCTION = 8, - // ALIAS: [alias type, aliasee val#, linkage, visibility] - MODULE_CODE_ALIAS_OLD = 9, + // ALIAS: [alias type, aliasee val#, linkage, visibility] + MODULE_CODE_ALIAS_OLD = 9, - // MODULE_CODE_PURGEVALS: [numvals] - MODULE_CODE_PURGEVALS = 10, + // MODULE_CODE_PURGEVALS: [numvals] + MODULE_CODE_PURGEVALS = 10, - MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N] - MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name] + MODULE_CODE_GCNAME = 11, // GCNAME: [strchr x N] + MODULE_CODE_COMDAT = 12, // COMDAT: [selection_kind, name] - MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset] + MODULE_CODE_VSTOFFSET = 13, // VSTOFFSET: [offset] - // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] - MODULE_CODE_ALIAS = 14, + // ALIAS: [alias value type, addrspace, aliasee val#, linkage, visibility] + MODULE_CODE_ALIAS = 14, - // METADATA_VALUES: [numvals] - MODULE_CODE_METADATA_VALUES = 15, + // METADATA_VALUES: [numvals] + MODULE_CODE_METADATA_VALUES = 15, - // SOURCE_FILENAME: [namechar x N] - MODULE_CODE_SOURCE_FILENAME = 16, - }; + // SOURCE_FILENAME: [namechar x N] + MODULE_CODE_SOURCE_FILENAME = 16, + + // HASH: [5*i32] + MODULE_CODE_HASH = 17, +}; /// PARAMATTR blocks have code for defining a parameter attribute set. enum AttributeCodes { @@ -183,7 +186,7 @@ // The module path symbol table only has one code (MST_CODE_ENTRY). enum ModulePathSymtabCodes { - MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, hash (5*i32), namechar x N] }; // The summary section uses different codes in the per-module Index: include/llvm/Bitcode/ReaderWriter.h =================================================================== --- include/llvm/Bitcode/ReaderWriter.h +++ include/llvm/Bitcode/ReaderWriter.h @@ -107,7 +107,8 @@ /// for use in ThinLTO optimization). void WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder = false, - bool EmitSummaryIndex = false); + bool EmitSummaryIndex = false, + bool GenerateHash = false); /// Write the specified module summary index to the given raw output stream, /// where it will be written in a new bitcode block. This is used when Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -25,6 +25,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" +#include + namespace llvm { /// \brief Class to accumulate and hold information about a callee. @@ -228,6 +230,9 @@ void setBitcodeIndex(uint64_t Offset) { BitcodeIndex = Offset; } }; +/// 160 bits SHA1 +typedef std::array ModuleHash; + /// List of global value info structures for a particular value held /// in the GlobalValueMap. Requires a vector in the case of multiple /// COMDAT values of the same name. @@ -245,9 +250,9 @@ typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator; /// String table to hold/own module path strings, which additionally holds the -/// module ID assigned to each module during the plugin step. The StringMap -/// makes a copy of and owns inserted strings. -typedef StringMap ModulePathStringTableTy; +/// module ID assigned to each module during the plugin step, as well as a hash +/// of the module. The StringMap makes a copy of and owns inserted strings. +typedef StringMap> ModulePathStringTableTy; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. @@ -304,16 +309,26 @@ GlobalValueMap[ValueGUID].push_back(std::move(Info)); } - /// Iterator to allow writer to walk through table during emission. - iterator_range::const_iterator> - modPathStringEntries() const { - return llvm::make_range(ModulePathStringTable.begin(), - ModulePathStringTable.end()); + /// Table of module, containing module hash and id. + const StringMap> &modulePaths() const { + return ModulePathStringTable; + } + + /// Table of module, containing hash and id. + StringMap> &modulePaths() { + return ModulePathStringTable; } /// Get the module ID recorded for the given module path. uint64_t getModuleId(const StringRef ModPath) const { - return ModulePathStringTable.lookup(ModPath); + return ModulePathStringTable.lookup(ModPath).first; + } + + /// Get the module SHA1 hash recorded for the given module path. + const ModuleHash &getModuleHash(const StringRef ModPath) const { + auto It = ModulePathStringTable.find(ModPath); + assert(It != ModulePathStringTable.end() && "Module not registered"); + return It->second.second; } /// Add the given per-module index into this module index/summary, @@ -332,10 +347,14 @@ return NewName.str(); } - /// Add a new module path, mapped to the given module Id, and return StringRef - /// owned by string table map. - StringRef addModulePath(StringRef ModPath, uint64_t ModId) { - return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) + /// Add a new module path with the given \p Hash, mapped to the given \p + /// ModID, and return a StringRef to the copy of \p ModPath owned by the + // index. + StringRef addModulePath(StringRef ModPath, uint64_t ModId, + ModuleHash Hash = ModuleHash{{0}}) { + return ModulePathStringTable.insert( + std::make_pair( + ModPath, std::make_pair(ModId, Hash))) .first->first(); } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5664,11 +5664,7 @@ } continue; - case BitstreamEntry::Record: - // Once we find the last record of interest, skip the rest. - if (VSTOffset > 0) - Stream.skipRecord(Entry.ID); - else { + case BitstreamEntry::Record: { Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { @@ -5682,6 +5678,26 @@ SourceFileName = ValueName.c_str(); break; } + /// MODULE_CODE_HASH: [5*i32] + case bitc::MODULE_CODE_HASH: { + if (Record.size() != 5) + return error("Invalid hash length " + Twine(Record.size()).str()); + if (!TheIndex) + break; + if (TheIndex->modulePaths().empty()) + // Does not have any summary emitted. + break; + if (TheIndex->modulePaths().size() != 1) + return error("Don't expect multiple modules defined?"); + auto &Hash = TheIndex->modulePaths().begin()->second.second; + int Pos = 0; + for (auto &Val : Record) { + assert(!(Val >> 32) && "Unexpected high bits set"); + errs() << "READ Module Hash at " << Pos << " : " << Val << "\n"; + Hash[Pos++] = Val; + } + break; + } /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) @@ -5938,12 +5954,23 @@ default: // Default behavior: ignore. break; case bitc::MST_CODE_ENTRY: { - // MST_ENTRY: [modid, namechar x N] - if (convertToString(Record, 1, ModulePath)) - return error("Invalid record"); + // MST_ENTRY: [modid, hash (6xi32), namechar x N] uint64_t ModuleId = Record[0]; - StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); + + ModuleHash Hash; + for (int Pos = 0; Pos < 6; ++Pos) { + auto &Val = Record[Pos + 1]; + assert(!(Val >> 32) && "Unexpected high bits set"); + Hash[Pos] = Val; + } + + if (convertToString(Record, 6, ModulePath)) + return error("Invalid record"); + + StringRef ModulePathInMap = + TheIndex->addModulePath(ModulePath, ModuleId, Hash); ModuleIdMap[ModuleId] = ModulePathInMap; + ModulePath.clear(); break; } Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2818,6 +2818,13 @@ BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + // 160 bits SHA1 + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + // End 160 bits SHA1 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv); @@ -2826,6 +2833,13 @@ Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + // 160 bits SHA1 + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + // End 160 bits SHA1 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv); @@ -2834,12 +2848,19 @@ Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + // 160 bits SHA1 + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); + // End 160 bits SHA1 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); SmallVector NameVals; - for (const StringMapEntry &MPSE : I.modPathStringEntries()) { + for (const auto &MPSE : I.modulePaths()) { StringEncoding Bits = getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); unsigned AbbrevToUse = Abbrev8Bit; @@ -2848,7 +2869,12 @@ else if (Bits == SE_Fixed7) AbbrevToUse = Abbrev7Bit; - NameVals.push_back(MPSE.getValue()); + NameVals.push_back(MPSE.getValue().first); + + auto &Hash = MPSE.getValue().second; + for (auto Val : Hash) { + NameVals.push_back(Val); + } for (const auto P : MPSE.getKey()) NameVals.push_back((unsigned char)P); @@ -3163,12 +3189,31 @@ Stream.ExitBlock(); } +static void writeModuleHash(BitstreamWriter &Stream) { + // Emit the module's hash. + // MODULE_CODE_HASH: [5*i32] + auto Hash = Stream.getCurrentHash(); + SmallVector Vals; + for (int Pos = 0; Pos < 20; Pos += 4) { + unsigned SubHash = (Hash[Pos + 0] << 24) | (Hash[Pos + 1] << 16) | + (Hash[Pos + 2] << 8) | Hash[Pos + 3]; + Vals.push_back(SubHash); + } + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, - uint64_t BitcodeStartBit, bool EmitSummaryIndex) { + uint64_t BitcodeStartBit, bool EmitSummaryIndex, + bool GenerateHash) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + if (GenerateHash) + Stream.enableHash(true); + SmallVector Vals; unsigned CurVersion = 1; Vals.push_back(CurVersion); @@ -3224,6 +3269,11 @@ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); + if (GenerateHash) { + writeModuleHash(Stream); + Stream.enableHash(false); + } + Stream.ExitBlock(); } @@ -3308,7 +3358,7 @@ /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out, bool ShouldPreserveUseListOrder, - bool EmitSummaryIndex) { + bool EmitSummaryIndex, bool GenerateHash) { SmallVector Buffer; Buffer.reserve(256*1024); @@ -3334,7 +3384,7 @@ // Emit the module. WriteModule(M, Stream, ShouldPreserveUseListOrder, BitcodeStartBit, - EmitSummaryIndex); + EmitSummaryIndex, GenerateHash); } if (TT.isOSDarwin() || TT.isOSBinFormatMachO()) Index: lib/IR/ModuleSummaryIndex.cpp =================================================================== --- lib/IR/ModuleSummaryIndex.cpp +++ lib/IR/ModuleSummaryIndex.cpp @@ -37,9 +37,10 @@ // Add the module path string ref for this module if we haven't already // saved a reference to it. - if (ModPath.empty()) - ModPath = addModulePath(Info->summary()->modulePath(), NextModuleId); - else + if (ModPath.empty()) { + auto Path = Info->summary()->modulePath(); + ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path)); + } else assert(ModPath == Info->summary()->modulePath() && "Each module in the combined map should have a unique ID"); Index: lib/Transforms/IPO/FunctionImport.cpp =================================================================== --- lib/Transforms/IPO/FunctionImport.cpp +++ lib/Transforms/IPO/FunctionImport.cpp @@ -55,7 +55,7 @@ /* ShouldLazyLoadMetadata = */ true); if (!Result) { Err.print("function-import", errs()); - return nullptr; + report_fatal_error("Abort"); } return Result; Index: test/Bitcode/Inputs/module_hash.ll =================================================================== --- /dev/null +++ test/Bitcode/Inputs/module_hash.ll @@ -0,0 +1,4 @@ +; Needs a function for the combined index to be populated +define void @bar() { + ret void +} Index: test/Bitcode/module_hash.ll =================================================================== --- /dev/null +++ test/Bitcode/module_hash.ll @@ -0,0 +1,26 @@ +; Check per module hash +; RUN: llvm-as -module-hash %s -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD1 +; MOD1: +; RUN: llvm-as -module-hash %p/Inputs/module_hash.ll -o - | llvm-bcanalyzer -dump | FileCheck %s --check-prefix=MOD2 +; MOD2: + +; Check that the hash matches in the combined index + +; First check the module themselve, note that the hash differs from above because the summary was not written previously +; RUN: llvm-as -module-hash -module-summary %s -o %t +; RUN: llvm-bcanalyzer -dump %t | FileCheck %s --check-prefix=MOD_WITH_SUMMARY1 +; MOD_WITH_SUMMARY1: +; RUN: llvm-as -module-hash -module-summary %p/Inputs/module_hash.ll -o %t2 +; RUN: llvm-bcanalyzer -dump %t2 | FileCheck %s --check-prefix=MOD_WITH_SUMMARY2 +; MOD_WITH_SUMMARY2: + +; Check that the hashes are propagated in the combined index (should match the one just above) +; RUN: llvm-lto --thinlto-action=thinlink -o %t3 %t %t2 +; RUN: llvm-bcanalyzer -dump %t3 | FileCheck %s --check-prefix=COMBINED +; COMBINED-DAG: EmitModuleHash("module-hash", cl::desc("Emit module hash"), + cl::init(false)); + static cl::opt DumpAsm("d", cl::desc("Print assembly as parsed"), cl::Hidden); @@ -82,7 +85,7 @@ if (Force || !CheckBitcodeOutputToConsole(Out->os(), true)) WriteBitcodeToFile(M, Out->os(), PreserveBitcodeUseListOrder, - EmitSummaryIndex); + EmitSummaryIndex, EmitModuleHash); // Declare success. Out->keep(); Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp =================================================================== --- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -174,6 +174,7 @@ STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) + STRINGIFY_CODE(MODULE_CODE, HASH) } case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) { Index: tools/llvm-lto/llvm-lto.cpp =================================================================== --- tools/llvm-lto/llvm-lto.cpp +++ tools/llvm-lto/llvm-lto.cpp @@ -281,7 +281,7 @@ loadAllFilesForIndex(const ModuleSummaryIndex &Index) { std::vector> InputBuffers; - for (auto &ModPath : Index.modPathStringEntries()) { + for (auto &ModPath : Index.modulePaths()) { const auto &Filename = ModPath.first(); auto CurrentActivity = "loading file '" + Filename + "'"; auto InputOrErr = MemoryBuffer::getFile(Filename);