Index: include/llvm/Bitcode/BitstreamWriter.h =================================================================== --- include/llvm/Bitcode/BitstreamWriter.h +++ include/llvm/Bitcode/BitstreamWriter.h @@ -21,6 +21,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Bitcode/BitCodes.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/SHA1.h" #include namespace llvm { @@ -42,6 +43,12 @@ /// selected BLOCK ID. unsigned BlockInfoCurBID; + /// The writer has the ability to compute the hash of the stream on the fly. + SHA1 Hash; + + /// Flag to mark the hashing enabled/disabled. + bool HashEnabled = false; + /// CurAbbrevs - Abbrevs installed at in this block. std::vector> CurAbbrevs; @@ -69,6 +76,8 @@ void WriteWord(unsigned Value) { Value = support::endian::byte_swap(Value); + if (HashEnabled) + Hash.write(reinterpret_cast(&Value), sizeof(Value)); Out.append(reinterpret_cast(&Value), reinterpret_cast(&Value + 1)); } @@ -96,6 +105,14 @@ /// \brief Retrieve the number of bits currently used to encode an abbrev ID. unsigned GetAbbrevIDWidth() const { return CurCodeSize; } + /// Enable (and reset) the hashing + void enableHash(bool Enabled) { + HashEnabled = Enabled; + Hash.init(); + } + + StringRef getCurrentHash() { return Hash.result(); } + //===--------------------------------------------------------------------===// // Basic Primitives for emitting bits to the stream. //===--------------------------------------------------------------------===// Index: include/llvm/Bitcode/LLVMBitCodes.h =================================================================== --- include/llvm/Bitcode/LLVMBitCodes.h +++ include/llvm/Bitcode/LLVMBitCodes.h @@ -108,6 +108,9 @@ // SOURCE_FILENAME: [namechar x N] MODULE_CODE_SOURCE_FILENAME = 16, + + // HASH: [unsigned] + MODULE_CODE_HASH = 17, }; /// PARAMATTR blocks have code for defining a parameter attribute set. @@ -183,7 +186,7 @@ // The module path symbol table only has one code (MST_CODE_ENTRY). enum ModulePathSymtabCodes { - MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, namechar x N] + MST_CODE_ENTRY = 1, // MST_ENTRY: [modid, hash, namechar x N] }; // The summary section uses different codes in the per-module Index: include/llvm/IR/ModuleSummaryIndex.h =================================================================== --- include/llvm/IR/ModuleSummaryIndex.h +++ include/llvm/IR/ModuleSummaryIndex.h @@ -245,9 +245,9 @@ typedef GlobalValueInfoMapTy::iterator globalvalueinfo_iterator; /// String table to hold/own module path strings, which additionally holds the -/// module ID assigned to each module during the plugin step. The StringMap -/// makes a copy of and owns inserted strings. -typedef StringMap ModulePathStringTableTy; +/// module ID assigned to each module during the plugin step, as well as a hash +/// of the module. The StringMap makes a copy of and owns inserted strings. +typedef StringMap> ModulePathStringTableTy; /// Class to hold module path string table and global value map, /// and encapsulate methods for operating on them. @@ -305,15 +305,23 @@ } /// Iterator to allow writer to walk through table during emission. - iterator_range::const_iterator> - modPathStringEntries() const { - return llvm::make_range(ModulePathStringTable.begin(), - ModulePathStringTable.end()); + const StringMap> &modulePaths() const { + return ModulePathStringTable; + } + + /// Iterator + StringMap> &modulePaths() { + return ModulePathStringTable; } /// Get the module ID recorded for the given module path. uint64_t getModuleId(const StringRef ModPath) const { - return ModulePathStringTable.lookup(ModPath); + return ModulePathStringTable.lookup(ModPath).first; + } + + /// Get the module ID recorded for the given module path. + unsigned getModuleHash(const StringRef ModPath) const { + return ModulePathStringTable.lookup(ModPath).second; } /// Add the given per-module index into this module index/summary, @@ -334,8 +342,10 @@ /// Add a new module path, mapped to the given module Id, and return StringRef /// owned by string table map. - StringRef addModulePath(StringRef ModPath, uint64_t ModId) { - return ModulePathStringTable.insert(std::make_pair(ModPath, ModId)) + StringRef addModulePath(StringRef ModPath, uint64_t ModId, + unsigned Hash = 0) { + return ModulePathStringTable.insert(std::make_pair( + ModPath, std::make_pair(ModId, 0))) .first->first(); } Index: lib/Bitcode/Reader/BitcodeReader.cpp =================================================================== --- lib/Bitcode/Reader/BitcodeReader.cpp +++ lib/Bitcode/Reader/BitcodeReader.cpp @@ -5636,10 +5636,7 @@ continue; case BitstreamEntry::Record: - // Once we find the last record of interest, skip the rest. - if (VSTOffset > 0) - Stream.skipRecord(Entry.ID); - else { + { Record.clear(); auto BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { @@ -5653,6 +5650,17 @@ SourceFileName = ValueName.c_str(); break; } + /// MODULE_CODE_HASH: [unsigned] + case bitc::MODULE_CODE_HASH: { + if (Record.size() != 1) + return error("Invalid hash length"); + if (TheIndex->modulePaths().empty()) + return error("Didn't populate module info?"); + if (TheIndex->modulePaths().size() != 1) + return error("Don't expect multiple module defined?"); + TheIndex->modulePaths().begin()->second.second = Record[0]; + break; + } /// MODULE_CODE_VSTOFFSET: [offset] case bitc::MODULE_CODE_VSTOFFSET: if (Record.size() < 1) @@ -5909,11 +5917,13 @@ default: // Default behavior: ignore. break; case bitc::MST_CODE_ENTRY: { - // MST_ENTRY: [modid, namechar x N] - if (convertToString(Record, 1, ModulePath)) + // MST_ENTRY: [modid, hash, namechar x N] + if (convertToString(Record, 2, ModulePath)) return error("Invalid record"); uint64_t ModuleId = Record[0]; - StringRef ModulePathInMap = TheIndex->addModulePath(ModulePath, ModuleId); + unsigned ModuleHash = Record[1]; + StringRef ModulePathInMap = + TheIndex->addModulePath(ModulePath, ModuleId, ModuleHash); ModuleIdMap[ModuleId] = ModulePathInMap; ModulePath.clear(); break; Index: lib/Bitcode/Writer/BitcodeWriter.cpp =================================================================== --- lib/Bitcode/Writer/BitcodeWriter.cpp +++ lib/Bitcode/Writer/BitcodeWriter.cpp @@ -2819,6 +2819,7 @@ BitCodeAbbrev *Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); unsigned Abbrev8Bit = Stream.EmitAbbrev(Abbv); @@ -2827,6 +2828,7 @@ Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); unsigned Abbrev7Bit = Stream.EmitAbbrev(Abbv); @@ -2835,12 +2837,13 @@ Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::MST_CODE_ENTRY)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); + Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6)); unsigned Abbrev6Bit = Stream.EmitAbbrev(Abbv); SmallVector NameVals; - for (const StringMapEntry &MPSE : I.modPathStringEntries()) { + for (const auto &MPSE : I.modulePaths()) { StringEncoding Bits = getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size()); unsigned AbbrevToUse = Abbrev8Bit; @@ -2849,7 +2852,8 @@ else if (Bits == SE_Fixed7) AbbrevToUse = Abbrev7Bit; - NameVals.push_back(MPSE.getValue()); + NameVals.push_back(MPSE.getValue().first); + NameVals.push_back(MPSE.getValue().second); for (const auto P : MPSE.getKey()) NameVals.push_back((unsigned char)P); @@ -3164,12 +3168,25 @@ Stream.ExitBlock(); } +static void writeModuleHash(BitstreamWriter &Stream) { + // Emit the module's hash. + // MODULE_CODE_HASH: [unsigned] + auto SHA1 = Stream.getCurrentHash(); + unsigned Hash = hash_combine_range(&*SHA1.begin(), &*SHA1.end()); + + // Emit the finished record. + Stream.EmitRecord(bitc::MODULE_CODE_HASH, makeArrayRef(Hash)); +} + /// WriteModule - Emit the specified module to the bitstream. static void WriteModule(const Module *M, BitstreamWriter &Stream, bool ShouldPreserveUseListOrder, uint64_t BitcodeStartBit, bool EmitSummaryIndex) { Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3); + if (EmitSummaryIndex) + Stream.enableHash(true); + SmallVector Vals; unsigned CurVersion = 1; Vals.push_back(CurVersion); @@ -3225,6 +3242,11 @@ WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream, VSTOffsetPlaceholder, BitcodeStartBit, &FunctionIndex); + if (EmitSummaryIndex) { + writeModuleHash(Stream); + Stream.enableHash(false); + } + Stream.ExitBlock(); } Index: tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp =================================================================== --- tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp +++ tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp @@ -174,6 +174,7 @@ STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES) STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) + STRINGIFY_CODE(MODULE_CODE, HASH) } case bitc::IDENTIFICATION_BLOCK_ID: switch (CodeID) {