Index: llvm/trunk/include/llvm/DebugInfo/PDB/Native/HashTable.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Native/HashTable.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Native/HashTable.h @@ -54,10 +54,67 @@ HashTableIterator begin() const; HashTableIterator end() const; - HashTableIterator find(uint32_t K); + /// Find the entry with the specified key value. + HashTableIterator find(uint32_t K) const; + + /// Find the entry whose key has the specified hash value, using the specified + /// traits defining hash function and equality. + template + HashTableIterator find_as(const Key &K, const Context &Ctx) const { + uint32_t H = Traits::hash(K, Ctx) % capacity(); + uint32_t I = H; + Optional FirstUnused; + do { + if (isPresent(I)) { + if (Traits::realKey(Buckets[I].first, Ctx) == K) + return HashTableIterator(*this, I, false); + } else { + if (!FirstUnused) + FirstUnused = I; + // Insertion occurs via linear probing from the slot hint, and will be + // inserted at the first empty / deleted location. Therefore, if we are + // probing and find a location that is neither present nor deleted, then + // nothing must have EVER been inserted at this location, and thus it is + // not possible for a matching value to occur later. + if (!isDeleted(I)) + break; + } + I = (I + 1) % capacity(); + } while (I != H); + + // The only way FirstUnused would not be set is if every single entry in the + // table were Present. But this would violate the load factor constraints + // that we impose, so it should never happen. + assert(FirstUnused); + return HashTableIterator(*this, *FirstUnused, true); + } + + /// Set the entry with the specified key to the specified value. void set(uint32_t K, uint32_t V); + + /// Set the entry using a key type that the specified Traits can convert + /// from a real key to an internal key. + template + bool set_as(const Key &K, uint32_t V, Context &Ctx) { + return set_as_internal(K, V, None, Ctx); + } + void remove(uint32_t K); + + template + void remove_as(const Key &K, Context &Ctx) { + auto Iter = find_as(K, Ctx); + // It wasn't here to begin with, just exit. + if (Iter == end()) + return; + + assert(Present.test(Iter.index())); + assert(!Deleted.test(Iter.index())); + Deleted.set(Iter.index()); + Present.reset(Iter.index()); + } + uint32_t get(uint32_t K); protected: @@ -69,8 +126,62 @@ mutable SparseBitVector<> Deleted; private: + /// Set the entry using a key type that the specified Traits can convert + /// from a real key to an internal key. + template + bool set_as_internal(const Key &K, uint32_t V, Optional InternalKey, + Context &Ctx) { + auto Entry = find_as(K, Ctx); + if (Entry != end()) { + assert(isPresent(Entry.index())); + assert(Traits::realKey(Buckets[Entry.index()].first, Ctx) == K); + // We're updating, no need to do anything special. + Buckets[Entry.index()].second = V; + return false; + } + + auto &B = Buckets[Entry.index()]; + assert(!isPresent(Entry.index())); + assert(Entry.isEnd()); + B.first = InternalKey ? *InternalKey : Traits::lowerKey(K, Ctx); + B.second = V; + Present.set(Entry.index()); + Deleted.reset(Entry.index()); + + grow(Ctx); + + assert((find_as(K, Ctx)) != end()); + return true; + } + static uint32_t maxLoad(uint32_t capacity); - void grow(); + + template + void grow(Context &Ctx) { + uint32_t S = size(); + if (S < maxLoad(capacity())) + return; + assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); + + uint32_t NewCapacity = + (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; + + // Growing requires rebuilding the table and re-hashing every item. Make a + // copy with a larger capacity, insert everything into the copy, then swap + // it in. + HashTable NewMap(NewCapacity); + for (auto I : Present) { + auto RealKey = Traits::realKey(Buckets[I].first, Ctx); + NewMap.set_as_internal(RealKey, Buckets[I].second, + Buckets[I].first, Ctx); + } + + Buckets.swap(NewMap.Buckets); + std::swap(Present, NewMap.Present); + std::swap(Deleted, NewMap.Deleted); + assert(capacity() == NewCapacity); + assert(size() == S); + } static Error readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V); Index: llvm/trunk/include/llvm/DebugInfo/PDB/Native/InfoStream.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Native/InfoStream.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Native/InfoStream.h @@ -51,7 +51,7 @@ BinarySubstreamRef getNamedStreamsBuffer() const; uint32_t getNamedStreamIndex(llvm::StringRef Name) const; - iterator_range> named_streams() const; + StringMap named_streams() const; private: std::unique_ptr Stream; Index: llvm/trunk/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Native/NamedStreamMap.h @@ -28,29 +28,30 @@ class NamedStreamMap { friend class NamedStreamMapBuilder; - struct FinalizationInfo { - uint32_t StringDataBytes = 0; - uint32_t SerializedLength = 0; - }; - public: NamedStreamMap(); Error load(BinaryStreamReader &Stream); Error commit(BinaryStreamWriter &Writer) const; - uint32_t finalize(); + uint32_t calculateSerializedLength() const; uint32_t size() const; bool get(StringRef Stream, uint32_t &StreamNo) const; void set(StringRef Stream, uint32_t StreamNo); - void remove(StringRef Stream); - const StringMap &getStringMap() const { return Mapping; } - iterator_range> entries() const; + + uint32_t appendStringData(StringRef S); + StringRef getString(uint32_t Offset) const; + uint32_t hashString(uint32_t Offset) const; + + StringMap entries() const; private: - Optional FinalizedInfo; - HashTable FinalizedHashTable; - StringMap Mapping; + /// Closed hash table from Offset -> StreamNumber, where Offset is the offset + /// of the stream name in NamesBuffer. + HashTable OffsetIndexMap; + + /// Buffer of string data. + std::vector NamesBuffer; }; } // end namespace pdb Index: llvm/trunk/lib/DebugInfo/PDB/Native/HashTable.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Native/HashTable.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Native/HashTable.cpp @@ -22,6 +22,14 @@ using namespace llvm; using namespace llvm::pdb; +namespace { +struct IdentityTraits { + static uint32_t hash(uint32_t K, const HashTable &Ctx) { return K; } + static uint32_t realKey(uint32_t K, const HashTable &Ctx) { return K; } + static uint32_t lowerKey(uint32_t K, const HashTable &Ctx) { return K; } +}; +} // namespace + HashTable::HashTable() : HashTable(8) {} HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } @@ -119,70 +127,16 @@ return HashTableIterator(*this, 0, true); } -HashTableIterator HashTable::find(uint32_t K) { - uint32_t H = K % capacity(); - uint32_t I = H; - Optional FirstUnused; - do { - if (isPresent(I)) { - if (Buckets[I].first == K) - return HashTableIterator(*this, I, false); - } else { - if (!FirstUnused) - FirstUnused = I; - // Insertion occurs via linear probing from the slot hint, and will be - // inserted at the first empty / deleted location. Therefore, if we are - // probing and find a location that is neither present nor deleted, then - // nothing must have EVER been inserted at this location, and thus it is - // not possible for a matching value to occur later. - if (!isDeleted(I)) - break; - } - I = (I + 1) % capacity(); - } while (I != H); - - // The only way FirstUnused would not be set is if every single entry in the - // table were Present. But this would violate the load factor constraints - // that we impose, so it should never happen. - assert(FirstUnused); - return HashTableIterator(*this, *FirstUnused, true); +HashTableIterator HashTable::find(uint32_t K) const { + return find_as(K, *this); } void HashTable::set(uint32_t K, uint32_t V) { - auto Entry = find(K); - if (Entry != end()) { - assert(isPresent(Entry.index())); - assert(Buckets[Entry.index()].first == K); - // We're updating, no need to do anything special. - Buckets[Entry.index()].second = V; - return; - } - - auto &B = Buckets[Entry.index()]; - assert(!isPresent(Entry.index())); - assert(Entry.isEnd()); - B.first = K; - B.second = V; - Present.set(Entry.index()); - Deleted.reset(Entry.index()); - - grow(); - - assert(find(K) != end()); -} - -void HashTable::remove(uint32_t K) { - auto Iter = find(K); - // It wasn't here to begin with, just exit. - if (Iter == end()) - return; - - assert(Present.test(Iter.index())); - assert(!Deleted.test(Iter.index())); - Deleted.set(Iter.index()); - Present.reset(Iter.index()); + set_as(K, V, *this); } +void HashTable::remove(uint32_t K) { remove_as(K, *this); } + uint32_t HashTable::get(uint32_t K) { auto I = find(K); assert(I != end()); @@ -191,30 +145,6 @@ uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } -void HashTable::grow() { - uint32_t S = size(); - if (S < maxLoad(capacity())) - return; - assert(capacity() != UINT32_MAX && "Can't grow Hash table!"); - - uint32_t NewCapacity = - (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; - - // Growing requires rebuilding the table and re-hashing every item. Make a - // copy with a larger capacity, insert everything into the copy, then swap - // it in. - HashTable NewMap(NewCapacity); - for (auto I : Present) { - NewMap.set(Buckets[I].first, Buckets[I].second); - } - - Buckets.swap(NewMap.Buckets); - std::swap(Present, NewMap.Present); - std::swap(Deleted, NewMap.Deleted); - assert(capacity() == NewCapacity); - assert(size() == S); -} - Error HashTable::readSparseBitVector(BinaryStreamReader &Stream, SparseBitVector<> &V) { uint32_t NumWords; Index: llvm/trunk/lib/DebugInfo/PDB/Native/InfoStream.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Native/InfoStream.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Native/InfoStream.cpp @@ -99,8 +99,7 @@ return Result; } -iterator_range> -InfoStream::named_streams() const { +StringMap InfoStream::named_streams() const { return NamedStreams.entries(); } Index: llvm/trunk/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp @@ -41,7 +41,8 @@ } Error InfoStreamBuilder::finalizeMsfLayout() { - uint32_t Length = sizeof(InfoStreamHeader) + NamedStreams.finalize() + + uint32_t Length = sizeof(InfoStreamHeader) + + NamedStreams.calculateSerializedLength() + (Features.size() + 1) * sizeof(uint32_t); if (auto EC = Msf.setStreamSize(StreamPDB, Length)) return EC; Index: llvm/trunk/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Native/NamedStreamMap.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/DebugInfo/PDB/Native/Hash.h" #include "llvm/DebugInfo/PDB/Native/HashTable.h" #include "llvm/DebugInfo/PDB/Native/RawError.h" #include "llvm/Support/BinaryStreamReader.h" @@ -26,127 +27,100 @@ using namespace llvm; using namespace llvm::pdb; -// FIXME: This shouldn't be necessary, but if we insert the strings in any -// other order, cvdump cannot read the generated name map. This suggests that -// we may be using the wrong hash function. A closer inspection of the cvdump -// source code may reveal something, but for now this at least makes us work, -// even if only by accident. -static constexpr const char *OrderedStreamNames[] = {"/LinkInfo", "/names", - "/src/headerblock"}; +namespace { +struct NamedStreamMapTraits { + static uint16_t hash(StringRef S, const NamedStreamMap &NS) { + // In the reference implementation, this uses + // HASH Hasher::hashPbCb(PB pb, size_t cb, ULONG ulMod). + // Here, the type HASH is a typedef of unsigned short. + // ** It is not a bug that we truncate the result of hashStringV1, in fact + // it is a bug if we do not! ** + return static_cast(hashStringV1(S)); + } + static StringRef realKey(uint32_t Offset, const NamedStreamMap &NS) { + return NS.getString(Offset); + } + static uint32_t lowerKey(StringRef S, NamedStreamMap &NS) { + return NS.appendStringData(S); + } +}; +} // namespace -NamedStreamMap::NamedStreamMap() = default; +NamedStreamMap::NamedStreamMap() {} Error NamedStreamMap::load(BinaryStreamReader &Stream) { - Mapping.clear(); - FinalizedHashTable.clear(); - FinalizedInfo.reset(); - uint32_t StringBufferSize; if (auto EC = Stream.readInteger(StringBufferSize)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Expected string buffer size")); - BinaryStreamRef StringsBuffer; - if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize)) + StringRef Buffer; + if (auto EC = Stream.readFixedString(Buffer, StringBufferSize)) return EC; + NamesBuffer.assign(Buffer.begin(), Buffer.end()); - HashTable OffsetIndexMap; - if (auto EC = OffsetIndexMap.load(Stream)) - return EC; - - uint32_t NameOffset; - uint32_t NameIndex; - for (const auto &Entry : OffsetIndexMap) { - std::tie(NameOffset, NameIndex) = Entry; - - // Compute the offset of the start of the string relative to the stream. - BinaryStreamReader NameReader(StringsBuffer); - NameReader.setOffset(NameOffset); - // Pump out our c-string from the stream. - StringRef Str; - if (auto EC = NameReader.readCString(Str)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map name")); - - // Add this to a string-map from name to stream number. - Mapping.insert({Str, NameIndex}); - } - - return Error::success(); + return OffsetIndexMap.load(Stream); } Error NamedStreamMap::commit(BinaryStreamWriter &Writer) const { - assert(FinalizedInfo.hasValue()); - // The first field is the number of bytes of string data. - if (auto EC = Writer.writeInteger(FinalizedInfo->StringDataBytes)) + if (auto EC = Writer.writeInteger(NamesBuffer.size())) return EC; - for (const auto &Name : OrderedStreamNames) { - auto Item = Mapping.find(Name); - if (Item == Mapping.end()) - continue; - if (auto EC = Writer.writeCString(Item->getKey())) - return EC; - } + // Then the actual string data. + StringRef Data(NamesBuffer.data(), NamesBuffer.size()); + if (auto EC = Writer.writeFixedString(Data)) + return EC; // And finally the Offset Index map. - if (auto EC = FinalizedHashTable.commit(Writer)) + if (auto EC = OffsetIndexMap.commit(Writer)) return EC; return Error::success(); } -uint32_t NamedStreamMap::finalize() { - if (FinalizedInfo.hasValue()) - return FinalizedInfo->SerializedLength; - - // Build the finalized hash table. - FinalizedHashTable.clear(); - FinalizedInfo.emplace(); - - for (const auto &Name : OrderedStreamNames) { - auto Item = Mapping.find(Name); - if (Item == Mapping.end()) - continue; - FinalizedHashTable.set(FinalizedInfo->StringDataBytes, Item->getValue()); - FinalizedInfo->StringDataBytes += Item->getKeyLength() + 1; - } +uint32_t NamedStreamMap::calculateSerializedLength() const { + return sizeof(uint32_t) // String data size + + NamesBuffer.size() // String data + + OffsetIndexMap.calculateSerializedLength(); // Offset Index Map +} + +uint32_t NamedStreamMap::size() const { return OffsetIndexMap.size(); } - // Number of bytes of string data. - FinalizedInfo->SerializedLength += sizeof(support::ulittle32_t); - // Followed by that many actual bytes of string data. - FinalizedInfo->SerializedLength += FinalizedInfo->StringDataBytes; - // Followed by the mapping from Offset to Index. - FinalizedInfo->SerializedLength += - FinalizedHashTable.calculateSerializedLength(); - return FinalizedInfo->SerializedLength; -} - -iterator_range> -NamedStreamMap::entries() const { - return make_range>(Mapping.begin(), - Mapping.end()); +StringRef NamedStreamMap::getString(uint32_t Offset) const { + assert(NamesBuffer.size() > Offset); + return StringRef(NamesBuffer.data() + Offset); } -uint32_t NamedStreamMap::size() const { return Mapping.size(); } +uint32_t NamedStreamMap::hashString(uint32_t Offset) const { + return hashStringV1(getString(Offset)); +} bool NamedStreamMap::get(StringRef Stream, uint32_t &StreamNo) const { - auto Iter = Mapping.find(Stream); - if (Iter == Mapping.end()) + auto Iter = OffsetIndexMap.find_as(Stream, *this); + if (Iter == OffsetIndexMap.end()) return false; - StreamNo = Iter->second; + StreamNo = (*Iter).second; return true; } -void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) { - FinalizedInfo.reset(); - Mapping[Stream] = StreamNo; +StringMap NamedStreamMap::entries() const { + StringMap Result; + for (const auto &Entry : OffsetIndexMap) { + StringRef Stream(NamesBuffer.data() + Entry.first); + Result.try_emplace(Stream, Entry.second); + } + return Result; } -void NamedStreamMap::remove(StringRef Stream) { - FinalizedInfo.reset(); - Mapping.erase(Stream); +uint32_t NamedStreamMap::appendStringData(StringRef S) { + uint32_t Offset = NamesBuffer.size(); + NamesBuffer.insert(NamesBuffer.end(), S.begin(), S.end()); + NamesBuffer.push_back('\0'); + return Offset; +} + +void NamedStreamMap::set(StringRef Stream, uint32_t StreamNo) { + OffsetIndexMap.set_as(Stream, StreamNo, *this); } Index: llvm/trunk/tools/llvm-pdbutil/Diff.cpp =================================================================== --- llvm/trunk/tools/llvm-pdbutil/Diff.cpp +++ llvm/trunk/tools/llvm-pdbutil/Diff.cpp @@ -417,8 +417,8 @@ IS2.getFeatureSignatures()); D.print("Named Stream Size", IS1.getNamedStreamMapByteSize(), IS2.getNamedStreamMapByteSize()); - StringMap NSL = IS1.getNamedStreams().getStringMap(); - StringMap NSR = IS2.getNamedStreams().getStringMap(); + StringMap NSL = IS1.getNamedStreams().entries(); + StringMap NSR = IS2.getNamedStreams().entries(); D.diffUnorderedMap("Named Stream", NSL, NSR); return Error::success(); } Index: llvm/trunk/unittests/DebugInfo/PDB/HashTableTest.cpp =================================================================== --- llvm/trunk/unittests/DebugInfo/PDB/HashTableTest.cpp +++ llvm/trunk/unittests/DebugInfo/PDB/HashTableTest.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/DebugInfo/PDB/Native/HashTable.h" +#include "llvm/DebugInfo/PDB/Native/NamedStreamMap.h" #include "llvm/Support/BinaryByteStream.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/BinaryStreamWriter.h" @@ -166,3 +167,43 @@ EXPECT_EQ(Table.Present, Table2.Present); EXPECT_EQ(Table.Deleted, Table2.Deleted); } + +TEST(HashTableTest, NamedStreamMap) { + std::vector Streams = {"One", "Two", "Three", "Four", + "Five", "Six", "Seven"}; + StringMap ExpectedIndices; + for (uint32_t I = 0; I < Streams.size(); ++I) + ExpectedIndices[Streams[I]] = I + 1; + + // To verify the hash table actually works, we want to verify that insertion + // order doesn't matter. So try inserting in every possible order of 7 items. + do { + NamedStreamMap NSM; + for (StringRef S : Streams) + NSM.set(S, ExpectedIndices[S]); + + EXPECT_EQ(Streams.size(), NSM.size()); + + uint32_t N; + EXPECT_TRUE(NSM.get("One", N)); + EXPECT_EQ(1, N); + + EXPECT_TRUE(NSM.get("Two", N)); + EXPECT_EQ(2, N); + + EXPECT_TRUE(NSM.get("Three", N)); + EXPECT_EQ(3, N); + + EXPECT_TRUE(NSM.get("Four", N)); + EXPECT_EQ(4, N); + + EXPECT_TRUE(NSM.get("Five", N)); + EXPECT_EQ(5, N); + + EXPECT_TRUE(NSM.get("Six", N)); + EXPECT_EQ(6, N); + + EXPECT_TRUE(NSM.get("Seven", N)); + EXPECT_EQ(7, N); + } while (std::next_permutation(Streams.begin(), Streams.end())); +}