Index: llvm/include/llvm/DebugInfo/PDB/Raw/HashTable.h =================================================================== --- /dev/null +++ llvm/include/llvm/DebugInfo/PDB/Raw/HashTable.h @@ -0,0 +1,102 @@ +//===- HashTable.h - PDB Hash Table -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H +#define LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator.h" +#include "llvm/DebugInfo/MSF/StreamArray.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" + +#include +#include + +namespace llvm { +namespace pdb { + +class HashTable { + friend class HashTableIterator; + struct Header { + support::ulittle32_t Size; + support::ulittle32_t Capacity; + }; + + typedef std::vector> BucketList; + +public: + HashTable(); + explicit HashTable(uint32_t Capacity); + + Error load(msf::StreamReader &Stream); + + uint32_t calculateSerializedLength() const; + Error commit(msf::StreamWriter &Writer) const; + + uint32_t capacity() const; + uint32_t size() const; + + HashTableIterator begin() const; + HashTableIterator end() const; + HashTableIterator find(uint32_t K); + + void set(uint32_t K, uint32_t V); + void remove(uint32_t K); + uint32_t get(uint32_t K); + +protected: + bool isPresent(uint32_t K) const { return Present.test(K); } + bool isDeleted(uint32_t K) const { return Deleted.test(K); } + BucketList Buckets; + mutable SparseBitVector<> Present; + mutable SparseBitVector<> Deleted; + +private: + static uint32_t maxLoad(uint32_t capacity); + void grow(); + + static Error readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V); + static Error writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec); +}; + +class HashTableIterator + : public iterator_facade_base> { + friend class HashTable; + HashTableIterator(const HashTable &Map, uint32_t Index, bool IsEnd); + +public: + HashTableIterator(const HashTable &Map); + + HashTableIterator &operator=(const HashTableIterator &R); + bool operator==(const HashTableIterator &R) const; + const std::pair &operator*() const; + HashTableIterator &operator++(); + +private: + bool isEnd() const { return IsEnd; } + uint32_t index() const { return Index; } + + const HashTable *Map; + uint32_t Index; + bool IsEnd; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_HASHTABLE_H Index: llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h +++ llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h @@ -10,11 +10,12 @@ #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H #define LLVM_DEBUGINFO_PDB_RAW_PDBNAMEMAPBUILDER_H -#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" #include "llvm/Support/Error.h" #include #include +#include namespace llvm { namespace msf { @@ -29,14 +30,14 @@ void addMapping(StringRef Name, uint32_t Mapping); - Expected> build(); Error commit(msf::StreamWriter &Writer) const; uint32_t calculateSerializedLength() const; private: - StringMap Map; - uint32_t StringDataBytes = 0; + std::vector Strings; + HashTable Map; + uint32_t Offset = 0; }; } // end namespace pdb Index: llvm/lib/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -34,6 +34,7 @@ Raw/GlobalsStream.cpp Raw/GSI.cpp Raw/Hash.cpp + Raw/HashTable.cpp Raw/InfoStream.cpp Raw/InfoStreamBuilder.cpp Raw/ModInfo.cpp Index: llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp =================================================================== --- /dev/null +++ llvm/lib/DebugInfo/PDB/Raw/HashTable.cpp @@ -0,0 +1,294 @@ +//===- HashTable.cpp - PDB Hash Table ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/DebugInfo/PDB/Raw/RawError.h" + +using namespace llvm; +using namespace llvm::pdb; + +HashTable::HashTable() : HashTable(8) {} + +HashTable::HashTable(uint32_t Capacity) { Buckets.resize(Capacity); } + +Error HashTable::load(msf::StreamReader &Stream) { + const Header *HAH; + if (auto EC = Stream.readObject(HAH)) + return EC; + if (HAH->Capacity == 0) + return make_error(raw_error_code::corrupt_file, + "Invalid Hash Table Capacity"); + if (HAH->Size > maxLoad(HAH->Capacity)) + return make_error(raw_error_code::corrupt_file, + "Invalid Hash Table Size"); + + Buckets.resize(HAH->Capacity); + + if (auto EC = readSparseBitVector(Stream, Present)) + return EC; + if (Present.count() != HAH->Size) + return make_error(raw_error_code::corrupt_file, + "Present bit vector does not match size!"); + + if (auto EC = readSparseBitVector(Stream, Deleted)) + return EC; + if (Present.intersects(Deleted)) + return make_error(raw_error_code::corrupt_file, + "Present bit vector interesects deleted!"); + + for (uint32_t P : Present) { + if (auto EC = Stream.readInteger(Buckets[P].first)) + return EC; + if (auto EC = Stream.readInteger(Buckets[P].second)) + return EC; + } + + return Error::success(); +} + +uint32_t HashTable::calculateSerializedLength() const { + uint32_t Size = sizeof(Header); + + int NumBitsP = Present.find_last() + 1; + int NumBitsD = Deleted.find_last() + 1; + + // Present bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsP, sizeof(uint32_t)); + + // Deleted bit set number of words, followed by that many actual words. + Size += sizeof(uint32_t); + Size += alignTo(NumBitsD, sizeof(uint32_t)); + + // One (Key, Value) pair for each entry Present. + Size += 2 * sizeof(uint32_t) * size(); + + return Size; +} + +Error HashTable::commit(msf::StreamWriter &Writer) const { + Header H; + H.Size = size(); + H.Capacity = capacity(); + if (auto EC = Writer.writeObject(H)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Present)) + return EC; + + if (auto EC = writeSparseBitVector(Writer, Deleted)) + return EC; + + for (const auto &Entry : *this) { + if (auto EC = Writer.writeInteger(Entry.first)) + return EC; + if (auto EC = Writer.writeInteger(Entry.second)) + return EC; + } + return Error::success(); +} + +uint32_t HashTable::capacity() const { return Buckets.size(); } +uint32_t HashTable::size() const { return Present.count(); } + +HashTableIterator HashTable::begin() const { return HashTableIterator(*this); } +HashTableIterator HashTable::end() const { + return HashTableIterator(*this, 0, true); +} + +HashTableIterator HashTable::find(uint32_t K) { + uint32_t H = K % capacity(); + uint32_t I = H; + Optional FirstUnused; + do { + if (isPresent(I)) { + if (Buckets[I].first == K) + return HashTableIterator(*this, I, false); + } else { + if (!FirstUnused) + FirstUnused = I; + // Insertion occurs via linear probing from the slot hint, and will be + // inserted at the first empty / deleted location. Therefore, if we are + // probing and find a location that is neither present nor deleted, then + // nothing must have EVER been inserted at this location, and thus it is + // not possible for a matching value to occur later. + if (!isDeleted(I)) + break; + } + I = (I + 1) % capacity(); + } while (I != H); + + // The only way FirstUnused would not be set is if every single entry in the + // table were Present. But this would violate the load factor constraints + // that we impose, so it should never happen. + assert(FirstUnused); + return HashTableIterator(*this, *FirstUnused, true); +} + +void HashTable::set(uint32_t K, uint32_t V) { + auto Entry = find(K); + if (Entry != end()) { + assert(isPresent(Entry.index())); + assert(Buckets[Entry.index()].first == K); + // We're updating, no need to do anything special. + Buckets[Entry.index()].second = V; + return; + } + + auto &B = Buckets[Entry.index()]; + assert(!isPresent(Entry.index())); + assert(Entry.isEnd()); + B.first = K; + B.second = V; + Present.set(Entry.index()); + Deleted.reset(Entry.index()); + + grow(); + + assert(find(K) != end()); +} + +void HashTable::remove(uint32_t K) { + auto Iter = find(K); + // It wasn't here to begin with, just exit. + if (Iter == end()) + return; + + assert(Present.test(Iter.index())); + assert(!Deleted.test(Iter.index())); + Deleted.set(Iter.index()); + Present.reset(Iter.index()); +} + +uint32_t HashTable::get(uint32_t K) { + auto I = find(K); + assert(I != end()); + return (*I).second; +} + +uint32_t HashTable::maxLoad(uint32_t capacity) { return capacity * 2 / 3 + 1; } + +void HashTable::grow() { + uint32_t S = size(); + if (S < maxLoad(capacity())) + return; + assert(capacity() != UINT32_MAX, "Can't grow Hash table!"); + + uint32_t NewCapacity = + (capacity() <= INT32_MAX) ? capacity() * 2 : UINT32_MAX; + + // Growing requires rebuilding the table and re-hashing every item. Make a + // copy with a larger capacity, insert everything into the copy, then swap + // it in. + HashTable NewMap(NewCapacity); + for (auto I : Present) { + NewMap.set(Buckets[I].first, Buckets[I].second); + } + + Buckets.swap(NewMap.Buckets); + std::swap(Present, NewMap.Present); + std::swap(Deleted, NewMap.Deleted); + assert(capacity() == NewCapacity); + assert(size() == S); +} + +Error HashTable::readSparseBitVector(msf::StreamReader &Stream, + SparseBitVector<> &V) { + uint32_t NumWords; + if (auto EC = Stream.readInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error(raw_error_code::corrupt_file, + "Expected hash table number of words")); + + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word; + if (auto EC = Stream.readInteger(Word)) + return joinErrors(std::move(EC), + make_error(raw_error_code::corrupt_file, + "Expected hash table word")); + for (unsigned Idx = 0; Idx < 32; ++Idx) + if (Word & (1U << Idx)) + V.set((I * 32) + Idx); + } + return Error::success(); +} + +Error HashTable::writeSparseBitVector(msf::StreamWriter &Writer, + SparseBitVector<> &Vec) { + int ReqBits = Vec.find_last() + 1; + uint32_t NumWords = alignTo(ReqBits, sizeof(uint32_t)) / sizeof(uint32_t); + if (auto EC = Writer.writeInteger(NumWords)) + return joinErrors( + std::move(EC), + make_error(raw_error_code::corrupt_file, + "Could not write linear map number of words")); + + uint32_t Idx = 0; + for (uint32_t I = 0; I != NumWords; ++I) { + uint32_t Word = 0; + for (uint32_t WordIdx = 0; WordIdx < 32; ++WordIdx, ++Idx) { + if (Vec.test(Idx)) + Word |= (1 << WordIdx); + } + if (auto EC = Writer.writeInteger(Word)) + return joinErrors(std::move(EC), make_error( + raw_error_code::corrupt_file, + "Could not write linear map word")); + } + return Error::success(); +} + +HashTableIterator::HashTableIterator(const HashTable &Map, uint32_t Index, + bool IsEnd) + : Map(&Map), Index(Index), IsEnd(IsEnd) {} + +HashTableIterator::HashTableIterator(const HashTable &Map) : Map(&Map) { + int I = Map.Present.find_first(); + if (I == -1) { + Index = 0; + IsEnd = true; + } else { + Index = static_cast(I); + IsEnd = false; + } +} + +HashTableIterator &HashTableIterator::operator=(const HashTableIterator &R) { + Map = R.Map; + return *this; +} + +bool HashTableIterator::operator==(const HashTableIterator &R) const { + if (IsEnd && R.IsEnd) + return true; + if (IsEnd != R.IsEnd) + return false; + + return (Map == R.Map) && (Index == R.Index); +} + +const std::pair &HashTableIterator::operator*() const { + assert(Map->Present.test(Index)); + return Map->Buckets[Index]; +} + +HashTableIterator &HashTableIterator::operator++() { + while (Index < Map->Buckets.size()) { + ++Index; + if (Map->Present.test(Index)) + return *this; + } + + IsEnd = true; + return *this; +} Index: llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp +++ llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp @@ -7,12 +7,14 @@ // //===----------------------------------------------------------------------===// +#include "llvm/DebugInfo/PDB/Raw/NameMap.h" + #include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" #include "llvm/DebugInfo/MSF/StreamReader.h" -#include "llvm/DebugInfo/PDB/Raw/NameMap.h" +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" #include "llvm/DebugInfo/PDB/Raw/RawError.h" #include "llvm/Support/Error.h" #include @@ -25,123 +27,35 @@ NameMap::NameMap() = default; Error NameMap::load(StreamReader &Stream) { - // This is some sort of weird string-set/hash table encoded in the stream. - // It starts with the number of bytes in the table. - uint32_t NumberOfBytes; - if (auto EC = Stream.readInteger(NumberOfBytes)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map length")); - if (Stream.bytesRemaining() < NumberOfBytes) - return make_error(raw_error_code::corrupt_file, - "Invalid name map length"); - - // Following that field is the starting offset of strings in the name table. - uint32_t StringsOffset = Stream.getOffset(); - Stream.setOffset(StringsOffset + NumberOfBytes); - - // This appears to be equivalent to the total number of strings *actually* - // in the name table. - uint32_t HashSize; - if (auto EC = Stream.readInteger(HashSize)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map hash size")); - - // This appears to be an upper bound on the number of strings in the name - // table. - uint32_t MaxNumberOfStrings; - if (auto EC = Stream.readInteger(MaxNumberOfStrings)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map max strings")); - - if (MaxNumberOfStrings > (UINT32_MAX / sizeof(uint32_t))) - return make_error(raw_error_code::corrupt_file, - "Implausible number of strings"); - - const uint32_t MaxNumberOfWords = UINT32_MAX / (sizeof(uint32_t) * 8); - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'present'. - uint32_t NumPresentWords; - if (auto EC = Stream.readInteger(NumPresentWords)) + uint32_t StringBufferSize; + if (auto EC = Stream.readInteger(StringBufferSize)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, - "Expected name map num words")); + "Expected string buffer size")); - if (NumPresentWords > MaxNumberOfWords) - return make_error(raw_error_code::corrupt_file, - "Number of present words is too large"); + msf::ReadableStreamRef StringsBuffer; + if (auto EC = Stream.readStreamRef(StringsBuffer, StringBufferSize)) + return EC; - SparseBitVector<> Present; - for (uint32_t I = 0; I != NumPresentWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Present.set((I * 32) + Idx); - } - - // This appears to be a hash table which uses bitfields to determine whether - // or not a bucket is 'deleted'. - uint32_t NumDeletedWords; - if (auto EC = Stream.readInteger(NumDeletedWords)) - return joinErrors( - std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map num deleted words")); + HashTable OffsetIndexMap; + if (auto EC = OffsetIndexMap.load(Stream)) + return EC; - if (NumDeletedWords > MaxNumberOfWords) - return make_error(raw_error_code::corrupt_file, - "Number of deleted words is too large"); - - SparseBitVector<> Deleted; - for (uint32_t I = 0; I != NumDeletedWords; ++I) { - uint32_t Word; - if (auto EC = Stream.readInteger(Word)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map word")); - for (unsigned Idx = 0; Idx < 32; ++Idx) - if (Word & (1U << Idx)) - Deleted.set((I * 32) + Idx); - } - - for (unsigned I : Present) { - // For all present entries, dump out their mapping. - (void)I; - - // This appears to be an offset relative to the start of the strings. - // It tells us where the null-terminated string begins. - uint32_t NameOffset; - if (auto EC = Stream.readInteger(NameOffset)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map name offset")); - - // This appears to be a stream number into the stream directory. - uint32_t NameIndex; - if (auto EC = Stream.readInteger(NameIndex)) - return joinErrors(std::move(EC), - make_error(raw_error_code::corrupt_file, - "Expected name map name index")); + uint32_t NameOffset; + uint32_t NameIndex; + for (const auto &Entry : OffsetIndexMap) { + std::tie(NameOffset, NameIndex) = Entry; // Compute the offset of the start of the string relative to the stream. - uint32_t StringOffset = StringsOffset + NameOffset; - uint32_t OldOffset = Stream.getOffset(); + msf::StreamReader NameReader(StringsBuffer); + NameReader.setOffset(NameOffset); // Pump out our c-string from the stream. StringRef Str; - Stream.setOffset(StringOffset); - if (auto EC = Stream.readZeroString(Str)) + if (auto EC = NameReader.readZeroString(Str)) return joinErrors(std::move(EC), make_error(raw_error_code::corrupt_file, "Expected name map name")); - Stream.setOffset(OldOffset); // Add this to a string-map from name to stream number. Mapping.insert({Str, NameIndex}); } Index: llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp +++ llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp @@ -22,87 +22,39 @@ NameMapBuilder::NameMapBuilder() = default; void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) { - StringDataBytes += Name.size() + 1; - Map.insert({Name, Mapping}); -} - -Expected> NameMapBuilder::build() { - auto Result = llvm::make_unique(); - Result->Mapping = Map; - return std::move(Result); + Strings.push_back(Name); + Map.set(Offset, Mapping); + Offset += Name.size() + 1; } uint32_t NameMapBuilder::calculateSerializedLength() const { uint32_t TotalLength = 0; - TotalLength += sizeof(support::ulittle32_t); // StringDataBytes value - TotalLength += StringDataBytes; // actual string data - - TotalLength += sizeof(support::ulittle32_t); // Hash Size - TotalLength += sizeof(support::ulittle32_t); // Max Number of Strings - TotalLength += sizeof(support::ulittle32_t); // Num Present Words - // One bitmask word for each present entry - TotalLength += Map.size() * sizeof(support::ulittle32_t); - TotalLength += sizeof(support::ulittle32_t); // Num Deleted Words - - // For each present word, which we are treating as equivalent to the number of - // entries in the table, we have a pair of integers. An offset into the - // string data, and a corresponding stream number. - TotalLength += Map.size() * 2 * sizeof(support::ulittle32_t); + // Number of bytes of string data. + TotalLength += sizeof(support::ulittle32_t); + // Followed by that many actual bytes of string data. + TotalLength += Offset; + // Followed by the mapping from Name to Index. + TotalLength += Map.calculateSerializedLength(); return TotalLength; } Error NameMapBuilder::commit(msf::StreamWriter &Writer) const { - // The first field is the number of bytes of string data. So add - // up the length of all strings plus a null terminator for each - // one. - uint32_t NumBytes = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - NumBytes += B->getKeyLength() + 1; - } - - if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data - return EC; - // Now all of the string data itself. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeZeroString(B->getKey())) - return EC; - } - - if (auto EC = Writer.writeInteger(Map.size())) // Hash Size + // The first field is the number of bytes of string data. We've already been + // keeping a running total of this in `Offset`. + if (auto EC = Writer.writeInteger(Offset)) // Number of bytes of string data return EC; - if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings - return EC; - - if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words - return EC; - - // For each entry in the mapping, write a bit mask which represents a bucket - // to store it in. We don't use this, so the value we write isn't important - // to us, it just has to be there. - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - if (auto EC = Writer.writeInteger(1U)) + // Now all of the string data itself. + for (auto S : Strings) { + if (auto EC = Writer.writeZeroString(S)) return EC; } - if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words + // And finally the Linear Map. + if (auto EC = Map.commit(Writer)) return EC; - // Mappings of each word. - uint32_t OffsetSoFar = 0; - for (auto B = Map.begin(), E = Map.end(); B != E; ++B) { - // This is a list of key value pairs where the key is the offset into the - // strings buffer, and the value is a stream number. Write each pair. - if (auto EC = Writer.writeInteger(OffsetSoFar)) - return EC; - - if (auto EC = Writer.writeInteger(B->second)) - return EC; - - OffsetSoFar += B->getKeyLength() + 1; - } - return Error::success(); } Index: llvm/unittests/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/unittests/DebugInfo/PDB/CMakeLists.txt +++ llvm/unittests/DebugInfo/PDB/CMakeLists.txt @@ -5,6 +5,7 @@ ) set(DebugInfoPDBSources + HashTableTest.cpp MappedBlockStreamTest.cpp NameHashTableBuilderTest.cpp MSFBuilderTest.cpp Index: llvm/unittests/DebugInfo/PDB/HashTableTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/DebugInfo/PDB/HashTableTest.cpp @@ -0,0 +1,167 @@ +//===- llvm/unittest/DebugInfo/PDB/HashTableTest.cpp ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ErrorChecking.h" +#include "gtest/gtest.h" + +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/HashTable.h" + +#include + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +class HashTableInternals : public HashTable { +public: + using HashTable::Buckets; + using HashTable::Present; + using HashTable::Deleted; +}; +} + +TEST(HashTableTest, TestSimple) { + HashTable Table; + EXPECT_EQ(0u, Table.size()); + EXPECT_GT(Table.capacity(), 0u); + + Table.set(3, 7); + EXPECT_EQ(1u, Table.size()); + ASSERT_NE(Table.end(), Table.find(3)); + EXPECT_EQ(7u, Table.get(3)); +} + +TEST(HashTableTest, TestCollision) { + HashTable Table; + EXPECT_EQ(0u, Table.size()); + EXPECT_GT(Table.capacity(), 0u); + + // We use knowledge of the hash table's implementation details to make sure + // to add another value that is the equivalent to the first value modulo the + // hash table's capacity. + uint32_t N1 = Table.capacity() + 1; + uint32_t N2 = 2 * N1; + + Table.set(N1, 7); + Table.set(N2, 12); + EXPECT_EQ(2u, Table.size()); + ASSERT_NE(Table.end(), Table.find(N1)); + ASSERT_NE(Table.end(), Table.find(N2)); + + EXPECT_EQ(7u, Table.get(N1)); + EXPECT_EQ(12u, Table.get(N2)); +} + +TEST(HashTableTest, TestRemove) { + HashTable Table; + EXPECT_EQ(0u, Table.size()); + EXPECT_GT(Table.capacity(), 0u); + + Table.set(1, 2); + Table.set(3, 4); + EXPECT_EQ(2u, Table.size()); + ASSERT_NE(Table.end(), Table.find(1)); + ASSERT_NE(Table.end(), Table.find(3)); + + EXPECT_EQ(2u, Table.get(1)); + EXPECT_EQ(4u, Table.get(3)); + + Table.remove(1u); + EXPECT_EQ(1u, Table.size()); + EXPECT_EQ(Table.end(), Table.find(1)); + ASSERT_NE(Table.end(), Table.find(3)); + EXPECT_EQ(4u, Table.get(3)); +} + +TEST(HashTableTest, TestCollisionAfterMultipleProbes) { + HashTable Table; + EXPECT_EQ(0u, Table.size()); + EXPECT_GT(Table.capacity(), 0u); + + // Probing looks for the first available slot. A slot may already be filled + // as a result of an item with a *different* hash value already being there. + // Test that when this happens, the probe still finds the value. + uint32_t N1 = Table.capacity() + 1; + uint32_t N2 = N1 + 1; + uint32_t N3 = 2 * N1; + + Table.set(N1, 7); + Table.set(N2, 11); + Table.set(N3, 13); + EXPECT_EQ(3u, Table.size()); + ASSERT_NE(Table.end(), Table.find(N1)); + ASSERT_NE(Table.end(), Table.find(N2)); + ASSERT_NE(Table.end(), Table.find(N3)); + + EXPECT_EQ(7u, Table.get(N1)); + EXPECT_EQ(11u, Table.get(N2)); + EXPECT_EQ(13u, Table.get(N3)); + + // Remove the one that had been filled in the middle, then insert another one + // with a collision. It should fill the newly emptied slot. + Table.remove(N2); + uint32_t N4 = N1 * 3; + Table.set(N4, 17); + EXPECT_EQ(3u, Table.size()); + ASSERT_NE(Table.end(), Table.find(N1)); + ASSERT_NE(Table.end(), Table.find(N3)); + ASSERT_NE(Table.end(), Table.find(N4)); + + EXPECT_EQ(7u, Table.get(N1)); + EXPECT_EQ(13u, Table.get(N3)); + EXPECT_EQ(17u, Table.get(N4)); +} + +TEST(HashTableTest, Grow) { + // So that we are independent of the load factor, `capacity` items, which is + // guaranteed to trigger a grow. Then verify that the size is the same, the + // capacity is larger, and all the original items are still in the table. + + HashTable Table; + uint32_t OldCapacity = Table.capacity(); + for (uint32_t I = 0; I < OldCapacity; ++I) { + Table.set(OldCapacity + I * 2 + 1, I * 2 + 3); + } + EXPECT_EQ(OldCapacity, Table.size()); + EXPECT_GT(Table.capacity(), OldCapacity); + for (uint32_t I = 0; I < OldCapacity; ++I) { + ASSERT_NE(Table.end(), Table.find(OldCapacity + I * 2 + 1)); + EXPECT_EQ(I * 2 + 3, Table.get(OldCapacity + I * 2 + 1)); + } +} + +TEST(HashTableTest, Serialization) { + HashTableInternals Table; + uint32_t Cap = Table.capacity(); + for (uint32_t I = 0; I < Cap; ++I) { + Table.set(Cap + I * 2 + 1, I * 2 + 3); + } + + std::vector Buffer(Table.calculateSerializedLength()); + msf::MutableByteStream Stream(Buffer); + msf::StreamWriter Writer(Stream); + EXPECT_NO_ERROR(Table.commit(Writer)); + // We should have written precisely the number of bytes we calculated earlier. + EXPECT_EQ(Buffer.size(), Writer.getOffset()); + + HashTableInternals Table2; + msf::StreamReader Reader(Stream); + EXPECT_NO_ERROR(Table2.load(Reader)); + // We should have read precisely the number of bytes we calculated earlier. + EXPECT_EQ(Buffer.size(), Reader.getOffset()); + + EXPECT_EQ(Table.size(), Table2.size()); + EXPECT_EQ(Table.capacity(), Table2.capacity()); + EXPECT_EQ(Table.Buckets, Table2.Buckets); + EXPECT_EQ(Table.Present, Table2.Present); + EXPECT_EQ(Table.Deleted, Table2.Deleted); +}