Index: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h @@ -0,0 +1,45 @@ +//===- NameHashTableBuilder.h - PDB Name Hash Table Builder -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file creates the "/names" stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +namespace llvm { +namespace msf { +class StreamWriter; +} +namespace pdb { + +class NameHashTableBuilder { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + uint32_t calculateSerializedLength() const; + Error commit(msf::StreamWriter &Writer) const; + +private: + DenseMap Strings; + uint32_t StringSize = 1; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H Index: llvm/trunk/include/llvm/DebugInfo/PDB/Raw/RawTypes.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/PDB/Raw/RawTypes.h +++ llvm/trunk/include/llvm/DebugInfo/PDB/Raw/RawTypes.h @@ -302,6 +302,15 @@ PDB_UniqueId Guid; }; +/// The header preceeding the /names stream. +struct NameHashTableHeader { + support::ulittle32_t Signature; + support::ulittle32_t HashVersion; + support::ulittle32_t ByteSize; +}; + +const uint32_t NameHashTableSignature = 0xEFFEEFFE; + } // namespace pdb } // namespace llvm Index: llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt +++ llvm/trunk/lib/DebugInfo/PDB/CMakeLists.txt @@ -39,6 +39,7 @@ Raw/ModInfo.cpp Raw/ModStream.cpp Raw/NameHashTable.cpp + Raw/NameHashTableBuilder.cpp Raw/NameMap.cpp Raw/NameMapBuilder.cpp Raw/PDBFile.cpp Index: llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTable.cpp @@ -13,6 +13,7 @@ #include "llvm/DebugInfo/MSF/StreamReader.h" #include "llvm/DebugInfo/PDB/Raw/Hash.h" #include "llvm/DebugInfo/PDB/Raw/RawError.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" #include "llvm/Support/Endian.h" using namespace llvm; @@ -23,17 +24,11 @@ NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {} Error NameHashTable::load(StreamReader &Stream) { - struct Header { - support::ulittle32_t Signature; - support::ulittle32_t HashVersion; - support::ulittle32_t ByteSize; - }; - - const Header *H; + const NameHashTableHeader *H; if (auto EC = Stream.readObject(H)) return EC; - if (H->Signature != 0xEFFEEFFE) + if (H->Signature != NameHashTableSignature) return make_error(raw_error_code::corrupt_file, "Invalid hash table signature"); if (H->HashVersion != 1 && H->HashVersion != 2) Index: llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp @@ -0,0 +1,101 @@ +//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/Hash.h" +#include "llvm/DebugInfo/PDB/Raw/RawTypes.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +uint32_t NameHashTableBuilder::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, we want to increment + // the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +static uint32_t computeBucketCount(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // load factor, we choose 80%. We need +1 because slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +uint32_t NameHashTableBuilder::calculateSerializedLength() const { + uint32_t Size = 0; + Size += sizeof(NameHashTableHeader); + Size += StringSize; + Size += 4; // Hash table begins with 4-byte size field. + + uint32_t BucketCount = computeBucketCount(Strings.size()); + Size += BucketCount * 4; + + Size += 4; // The /names stream ends with the number of strings. + return Size; +} + +Error NameHashTableBuilder::commit(msf::StreamWriter &Writer) const { + // Write a header + NameHashTableHeader H; + H.Signature = NameHashTableSignature; + H.HashVersion = 1; + H.ByteSize = StringSize; + if (auto EC = Writer.writeObject(H)) + return EC; + + // Write a string table. + uint32_t StringStart = Writer.getOffset(); + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + Writer.setOffset(StringStart + Offset); + if (auto EC = Writer.writeZeroString(S)) + return EC; + } + Writer.setOffset(StringStart + StringSize); + + // Write a hash table. + uint32_t BucketCount = computeBucketCount(Strings.size()); + if (auto EC = Writer.writeInteger(BucketCount)) + return EC; + std::vector Buckets(BucketCount); + + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != BucketCount; ++I) { + uint32_t Slot = (Hash + I) % BucketCount; + if (Slot == 0) + continue; // Skip reserved slot + if (Buckets[Slot] != 0) + continue; + Buckets[Slot] = Offset; + break; + } + } + + if (auto EC = Writer.writeArray(ArrayRef(Buckets))) + return EC; + if (auto EC = Writer.writeInteger(static_cast(Strings.size()))) + return EC; + return Error::success(); +} Index: llvm/trunk/unittests/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/trunk/unittests/DebugInfo/PDB/CMakeLists.txt +++ llvm/trunk/unittests/DebugInfo/PDB/CMakeLists.txt @@ -6,6 +6,7 @@ set(DebugInfoPDBSources MappedBlockStreamTest.cpp + NameHashTableBuilderTest.cpp MSFBuilderTest.cpp PDBApiTest.cpp ) Index: llvm/trunk/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp =================================================================== --- llvm/trunk/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp +++ llvm/trunk/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp @@ -0,0 +1,54 @@ +//===- NameHashTableBuilderTest.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ErrorChecking.h" + +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/MSF/StreamWriter.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +class NameHashTableBuilderTest : public ::testing::Test {}; +} + +TEST_F(NameHashTableBuilderTest, Simple) { + // Create /names table contents. + NameHashTableBuilder Builder; + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(5U, Builder.insert("bar")); + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(9U, Builder.insert("baz")); + + std::vector Buffer(Builder.calculateSerializedLength()); + msf::MutableByteStream OutStream(Buffer); + msf::StreamWriter Writer(OutStream); + EXPECT_NO_ERROR(Builder.commit(Writer)); + + // Reads the contents back. + msf::ByteStream InStream(Buffer); + msf::StreamReader Reader(InStream); + NameHashTable Table; + EXPECT_NO_ERROR(Table.load(Reader)); + + EXPECT_EQ(3U, Table.getNameCount()); + EXPECT_EQ(1U, Table.getHashVersion()); + EXPECT_EQ("foo", Table.getStringForID(1)); + EXPECT_EQ("bar", Table.getStringForID(5)); + EXPECT_EQ("baz", Table.getStringForID(9)); + EXPECT_EQ(1U, Table.getIDForString("foo")); + EXPECT_EQ(5U, Table.getIDForString("bar")); + EXPECT_EQ(9U, Table.getIDForString("baz")); +}