Index: llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h =================================================================== --- /dev/null +++ llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h @@ -0,0 +1,41 @@ +//===- NameHashTableBuilder.h - PDB Name Hash Table Builder -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file creates the "/names" stream. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H +#define LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include + +namespace llvm { +namespace pdb { + +class NameHashTableBuilder { +public: + // If string S does not exist in the string table, insert it. + // Returns the ID for S. + uint32_t insert(StringRef S); + + // Finalizes the string table contents and returns it. + std::vector build() const; + +private: + DenseMap Strings; + uint32_t StringSize = 1; +}; + +} // end namespace pdb +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_PDB_RAW_NAMEHASHTABLEBUILDER_H Index: llvm/lib/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/lib/DebugInfo/PDB/CMakeLists.txt +++ llvm/lib/DebugInfo/PDB/CMakeLists.txt @@ -39,6 +39,7 @@ Raw/ModInfo.cpp Raw/ModStream.cpp Raw/NameHashTable.cpp + Raw/NameHashTableBuilder.cpp Raw/NameMap.cpp Raw/NameMapBuilder.cpp Raw/PDBFile.cpp Index: llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp =================================================================== --- /dev/null +++ llvm/lib/DebugInfo/PDB/Raw/NameHashTableBuilder.cpp @@ -0,0 +1,98 @@ +//===- NameHashTable.cpp - PDB Name Hash Table ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/DebugInfo/PDB/Raw/Hash.h" +#include "llvm/Support/Endian.h" + +using namespace llvm; +using namespace llvm::support; +using namespace llvm::support::endian; +using namespace llvm::pdb; + +// The /names stream starts with a 4-byte signature followed by +// a 4-byte hash version and a 4-byte size string table. +static const uint32_t HeaderSize = 12; + +// Hash entries follow 4-byte size field. +static const uint32_t EntryHeaderSize = 4; + +// The /names stream ends with the number of strings. +static const uint32_t TrailerSize = 4; + +uint32_t NameHashTableBuilder::insert(StringRef S) { + auto P = Strings.insert({S, StringSize}); + + // If a given string didn't exist in the string table, + // we want to increment the string table size. + if (P.second) + StringSize += S.size() + 1; // +1 for '\0' + return P.first->second; +} + +static uint32_t computeHashEntries(uint32_t NumStrings) { + // The /names stream is basically an on-disk open-addressing hash table. + // Hash collisions are resolved by linear probing. We cannot make + // utilization to 100% because it will make the linear probing extremely + // slow. But lower utilization wastes disk space. As a reasonable + // utilization, we choose 80%. We need +1 becuase slot 0 is reserved. + return (NumStrings + 1) * 1.25; +} + +std::vector NameHashTableBuilder::build() const { + uint32_t NumEntries = computeHashEntries(Strings.size()); + uint32_t Total = + HeaderSize + StringSize + EntryHeaderSize + NumEntries * 4 + TrailerSize; + std::vector Vec(Total); + uint8_t *Buf = Vec.data(); + + // Write a header + write32le(Buf, 0xEFFEEFFE); // Magic + write32le(Buf + 4, 1); // HashVersion + write32le(Buf + 8, StringSize); // ByteSize + Buf += 12; + + // Write a string table. + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + memcpy(Buf + Offset, S.data(), S.size()); + } + Buf += StringSize; + + // Write a hash table. + write32le(Buf, NumEntries); + Buf += 4; + + for (auto Pair : Strings) { + StringRef S = Pair.first; + uint32_t Offset = Pair.second; + uint32_t Hash = hashStringV1(S); + + for (uint32_t I = 0; I != NumEntries; ++I) { + uint32_t Slot = (Hash + I) % NumEntries; + + // Skip reserved slot + if (Slot == 0) + continue; + + if (read32le(Buf + Slot * 4) != 0) + continue; + write32le(Buf + Slot * 4, Offset); + break; + } + } + Buf += NumEntries * 4; + + // Write a trailer. + write32le(Buf, Strings.size()); + + return Vec; +} Index: llvm/unittests/DebugInfo/PDB/CMakeLists.txt =================================================================== --- llvm/unittests/DebugInfo/PDB/CMakeLists.txt +++ llvm/unittests/DebugInfo/PDB/CMakeLists.txt @@ -6,6 +6,7 @@ set(DebugInfoPDBSources MappedBlockStreamTest.cpp + NameHashTableBuilderTest.cpp MSFBuilderTest.cpp PDBApiTest.cpp ) Index: llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/DebugInfo/PDB/NameHashTableBuilderTest.cpp @@ -0,0 +1,49 @@ +//===- NameHashTableBuilderTest.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ErrorChecking.h" + +#include "llvm/DebugInfo/MSF/ByteStream.h" +#include "llvm/DebugInfo/MSF/StreamReader.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTable.h" +#include "llvm/DebugInfo/PDB/Raw/NameHashTableBuilder.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::pdb; + +namespace { +class NameHashTableBuilderTest : public ::testing::Test {}; +} + +TEST_F(NameHashTableBuilderTest, Simple) { + // Create /names table contents. + NameHashTableBuilder Builder; + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(5U, Builder.insert("bar")); + EXPECT_EQ(1U, Builder.insert("foo")); + EXPECT_EQ(9U, Builder.insert("baz")); + std::vector Data = Builder.build(); + + // Reads the contents back. + msf::ByteStream Stream(Data); + msf::StreamReader Reader(Stream); + NameHashTable Table; + EXPECT_NO_ERROR(Table.load(Reader)); + + EXPECT_EQ(3U, Table.getNameCount()); + EXPECT_EQ(1U, Table.getHashVersion()); + EXPECT_EQ("foo", Table.getStringForID(1)); + EXPECT_EQ("bar", Table.getStringForID(5)); + EXPECT_EQ("baz", Table.getStringForID(9)); + EXPECT_EQ(1U, Table.getIDForString("foo")); + EXPECT_EQ(5U, Table.getIDForString("bar")); + EXPECT_EQ(9U, Table.getIDForString("baz")); +}