Index: llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h +++ llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" #include @@ -27,13 +28,6 @@ namespace codeview { class ContinuationRecordBuilder; -class TypeHasher; - -struct HashedType { - hash_code Hash; - ArrayRef Data; - TypeIndex Index; -}; class MergingTypeTableBuilder : public TypeCollection { /// Storage for records. These need to outlive the TypeTableBuilder. @@ -45,14 +39,11 @@ SimpleTypeSerializer SimpleSerializer; /// Hash table. - DenseSet HashedRecords; + DenseMap HashedRecords; /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). SmallVector, 2> SeenRecords; - /// Contains a list of all hash codes index by TypeIndex.toArrayIndex(). - SmallVector SeenHashes; - public: explicit MergingTypeTableBuilder(BumpPtrAllocator &Storage); ~MergingTypeTableBuilder(); @@ -73,7 +64,6 @@ BumpPtrAllocator &getAllocator() { return RecordStorage; } ArrayRef> records() const; - ArrayRef hashes() const; TypeIndex insertRecordAs(hash_code Hash, ArrayRef &Record); TypeIndex insertRecordBytes(ArrayRef &Record); Index: llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h =================================================================== --- /dev/null +++ llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -0,0 +1,119 @@ +//===- TypeHashing.h ---------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" + +namespace llvm { +namespace codeview { + +/// A locally hashed type represents a straightforward hash code of a serialized +/// record. The record is simply serialized, and then the bytes are hashed by +/// a standard algorithm. This is sufficient for the case of de-duplicating +/// records within a single sequence of types, because if two records both have +/// a back-reference to the same type in the same stream, they will both have +/// the same numeric value for the TypeIndex of the back reference. +struct LocallyHashedType { + hash_code Hash; + ArrayRef RecordData; + + static LocallyHashedType hashType(ArrayRef RecordData); +}; + +/// A globally hashed type represents a hash value that is sufficient to +/// uniquely identify a record across multiple type streams or type sequences. +/// This works by, for any given record A which references B, replacing the +/// TypeIndex that refers to B with a previously-computed global hash for B. As +/// this is a recursive algorithm (e.g. the global hash of B also depends on the +/// global hashes of the types that B refers to), a global hash can uniquely +/// identify identify that A occurs in another stream that has a completely +/// different graph structure. Although the hash itself is slower to compute, +/// probing is much faster with a globally hashed type, because the hash itself +/// is considered "as good as" the original type. Since type records can be +/// quite large, this makes the equality comparison of the hash much faster than +/// equality comparison of a full record. +struct GloballyHashedType { + GloballyHashedType() = default; + GloballyHashedType(StringRef H) + : GloballyHashedType(ArrayRef(H.bytes_begin(), H.bytes_end())) {} + GloballyHashedType(ArrayRef H) { + assert(H.size() == 20); + ::memcpy(Hash.data(), H.data(), 20); + } + std::array Hash; + + /// Given a sequence of bytes representing a record, compute a global hash for + /// this record. Due to the nature of global hashes incorporating the hashes + /// of referenced records, this function requires a list of types and ids + /// that RecordData might reference, indexable by TypeIndex. + static GloballyHashedType hashType(ArrayRef RecordData, + ArrayRef PreviousTypes, + ArrayRef PreviousIds); + + /// Given a sequence of combined type and ID records, compute global hashes + /// for each of them, returning the results in a vector of hashed types. + template + static std::vector hashTypes(Range &&Records) { + std::vector Hashes; + Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); + for (const auto &R : Records) + Hashes.push_back(hashType(R, Hashes, Hashes)); + + return Hashes; + } +}; +} // namespace codeview + +template <> struct DenseMapInfo { + static codeview::LocallyHashedType Empty; + static codeview::LocallyHashedType Tombstone; + + static codeview::LocallyHashedType getEmptyKey() { return Empty; } + + static codeview::LocallyHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::LocallyHashedType Val) { + return Val.Hash; + } + + static bool isEqual(codeview::LocallyHashedType LHS, + codeview::LocallyHashedType RHS) { + if (LHS.Hash != RHS.Hash) + return false; + return LHS.RecordData == RHS.RecordData; + } +}; + +template <> struct DenseMapInfo { + static codeview::GloballyHashedType Empty; + static codeview::GloballyHashedType Tombstone; + + static codeview::GloballyHashedType getEmptyKey() { return Empty; } + + static codeview::GloballyHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::GloballyHashedType Val) { + return *reinterpret_cast(Val.Hash.data()); + } + + static bool isEqual(codeview::GloballyHashedType LHS, + codeview::GloballyHashedType RHS) { + return LHS.Hash == RHS.Hash; + } +}; + +} // namespace llvm + +#endif Index: llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -334,6 +334,11 @@ uint32_t Attrs; Optional MemberInfo; + void setAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, + uint8_t Size) { + Attrs = calcAttrs(PK, PM, PO, Size); + } + private: static uint32_t calcAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, uint8_t Size) { Index: llvm/lib/DebugInfo/CodeView/CMakeLists.txt =================================================================== --- llvm/lib/DebugInfo/CodeView/CMakeLists.txt +++ llvm/lib/DebugInfo/CodeView/CMakeLists.txt @@ -32,6 +32,7 @@ TypeDumpVisitor.cpp TypeIndex.cpp TypeIndexDiscovery.cpp + TypeHashing.cpp TypeRecordMapping.cpp TypeStreamMerger.cpp TypeTableCollection.cpp Index: llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -28,27 +28,6 @@ using namespace llvm; using namespace llvm::codeview; -static HashedType Empty{0, {}, TypeIndex::None()}; -static HashedType Tombstone{hash_code(-1), {}, TypeIndex::None()}; - -namespace llvm { - -template <> struct DenseMapInfo { - static inline HashedType getEmptyKey() { return Empty; } - - static inline HashedType getTombstoneKey() { return Tombstone; } - - static unsigned getHashValue(HashedType Val) { return Val.Hash; } - - static bool isEqual(HashedType LHS, HashedType RHS) { - if (RHS.Hash != LHS.Hash) - return false; - return RHS.Data == LHS.Data; - } -}; - -} // end namespace llvm - TypeIndex MergingTypeTableBuilder::nextTypeIndex() const { return TypeIndex::fromArrayIndex(SeenRecords.size()); } @@ -56,7 +35,6 @@ MergingTypeTableBuilder::MergingTypeTableBuilder(BumpPtrAllocator &Storage) : RecordStorage(Storage) { SeenRecords.reserve(4096); - SeenHashes.reserve(4096); } MergingTypeTableBuilder::~MergingTypeTableBuilder() = default; @@ -102,13 +80,8 @@ return SeenRecords; } -ArrayRef MergingTypeTableBuilder::hashes() const { - return SeenHashes; -} - void MergingTypeTableBuilder::reset() { HashedRecords.clear(); - SeenHashes.clear(); SeenRecords.clear(); } @@ -124,18 +97,19 @@ assert(Record.size() < UINT32_MAX && "Record too big"); assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - HashedType TempHashedType = {Hash, Record, nextTypeIndex()}; - auto Result = HashedRecords.insert(TempHashedType); + LocallyHashedType WeakHash{Hash, Record}; + auto Result = HashedRecords.try_emplace(WeakHash, nextTypeIndex()); if (Result.second) { - Result.first->Data = stabilize(RecordStorage, Record); - SeenRecords.push_back(Result.first->Data); - SeenHashes.push_back(Result.first->Hash); + ArrayRef RecordData = stabilize(RecordStorage, Record); + Result.first->first.RecordData = RecordData; + SeenRecords.push_back(RecordData); } // Update the caller's copy of Record to point a stable copy. - Record = Result.first->Data; - return Result.first->Index; + TypeIndex ActualTI = Result.first->second; + Record = SeenRecords[ActualTI.toArrayIndex()]; + return ActualTI; } TypeIndex Index: llvm/lib/DebugInfo/CodeView/TypeHashing.cpp =================================================================== --- /dev/null +++ llvm/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -0,0 +1,74 @@ +//===- TypeHashing.cpp -------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" + +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +using namespace llvm::codeview; + +LocallyHashedType DenseMapInfo::Empty{0, {}}; +LocallyHashedType DenseMapInfo::Tombstone{hash_code(-1), {}}; + +static std::array EmptyHash; +static std::array TombstoneHash = { + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +GloballyHashedType DenseMapInfo::Empty{EmptyHash}; +GloballyHashedType DenseMapInfo::Tombstone{TombstoneHash}; + +LocallyHashedType LocallyHashedType::hashType(ArrayRef RecordData) { + return {llvm::hash_value(RecordData), RecordData}; +} + +GloballyHashedType +GloballyHashedType::hashType(ArrayRef RecordData, + ArrayRef PreviousTypes, + ArrayRef PreviousIds) { + SmallVector Refs; + discoverTypeIndices(RecordData, Refs); + SHA1 S; + S.init(); + uint32_t Off = 0; + RecordData = RecordData.drop_front(sizeof(RecordPrefix)); + for (const auto &Ref : Refs) { + // Hash any data that comes before this TiRef. + uint32_t PreLen = Ref.Offset - Off; + ArrayRef PreData = RecordData.slice(Off, PreLen); + S.update(PreData); + auto Prev = (Ref.Kind == TiRefKind::IndexRef) ? PreviousIds : PreviousTypes; + + auto RefData = RecordData.slice(Ref.Offset, Ref.Count * sizeof(TypeIndex)); + // For each type index referenced, add in the previously computed hash + // value of that type. + ArrayRef Indices( + reinterpret_cast(RefData.data()), Ref.Count); + for (TypeIndex TI : Indices) { + ArrayRef BytesToHash; + if (TI.isSimple() || TI.isNoneType()) { + const uint8_t *IndexBytes = reinterpret_cast(&TI); + BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); + } else { + BytesToHash = Prev[TI.toArrayIndex()].Hash; + } + S.update(BytesToHash); + } + + Off = Ref.Offset + Ref.Count * sizeof(TypeIndex); + } + + // Don't forget to add in any trailing bytes. + auto TrailingBytes = RecordData.drop_front(Off); + S.update(TrailingBytes); + + return {S.final()}; +} Index: llvm/unittests/DebugInfo/CodeView/CMakeLists.txt =================================================================== --- llvm/unittests/DebugInfo/CodeView/CMakeLists.txt +++ llvm/unittests/DebugInfo/CodeView/CMakeLists.txt @@ -4,6 +4,7 @@ set(DebugInfoCodeViewSources RandomAccessVisitorTest.cpp + TypeHashingTest.cpp TypeIndexDiscoveryTest.cpp ) Index: llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp @@ -0,0 +1,156 @@ +//===- llvm/unittest/DebugInfo/CodeView/TypeHashingTest.cpp ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::codeview; + +static TypeIndex createPointerRecord(AppendingTypeTableBuilder &Builder, + TypeIndex TI) { + PointerRecord PR(TypeRecordKind::Pointer); + PR.setAttrs(PointerKind::Near32, PointerMode::Pointer, PointerOptions::None, + 4); + PR.ReferentType = TI; + return Builder.writeLeafType(PR); +} + +static TypeIndex createArgListRecord(AppendingTypeTableBuilder &Builder, + TypeIndex Q, TypeIndex R) { + ArgListRecord AR(TypeRecordKind::ArgList); + AR.ArgIndices.push_back(Q); + AR.ArgIndices.push_back(R); + return Builder.writeLeafType(AR); +} + +static TypeIndex createProcedureRecord(AppendingTypeTableBuilder &Builder, + uint32_t ParamCount, TypeIndex Return, + TypeIndex ArgList) { + ProcedureRecord PR(TypeRecordKind::Procedure); + PR.ArgumentList = ArgList; + PR.CallConv = CallingConvention::NearC; + PR.Options = FunctionOptions::None; + PR.ParameterCount = ParamCount; + PR.ReturnType = Return; + return Builder.writeLeafType(PR); +} + +static ArrayRef hash_of(ArrayRef Hashes, + TypeIndex TI) { + return Hashes[TI.toArrayIndex()].Hash; +} + +static void verifyHashUniqueness(ArrayRef Hashes) { + assert(!Hashes.empty()); + + for (size_t I = 0; I < Hashes.size() - 1; ++I) { + for (size_t J = I + 1; J < Hashes.size(); ++J) { + EXPECT_NE(Hashes[I].Hash, Hashes[J].Hash); + } + } +} + +TEST(TypeHashingTest, ContentHash) { + SimpleTypeSerializer Serializer; + + TypeIndex CharStar(SimpleTypeKind::SignedCharacter, + SimpleTypeMode::NearPointer32); + + BumpPtrAllocator Alloc; + AppendingTypeTableBuilder Ordering1(Alloc); + AppendingTypeTableBuilder Ordering2(Alloc); + + TypeIndex CharP(SimpleTypeKind::SignedCharacter, SimpleTypeMode::NearPointer); + TypeIndex IntP(SimpleTypeKind::Int32, SimpleTypeMode::NearPointer); + TypeIndex DoubleP(SimpleTypeKind::Float64, SimpleTypeMode::NearPointer); + + // We're going to the same type sequence with two different orderings, and + // then confirm all records are hashed the same. + + TypeIndex CharPP[2]; + TypeIndex IntPP[2]; + TypeIndex IntPPP[2]; + TypeIndex DoublePP[2]; + TypeIndex Args[2]; + TypeIndex Proc[2]; + + // Ordering 1 + // ---------------------------------------- + // LF_POINTER 0x1000 {char**} + // Referent = char* + // LF_POINTER 0x1001 {int**} + // Referent = int* + // LF_POINTER 0x1002 {int***} + // Referent = 0x1001 + // LF_ARGLIST 0x1003 {(char**, int***)} + // Arg[0] = 0x1000 + // Arg[1] = 0x1002 + // LF_PROCEDURE 0x1004 {int** func(char**, int***)} + // ArgList = 0x1003 + // ReturnType = 0x1001 + std::vector Ordering1Hashes; + CharPP[0] = createPointerRecord(Ordering1, CharP); + IntPP[0] = createPointerRecord(Ordering1, IntP); + IntPPP[0] = createPointerRecord(Ordering1, IntPP[0]); + Args[0] = createArgListRecord(Ordering1, CharPP[0], IntPPP[0]); + Proc[0] = createProcedureRecord(Ordering1, 2, IntPP[0], Args[0]); + + ASSERT_EQ(0x1000U, CharPP[0].getIndex()); + ASSERT_EQ(0x1001U, IntPP[0].getIndex()); + ASSERT_EQ(0x1002U, IntPPP[0].getIndex()); + ASSERT_EQ(0x1003U, Args[0].getIndex()); + ASSERT_EQ(0x1004U, Proc[0].getIndex()); + + auto Hashes1 = GloballyHashedType::hashTypes(Ordering1.records()); + + // Ordering 2 + // ---------------------------------------- + // LF_POINTER 0x1000 {int**} + // Referent = int* + // LF_POINTER 0x1001 {int***} + // Referent = 0x1000 + // LF_POINTER 0x1002 {char**} + // Referent = char* + // LF_POINTER 0x1003 {double**} + // Referent = double* + // LF_ARGLIST 0x1004 {(char**, int***)} + // Arg[0] = 0x1002 + // Arg[1] = 0x1001 + // LF_PROCEDURE 0x1005 {int** func(char**, int***)} + // ArgList = 0x1004 + // ReturnType = 0x1000 + IntPP[1] = createPointerRecord(Ordering2, IntP); + IntPPP[1] = createPointerRecord(Ordering2, IntPP[1]); + CharPP[1] = createPointerRecord(Ordering2, CharP); + DoublePP[1] = createPointerRecord(Ordering2, DoubleP); + Args[1] = createArgListRecord(Ordering2, CharPP[1], IntPPP[1]); + Proc[1] = createProcedureRecord(Ordering2, 2, IntPP[1], Args[1]); + auto Hashes2 = GloballyHashedType::hashTypes(Ordering2.records()); + + ASSERT_EQ(0x1000U, IntPP[1].getIndex()); + ASSERT_EQ(0x1001U, IntPPP[1].getIndex()); + ASSERT_EQ(0x1002U, CharPP[1].getIndex()); + ASSERT_EQ(0x1003U, DoublePP[1].getIndex()); + ASSERT_EQ(0x1004U, Args[1].getIndex()); + ASSERT_EQ(0x1005U, Proc[1].getIndex()); + + // Sanity check to make sure all same-ordering hashes are different + // from each other. + verifyHashUniqueness(Hashes1); + verifyHashUniqueness(Hashes2); + + EXPECT_EQ(hash_of(Hashes1, IntPP[0]), hash_of(Hashes2, IntPP[1])); + EXPECT_EQ(hash_of(Hashes1, IntPPP[0]), hash_of(Hashes2, IntPPP[1])); + EXPECT_EQ(hash_of(Hashes1, CharPP[0]), hash_of(Hashes2, CharPP[1])); + EXPECT_EQ(hash_of(Hashes1, Args[0]), hash_of(Hashes2, Args[1])); + EXPECT_EQ(hash_of(Hashes1, Proc[0]), hash_of(Hashes2, Proc[1])); +}