Index: llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h +++ llvm/include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h @@ -16,6 +16,7 @@ #include "llvm/DebugInfo/CodeView/CodeView.h" #include "llvm/DebugInfo/CodeView/SimpleTypeSerializer.h" #include "llvm/DebugInfo/CodeView/TypeCollection.h" +#include "llvm/DebugInfo/CodeView/TypeHashing.h" #include "llvm/DebugInfo/CodeView/TypeIndex.h" #include "llvm/Support/Allocator.h" #include @@ -27,13 +28,6 @@ namespace codeview { class ContinuationRecordBuilder; -class TypeHasher; - -struct HashedType { - hash_code Hash; - ArrayRef Data; - TypeIndex Index; -}; class MergingTypeTableBuilder : public TypeCollection { /// Storage for records. These need to outlive the TypeTableBuilder. @@ -45,14 +39,11 @@ SimpleTypeSerializer SimpleSerializer; /// Hash table. - DenseSet HashedRecords; + DenseSet HashedRecords; /// Contains a list of all records indexed by TypeIndex.toArrayIndex(). SmallVector, 2> SeenRecords; - /// Contains a list of all hash codes index by TypeIndex.toArrayIndex(). - SmallVector SeenHashes; - public: explicit MergingTypeTableBuilder(BumpPtrAllocator &Storage); ~MergingTypeTableBuilder(); @@ -73,7 +64,6 @@ BumpPtrAllocator &getAllocator() { return RecordStorage; } ArrayRef> records() const; - ArrayRef hashes() const; TypeIndex insertRecordAs(hash_code Hash, ArrayRef &Record); TypeIndex insertRecordBytes(ArrayRef &Record); Index: llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h =================================================================== --- /dev/null +++ llvm/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -0,0 +1,116 @@ +//===- TypeHashing.h ---------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H +#define LLVM_DEBUGINFO_CODEVIEW_TYPEHASHING_H + +#include "llvm/DebugInfo/CodeView/CodeView.h" +#include "llvm/DebugInfo/CodeView/TypeIndex.h" + +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" + +namespace llvm { +namespace codeview { + +/// A weakly hashed type represents a straightforward hash code of a serialized +/// record. The record is simply serialized, and then the bytes are hashed by +/// a standard algorithm. This is sufficient for the case of de-duplicating +/// records within a single sequence of types, because if two records both have +/// a back-reference to the same type in the same stream, they will both have +/// the same numeric value for the TypeIndex of the back reference. +struct WeaklyHashedType { + hash_code Hash; + ArrayRef RecordData; + TypeIndex Index; + + static WeaklyHashedType hashType(TypeIndex Index, + ArrayRef RecordData, + ArrayRef PreviousTypes); +}; + +/// A content hashed type represents a hash value that is sufficient to uniquely +/// identify a record across multiple type streams or type sequences. This +/// works by, for any given record A which references B, replacing the TypeIndex +/// that refers to B with a previously-computed content hash for B. As this is +/// a recursive algorithm (e.g. the content hash of B also depends on the +/// content hashes of the types that B refers to), a content hash can uniquely +/// identify identify that A occurs in another stream that has a completely +/// different graph structure. Given that the hash itself is slower to compute, +/// it should only be preferred when cross-stream de-duplication is required. +struct ContentHashedType { + ContentHashedType() = default; + ContentHashedType(StringRef H, TypeIndex TI) + : ContentHashedType(ArrayRef(H.bytes_begin(), H.bytes_end()), + TI) {} + ContentHashedType(ArrayRef H, TypeIndex TI) : Index(TI) { + assert(H.size() == 20); + ::memcpy(Hash.data(), H.data(), 20); + } + std::array Hash; + TypeIndex Index; + + static ContentHashedType hashType(TypeIndex Index, + ArrayRef RecordData, + ArrayRef PreviousTypes); + + template + static std::vector hashTypes(Range &&Records) { + std::vector Hashes; + Hashes.reserve(std::distance(std::begin(Records), std::end(Records))); + TypeIndex TI(TypeIndex::FirstNonSimpleIndex); + for (const auto &R : Records) + Hashes.push_back(hashType(TI, R, Hashes)); + + return Hashes; + } +}; +} // namespace codeview + +template <> struct DenseMapInfo { + static codeview::WeaklyHashedType Empty; + static codeview::WeaklyHashedType Tombstone; + + static codeview::WeaklyHashedType getEmptyKey() { return Empty; } + + static codeview::WeaklyHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::WeaklyHashedType Val) { + return Val.Hash; + } + + static bool isEqual(codeview::WeaklyHashedType LHS, + codeview::WeaklyHashedType RHS) { + if (LHS.Hash != RHS.Hash) + return false; + return LHS.RecordData == RHS.RecordData; + } +}; + +template <> struct DenseMapInfo { + static codeview::ContentHashedType Empty; + static codeview::ContentHashedType Tombstone; + + static codeview::ContentHashedType getEmptyKey() { return Empty; } + + static codeview::ContentHashedType getTombstoneKey() { return Tombstone; } + + static unsigned getHashValue(codeview::ContentHashedType Val) { + return *reinterpret_cast(Val.Hash.data()); + } + + static bool isEqual(codeview::WeaklyHashedType LHS, + codeview::WeaklyHashedType RHS) { + return LHS.Hash == RHS.Hash; + } +}; + +} // namespace llvm + +#endif Index: llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h +++ llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h @@ -334,6 +334,11 @@ uint32_t Attrs; Optional MemberInfo; + void setAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, + uint8_t Size) { + Attrs = calcAttrs(PK, PM, PO, Size); + } + private: static uint32_t calcAttrs(PointerKind PK, PointerMode PM, PointerOptions PO, uint8_t Size) { Index: llvm/lib/DebugInfo/CodeView/CMakeLists.txt =================================================================== --- llvm/lib/DebugInfo/CodeView/CMakeLists.txt +++ llvm/lib/DebugInfo/CodeView/CMakeLists.txt @@ -32,6 +32,7 @@ TypeDumpVisitor.cpp TypeIndex.cpp TypeIndexDiscovery.cpp + TypeHashing.cpp TypeRecordMapping.cpp TypeStreamMerger.cpp TypeTableCollection.cpp Index: llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp +++ llvm/lib/DebugInfo/CodeView/MergingTypeTableBuilder.cpp @@ -28,27 +28,6 @@ using namespace llvm; using namespace llvm::codeview; -static HashedType Empty{0, {}, TypeIndex::None()}; -static HashedType Tombstone{hash_code(-1), {}, TypeIndex::None()}; - -namespace llvm { - -template <> struct DenseMapInfo { - static inline HashedType getEmptyKey() { return Empty; } - - static inline HashedType getTombstoneKey() { return Tombstone; } - - static unsigned getHashValue(HashedType Val) { return Val.Hash; } - - static bool isEqual(HashedType LHS, HashedType RHS) { - if (RHS.Hash != LHS.Hash) - return false; - return RHS.Data == LHS.Data; - } -}; - -} // end namespace llvm - TypeIndex MergingTypeTableBuilder::nextTypeIndex() const { return TypeIndex::fromArrayIndex(SeenRecords.size()); } @@ -56,7 +35,6 @@ MergingTypeTableBuilder::MergingTypeTableBuilder(BumpPtrAllocator &Storage) : RecordStorage(Storage) { SeenRecords.reserve(4096); - SeenHashes.reserve(4096); } MergingTypeTableBuilder::~MergingTypeTableBuilder() = default; @@ -102,13 +80,8 @@ return SeenRecords; } -ArrayRef MergingTypeTableBuilder::hashes() const { - return SeenHashes; -} - void MergingTypeTableBuilder::reset() { HashedRecords.clear(); - SeenHashes.clear(); SeenRecords.clear(); } @@ -124,17 +97,16 @@ assert(Record.size() < UINT32_MAX && "Record too big"); assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - HashedType TempHashedType = {Hash, Record, nextTypeIndex()}; + WeaklyHashedType TempHashedType = {Hash, Record, nextTypeIndex()}; auto Result = HashedRecords.insert(TempHashedType); if (Result.second) { - Result.first->Data = stabilize(RecordStorage, Record); - SeenRecords.push_back(Result.first->Data); - SeenHashes.push_back(Result.first->Hash); + Result.first->RecordData = stabilize(RecordStorage, Record); + SeenRecords.push_back(Result.first->RecordData); } // Update the caller's copy of Record to point a stable copy. - Record = Result.first->Data; + Record = Result.first->RecordData; return Result.first->Index; } Index: llvm/lib/DebugInfo/CodeView/TypeHashing.cpp =================================================================== --- /dev/null +++ llvm/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -0,0 +1,78 @@ +//===- TypeHashing.cpp -------------------------------------------*- C++-*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" + +#include "llvm/DebugInfo/CodeView/TypeIndexDiscovery.h" +#include "llvm/Support/SHA1.h" + +using namespace llvm; +using namespace llvm::codeview; + +WeaklyHashedType DenseMapInfo::Empty{ + 0, {}, TypeIndex::None()}; +WeaklyHashedType DenseMapInfo::Tombstone{ + hash_code(-1), {}, TypeIndex::None()}; + +static std::array EmptyHash; +static std::array TombstoneHash = { + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; + +ContentHashedType DenseMapInfo::Empty{EmptyHash, + TypeIndex::None()}; +ContentHashedType DenseMapInfo::Tombstone{TombstoneHash, + TypeIndex::None()}; + +WeaklyHashedType +WeaklyHashedType::hashType(TypeIndex Index, ArrayRef RecordData, + ArrayRef PreviousTypes) { + return {llvm::hash_value(RecordData), RecordData, Index}; +} + +ContentHashedType +ContentHashedType::hashType(TypeIndex Index, ArrayRef RecordData, + ArrayRef PreviousTypes) { + SmallVector Refs; + discoverTypeIndices(RecordData, Refs); + SHA1 S; + S.init(); + uint32_t Off = 0; + RecordData = RecordData.drop_front(sizeof(RecordPrefix)); + for (const auto &Ref : Refs) { + // Hash any data that comes before this TiRef. + uint32_t PreLen = Ref.Offset - Off; + ArrayRef PreData = RecordData.slice(Off, PreLen); + S.update(PreData); + + auto RefData = RecordData.slice(Ref.Offset, Ref.Count * sizeof(TypeIndex)); + // For each type index referenced, add in the previously computed hash + // value of that type. + ArrayRef Indices( + reinterpret_cast(RefData.data()), Ref.Count); + for (TypeIndex TI : Indices) { + ArrayRef BytesToHash; + if (TI.isSimple() || TI.isNoneType()) { + const uint8_t *IndexBytes = reinterpret_cast(&TI); + BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); + } else { + BytesToHash = PreviousTypes[TI.toArrayIndex()].Hash; + } + S.update(BytesToHash); + } + + Off = Ref.Offset + Ref.Count * sizeof(TypeIndex); + } + + // Don't forget to add in any trailing bytes. + auto TrailingBytes = RecordData.drop_front(Off); + S.update(TrailingBytes); + + return {S.final(), Index}; +} Index: llvm/unittests/DebugInfo/CodeView/CMakeLists.txt =================================================================== --- llvm/unittests/DebugInfo/CodeView/CMakeLists.txt +++ llvm/unittests/DebugInfo/CodeView/CMakeLists.txt @@ -4,6 +4,7 @@ set(DebugInfoCodeViewSources RandomAccessVisitorTest.cpp + TypeHashingTest.cpp TypeIndexDiscoveryTest.cpp ) Index: llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp =================================================================== --- /dev/null +++ llvm/unittests/DebugInfo/CodeView/TypeHashingTest.cpp @@ -0,0 +1,156 @@ +//===- llvm/unittest/DebugInfo/CodeView/TypeHashingTest.cpp ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/CodeView/TypeHashing.h" +#include "llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h" + +#include "gtest/gtest.h" + +using namespace llvm; +using namespace llvm::codeview; + +static TypeIndex createPointerRecord(AppendingTypeTableBuilder &Builder, + TypeIndex TI) { + PointerRecord PR(TypeRecordKind::Pointer); + PR.setAttrs(PointerKind::Near32, PointerMode::Pointer, PointerOptions::None, + 4); + PR.ReferentType = TI; + return Builder.writeLeafType(PR); +} + +static TypeIndex createArgListRecord(AppendingTypeTableBuilder &Builder, + TypeIndex Q, TypeIndex R) { + ArgListRecord AR(TypeRecordKind::ArgList); + AR.ArgIndices.push_back(Q); + AR.ArgIndices.push_back(R); + return Builder.writeLeafType(AR); +} + +static TypeIndex createProcedureRecord(AppendingTypeTableBuilder &Builder, + uint32_t ParamCount, TypeIndex Return, + TypeIndex ArgList) { + ProcedureRecord PR(TypeRecordKind::Procedure); + PR.ArgumentList = ArgList; + PR.CallConv = CallingConvention::NearC; + PR.Options = FunctionOptions::None; + PR.ParameterCount = ParamCount; + PR.ReturnType = Return; + return Builder.writeLeafType(PR); +} + +static ArrayRef hash_of(ArrayRef Hashes, + TypeIndex TI) { + return Hashes[TI.toArrayIndex()].Hash; +} + +static void verifyHashUniqueness(ArrayRef Hashes) { + assert(!Hashes.empty()); + + for (size_t I = 0; I < Hashes.size() - 1; ++I) { + for (size_t J = I + 1; J < Hashes.size(); ++J) { + EXPECT_NE(Hashes[I].Hash, Hashes[J].Hash); + } + } +} + +TEST(TypeHashingTest, ContentHash) { + SimpleTypeSerializer Serializer; + + TypeIndex CharStar(SimpleTypeKind::SignedCharacter, + SimpleTypeMode::NearPointer32); + + BumpPtrAllocator Alloc; + AppendingTypeTableBuilder Ordering1(Alloc); + AppendingTypeTableBuilder Ordering2(Alloc); + + TypeIndex CharP(SimpleTypeKind::SignedCharacter, SimpleTypeMode::NearPointer); + TypeIndex IntP(SimpleTypeKind::Int32, SimpleTypeMode::NearPointer); + TypeIndex DoubleP(SimpleTypeKind::Float64, SimpleTypeMode::NearPointer); + + // We're going to the same type sequence with two different orderings, and + // then confirm all records are hashed the same. + + TypeIndex CharPP[2]; + TypeIndex IntPP[2]; + TypeIndex IntPPP[2]; + TypeIndex DoublePP[2]; + TypeIndex Args[2]; + TypeIndex Proc[2]; + + // Ordering 1 + // ---------------------------------------- + // LF_POINTER 0x1000 {char**} + // Referent = char* + // LF_POINTER 0x1001 {int**} + // Referent = int* + // LF_POINTER 0x1002 {int***} + // Referent = 0x1001 + // LF_ARGLIST 0x1003 {(char**, int***)} + // Arg[0] = 0x1000 + // Arg[1] = 0x1002 + // LF_PROCEDURE 0x1004 {int** func(char**, int***)} + // ArgList = 0x1003 + // ReturnType = 0x1001 + std::vector Ordering1Hashes; + CharPP[0] = createPointerRecord(Ordering1, CharP); + IntPP[0] = createPointerRecord(Ordering1, IntP); + IntPPP[0] = createPointerRecord(Ordering1, IntPP[0]); + Args[0] = createArgListRecord(Ordering1, CharPP[0], IntPPP[0]); + Proc[0] = createProcedureRecord(Ordering1, 2, IntPP[0], Args[0]); + + ASSERT_EQ(0x1000U, CharPP[0].getIndex()); + ASSERT_EQ(0x1001U, IntPP[0].getIndex()); + ASSERT_EQ(0x1002U, IntPPP[0].getIndex()); + ASSERT_EQ(0x1003U, Args[0].getIndex()); + ASSERT_EQ(0x1004U, Proc[0].getIndex()); + + auto Hashes1 = ContentHashedType::hashTypes(Ordering1.records()); + + // Ordering 2 + // ---------------------------------------- + // LF_POINTER 0x1000 {int**} + // Referent = int* + // LF_POINTER 0x1001 {int***} + // Referent = 0x1000 + // LF_POINTER 0x1002 {char**} + // Referent = char* + // LF_POINTER 0x1003 {double**} + // Referent = double* + // LF_ARGLIST 0x1004 {(char**, int***)} + // Arg[0] = 0x1002 + // Arg[1] = 0x1001 + // LF_PROCEDURE 0x1005 {int** func(char**, int***)} + // ArgList = 0x1004 + // ReturnType = 0x1000 + IntPP[1] = createPointerRecord(Ordering2, IntP); + IntPPP[1] = createPointerRecord(Ordering2, IntPP[1]); + CharPP[1] = createPointerRecord(Ordering2, CharP); + DoublePP[1] = createPointerRecord(Ordering2, DoubleP); + Args[1] = createArgListRecord(Ordering2, CharPP[1], IntPPP[1]); + Proc[1] = createProcedureRecord(Ordering2, 2, IntPP[1], Args[1]); + auto Hashes2 = ContentHashedType::hashTypes(Ordering2.records()); + + ASSERT_EQ(0x1000U, IntPP[1].getIndex()); + ASSERT_EQ(0x1001U, IntPPP[1].getIndex()); + ASSERT_EQ(0x1002U, CharPP[1].getIndex()); + ASSERT_EQ(0x1003U, DoublePP[1].getIndex()); + ASSERT_EQ(0x1004U, Args[1].getIndex()); + ASSERT_EQ(0x1005U, Proc[1].getIndex()); + + // Sanity check to make sure all same-ordering hashes are different + // from each other. + verifyHashUniqueness(Hashes1); + verifyHashUniqueness(Hashes2); + + EXPECT_EQ(hash_of(Hashes1, IntPP[0]), hash_of(Hashes2, IntPP[1])); + EXPECT_EQ(hash_of(Hashes1, IntPPP[0]), hash_of(Hashes2, IntPPP[1])); + EXPECT_EQ(hash_of(Hashes1, CharPP[0]), hash_of(Hashes2, CharPP[1])); + EXPECT_EQ(hash_of(Hashes1, Args[0]), hash_of(Hashes2, Args[1])); + EXPECT_EQ(hash_of(Hashes1, Proc[0]), hash_of(Hashes2, Proc[1])); +}