Index: llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h =================================================================== --- llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -17,7 +17,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -26,6 +25,8 @@ namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -46,14 +47,12 @@ }; typedef SmallVector, 2> MutableRecordList; - typedef SmallVector, 2> RecordList; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional TypeKind; Optional MemberKind; std::vector RecordBuffer; @@ -61,25 +60,22 @@ BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr Hasher; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef getCurrentSubRecordData(); MutableArrayRef getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(ArrayRef &Record); Expected> addPadding(MutableArrayRef Record); public: explicit TypeSerializer(BumpPtrAllocator &Storage); + ~TypeSerializer(); ArrayRef> records() const; - TypeIndex getLastTypeIndex() const; TypeIndex insertRecordBytes(ArrayRef Record); Expected visitTypeEndGetIndex(CVType &Record); Index: llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp =================================================================== --- llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/TypeSerializer.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/BinaryStreamWriter.h" #include @@ -16,21 +17,118 @@ using namespace llvm; using namespace llvm::codeview; -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; -} +namespace { +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + unsigned Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + HashedType *Ptr = nullptr; + bool operator==(const HashedTypePtr &O) { return Ptr == O.Ptr; } + bool operator!=(const HashedTypePtr &O) { return Ptr != O.Ptr; } +}; +} // namespace + +template <> struct DenseMapInfo { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast(1)); + } + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet HashedRecords; + + SmallVector, 2> SeenRecords; + + unsigned NextTypeIndex = TypeIndex::FirstNonSimpleIndex; + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + ArrayRef> records() const { return SeenRecords; } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef &Record); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), NextTypeIndex}; + + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + + // This was a new record, so increment our next type index. + ++NextTypeIndex; + } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef(Hashed->Data, Hashed->Size); + + if (Result.second) { + // FIXME: Can we record these in a more efficient way? + SeenRecords.push_back(Record); + } + + return TypeIndex(Hashed->Index); } -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; +bool TypeSerializer::isInFieldList() const { + return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } MutableArrayRef TypeSerializer::getCurrentSubRecordData() { @@ -51,27 +149,6 @@ return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(ArrayRef &Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - - StringRef NewData = Result.first->getKey(); - Record = ArrayRef(NewData.bytes_begin(), NewData.bytes_end()); - - if (Result.second) { - // If this triggered an insert into the map, store the bytes. - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - - return Result.first->getValue(); -} - Expected> TypeSerializer::addPadding(MutableArrayRef Record) { uint32_t Align = Record.size() % 4; @@ -90,26 +167,25 @@ } TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer, llvm::support::little), Writer(Stream), - Mapping(Writer), HashedRecords(Storage) { + Mapping(Writer), Hasher(make_unique(Storage)) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. } +TypeSerializer::~TypeSerializer() = default; + ArrayRef> TypeSerializer::records() const { - return SeenRecords; + return Hasher->records(); } -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } - TypeIndex TypeSerializer::insertRecordBytes(ArrayRef Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + return Hasher->getOrCreateRecord(Record); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -145,7 +221,7 @@ Record.Type = *TypeKind; Record.RecordData = ThisRecordData; - TypeIndex InsertedTypeIndex = insertRecordBytesPrivate(Record.RecordData); + TypeIndex InsertedTypeIndex = Hasher->getOrCreateRecord(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -155,7 +231,7 @@ reinterpret_cast(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = Hasher->getOrCreateRecord(X); } TypeKind.reset();