Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeSerializer.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeSerializer.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeSerializer.h @@ -17,7 +17,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Error.h" @@ -26,6 +25,8 @@ namespace codeview { +class TypeHasher; + class TypeSerializer : public TypeVisitorCallbacks { struct SubRecord { SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {} @@ -45,14 +46,13 @@ } }; - typedef SmallVector, 2> RecordList; + typedef SmallVector, 2> MutableRecordList; static constexpr uint8_t ContinuationLength = 8; BumpPtrAllocator &RecordStorage; RecordSegment CurrentSegment; - RecordList FieldListSegments; + MutableRecordList FieldListSegments; - TypeIndex LastTypeIndex; Optional TypeKind; Optional MemberKind; std::vector RecordBuffer; @@ -60,28 +60,23 @@ BinaryStreamWriter Writer; TypeRecordMapping Mapping; - RecordList SeenRecords; - StringMap HashedRecords; + /// Private type record hashing implementation details are handled here. + std::unique_ptr Hasher; bool isInFieldList() const; - TypeIndex calcNextTypeIndex() const; - TypeIndex incrementTypeIndex(); MutableArrayRef getCurrentSubRecordData(); MutableArrayRef getCurrentRecordData(); Error writeRecordPrefix(TypeLeafKind Kind); - TypeIndex insertRecordBytesPrivate(MutableArrayRef Record); - TypeIndex insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef Data); Expected> addPadding(MutableArrayRef Record); public: explicit TypeSerializer(BumpPtrAllocator &Storage); + ~TypeSerializer(); - ArrayRef> records() const; - TypeIndex getLastTypeIndex() const; - TypeIndex insertRecordBytes(MutableArrayRef Record); + ArrayRef> records() const; + TypeIndex insertRecordBytes(ArrayRef Record); Expected visitTypeEndGetIndex(CVType &Record); Error visitTypeBegin(CVType &Record) override; Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h @@ -64,7 +64,7 @@ return *ExpectedIndex; } - TypeIndex writeSerializedRecord(MutableArrayRef Record) { + TypeIndex writeSerializedRecord(ArrayRef Record) { return Serializer.insertRecordBytes(Record); } @@ -77,9 +77,7 @@ } } - ArrayRef> records() const { - return Serializer.records(); - } + ArrayRef> records() const { return Serializer.records(); } }; class FieldListRecordBuilder { Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableCollection.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableCollection.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -18,7 +18,7 @@ class TypeTableCollection : public TypeCollection { public: - explicit TypeTableCollection(ArrayRef> Records); + explicit TypeTableCollection(ArrayRef> Records); Optional getFirst() override; Optional getNext(TypeIndex Prev) override; @@ -33,7 +33,7 @@ bool hasCapacityFor(TypeIndex Index) const; void ensureTypeExists(TypeIndex Index); - ArrayRef> Records; + ArrayRef> Records; TypeDatabase Database; }; } Index: llvm/trunk/lib/DebugInfo/CodeView/TypeSerializer.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeSerializer.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeSerializer.cpp @@ -9,6 +9,7 @@ #include "llvm/DebugInfo/CodeView/TypeSerializer.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/BinaryStreamWriter.h" #include @@ -16,21 +17,116 @@ using namespace llvm; using namespace llvm::codeview; -bool TypeSerializer::isInFieldList() const { - return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; -} +namespace { +struct HashedType { + uint64_t Hash; + const uint8_t *Data; + unsigned Size; // FIXME: Go to uint16_t? + TypeIndex Index; +}; + +/// Wrapper around a poitner to a HashedType. Hash and equality operations are +/// based on data in the pointee. +struct HashedTypePtr { + HashedTypePtr() = default; + HashedTypePtr(HashedType *Ptr) : Ptr(Ptr) {} + HashedType *Ptr = nullptr; +}; +} // namespace + +template <> struct DenseMapInfo { + static inline HashedTypePtr getEmptyKey() { return HashedTypePtr(nullptr); } + static inline HashedTypePtr getTombstoneKey() { + return HashedTypePtr(reinterpret_cast(1)); + } + static unsigned getHashValue(HashedTypePtr Val) { + assert(Val.Ptr != getEmptyKey().Ptr && Val.Ptr != getTombstoneKey().Ptr); + return Val.Ptr->Hash; + } + static bool isEqual(HashedTypePtr LHSP, HashedTypePtr RHSP) { + HashedType *LHS = LHSP.Ptr; + HashedType *RHS = RHSP.Ptr; + if (RHS == getEmptyKey().Ptr || RHS == getTombstoneKey().Ptr) + return LHS == RHS; + if (LHS->Hash != RHS->Hash || LHS->Size != RHS->Size) + return false; + return ::memcmp(LHS->Data, RHS->Data, LHS->Size) == 0; + } +}; + +/// Private implementation so that we don't leak our DenseMap instantiations to +/// users. +class llvm::codeview::TypeHasher { +private: + /// Storage for type record provided by the caller. Records will outlive the + /// hasher object, so they should be allocated here. + BumpPtrAllocator &RecordStorage; + + /// Storage for hash keys. These only need to live as long as the hashing + /// operation. + BumpPtrAllocator KeyStorage; + + /// Hash table. We really want a DenseMap, TypeIndex> here, + /// but DenseMap is inefficient when the keys are long (like type records) + /// because it recomputes the hash value of every key when it grows. This + /// value type stores the hash out of line in KeyStorage, so that table + /// entries are small and easy to rehash. + DenseSet HashedRecords; + + SmallVector, 2> SeenRecords; + + TypeIndex NextTypeIndex = TypeIndex(TypeIndex::FirstNonSimpleIndex); + +public: + TypeHasher(BumpPtrAllocator &RecordStorage) : RecordStorage(RecordStorage) {} + + ArrayRef> records() const { return SeenRecords; } + + /// Takes the bytes of type record, inserts them into the hash table, saves + /// them, and returns a pointer to an identical stable type record along with + /// its type index in the destination stream. + TypeIndex getOrCreateRecord(ArrayRef &Record); +}; + +TypeIndex TypeHasher::getOrCreateRecord(ArrayRef &Record) { + assert(Record.size() < UINT32_MAX && "Record too big"); + assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); + + // Compute the hash up front so we can store it in the key. + HashedType TempHashedType = {hash_value(Record), Record.data(), + unsigned(Record.size()), NextTypeIndex}; + + auto Result = HashedRecords.insert(HashedTypePtr(&TempHashedType)); + HashedType *&Hashed = Result.first->Ptr; + + if (Result.second) { + // This was a new type record. We need stable storage for both the key and + // the record. The record should outlive the hashing operation. + Hashed = KeyStorage.Allocate(); + *Hashed = TempHashedType; + + uint8_t *Stable = RecordStorage.Allocate(Record.size()); + memcpy(Stable, Record.data(), Record.size()); + Hashed->Data = Stable; + assert(Hashed->Size == Record.size()); + + // This was a new record, so increment our next type index. + ++NextTypeIndex; + } + + // Update the caller's copy of Record to point a stable copy. + Record = ArrayRef(Hashed->Data, Hashed->Size); + + if (Result.second) { + // FIXME: Can we record these in a more efficient way? + SeenRecords.push_back(Record); + } -TypeIndex TypeSerializer::calcNextTypeIndex() const { - if (LastTypeIndex.isNoneType()) - return TypeIndex(TypeIndex::FirstNonSimpleIndex); - else - return TypeIndex(LastTypeIndex.getIndex() + 1); + return TypeIndex(Hashed->Index); } -TypeIndex TypeSerializer::incrementTypeIndex() { - TypeIndex Previous = LastTypeIndex; - LastTypeIndex = calcNextTypeIndex(); - return Previous; +bool TypeSerializer::isInFieldList() const { + return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST; } MutableArrayRef TypeSerializer::getCurrentSubRecordData() { @@ -51,46 +147,6 @@ return Error::success(); } -TypeIndex -TypeSerializer::insertRecordBytesPrivate(MutableArrayRef Record) { - assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Record.data()), Record.size()); - - TypeIndex NextTypeIndex = calcNextTypeIndex(); - auto Result = HashedRecords.try_emplace(S, NextTypeIndex); - if (Result.second) { - LastTypeIndex = NextTypeIndex; - SeenRecords.push_back(Record); - } - return Result.first->getValue(); -} - -TypeIndex -TypeSerializer::insertRecordBytesWithCopy(CVType &Record, - MutableArrayRef Data) { - assert(Data.size() % 4 == 0 && "Record is not aligned to 4 bytes!"); - - StringRef S(reinterpret_cast(Data.data()), Data.size()); - - // Do a two state lookup / insert so that we don't have to allocate unless - // we're going - // to do an insert. This is a big memory savings. - auto Iter = HashedRecords.find(S); - if (Iter != HashedRecords.end()) - return Iter->second; - - LastTypeIndex = calcNextTypeIndex(); - uint8_t *Copy = RecordStorage.Allocate(Data.size()); - ::memcpy(Copy, Data.data(), Data.size()); - Data = MutableArrayRef(Copy, Data.size()); - S = StringRef(reinterpret_cast(Data.data()), Data.size()); - HashedRecords.insert(std::make_pair(S, LastTypeIndex)); - SeenRecords.push_back(Data); - Record.RecordData = Data; - return LastTypeIndex; -} - Expected> TypeSerializer::addPadding(MutableArrayRef Record) { uint32_t Align = Record.size() % 4; @@ -109,26 +165,25 @@ } TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage) - : RecordStorage(Storage), LastTypeIndex(), - RecordBuffer(MaxRecordLength * 2), + : RecordStorage(Storage), RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer, llvm::support::little), Writer(Stream), - Mapping(Writer) { + Mapping(Writer), Hasher(make_unique(Storage)) { // RecordBuffer needs to be able to hold enough data so that if we are 1 // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes, // we won't overflow. } -ArrayRef> TypeSerializer::records() const { - return SeenRecords; -} +TypeSerializer::~TypeSerializer() = default; -TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; } +ArrayRef> TypeSerializer::records() const { + return Hasher->records(); +} -TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef Record) { +TypeIndex TypeSerializer::insertRecordBytes(ArrayRef Record) { assert(!TypeKind.hasValue() && "Already in a type mapping!"); assert(Writer.getOffset() == 0 && "Stream has data already!"); - return insertRecordBytesPrivate(Record); + return Hasher->getOrCreateRecord(Record); } Error TypeSerializer::visitTypeBegin(CVType &Record) { @@ -163,8 +218,8 @@ Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t); Record.Type = *TypeKind; - TypeIndex InsertedTypeIndex = - insertRecordBytesWithCopy(Record, ThisRecordData); + Record.RecordData = ThisRecordData; + TypeIndex InsertedTypeIndex = Hasher->getOrCreateRecord(Record.RecordData); // Write out each additional segment in reverse order, and update each // record's continuation index to point to the previous one. @@ -174,7 +229,7 @@ reinterpret_cast(CIBytes.data()); assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder"); *CI = InsertedTypeIndex.getIndex(); - InsertedTypeIndex = insertRecordBytesPrivate(X); + InsertedTypeIndex = Hasher->getOrCreateRecord(X); } TypeKind.reset(); Index: llvm/trunk/lib/DebugInfo/CodeView/TypeTableCollection.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -24,8 +24,7 @@ consumeError(std::move(EC)); } -TypeTableCollection::TypeTableCollection( - ArrayRef> Records) +TypeTableCollection::TypeTableCollection(ArrayRef> Records) : Records(Records), Database(Records.size()) {} Optional TypeTableCollection::getFirst() { Index: llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp =================================================================== --- llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp +++ llvm/trunk/tools/llvm-pdbdump/llvm-pdbdump.cpp @@ -877,14 +877,12 @@ auto &DestTpi = Builder.getTpiBuilder(); auto &DestIpi = Builder.getIpiBuilder(); - MergedTpi.ForEachRecord( - [&DestTpi](TypeIndex TI, MutableArrayRef Data) { - DestTpi.addTypeRecord(Data, None); - }); - MergedIpi.ForEachRecord( - [&DestIpi](TypeIndex TI, MutableArrayRef Data) { - DestIpi.addTypeRecord(Data, None); - }); + MergedTpi.ForEachRecord([&DestTpi](TypeIndex TI, ArrayRef Data) { + DestTpi.addTypeRecord(Data, None); + }); + MergedIpi.ForEachRecord([&DestIpi](TypeIndex TI, ArrayRef Data) { + DestIpi.addTypeRecord(Data, None); + }); SmallString<64> OutFile(opts::merge::PdbOutputFile); if (OutFile.empty()) {