Index: include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h =================================================================== --- include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h +++ include/llvm/DebugInfo/CodeView/AppendingTypeTableBuilder.h @@ -47,13 +47,14 @@ uint32_t size() override; uint32_t capacity() override; + ArrayRef> records() const override; + // public interface void reset(); TypeIndex nextTypeIndex() const; BumpPtrAllocator &getAllocator() { return RecordStorage; } - ArrayRef> records() const; TypeIndex insertRecordBytes(ArrayRef &Record); TypeIndex insertRecord(ContinuationRecordBuilder &Builder); Index: include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h =================================================================== --- include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -60,12 +60,11 @@ uint32_t capacity() override; // public interface - void reset(); TypeIndex nextTypeIndex() const; BumpPtrAllocator &getAllocator() { return RecordStorage; } - ArrayRef> records() const; + ArrayRef> records() const override; ArrayRef hashes() const; template @@ -73,14 +72,30 @@ CreateFunc Create) { auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); - if (LLVM_UNLIKELY(Result.second)) { + if (LLVM_UNLIKELY(Result.second /*inserted*/ || + Result.first->second.isSimple())) { uint8_t *Stable = RecordStorage.Allocate(RecordSize); MutableArrayRef Data(Stable, RecordSize); - SeenRecords.push_back(Create(Data)); + ArrayRef StableRecord = Create(Data); + if (StableRecord.empty()) { + // Records with forward references into the Type stream will be deferred + // for insertion at a later time, on the second pass. + Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated); + return TypeIndex(SimpleTypeKind::NotTranslated); + } + if (Result.first->second.isSimple()) { + assert(Result.first->second.getIndex() == + (uint32_t)SimpleTypeKind::NotTranslated); + // On the second pass, update with index to remapped record. The + // (initially misbehaved) record will now come *after* other records + // resolved in the first pass, with proper *back* references in the + // stream. + Result.first->second = nextTypeIndex(); + } + SeenRecords.push_back(StableRecord); SeenHashes.push_back(Hash); } - // Update the caller's copy of Record to point a stable copy. return Result.first->second; } Index: include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h =================================================================== --- include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h +++ include/llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h @@ -58,8 +58,6 @@ public: explicit LazyRandomTypeCollection(uint32_t RecordCountHint); - LazyRandomTypeCollection(StringRef Data, uint32_t RecordCountHint); - LazyRandomTypeCollection(ArrayRef Data, uint32_t RecordCountHint); LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint, PartialOffsetArray PartialOffsets); LazyRandomTypeCollection(const CVTypeArray &Types, uint32_t RecordCountHint); @@ -80,6 +78,8 @@ Optional getFirst() override; Optional getNext(TypeIndex Prev) override; + ArrayRef> records() const override; + private: Error ensureTypeExists(TypeIndex Index); void ensureCapacityFor(TypeIndex Index); @@ -101,6 +101,7 @@ CVTypeArray Types; std::vector Records; + std::vector> DataRecords; /// An array of index offsets for the given type stream, allowing log(N) /// lookups of a type record by index. Similar to KnownOffsets but only Index: include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h =================================================================== --- include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h +++ include/llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h @@ -62,7 +62,7 @@ BumpPtrAllocator &getAllocator() { return RecordStorage; } - ArrayRef> records() const; + ArrayRef> records() const override; TypeIndex insertRecordAs(hash_code Hash, ArrayRef &Record); TypeIndex insertRecordBytes(ArrayRef &Record); Index: include/llvm/DebugInfo/CodeView/TypeCollection.h =================================================================== --- include/llvm/DebugInfo/CodeView/TypeCollection.h +++ include/llvm/DebugInfo/CodeView/TypeCollection.h @@ -30,6 +30,7 @@ virtual bool contains(TypeIndex Index) = 0; virtual uint32_t size() = 0; virtual uint32_t capacity() = 0; + virtual ArrayRef> records() const = 0; template void ForEachRecord(TFunc Func) { Optional Next = getFirst(); Index: include/llvm/DebugInfo/CodeView/TypeHashing.h =================================================================== --- include/llvm/DebugInfo/CodeView/TypeHashing.h +++ include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -84,6 +84,8 @@ } std::array Hash; + bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -107,8 +109,24 @@ template static std::vector hashTypes(Range &&Records) { std::vector Hashes; - for (const auto &R : Records) - Hashes.push_back(hashType(R, Hashes, Hashes)); + bool ReqSecondPass = false; + for (const auto &R : Records) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + if (H.empty()) + ReqSecondPass = true; + Hashes.push_back(H); + } + if (ReqSecondPass) { + auto HashIt = Hashes.begin(); + for (const auto &R : Records) { + if (HashIt->empty()) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + assert(!H.empty()); + *HashIt = H; + } + ++HashIt; + } + } return Hashes; } Index: include/llvm/DebugInfo/CodeView/TypeTableCollection.h =================================================================== --- include/llvm/DebugInfo/CodeView/TypeTableCollection.h +++ include/llvm/DebugInfo/CodeView/TypeTableCollection.h @@ -29,6 +29,7 @@ bool contains(TypeIndex Index) override; uint32_t size() override; uint32_t capacity() override; + ArrayRef> records() const override; private: BumpPtrAllocator Allocator; Index: include/llvm/Support/BinaryStreamArray.h =================================================================== --- include/llvm/Support/BinaryStreamArray.h +++ include/llvm/Support/BinaryStreamArray.h @@ -140,6 +140,12 @@ void drop_front() { Skew += begin()->length(); } + ArrayRef getBytes() const { + ArrayRef Buffer; + cantFail(Stream.readBytes(Skew, Stream.getLength() - Skew, Buffer)); + return Buffer; + } + private: BinaryStreamRef Stream; Extractor E; Index: lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp =================================================================== --- lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp +++ lib/DebugInfo/CodeView/GlobalTypeTableBuilder.cpp @@ -86,11 +86,6 @@ return SeenHashes; } -void GlobalTypeTableBuilder::reset() { - HashedRecords.clear(); - SeenRecords.clear(); -} - TypeIndex GlobalTypeTableBuilder::insertRecordBytes(ArrayRef Record) { GloballyHashedType GHT = GloballyHashedType::hashType(Record, SeenHashes, SeenHashes); Index: lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp =================================================================== --- lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp +++ lib/DebugInfo/CodeView/LazyRandomTypeCollection.cpp @@ -40,17 +40,7 @@ PartialOffsetArray PartialOffsets) : NameStorage(Allocator), Types(Types), PartialOffsets(PartialOffsets) { Records.resize(RecordCountHint); -} - -LazyRandomTypeCollection::LazyRandomTypeCollection(ArrayRef Data, - uint32_t RecordCountHint) - : LazyRandomTypeCollection(RecordCountHint) { -} - -LazyRandomTypeCollection::LazyRandomTypeCollection(StringRef Data, - uint32_t RecordCountHint) - : LazyRandomTypeCollection( - makeArrayRef(Data.bytes_begin(), Data.bytes_end()), RecordCountHint) { + DataRecords.resize(RecordCountHint); } LazyRandomTypeCollection::LazyRandomTypeCollection(const CVTypeArray &Types, @@ -67,6 +57,8 @@ // Clear and then resize, to make sure existing data gets destroyed. Records.clear(); Records.resize(RecordCountHint); + DataRecords.clear(); + DataRecords.resize(RecordCountHint); } void LazyRandomTypeCollection::reset(StringRef Data, uint32_t RecordCountHint) { @@ -147,6 +139,10 @@ uint32_t LazyRandomTypeCollection::capacity() { return Records.size(); } +ArrayRef> LazyRandomTypeCollection::records() const { + return DataRecords; +} + Error LazyRandomTypeCollection::ensureTypeExists(TypeIndex TI) { if (contains(TI)) return Error::success(); @@ -165,6 +161,7 @@ assert(NewCapacity > capacity()); Records.resize(NewCapacity); + DataRecords.resize(NewCapacity); } Error LazyRandomTypeCollection::visitRangeForType(TypeIndex TI) { @@ -185,7 +182,7 @@ // They've asked us to fetch a type index, but the entry we found in the // partial offsets array has already been visited. Since we visit an entire // block every time, that means this record should have been previously - // discovered. Ultimately, this means this is a request for a non-existant + // discovered. Ultimately, this means this is a request for a non-existent // type index. return make_error("Invalid type index"); } @@ -251,6 +248,7 @@ auto Idx = CurrentTI.toArrayIndex(); Records[Idx].Type = *Begin; Records[Idx].Offset = Begin.offset(); + DataRecords[Idx] = (*Begin).data(); ++Count; ++Begin; ++CurrentTI; @@ -272,6 +270,7 @@ auto Idx = Begin.toArrayIndex(); Records[Idx].Type = *RI; Records[Idx].Offset = RI.offset(); + DataRecords[Idx] = (*RI).data(); ++Count; ++Begin; ++RI; Index: lib/DebugInfo/CodeView/TypeHashing.cpp =================================================================== --- lib/DebugInfo/CodeView/TypeHashing.cpp +++ lib/DebugInfo/CodeView/TypeHashing.cpp @@ -54,10 +54,15 @@ reinterpret_cast(RefData.data()), Ref.Count); for (TypeIndex TI : Indices) { ArrayRef BytesToHash; - if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) { + if (TI.isSimple() || TI.isNoneType()) { const uint8_t *IndexBytes = reinterpret_cast(&TI); BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); } else { + if (TI.toArrayIndex() >= Prev.size()) { + // There are forward references to yet-unhashed records. Suspend + // hashing for this record until all the other records are processed. + return {}; + } BytesToHash = Prev[TI.toArrayIndex()].Hash; } S.update(BytesToHash); Index: lib/DebugInfo/CodeView/TypeStreamMerger.cpp =================================================================== --- lib/DebugInfo/CodeView/TypeStreamMerger.cpp +++ lib/DebugInfo/CodeView/TypeStreamMerger.cpp @@ -355,12 +355,8 @@ } Error TypeStreamMerger::remapAllTypes(const CVTypeArray &Types) { - BinaryStreamRef Stream = Types.getUnderlyingStream(); - ArrayRef Buffer; - cantFail(Stream.readBytes(0, Stream.getLength(), Buffer)); - return forEachCodeViewRecord( - Buffer, [this](const CVType &T) { return remapType(T); }); + Types.getBytes(), [this](const CVType &T) { return remapType(T); }); } Error TypeStreamMerger::remapType(const CVType &Type) { Index: lib/DebugInfo/CodeView/TypeTableCollection.cpp =================================================================== --- lib/DebugInfo/CodeView/TypeTableCollection.cpp +++ lib/DebugInfo/CodeView/TypeTableCollection.cpp @@ -62,3 +62,7 @@ uint32_t TypeTableCollection::size() { return Records.size(); } uint32_t TypeTableCollection::capacity() { return Records.size(); } + +ArrayRef> TypeTableCollection::records() const { + return Records; +} Index: lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp =================================================================== --- lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -152,9 +152,12 @@ if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) + for (auto Rec : TypeRecords) { + assert(!Rec.empty()); // An empty record will not write anything, but it + // will shift all offsets from here on. if (auto EC = Writer.writeBytes(Rec)) return EC; + } if (HashStreamIndex != kInvalidStreamIndex) { auto HVS = WritableMappedBlockStream::createIndexedStream( Index: test/tools/llvm-readobj/codeview-merging-ghash.test =================================================================== --- test/tools/llvm-readobj/codeview-merging-ghash.test +++ test/tools/llvm-readobj/codeview-merging-ghash.test @@ -0,0 +1,43 @@ +# RUN: yaml2obj %s -o=%t.obj +# RUN: llvm-readobj -codeview-merged-types %t.obj | FileCheck %s --check-prefix=MERGED +# RUN: llvm-readobj -codeview-merged-types -codeview-ghash %t.obj | FileCheck %s --check-prefix=MERGED + +# MERGED: MergedTypeStream [ +# MERGED-NEXT: ArgList (0x1000) { +# MERGED-NEXT: TypeLeafKind: LF_ARGLIST (0x1201) +# MERGED-NEXT: NumArgs: 0 +# MERGED-NEXT: Arguments [ +# MERGED-NEXT: ] +# MERGED-NEXT: } +# MERGED-NEXT: Procedure (0x1001) { +# MERGED-NEXT: TypeLeafKind: LF_PROCEDURE (0x1008) +# MERGED-NEXT: ReturnType: void (0x3) +# MERGED-NEXT: CallingConvention: NearC (0x0) +# MERGED-NEXT: FunctionOptions [ (0x0) +# MERGED-NEXT: ] +# MERGED-NEXT: NumParameters: 0 +# MERGED-NEXT: ArgListType: () (0x1000) +# MERGED-NEXT: } +# MERGED-NEXT: ] + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Types: + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 3 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4097 + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] +symbols: +... Index: tools/llvm-readobj/COFFDumper.cpp =================================================================== --- tools/llvm-readobj/COFFDumper.cpp +++ tools/llvm-readobj/COFFDumper.cpp @@ -92,9 +92,11 @@ void printCOFFResources() override; void printCOFFLoadConfig() override; void printCodeViewDebugInfo() override; - void - mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) override; + void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) override; void printStackMap() const override; void printAddrsig() override; private: @@ -1227,7 +1229,10 @@ } void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, - MergingTypeTableBuilder &CVTypes) { + MergingTypeTableBuilder &CVTypes, + GlobalTypeTableBuilder &GlobalCVIDs, + GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) { for (const SectionRef &S : Obj->sections()) { StringRef SectionName; error(S.getName(SectionName)); @@ -1248,9 +1253,17 @@ } SmallVector SourceToDest; Optional PCHSignature; - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, - PCHSignature)) - return error(std::move(EC)); + if (GHash) { + std::vector Hashes = + GloballyHashedType::hashTypes(Types); + if (auto EC = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types, + Hashes, PCHSignature)) + return error(std::move(EC)); + } else { + if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, + PCHSignature)) + return error(std::move(EC)); + } } } } @@ -1905,8 +1918,8 @@ } void llvm::dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &CVTypes) { + ScopedPrinter &Writer, llvm::codeview::TypeCollection &IDTable, + llvm::codeview::TypeCollection &CVTypes) { // Flatten it first, then run our dumper on it. SmallString<0> TypeBuf; CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) { Index: tools/llvm-readobj/ObjDumper.h =================================================================== --- tools/llvm-readobj/ObjDumper.h +++ tools/llvm-readobj/ObjDumper.h @@ -22,8 +22,10 @@ class ObjectFile; } namespace codeview { +class GlobalTypeTableBuilder; class MergingTypeTableBuilder; -} +class TypeCollection; +} // namespace codeview class ScopedPrinter; @@ -88,7 +90,10 @@ virtual void printCodeViewDebugInfo() { } virtual void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) {} + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) {} // Only implemented for MachO. virtual void printMachODataInCode() { } @@ -132,9 +137,9 @@ void dumpCOFFImportFile(const object::COFFImportFile *File, ScopedPrinter &Writer); -void dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &TypeTable); +void dumpCodeViewMergedTypes(ScopedPrinter &Writer, + llvm::codeview::TypeCollection &IDTable, + llvm::codeview::TypeCollection &TypeTable); } // namespace llvm Index: tools/llvm-readobj/llvm-readobj.cpp =================================================================== --- tools/llvm-readobj/llvm-readobj.cpp +++ tools/llvm-readobj/llvm-readobj.cpp @@ -22,6 +22,7 @@ #include "Error.h" #include "ObjDumper.h" #include "WindowsResourceDumper.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFFImportFile.h" @@ -218,6 +219,12 @@ CodeViewMergedTypes("codeview-merged-types", cl::desc("Display the merged CodeView type stream")); + // -codeview-ghash + cl::opt CodeViewEnableGHash( + "codeview-ghash", + cl::desc( + "Enable global hashing for CodeView type stream de-duplication")); + // -codeview-subsection-bytes cl::opt CodeViewSubsectionBytes( "codeview-subsection-bytes", @@ -416,13 +423,16 @@ namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() - : Allocator(), IDTable(Allocator), TypeTable(Allocator) {} + : Allocator(), IDTable(Allocator), TypeTable(Allocator), + GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {} llvm::BumpPtrAllocator Allocator; llvm::codeview::MergingTypeTableBuilder IDTable; llvm::codeview::MergingTypeTableBuilder TypeTable; + llvm::codeview::GlobalTypeTableBuilder GlobalIDTable; + llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable; }; -} +} // namespace static ReadObjTypeTableBuilder CVTypes; /// Creates an format-specific object file dumper. @@ -542,7 +552,9 @@ if (opts::CodeView) Dumper->printCodeViewDebugInfo(); if (opts::CodeViewMergedTypes) - Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable); + Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable, + CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable, + opts::CodeViewEnableGHash); } if (Obj->isMachO()) { if (opts::MachODataInCode) @@ -720,7 +732,11 @@ if (opts::CodeViewMergedTypes) { ScopedPrinter W(outs()); - dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable); + if (opts::CodeViewEnableGHash) + dumpCodeViewMergedTypes(W, CVTypes.GlobalIDTable, + CVTypes.GlobalTypeTable); + else + dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable); } return 0;