Index: llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h @@ -73,14 +73,30 @@ CreateFunc Create) { auto Result = HashedRecords.try_emplace(Hash, nextTypeIndex()); - if (LLVM_UNLIKELY(Result.second)) { + if (LLVM_UNLIKELY(Result.second /*inserted*/ || + Result.first->second.isSimple())) { uint8_t *Stable = RecordStorage.Allocate(RecordSize); MutableArrayRef Data(Stable, RecordSize); - SeenRecords.push_back(Create(Data)); + ArrayRef StableRecord = Create(Data); + if (StableRecord.empty()) { + // Records with forward references into the Type stream will be deferred + // for insertion at a later time, on the second pass. + Result.first->getSecond() = TypeIndex(SimpleTypeKind::NotTranslated); + return TypeIndex(SimpleTypeKind::NotTranslated); + } + if (Result.first->second.isSimple()) { + assert(Result.first->second.getIndex() == + (uint32_t)SimpleTypeKind::NotTranslated); + // On the second pass, update with index to remapped record. The + // (initially misbehaved) record will now come *after* other records + // resolved in the first pass, with proper *back* references in the + // stream. + Result.first->second = nextTypeIndex(); + } + SeenRecords.push_back(StableRecord); SeenHashes.push_back(Hash); } - // Update the caller's copy of Record to point a stable copy. return Result.first->second; } Index: llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h =================================================================== --- llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h +++ llvm/trunk/include/llvm/DebugInfo/CodeView/TypeHashing.h @@ -84,6 +84,8 @@ } std::array Hash; + bool empty() const { return *(const uint64_t*)Hash.data() == 0; } + /// Given a sequence of bytes representing a record, compute a global hash for /// this record. Due to the nature of global hashes incorporating the hashes /// of referenced records, this function requires a list of types and ids @@ -107,8 +109,33 @@ template static std::vector hashTypes(Range &&Records) { std::vector Hashes; - for (const auto &R : Records) - Hashes.push_back(hashType(R, Hashes, Hashes)); + bool UnresolvedRecords = false; + for (const auto &R : Records) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + if (H.empty()) + UnresolvedRecords = true; + Hashes.push_back(H); + } + + // In some rare cases, there might be records with forward references in the + // stream. Several passes might be needed to fully hash each record in the + // Type stream. However this occurs on very small OBJs generated by MASM, + // with a dozen records at most. Therefore this codepath isn't + // time-critical, as it isn't taken in 99% of cases. + while (UnresolvedRecords) { + UnresolvedRecords = false; + auto HashIt = Hashes.begin(); + for (const auto &R : Records) { + if (HashIt->empty()) { + GloballyHashedType H = hashType(R, Hashes, Hashes); + if (H.empty()) + UnresolvedRecords = true; + else + *HashIt = H; + } + ++HashIt; + } + } return Hashes; } Index: llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp +++ llvm/trunk/lib/DebugInfo/CodeView/TypeHashing.cpp @@ -54,10 +54,16 @@ reinterpret_cast(RefData.data()), Ref.Count); for (TypeIndex TI : Indices) { ArrayRef BytesToHash; - if (TI.isSimple() || TI.isNoneType() || TI.toArrayIndex() >= Prev.size()) { + if (TI.isSimple() || TI.isNoneType()) { const uint8_t *IndexBytes = reinterpret_cast(&TI); BytesToHash = makeArrayRef(IndexBytes, sizeof(TypeIndex)); } else { + if (TI.toArrayIndex() >= Prev.size() || + Prev[TI.toArrayIndex()].empty()) { + // There are references to yet-unhashed records. Suspend hashing for + // this record until all the other records are processed. + return {}; + } BytesToHash = Prev[TI.toArrayIndex()].Hash; } S.update(BytesToHash); Index: llvm/trunk/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp =================================================================== --- llvm/trunk/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +++ llvm/trunk/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp @@ -152,9 +152,12 @@ if (auto EC = Writer.writeObject(*Header)) return EC; - for (auto Rec : TypeRecords) + for (auto Rec : TypeRecords) { + assert(!Rec.empty()); // An empty record will not write anything, but it + // would shift all offsets from here on. if (auto EC = Writer.writeBytes(Rec)) return EC; + } if (HashStreamIndex != kInvalidStreamIndex) { auto HVS = WritableMappedBlockStream::createIndexedStream( Index: llvm/trunk/test/tools/llvm-readobj/codeview-merging-ghash.test =================================================================== --- llvm/trunk/test/tools/llvm-readobj/codeview-merging-ghash.test +++ llvm/trunk/test/tools/llvm-readobj/codeview-merging-ghash.test @@ -0,0 +1,132 @@ +# RUN: yaml2obj %s -o=%t.obj +# RUN: llvm-readobj -codeview-merged-types %t.obj | FileCheck %s --check-prefix=MERGED +# RUN: llvm-readobj -codeview-merged-types -codeview-ghash %t.obj | FileCheck %s --check-prefix=MERGED + +# MERGED: Format: COFF-x86-64 +# MERGED-NEXT: Arch: x86_64 +# MERGED-NEXT: AddressSize: 64bit +# MERGED-NEXT: MergedTypeStream [ +# MERGED-NEXT: ArgList (0x1000) { +# MERGED-NEXT: TypeLeafKind: LF_ARGLIST (0x1201) +# MERGED-NEXT: NumArgs: 0 +# MERGED-NEXT: Arguments [ +# MERGED-NEXT: ] +# MERGED-NEXT: } +# MERGED-NEXT: Modifier (0x1001) { +# MERGED-NEXT: TypeLeafKind: LF_MODIFIER (0x1001) +# MERGED-NEXT: ModifiedType: void (0x3) +# MERGED-NEXT: Modifiers [ (0x3) +# MERGED-NEXT: Const (0x1) +# MERGED-NEXT: Volatile (0x2) +# MERGED-NEXT: ] +# MERGED-NEXT: } +# MERGED-NEXT: Procedure (0x1002) { +# MERGED-NEXT: TypeLeafKind: LF_PROCEDURE (0x1008) +# MERGED-NEXT: ReturnType: void (0x3) +# MERGED-NEXT: CallingConvention: NearC (0x0) +# MERGED-NEXT: FunctionOptions [ (0x0) +# MERGED-NEXT: ] +# MERGED-NEXT: NumParameters: 0 +# MERGED-NEXT: ArgListType: () (0x1000) +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1003) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void (0x1001) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1004) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const (0x1003) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1005) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const* const (0x1004) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: Pointer (0x1006) { +# MERGED-NEXT: TypeLeafKind: LF_POINTER (0x1002) +# MERGED-NEXT: PointeeType: const volatile void* const* const* const (0x1005) +# MERGED-NEXT: PtrType: Near64 (0xC) +# MERGED-NEXT: PtrMode: Pointer (0x0) +# MERGED-NEXT: IsFlat: 0 +# MERGED-NEXT: IsConst: 1 +# MERGED-NEXT: IsVolatile: 0 +# MERGED-NEXT: IsUnaligned: 0 +# MERGED-NEXT: IsRestrict: 0 +# MERGED-NEXT: IsThisPtr&: 0 +# MERGED-NEXT: IsThisPtr&&: 0 +# MERGED-NEXT: SizeOf: 8 +# MERGED-NEXT: } +# MERGED-NEXT: ] +# MERGED-NEXT: MergedIDStream [ +# MERGED-NEXT: ] + +--- !COFF +header: + Machine: IMAGE_FILE_MACHINE_AMD64 + Characteristics: [ ] +sections: + - Name: '.debug$T' + Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_DISCARDABLE, IMAGE_SCN_MEM_READ ] + Alignment: 1 + Types: + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 3 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4097 + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] + - Kind: LF_POINTER + Pointer: + ReferentType: 4099 + Attrs: 66572 + - Kind: LF_POINTER + Pointer: + ReferentType: 4100 + Attrs: 66572 + - Kind: LF_POINTER + Pointer: + ReferentType: 4101 + Attrs: 66572 + - Kind: LF_MODIFIER + Modifier: + ModifiedType: 3 + Modifiers: [ None, Const, Volatile ] + - Kind: LF_POINTER + Pointer: + ReferentType: 4098 + Attrs: 66572 +symbols: +... Index: llvm/trunk/tools/llvm-readobj/COFFDumper.cpp =================================================================== --- llvm/trunk/tools/llvm-readobj/COFFDumper.cpp +++ llvm/trunk/tools/llvm-readobj/COFFDumper.cpp @@ -92,9 +92,11 @@ void printCOFFResources() override; void printCOFFLoadConfig() override; void printCodeViewDebugInfo() override; - void - mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) override; + void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) override; void printStackMap() const override; void printAddrsig() override; private: @@ -1227,7 +1229,10 @@ } void COFFDumper::mergeCodeViewTypes(MergingTypeTableBuilder &CVIDs, - MergingTypeTableBuilder &CVTypes) { + MergingTypeTableBuilder &CVTypes, + GlobalTypeTableBuilder &GlobalCVIDs, + GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) { for (const SectionRef &S : Obj->sections()) { StringRef SectionName; error(S.getName(SectionName)); @@ -1248,9 +1253,17 @@ } SmallVector SourceToDest; Optional PCHSignature; - if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, - PCHSignature)) - return error(std::move(EC)); + if (GHash) { + std::vector Hashes = + GloballyHashedType::hashTypes(Types); + if (auto EC = mergeTypeAndIdRecords(GlobalCVIDs, GlobalCVTypes, SourceToDest, Types, + Hashes, PCHSignature)) + return error(std::move(EC)); + } else { + if (auto EC = mergeTypeAndIdRecords(CVIDs, CVTypes, SourceToDest, Types, + PCHSignature)) + return error(std::move(EC)); + } } } } @@ -1904,16 +1917,10 @@ } } -void llvm::dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &CVTypes) { - // Flatten it first, then run our dumper on it. - SmallString<0> TypeBuf; - CVTypes.ForEachRecord([&](TypeIndex TI, const CVType &Record) { - TypeBuf.append(Record.RecordData.begin(), Record.RecordData.end()); - }); - - TypeTableCollection TpiTypes(CVTypes.records()); +void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer, + ArrayRef> IpiRecords, + ArrayRef> TpiRecords) { + TypeTableCollection TpiTypes(TpiRecords); { ListScope S(Writer, "MergedTypeStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); @@ -1923,7 +1930,7 @@ // Flatten the id stream and print it next. The ID stream refers to names from // the type stream. - TypeTableCollection IpiTypes(IDTable.records()); + TypeTableCollection IpiTypes(IpiRecords); { ListScope S(Writer, "MergedIDStream"); TypeDumpVisitor TDV(TpiTypes, &Writer, opts::CodeViewSubsectionBytes); Index: llvm/trunk/tools/llvm-readobj/ObjDumper.h =================================================================== --- llvm/trunk/tools/llvm-readobj/ObjDumper.h +++ llvm/trunk/tools/llvm-readobj/ObjDumper.h @@ -22,8 +22,9 @@ class ObjectFile; } namespace codeview { +class GlobalTypeTableBuilder; class MergingTypeTableBuilder; -} +} // namespace codeview class ScopedPrinter; @@ -88,7 +89,10 @@ virtual void printCodeViewDebugInfo() { } virtual void mergeCodeViewTypes(llvm::codeview::MergingTypeTableBuilder &CVIDs, - llvm::codeview::MergingTypeTableBuilder &CVTypes) {} + llvm::codeview::MergingTypeTableBuilder &CVTypes, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVIDs, + llvm::codeview::GlobalTypeTableBuilder &GlobalCVTypes, + bool GHash) {} // Only implemented for MachO. virtual void printMachODataInCode() { } @@ -132,9 +136,9 @@ void dumpCOFFImportFile(const object::COFFImportFile *File, ScopedPrinter &Writer); -void dumpCodeViewMergedTypes( - ScopedPrinter &Writer, llvm::codeview::MergingTypeTableBuilder &IDTable, - llvm::codeview::MergingTypeTableBuilder &TypeTable); +void dumpCodeViewMergedTypes(ScopedPrinter &Writer, + ArrayRef> IpiRecords, + ArrayRef> TpiRecords); } // namespace llvm Index: llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp =================================================================== --- llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp +++ llvm/trunk/tools/llvm-readobj/llvm-readobj.cpp @@ -22,6 +22,7 @@ #include "Error.h" #include "ObjDumper.h" #include "WindowsResourceDumper.h" +#include "llvm/DebugInfo/CodeView/GlobalTypeTableBuilder.h" #include "llvm/DebugInfo/CodeView/MergingTypeTableBuilder.h" #include "llvm/Object/Archive.h" #include "llvm/Object/COFFImportFile.h" @@ -218,6 +219,12 @@ CodeViewMergedTypes("codeview-merged-types", cl::desc("Display the merged CodeView type stream")); + // -codeview-ghash + cl::opt CodeViewEnableGHash( + "codeview-ghash", + cl::desc( + "Enable global hashing for CodeView type stream de-duplication")); + // -codeview-subsection-bytes cl::opt CodeViewSubsectionBytes( "codeview-subsection-bytes", @@ -416,13 +423,17 @@ namespace { struct ReadObjTypeTableBuilder { ReadObjTypeTableBuilder() - : Allocator(), IDTable(Allocator), TypeTable(Allocator) {} + : Allocator(), IDTable(Allocator), TypeTable(Allocator), + GlobalIDTable(Allocator), GlobalTypeTable(Allocator) {} llvm::BumpPtrAllocator Allocator; llvm::codeview::MergingTypeTableBuilder IDTable; llvm::codeview::MergingTypeTableBuilder TypeTable; + llvm::codeview::GlobalTypeTableBuilder GlobalIDTable; + llvm::codeview::GlobalTypeTableBuilder GlobalTypeTable; + std::vector> Binaries; }; -} +} // namespace static ReadObjTypeTableBuilder CVTypes; /// Creates an format-specific object file dumper. @@ -542,7 +553,9 @@ if (opts::CodeView) Dumper->printCodeViewDebugInfo(); if (opts::CodeViewMergedTypes) - Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable); + Dumper->mergeCodeViewTypes(CVTypes.IDTable, CVTypes.TypeTable, + CVTypes.GlobalIDTable, CVTypes.GlobalTypeTable, + opts::CodeViewEnableGHash); } if (Obj->isMachO()) { if (opts::MachODataInCode) @@ -631,6 +644,8 @@ dumpWindowsResourceFile(WinRes); else reportError(File, readobj_error::unrecognized_file_format); + + CVTypes.Binaries.push_back(std::move(*BinaryOrErr)); } /// Registers aliases that should only be allowed by readobj. @@ -720,7 +735,12 @@ if (opts::CodeViewMergedTypes) { ScopedPrinter W(outs()); - dumpCodeViewMergedTypes(W, CVTypes.IDTable, CVTypes.TypeTable); + if (opts::CodeViewEnableGHash) + dumpCodeViewMergedTypes(W, CVTypes.GlobalIDTable.records(), + CVTypes.GlobalTypeTable.records()); + else + dumpCodeViewMergedTypes(W, CVTypes.IDTable.records(), + CVTypes.TypeTable.records()); } return 0;