Index: include/llvm/CodeGen/DwarfStringPoolEntry.h =================================================================== --- include/llvm/CodeGen/DwarfStringPoolEntry.h +++ include/llvm/CodeGen/DwarfStringPoolEntry.h @@ -10,6 +10,7 @@ #ifndef LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H #define LLVM_CODEGEN_DWARFSTRINGPOOLENTRY_H +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/StringMap.h" namespace llvm { @@ -18,34 +19,52 @@ /// Data for a string pool entry. struct DwarfStringPoolEntry { + static constexpr unsigned NotIndexed = -1; + MCSymbol *Symbol; unsigned Offset; unsigned Index; + + bool isIndexed() const { return Index != NotIndexed; } }; /// String pool entry reference. -struct DwarfStringPoolEntryRef { - const StringMapEntry *I = nullptr; +class DwarfStringPoolEntryRef { + PointerIntPair *, 1, bool> + MapEntryAndIndexed; + + const StringMapEntry *getMapEntry() const { + return MapEntryAndIndexed.getPointer(); + } + + bool isIndexed() const { return MapEntryAndIndexed.getInt(); } public: DwarfStringPoolEntryRef() = default; - explicit DwarfStringPoolEntryRef( - const StringMapEntry &I) - : I(&I) {} + DwarfStringPoolEntryRef(const StringMapEntry &Entry, + bool Indexed) + : MapEntryAndIndexed(&Entry, Indexed) {} - explicit operator bool() const { return I; } + explicit operator bool() const { return getMapEntry(); } MCSymbol *getSymbol() const { - assert(I->second.Symbol && "No symbol available!"); - return I->second.Symbol; + assert(getMapEntry()->second.Symbol && "No symbol available!"); + return getMapEntry()->second.Symbol; + } + unsigned getOffset() const { return getMapEntry()->second.Offset; } + unsigned getIndex() const { + assert(getMapEntry()->getValue().isIndexed()); + return getMapEntry()->second.Index; } - unsigned getOffset() const { return I->second.Offset; } - unsigned getIndex() const { return I->second.Index; } - StringRef getString() const { return I->first(); } + StringRef getString() const { return getMapEntry()->first(); } /// Return the entire string pool entry for convenience. - DwarfStringPoolEntry getEntry() const { return I->getValue(); } + DwarfStringPoolEntry getEntry() const { return getMapEntry()->getValue(); } - bool operator==(const DwarfStringPoolEntryRef &X) const { return I == X.I; } - bool operator!=(const DwarfStringPoolEntryRef &X) const { return I != X.I; } + bool operator==(const DwarfStringPoolEntryRef &X) const { + return getMapEntry() == X.getMapEntry(); + } + bool operator!=(const DwarfStringPoolEntryRef &X) const { + return getMapEntry() != X.getMapEntry(); + } }; } // end namespace llvm Index: lib/CodeGen/AsmPrinter/DwarfDebug.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2418,8 +2418,7 @@ return; DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - DwarfStringPoolEntryRef Ref = - Holder.getStringPool().getEntry(*Asm, Name); + DwarfStringPoolEntryRef Ref = Holder.getStringPool().getEntry(*Asm, Name); switch (getAccelTableKind()) { case AccelTableKind::Apple: Index: lib/CodeGen/AsmPrinter/DwarfFile.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfFile.cpp +++ lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -38,7 +38,7 @@ // table. The header consists of an entry with the contribution's // size (not including the size of the length field), the DWARF version and // 2 bytes of padding. - Asm->emitInt32(StrPool.size() * EntrySize + 4); + Asm->emitInt32(StrPool.getNumIndexedStrings() * EntrySize + 4); Asm->emitInt16(Asm->getDwarfVersion()); Asm->emitInt16(0); // Define the symbol that marks the start of the contribution. It is Index: lib/CodeGen/AsmPrinter/DwarfStringPool.h =================================================================== --- lib/CodeGen/AsmPrinter/DwarfStringPool.h +++ lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -29,8 +29,11 @@ StringMap Pool; StringRef Prefix; unsigned NumBytes = 0; + unsigned NumIndexedStrings = 0; bool ShouldCreateSymbols; + StringMapEntry &getEntryImpl(AsmPrinter &Asm, StringRef Str); + public: using EntryRef = DwarfStringPoolEntryRef; @@ -44,8 +47,14 @@ unsigned size() const { return Pool.size(); } + unsigned getNumIndexedStrings() const { return NumIndexedStrings; } + /// Get a reference to an entry in the string pool. EntryRef getEntry(AsmPrinter &Asm, StringRef Str); + + /// Same as getEntry, except that you can use EntryRef::getIndex to obtain a + /// unique ID of this entry (e.g., for use in indexed forms like DW_FORM_strx). + EntryRef getIndexedEntry(AsmPrinter &Asm, StringRef Str); }; } // end namespace llvm Index: lib/CodeGen/AsmPrinter/DwarfStringPool.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfStringPool.cpp +++ lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -24,19 +24,33 @@ : Pool(A), Prefix(Prefix), ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} -DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, - StringRef Str) { +StringMapEntry & +DwarfStringPool::getEntryImpl(AsmPrinter &Asm, StringRef Str) { auto I = Pool.insert(std::make_pair(Str, EntryTy())); + auto &Entry = I.first->second; if (I.second) { - auto &Entry = I.first->second; - Entry.Index = Pool.size() - 1; + Entry.Index = EntryTy::NotIndexed; Entry.Offset = NumBytes; Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; NumBytes += Str.size() + 1; assert(NumBytes > Entry.Offset && "Unexpected overflow"); } - return EntryRef(*I.first); + return *I.first; +} + +DwarfStringPool::EntryRef +DwarfStringPool::getEntry(AsmPrinter &Asm, StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + return EntryRef(MapEntry, false); +} + +DwarfStringPool::EntryRef +DwarfStringPool::getIndexedEntry(AsmPrinter &Asm, StringRef Str) { + auto &MapEntry = getEntryImpl(Asm, Str); + if (!MapEntry.getValue().isIndexed()) + MapEntry.getValue().Index = NumIndexedStrings++; + return EntryRef(MapEntry, true); } void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, @@ -47,12 +61,18 @@ // Start the dwarf str section. Asm.OutStreamer->SwitchSection(StrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector *, 64> Entries(Pool.size()); + // Get all of the string pool entries and sort them by their offset. + SmallVector *, 64> Entries; + Entries.reserve(Pool.size()); for (const auto &E : Pool) - Entries[E.getValue().Index] = &E; + Entries.push_back(&E); + + llvm::sort( + Entries.begin(), Entries.end(), + [](const StringMapEntry *A, const StringMapEntry *B) { + return A->getValue().Offset < B->getValue().Offset; + }); for (const auto &Entry : Entries) { assert(ShouldCreateSymbols == static_cast(Entry->getValue().Symbol) && @@ -71,6 +91,14 @@ // If we've got an offset section go ahead and emit that now as well. if (OffsetSection) { + // Now only take the indexed entries and put them in an array by their ID so + // we can emit them in order. + Entries.resize(NumIndexedStrings); + for (const auto &Entry : Pool) { + if (Entry.getValue().isIndexed()) + Entries[Entry.getValue().Index] = &Entry; + } + Asm.OutStreamer->SwitchSection(OffsetSection); unsigned size = 4; // FIXME: DWARF64 is 8. for (const auto &Entry : Entries) Index: lib/CodeGen/AsmPrinter/DwarfUnit.cpp =================================================================== --- lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -240,9 +240,14 @@ DIEInlineString(String, DIEValueAllocator)); return; } - auto StringPoolEntry = DU->getStringPool().getEntry(*Asm, String); dwarf::Form IxForm = isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp; + + auto StringPoolEntry = + useSegmentedStringOffsetsTable() || IxForm == dwarf::DW_FORM_GNU_str_index + ? DU->getStringPool().getIndexedEntry(*Asm, String) + : DU->getStringPool().getEntry(*Asm, String); + // For DWARF v5 and beyond, use the smallest strx? form possible. if (useSegmentedStringOffsetsTable()) { IxForm = dwarf::DW_FORM_strx1; Index: test/DebugInfo/X86/string-offsets-table-order.ll =================================================================== --- /dev/null +++ test/DebugInfo/X86/string-offsets-table-order.ll @@ -0,0 +1,79 @@ +; REQUIRES: object-emission +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=foo.dwo -filetype=obj < %s \ +; RUN: | llvm-dwarfdump -v - | FileCheck %s + +; This triggers a situation where the order of entries in the .debug_str and +; .debug_str_offsets sections does not match and makes sure that all entries are +; still wired up correctly. + +; Produced with "clang -S -emit-llvm -gdwarf-5" from source "int X;", copied +; three times and modified by hand. + +; CHECK: .debug_info contents: +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000001) string = "X3") +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000002) string = "X2") +; CHECK: DW_TAG_compile_unit +; CHECK: DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000003) string = "X1") +; CHECK: .debug_info.dwo contents: + +; CHECK: .debug_str contents: +; CHECK: 0x[[X3:[0-9a-f]*]]: "X3" +; CHECK: 0x[[X1:[0-9a-f]*]]: "X1" +; CHECK: 0x[[X2:[0-9a-f]*]]: "X2" + +; CHECK: .debug_str_offsets contents: +; CHECK: Format = DWARF32, Version = 5 +; CHECK-NEXT: 00000000 "foo.dwo" +; CHECK-NEXT: [[X3]] "X3" +; CHECK-NEXT: [[X2]] "X2" +; CHECK-NEXT: [[X1]] "X1" +; CHECK-EMPTY: + + + +!llvm.dbg.cu = !{!10, !20, !30} +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} + +!0 = !{i32 2, !"Dwarf Version", i32 5} +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = !{i32 1, !"wchar_size", i32 4} +!3 = !{!"clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)"} + + +@X1 = dso_local global i32 0, align 4, !dbg !11 + +!10 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !13, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !14, globals: !15) +!11 = !DIGlobalVariableExpression(var: !12, expr: !DIExpression()) +!12 = distinct !DIGlobalVariable(name: "X1", scope: !10, file: !16, line: 1, type: !17, isLocal: false, isDefinition: true) +!13 = !DIFile(filename: "-", directory: "X3", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!14 = !{} +!15 = !{!11} +!16 = !DIFile(filename: "", directory: "X3", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + + +@X2 = dso_local global i32 0, align 4, !dbg !21 + +!20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !23, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !24, globals: !25) +!21 = !DIGlobalVariableExpression(var: !22, expr: !DIExpression()) +!22 = distinct !DIGlobalVariable(name: "X2", scope: !20, file: !26, line: 1, type: !27, isLocal: false, isDefinition: true) +!23 = !DIFile(filename: "-", directory: "X2", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!24 = !{} +!25 = !{!21} +!26 = !DIFile(filename: "", directory: "X2", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!27 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + + +@X3 = dso_local global i32 0, align 4, !dbg !31 + +!30 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !33, producer: "clang version 7.0.0 (trunk 337353) (llvm/trunk 337361)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !34, globals: !35) +!31 = !DIGlobalVariableExpression(var: !32, expr: !DIExpression()) +!32 = distinct !DIGlobalVariable(name: "X3", scope: !30, file: !36, line: 1, type: !37, isLocal: false, isDefinition: true) +!33 = !DIFile(filename: "-", directory: "X1", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!34 = !{} +!35 = !{!31} +!36 = !DIFile(filename: "", directory: "X1", checksumkind: CSK_MD5, checksum: "f2e6e10e303927a308f1645fbf6f710e") +!37 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) Index: test/DebugInfo/X86/string-offsets-table.ll =================================================================== --- test/DebugInfo/X86/string-offsets-table.ll +++ test/DebugInfo/X86/string-offsets-table.ll @@ -1,7 +1,7 @@ ; REQUIRES: object-emission ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -filetype=obj < %s | llvm-dwarfdump -v - \ ; RUN: | FileCheck --check-prefix=MONOLITHIC %s -; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=%t.dwo -filetype=obj < %s \ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -split-dwarf-file=foo.dwo -filetype=obj < %s \ ; RUN: | llvm-dwarfdump -v - | FileCheck --check-prefix=SPLIT %s ; This basic test checks the emission of a DWARF v5 string offsets table in @@ -59,6 +59,8 @@ ; SPLIT: DW_TAG_compile_unit ; SPLIT-NOT: {{DW_TAG|contents:}} ; SPLIT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008) +; SPLIT: DW_AT_GNU_dwo_name [DW_FORM_strx1] ( indexed (00000000) string = "foo.dwo") +; SPLIT: DW_AT_comp_dir [DW_FORM_strx1] ( indexed (00000001) string = "/home/test") ; Check for the split CU in .debug_info.dwo. ; SPLIT: .debug_info.dwo contents: @@ -79,10 +81,10 @@ ; ; Extract the string offsets referenced in the main file by the skeleton unit. ; SPLIT: .debug_str contents: -; SPLIT-NEXT: 0x00000000:{{.*}} -; SPLIT-NEXT: 0x[[STRING2SPLIT:[0-9a-f]*]]{{.*}} -; SPLIT-NEXT: 0x[[STRING3SPLIT:[0-9a-f]*]]{{.*}} -; SPLIT-NEXT: 0x[[STRING4SPLIT:[0-9a-f]*]]{{.*}} +; SPLIT-NEXT: 0x00000000: "foo.dwo" +; SPLIT-NEXT: 0x[[STRING2SPLIT:[0-9a-f]*]]: "/home/test" +; SPLIT-NEXT: 0x[[STRING3SPLIT:[0-9a-f]*]]: "E" +; SPLIT-NEXT: 0x[[STRING4SPLIT:[0-9a-f]*]]: "glob" ; ; Extract the string offsets referenced in the .dwo file by the split unit. ; SPLIT: .debug_str.dwo contents: @@ -91,13 +93,15 @@ ; SPLIT-NEXT: 0x[[STRING3DWO:[0-9a-f]*]]{{.*}} ; ; Check the string offsets sections in both the main and the .dwo files and -; verify that the extracted string offsets are referenced correctly. +; verify that the extracted string offsets are referenced correctly. The +; sections should contain only the offsets of strings that are actually +; referenced by the debug info. ; SPLIT: .debug_str_offsets contents: -; SPLIT-NEXT: 0x00000000: Contribution size = 20, Format = DWARF32, Version = 5 -; SPLIT-NEXT: 0x00000008: 00000000{{.*}} -; SPLIT-NEXT: 0x0000000c: [[STRING2SPLIT]] -; SPLIT-NEXT: 0x00000010: [[STRING3SPLIT]] -; SPLIT-NEXT: 0x00000014: [[STRING4SPLIT]] +; SPLIT-NEXT: 0x00000000: Contribution size = 12, Format = DWARF32, Version = 5 +; SPLIT-NEXT: 0x00000008: 00000000 "foo.dwo" +; SPLIT-NEXT: 0x0000000c: [[STRING2SPLIT]] "/home/test" +; SPLIT-EMPTY: + ; SPLIT: .debug_str_offsets.dwo contents: ; SPLIT-NEXT: 0x00000000: Contribution size = 36, Format = DWARF32, Version = 5 ; SPLIT-NEXT: 0x00000008: 00000000{{.*}} Index: tools/dsymutil/NonRelocatableStringpool.cpp =================================================================== --- tools/dsymutil/NonRelocatableStringpool.cpp +++ tools/dsymutil/NonRelocatableStringpool.cpp @@ -18,17 +18,17 @@ auto I = Strings.insert({S, DwarfStringPoolEntry()}); auto &Entry = I.first->second; - if (I.second || Entry.Index == -1U) { + if (I.second || !Entry.isIndexed()) { Entry.Index = NumEntries++; Entry.Offset = CurrentEndOffset; Entry.Symbol = nullptr; CurrentEndOffset += S.size() + 1; } - return DwarfStringPoolEntryRef(*I.first); + return DwarfStringPoolEntryRef(*I.first, true); } StringRef NonRelocatableStringpool::internString(StringRef S) { - DwarfStringPoolEntry Entry{nullptr, 0, -1U}; + DwarfStringPoolEntry Entry{nullptr, 0, DwarfStringPoolEntry::NotIndexed}; auto InsertResult = Strings.insert({S, Entry}); return InsertResult.first->getKey(); } @@ -38,7 +38,7 @@ std::vector Result; Result.reserve(Strings.size()); for (const auto &E : Strings) - Result.emplace_back(E); + Result.emplace_back(E, E.getValue().isIndexed()); llvm::sort( Result.begin(), Result.end(), [](const DwarfStringPoolEntryRef A, const DwarfStringPoolEntryRef B) { Index: unittests/CodeGen/DIEHashTest.cpp =================================================================== --- unittests/CodeGen/DIEHashTest.cpp +++ unittests/CodeGen/DIEHashTest.cpp @@ -31,8 +31,8 @@ public: DIEString getString(StringRef S) { DwarfStringPoolEntry Entry = {nullptr, 1, 1}; - return DIEString( - DwarfStringPoolEntryRef(*Pool.insert(std::make_pair(S, Entry)).first)); + return DIEString(DwarfStringPoolEntryRef( + *Pool.insert(std::make_pair(S, Entry)).first, Entry.isIndexed())); } };