Index: test/tools/llvm-dwp/Inputs/string_offsets/a.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/a.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/a.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm a.cpp from +; +; enum E1 {a, b, c}; +; E1 glob1; +; +; ModuleID = 'a.cpp' +source_filename = "a.cpp" + +@glob1 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob1", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "a.cpp", directory: "/home/test") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E1", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E1") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "a", value: 0) +!8 = !DIEnumerator(name: "b", value: 1) +!9 = !DIEnumerator(name: "c", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 5} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/Inputs/string_offsets/b.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/b.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/b.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm b.cpp from +; +; enum E2 {d, e, f}; +; E2 glob2; +; +;ModuleID = 'b.cpp' +source_filename = "b.cpp" + +@glob2 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob2", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "b.cpp", directory: "/home/test") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E2", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E2") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "d", value: 0) +!8 = !DIEnumerator(name: "e", value: 1) +!9 = !DIEnumerator(name: "f", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/Inputs/string_offsets/c.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/c.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/c.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm c.cpp from +; +; enum E3 {g, h, i}; +; E3 glob3; +; +; ModuleID = 'c.cpp' +source_filename = "c.cpp" + +@glob3 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob3", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "c.cpp", directory: "/home/wpieb") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E3", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E3") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "g", value: 0) +!8 = !DIEnumerator(name: "h", value: 1) +!9 = !DIEnumerator(name: "i", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 5} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s @@ -0,0 +1,62 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +.debug_str_offsets_object_file1_start: + .long 500 # Invalid length + .short 5 # DWARF version + .short 0 # Padding +.debug_str_offsets_base_1: + .long str_producer-.debug_str.dwo + .long str_CU1-.debug_str.dwo + .long str_CU1_dir-.debug_str.dwo +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: String offsets table contribution has invalid length Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s @@ -0,0 +1,62 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +.debug_str_offsets_object_file1_start: + .long 0xfffffff4 # Invalid length + .short 5 # DWARF version + .short 0 # Padding +.debug_str_offsets_base_1: + .long str_producer-.debug_str.dwo + .long str_CU1-.debug_str.dwo + .long str_CU1_dir-.debug_str.dwo +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: Invalid string offsets table contribution Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s @@ -0,0 +1,57 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +# The section is too short to contain a valid header. +.debug_str_offsets_object_file1_start: + .long 0 +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: Invalid string offsets table contribution Index: test/tools/llvm-dwp/X86/invalid_string_form.test =================================================================== --- test/tools/llvm-dwp/X86/invalid_string_form.test +++ test/tools/llvm-dwp/X86/invalid_string_form.test @@ -1,3 +1,3 @@ RUN: not llvm-dwp %p/../Inputs/invalid_string_form.dwo -o %t 2>&1 | FileCheck %s -CHECK: error: string field encoded without DW_FORM_string or DW_FORM_GNU_str_index +CHECK: error: string field encoded with unsupported form Index: test/tools/llvm-dwp/X86/string_offsets.test =================================================================== --- test/tools/llvm-dwp/X86/string_offsets.test +++ test/tools/llvm-dwp/X86/string_offsets.test @@ -0,0 +1,50 @@ +RUN: llc -filetype=obj -split-dwarf-file=%ta.dwo %p/../Inputs/string_offsets/a.ll -o %ta.o +RUN: llvm-objcopy -split-dwo=%ta.dwo %ta.o +RUN: llc -filetype=obj -split-dwarf-file=%tb.dwo %p/../Inputs/string_offsets/b.ll -o %tb.o +RUN: llvm-objcopy -split-dwo=%tb.dwo %tb.o +RUN: llc -filetype=obj -split-dwarf-file=%tc.dwo %p/../Inputs/string_offsets/c.ll -o %tc.o +RUN: llvm-objcopy -split-dwo=%tc.dwo %tc.o +RUN: llvm-dwp %ta.dwo %tb.dwo -o %t1.dwp +RUN: llvm-dwp %t1.dwp %tc.dwo -o %t2.dwp +RUN: llvm-dwarfdump -v %t2.dwp | FileCheck %s + +FIXME: For some reason, piping straight from llvm-dwp to llvm-dwarfdump -v doesn't behave well - looks like dwarfdump is reading/closes before dwp has finished. + +We are building a DWP file first from 2 DWO files, and subsequently another DWP file from +the first DWP file and another DWO file. This exercises the relevant code paths in +llvm-dwp. + +Check the final DWP that it contains 2 v5 CUs with a v4 CU sandwiched between them. We +make sure that at least one string from each CU is displayed correctly and that the string +offsets table looks correct. + +The first compile unit. +CHECK: .debug_info.dwo contents: +CHECK-NEXT: Compile Unit:{{.*}}version = 0x0005 +CHECK-NOT: Compile Unit +CHECK: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "E1") + +The second compile unit. +CHECK: Compile Unit:{{.*}}version = 0x0004 +CHECK-NOT: Compile Unit +CHECK: DW_AT_name [DW_FORM_GNU_str_index] ( indexed{{.*}}string = "E2") + +The third compile unit. +CHECK: Compile Unit:{{.*}}version = 0x0005 +CHECK-NOT: Compile Unit +CHECK: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "E3") + +Check that the first contribution to the string offsets table is of version 5 and +contains the string "a.dwo" +CHECK: .debug_str_offsets.dwo contents: +CHECK-NEXT: 0x00000000: Contribution size = 32, Format = DWARF32, Version = 5 +CHECK-NEXT: 0x00000008:{{.*}}a.dwo" + +Check that the second contribution is of version 4, has no header and contains +the string "b.dwo". +CHECK: 0x[[SECONDCONTRIBOFFSET:[0-9a-f]*]]: Contribution size = 32, Format = DWARF32, Version = 4 +CHECK-NEXT: 0x[[SECONDCONTRIBOFFSET]]:{{.*}}b.dwo" + +Check that the third contribution is of version 5 and contains the string "c.dwo" +CHECK: Contribution size = 32, Format = DWARF32, Version = 5 +CHECK-NEXT: {{.*}}c.dwo" Index: tools/llvm-dwp/llvm-dwp.cpp =================================================================== --- tools/llvm-dwp/llvm-dwp.cpp +++ tools/llvm-dwp/llvm-dwp.cpp @@ -57,18 +57,13 @@ value_desc("filename"), cat(DwpCategory)); -static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, - MCSection *StrOffsetSection, - StringRef CurStrSection, - StringRef CurStrOffsetSection) { - // Could possibly produce an error or warning if one of these was non-null but - // the other was null. - if (CurStrSection.empty() || CurStrOffsetSection.empty()) - return; - - DenseMap OffsetRemapping; +DenseMap OffsetRemapping; - DataExtractor Data(CurStrSection, true, 0); +// String sections are concatenated by dwp. Create a map of string offsets +// that maps the original string offsets to their post-concatenation values. +static void remapStrings(DWPStringPool &Strings, StringRef StrSection) { + // FIXME: DWARF64 support requires DataExtractor to support 64 bit offsets. + DataExtractor Data(StrSection, true, 0); uint32_t LocalOffset = 0; uint32_t PrevOffset = 0; while (const char *s = Data.getCStr(&LocalOffset)) { @@ -76,18 +71,124 @@ Strings.getOffset(s, LocalOffset - PrevOffset); PrevOffset = LocalOffset; } +} - Data = DataExtractor(CurStrOffsetSection, true, 0); +// Validate and copy a single DWARF v5 string offsets table contribution +// header. +static Error writeDWARFv5StrOffContributionHeader(DataExtractor &Data, + MCStreamer &Out, + uint32_t &Offset, + uint64_t &Length, + unsigned &EntrySize) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return make_error("Invalid string offsets table contribution"); + Length = Data.getU32(&Offset); + Out.EmitIntValue(Length, 4); - Out.SwitchSection(StrOffsetSection); + // A length of 0xffffffff indicates that this is a DWARF64 contribution to the + // string offsets table and the actual length is encoded in the next 64 bits. + if (Length == 0xffffffffU) { + if (!Data.isValidOffsetForDataOfSize(Offset, 8)) + return make_error("Invalid string offsets table contribution"); + Length = Data.getU64(&Offset); + EntrySize = 8; + Out.EmitIntValue(Length, 8); + } else if (Length >= 0xfffffff0u) + return make_error("Invalid string offsets table contribution"); + + // Copy the 16-bit version number, followed by 2 bytes of padding. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return make_error("Invalid string offsets table contribution"); + Out.EmitIntValue(Data.getU32(&Offset), 4); + return Error::success(); +} +// NOTE: In the context of DWARF v5 string offsets tables, the term +// "contribution" is overloaded. A compile or type unit's contribution +// to a section in a DWP file is described by its offset and length values +// in the index table. Conversely, a DWARF v5 string offsets table +// contribution is defined in the DWARF standard and is an independent concept. +// A non-empty DWP contribution to the string offsets table can contain a DWARF +// v5 string offsets table contribution. In fact, for v5 units and above, it +// must do so. +// +// Emit a single string offsets table (DWP) contribution to the string offsets +// section. The offset values are remapped to the location of the strings +// in the newly created package file. +// +// In DWARF v5 and beyond we expect the (DWP) contribution to consist of a +// single DWARF v5 contribution with a proper header. +static Error writeStringOffsets(MCStreamer &Out, MCSection *StrOffsetSection, + StringRef CurStrOffsetSection, + int16_t Version) { + if (CurStrOffsetSection.empty()) + return Error::success(); + + DataExtractor Data(CurStrOffsetSection, true, 0); uint32_t Offset = 0; - uint64_t Size = CurStrOffsetSection.size(); - while (Offset < Size) { - auto OldOffset = Data.getU32(&Offset); + unsigned EntrySize = 4; // DWARF32 + uint64_t SectionLength = CurStrOffsetSection.size(); + uint64_t ContributionLength = SectionLength; + + Out.SwitchSection(StrOffsetSection); + + // Validate and copy a DWARF v5 contribution. Its length is returned in + // ContributionLength. To be consistent, ContributionLength must equal the + // remainder of the section length after accounting for the header. + if (Version >= 5) + if (Error HeaderError = writeDWARFv5StrOffContributionHeader( + Data, Out, Offset, ContributionLength, EntrySize)) + return HeaderError; + + // Validate that the length we may have extracted from a DWARF v5 + // contribution header is a multiple of EntrySize and that it is + // consistent with the section length. + ContributionLength = alignTo(ContributionLength, EntrySize); + if (ContributionLength != SectionLength - Offset) + return make_error( + "String offsets table contribution has invalid length"); + + uint64_t ContributionEnd = Offset + ContributionLength; + while (Offset < ContributionEnd) { + auto OldOffset = Data.getUnsigned(&Offset, EntrySize); auto NewOffset = OffsetRemapping[OldOffset]; - Out.EmitIntValue(NewOffset, 4); + Out.EmitIntValue(NewOffset, EntrySize); } + return Error::success(); +} + +// Keep track of a compile unit's contribution (as specified by the index +// table) to the string offsets section along with the unit's version. +struct StrOffContributionDescriptor { + uint64_t Offset; + uint64_t Length; + uint8_t CUVersion; + StrOffContributionDescriptor(uint64_t Offset, uint64_t Length, + uint8_t CUVersion) + : Offset(Offset), Length(Length), CUVersion(CUVersion) {} +}; + +using StrOffContributionDescriptors = std::vector; + +// Remap and write the string offsets table contributions we extracted from +// a DWP file. The contributions are described in a vector of descriptors, which +// is cleared at the end. +static Error writeStringOffsetsDWP(MCStreamer &Out, + StringRef CurStrOffsetSection, + MCSection *StrOffsetSection, + StrOffContributionDescriptors &Descriptors) { + std::sort(Descriptors.begin(), Descriptors.end(), + [](const StrOffContributionDescriptor &L, + const StrOffContributionDescriptor &R) { + return L.Offset < R.Offset; + }); + for (auto &D : Descriptors) + if (Error WriteStringsError = writeStringOffsets( + Out, StrOffsetSection, + CurStrOffsetSection.substr(D.Offset, D.Length), D.CUVersion)) + return WriteStringsError; + Descriptors.clear(); + return Error::success(); } static uint32_t getCUAbbrev(StringRef Abbrev, uint64_t AbbrCode) { @@ -106,32 +207,58 @@ return Offset; } -struct CompileUnitIdentifiers { +struct CompileUnitProperties { uint64_t Signature = 0; const char *Name = ""; const char *DWOName = ""; + uint8_t StringOffsetsBase = 0; + uint8_t StringOffsetsEntrySize = 4; + uint8_t Version = 0; }; static Expected -getIndexedString(dwarf::Form Form, DataExtractor InfoData, - uint32_t &InfoOffset, StringRef StrOffsets, StringRef Str) { +getIndexedString(dwarf::Form Form, DataExtractor InfoData, uint32_t &InfoOffset, + StringRef StrOffsets, uint8_t StringOffsetsBase, + uint8_t StringOffsetsEntrySize, StringRef Str) { if (Form == dwarf::DW_FORM_string) return InfoData.getCStr(&InfoOffset); - if (Form != dwarf::DW_FORM_GNU_str_index) - return make_error( - "string field encoded without DW_FORM_string or DW_FORM_GNU_str_index"); - auto StrIndex = InfoData.getULEB128(&InfoOffset); + + uint32_t StrIndex; + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + StrIndex = InfoData.getULEB128(&InfoOffset); + break; + case dwarf::DW_FORM_strx1: + StrIndex = InfoData.getU8(&InfoOffset); + break; + case dwarf::DW_FORM_strx2: + StrIndex = InfoData.getU16(&InfoOffset); + break; + case dwarf::DW_FORM_strx3: + StrIndex = InfoData.getU24(&InfoOffset); + break; + case dwarf::DW_FORM_strx4: + StrIndex = InfoData.getU32(&InfoOffset); + break; + default: + return make_error("string field encoded with unsupported form"); + } DataExtractor StrOffsetsData(StrOffsets, true, 0); - uint32_t StrOffsetsOffset = 4 * StrIndex; - uint32_t StrOffset = StrOffsetsData.getU32(&StrOffsetsOffset); + uint32_t StrOffsetsOffset = + StringOffsetsBase + StringOffsetsEntrySize * StrIndex; + uint64_t StrOffset = + StrOffsetsData.getUnsigned(&StrOffsetsOffset, StringOffsetsEntrySize); DataExtractor StrData(Str, true, 0); - return StrData.getCStr(&StrOffset); + // FIXME: DataExtractor does not handle 64-bit offsets. + uint32_t StrOffset32 = (uint32_t)StrOffset; + return StrData.getCStr(&StrOffset32); } -static Expected getCUIdentifiers(StringRef Abbrev, - StringRef Info, - StringRef StrOffsets, - StringRef Str) { +static Expected getCUProperties(StringRef Abbrev, + StringRef Info, + StringRef StrOffsets, + StringRef Str) { uint32_t Offset = 0; DataExtractor InfoData(Info, true, 0); dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32; @@ -142,9 +269,36 @@ Format = dwarf::DwarfFormat::DWARF64; Length = InfoData.getU64(&Offset); } + uint8_t OffsetEntrySize = Format == dwarf::DwarfFormat::DWARF32 ? 4 : 8; uint16_t Version = InfoData.getU16(&Offset); - InfoData.getU32(&Offset); // Abbrev offset (should be zero) - uint8_t AddrSize = InfoData.getU8(&Offset); + uint8_t AddrSize = 0; + if (Version >= 5) { + (void)InfoData.getU8(&Offset); // UnitType + } else { + InfoData.getUnsigned(&Offset, + OffsetEntrySize); // Abbrev offset (should be zero) + } + + AddrSize = InfoData.getU8(&Offset); + if (Version >= 5) { + InfoData.getUnsigned(&Offset, + OffsetEntrySize); // Abbrev offset (should be zero) + } + + CompileUnitProperties ID; + // For DWARF v5 and later, we need to determine the start of the string + // offsets table and its entry size so we can find indexed strings. + if (Version >= 5) { + uint32_t Off = 0; + ID.StringOffsetsBase = 8; + DataExtractor StrOffsetData(StrOffsets, true, 0); + uint64_t StringOffsetsLength = StrOffsetData.getU32(&Off); + if (StringOffsetsLength == 0xffffffffU) { + ID.StringOffsetsEntrySize = 8; + ID.StringOffsetsBase = 16; + } + } + ID.Version = Version; uint32_t AbbrCode = InfoData.getULEB128(&Offset); @@ -157,22 +311,23 @@ AbbrevData.getU8(&AbbrevOffset); uint32_t Name; dwarf::Form Form; - CompileUnitIdentifiers ID; while ((Name = AbbrevData.getULEB128(&AbbrevOffset)) | (Form = static_cast(AbbrevData.getULEB128(&AbbrevOffset))) && (Name != 0 || Form != 0)) { switch (Name) { case dwarf::DW_AT_name: { - Expected EName = - getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + Expected EName = getIndexedString( + Form, InfoData, Offset, StrOffsets, ID.StringOffsetsBase, + ID.StringOffsetsEntrySize, Str); if (!EName) return EName.takeError(); ID.Name = *EName; break; } case dwarf::DW_AT_GNU_dwo_name: { - Expected EName = - getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + Expected EName = getIndexedString( + Form, InfoData, Offset, StrOffsets, ID.StringOffsetsBase, + ID.StringOffsetsEntrySize, Str); if (!EName) return EName.takeError(); ID.DWOName = *EName; @@ -455,7 +610,7 @@ static Error buildDuplicateError(const std::pair &PrevE, - const CompileUnitIdentifiers &ID, StringRef DWPName) { + const CompileUnitProperties &ID, StringRef DWPName) { return make_error( std::string("Duplicate DWO ID (") + utohexstr(PrevE.first) + ") in " + buildDWODescription(PrevE.second.Name, PrevE.second.DWPName, @@ -552,11 +707,10 @@ if (InfoSection.empty()) continue; - writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, - CurStrOffsetSection); + remapStrings(Strings, CurStrSection); if (CurCUIndexSection.empty()) { - Expected EID = getCUIdentifiers( + Expected EID = getCUProperties( AbbrevSection, InfoSection, CurStrOffsetSection, CurStrSection); if (!EID) return EID.takeError(); @@ -568,51 +722,70 @@ P.first->second.DWOName = ID.DWOName; addAllTypes(Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry, ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); - continue; - } - - DWARFUnitIndex CUIndex(DW_SECT_INFO); - DataExtractor CUIndexData(CurCUIndexSection, Obj.isLittleEndian(), 0); - if (!CUIndex.parse(CUIndexData)) - return make_error("Failed to parse cu_index"); - - for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) { - auto *I = E.getOffsets(); - if (!I) - continue; - auto P = IndexEntries.insert(std::make_pair(E.getSignature(), CurEntry)); - Expected EID = getCUIdentifiers( - getSubsection(AbbrevSection, E, DW_SECT_ABBREV), - getSubsection(InfoSection, E, DW_SECT_INFO), - getSubsection(CurStrOffsetSection, E, DW_SECT_STR_OFFSETS), - CurStrSection); - if (!EID) - return EID.takeError(); - const auto &ID = *EID; - if (!P.second) - return buildDuplicateError(*P.first, ID, Input); - auto &NewEntry = P.first->second; - NewEntry.Name = ID.Name; - NewEntry.DWOName = ID.DWOName; - NewEntry.DWPName = Input; - for (auto Kind : CUIndex.getColumnKinds()) { - auto &C = NewEntry.Contributions[Kind - DW_SECT_INFO]; - C.Offset += I->Offset; - C.Length = I->Length; - ++I; + if (Error WriteStringsError = writeStringOffsets( + Out, StrOffsetSection, CurStrOffsetSection, ID.Version)) + return WriteStringsError; + } else { + DWARFUnitIndex CUIndex(DW_SECT_INFO); + DataExtractor CUIndexData(CurCUIndexSection, Obj.isLittleEndian(), 0); + if (!CUIndex.parse(CUIndexData)) + return make_error("Failed to parse cu_index"); + + // In order to remap the string offsets we need to know the version of + // the compile unit that added a particular contribution to the + // string offsets table. This is because we may have DWARF v5 (or later) + // units as well as v4 (or earlier) units. V5 contributions adhere + // to the DWARF v5 standard and have a header, whereas v4 contributions + // are simply an array of string offsets. This vector keeps track of + // the individual contributions and their units' versions. + StrOffContributionDescriptors StrOffContributions; + for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) { + auto *I = E.getOffsets(); + if (!I) + continue; + auto P = + IndexEntries.insert(std::make_pair(E.getSignature(), CurEntry)); + Expected EID = getCUProperties( + getSubsection(AbbrevSection, E, DW_SECT_ABBREV), + getSubsection(InfoSection, E, DW_SECT_INFO), + getSubsection(CurStrOffsetSection, E, DW_SECT_STR_OFFSETS), + CurStrSection); + if (!EID) + return EID.takeError(); + const auto &ID = *EID; + if (!P.second) + return buildDuplicateError(*P.first, ID, Input); + auto &NewEntry = P.first->second; + NewEntry.Name = ID.Name; + NewEntry.DWOName = ID.DWOName; + NewEntry.DWPName = Input; + for (auto Kind : CUIndex.getColumnKinds()) { + auto &C = NewEntry.Contributions[Kind - DW_SECT_INFO]; + C.Offset += I->Offset; + C.Length = I->Length; + if (Kind == DW_SECT_STR_OFFSETS) { + StrOffContributions.push_back( + StrOffContributionDescriptor(I->Offset, I->Length, ID.Version)); + } + ++I; + } + } + if (Error WriteStringsError = writeStringOffsetsDWP( + Out, CurStrOffsetSection, StrOffsetSection, StrOffContributions)) + return WriteStringsError; + + if (!CurTypesSection.empty()) { + if (CurTypesSection.size() != 1) + return make_error( + "multiple type unit sections in .dwp file"); + DWARFUnitIndex TUIndex(DW_SECT_TYPES); + DataExtractor TUIndexData(CurTUIndexSection, Obj.isLittleEndian(), 0); + if (!TUIndex.parse(TUIndexData)) + return make_error("Failed to parse tu_index"); + addAllTypesFromDWP(Out, TypeIndexEntries, TUIndex, TypesSection, + CurTypesSection.front(), CurEntry, + ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); } - } - - if (!CurTypesSection.empty()) { - if (CurTypesSection.size() != 1) - return make_error("multiple type unit sections in .dwp file"); - DWARFUnitIndex TUIndex(DW_SECT_TYPES); - DataExtractor TUIndexData(CurTUIndexSection, Obj.isLittleEndian(), 0); - if (!TUIndex.parse(TUIndexData)) - return make_error("Failed to parse tu_index"); - addAllTypesFromDWP(Out, TypeIndexEntries, TUIndex, TypesSection, - CurTypesSection.front(), CurEntry, - ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); } }