Index: test/tools/llvm-dwp/Inputs/string_offsets/a.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/a.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/a.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm a.cpp from +; +; enum E1 {a, b, c}; +; E1 glob1; +; +; ModuleID = 'a.cpp' +source_filename = "a.cpp" + +@glob1 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob1", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "a.cpp", directory: "/home/test") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E1", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E1") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "a", value: 0) +!8 = !DIEnumerator(name: "b", value: 1) +!9 = !DIEnumerator(name: "c", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 5} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/Inputs/string_offsets/b.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/b.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/b.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm b.cpp from +; +; enum E2 {d, e, f}; +; E2 glob2; +; +;ModuleID = 'b.cpp' +source_filename = "b.cpp" + +@glob2 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob2", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "b.cpp", directory: "/home/test") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E2", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E2") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "d", value: 0) +!8 = !DIEnumerator(name: "e", value: 1) +!9 = !DIEnumerator(name: "f", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 4} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/Inputs/string_offsets/c.ll =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/c.ll +++ test/tools/llvm-dwp/Inputs/string_offsets/c.ll @@ -0,0 +1,30 @@ +; A basic module with an enumeration type and 3 enumerators. +; Generated wich clang -S -g -emit-llvm c.cpp from +; +; enum E3 {g, h, i}; +; E3 glob3; +; +; ModuleID = 'c.cpp' +source_filename = "c.cpp" + +@glob3 = global i32 0, align 4, !dbg !0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!11, !12, !13} +!llvm.ident = !{!14} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "glob3", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 7.0.0 (trunk 322295)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !10) +!3 = !DIFile(filename: "c.cpp", directory: "/home/wpieb") +!4 = !{!5} +!5 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "E3", file: !3, line: 1, size: 32, elements: !6, identifier: "_ZTS2E3") +!6 = !{!7, !8, !9} +!7 = !DIEnumerator(name: "g", value: 0) +!8 = !DIEnumerator(name: "h", value: 1) +!9 = !DIEnumerator(name: "i", value: 2) +!10 = !{!0} +!11 = !{i32 2, !"Dwarf Version", i32 5} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{!"clang version 7.0.0 (trunk 322295)"} Index: test/tools/llvm-dwp/Inputs/string_offsets/mixed_dwp.s =================================================================== --- test/tools/llvm-dwp/Inputs/string_offsets/mixed_dwp.s +++ test/tools/llvm-dwp/Inputs/string_offsets/mixed_dwp.s @@ -0,0 +1,284 @@ +# Test object in dwp format to verify that llvm-dwp correctly handles DWARF v5 string +# offset tables. We have 2 CUs and 2 TUs, one each of version 5 and version 4. +# Note that the CUs' contributions to the string offsets table are not in the same +# order as the order in which the CUs are listed in the index table. + + .section .debug_str.dwo,"MS",@progbits,1 +str_dwo_name1: + .asciz "/test/a.dwo" +str_dwo_name2: + .asciz "/test/b.dwo" +str_CU1: + .asciz "a.cpp" +str_CU2: + .asciz "b.cpp" +str_TU1: + .asciz "Type_Unit_1" +str_TU2: + .asciz "Type_Unit_2" +str_enum1: + .asciz "E1" +str_enum2: + .asciz "E2" +str_enumerator1: + .asciz "a" +str_enumerator2: + .asciz "d" + + .section .debug_str_offsets.dwo,"",@progbits +# Object files 2's portion of the .debug_str_offsets.dwo section. +# This is a pre-DWARF v5 string offsets table contribution (i.e. no header). +.debug_str_offsets_object_file2_start: +.debug_str_offsets_base_2: + .long str_dwo_name2-.debug_str.dwo + .long str_CU2-.debug_str.dwo + .long str_TU2-.debug_str.dwo + .long str_enum2-.debug_str.dwo + .long str_enumerator2-.debug_str.dwo +.debug_str_offsets_object_file2_end: + +# Object files 1's portion of the .debug_str_offsets.dwo section. +# CU1 and TU1 share a contribution to the string offsets table. +.debug_str_offsets_object_file1_start: + .long .debug_str_offsets_object_file1_end-.debug_str_offsets_base_1 + .short 5 # DWARF version + .short 0 # Padding +.debug_str_offsets_base_1: + .long str_dwo_name1-.debug_str.dwo + .long str_CU1-.debug_str.dwo + .long str_TU1-.debug_str.dwo + .long str_enum1-.debug_str.dwo + .long str_enumerator1-.debug_str.dwo +.debug_str_offsets_object_file1_end: + +# Abbrevs are shared for all compile and type units of the same version. + .section .debug_abbrev.dwo,"",@progbits +V5_abbrev_start: + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .short 0x42b0 # DW_AT_GNU_dwo_name + .byte 0x25 # DW_FORM_strx1 + .byte 0x03 # DW_AT_name + .byte 0x25 # DW_FORM_strx1 + .short 0x42b1 # DW_AT_GNU_dwo_id + .byte 0x07 # DW_FORM_data8 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x02 # Abbrev code + .byte 0x41 # DW_TAG_type_unit + .byte 0x01 # DW_CHILDREN_yes + .byte 0x03 # DW_AT_name + .byte 0x25 # DW_FORM_strx1 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x03 # Abbrev code + .byte 0x04 # DW_TAG_enumeration_type + .byte 0x01 # DW_CHILDREN_yes + .byte 0x03 # DW_AT_name + .byte 0x25 # DW_FORM_strx1 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x04 # Abbrev code + .byte 0x28 # DW_TAG_enumerator + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .byte 0x25 # DW_FORM_strx1 + .byte 0x1c # DW_AT_const_value + .byte 0x0d # DW_FORM_sdata + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +V5_abbrev_end: +V4_abbrev_start: + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .short 0x42b0 # DW_AT_GNU_dwo_name + .short 0x3e82 # DW_FORM_GNU_str_index + .byte 0x03 # DW_AT_name + .short 0x3e82 # DW_FORM_GNU_str_index + .short 0x42b1 # DW_AT_GNU_dwo_id + .byte 0x07 # DW_FORM_data8 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x02 # Abbrev code + .byte 0x41 # DW_TAG_type_unit + .byte 0x01 # DW_CHILDREN_yes + .byte 0x03 # DW_AT_name + .short 0x3e82 # DW_FORM_GNU_str_index + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x03 # Abbrev code + .byte 0x04 # DW_TAG_enumeration_type + .byte 0x01 # DW_CHILDREN_yes + .byte 0x03 # DW_AT_name + .short 0x3e82 # DW_FORM_GNU_str_index + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x04 # Abbrev code + .byte 0x28 # DW_TAG_enumerator + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .short 0x3e82 # DW_FORM_GNU_str_index + .byte 0x1c # DW_AT_const_value + .byte 0x0d # DW_FORM_sdata + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +V4_abbrev_end: +abbrev_end: + + .section .debug_info.dwo,"",@progbits +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Contribution +# The compile-unit DIE, which has a DW_AT_GNU_dwo_name, DW_AT_name +# and DW_AT_GNU_dwo_id. + .byte 1 # Abbreviation code + .byte 0 # The index of the dwo name string + .byte 1 # The index of the CU name string + .quad 0xaa00bb00cc00dd00 # dwo id + .byte 0 # NULL +CU1_5_end: + +# DWARF v4 CU header. +CU2_4_start: + .long CU2_4_end-CU2_4_version # Length of Unit +CU2_4_version: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Contribution + .byte 8 # Address Size (in bytes) +# The compile-unit DIE, which has a DW_AT_GNU_dwo_name, DW_AT_name +# and DW_AT_GNU_dwo_id. + .byte 1 # Abbreviation code + .byte 0 # The index of the dwo name string + .byte 1 # The index of the CU name string + .quad 0xcc00dd00ee00ff00 # dwo id + .byte 0 # NULL +CU2_4_end: + + .section .debug_types.dwo,"",@progbits +# DWARF v5 Type unit header. +TU1_5_start: + .long TU1_5_end-TU1_5_version # Length of Unit +TU1_5_version: + .short 5 # DWARF version number + .byte 2 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long 0 # Offset Into Abbrev. Section + .quad 0x0011223344556677 # Type Signature + .long TU1_5_type-TU1_5_start # Type offset +# The type-unit DIE, which has a name. + .byte 2 # Abbreviation code + .byte 2 # Index of the unit type name string +# The enumeration type DIE, which has a name. +TU1_5_type: + .byte 3 # Abbreviation code + .byte 3 # Index of the enumeration type name string +# One enumerator, which has a name. + .byte 4 # Abbreviation code + .byte 4 # Index of the enumerator string + .byte 0 # NULL + .byte 0 # NULL +TU1_5_end: + +# DWARF v4 Type unit header. +TU2_4_start: + .long TU2_4_end-TU2_4_version # Length of Unit +TU2_4_version: + .short 4 # DWARF version number + .long 0 # Offset Into Abbrev. Section + .byte 8 # Address Size (in bytes) + .quad 0x00aabbccddeeff99 # Type Signature + .long TU2_4_type-TU2_4_start # Type offset +# The type-unit DIE, which has a name. + .byte 2 # Abbreviation code + .byte 2 # Index of the unit type name string +# The enumeration type DIE, which has a name. +TU2_4_type: + .byte 3 # Abbreviation code + .byte 3 # Index of the enumeration type name string +# One enumerator, which has a name. + .byte 4 # Abbreviation code + .byte 4 # Index of the enumerator string + .byte 0 # NULL + .byte 0 # NULL +TU2_4_end: + + .section .debug_cu_index,"",@progbits + # The index header + .long 2 # Version + .long 3 # Columns of contribution matrix + .long 2 # number of units + .long 2 # number of hash buckets in table + + # The signatures for both CUs. + .quad 0xddeeaaddbbaabbee # signature 1 + .quad 0xff00ffeeffaaff00 # signature 2 + # The indexes for both CUs. + .long 1 # index 1 + .long 2 # index 2 + # The sections to which all CUs contribute. + .long 1 # DW_SECT_INFO + .long 3 # DW_SECT_ABBREV + .long 6 # DW_SECT_STR_OFFSETS + + # The starting offsets of all CU's contributions to info, + # abbrev and string offsets table. + .long CU1_5_start-.debug_info.dwo + .long V5_abbrev_start-.debug_abbrev.dwo + .long .debug_str_offsets_object_file1_start-.debug_str_offsets.dwo + .long CU2_4_start-.debug_info.dwo + .long V4_abbrev_start-.debug_abbrev.dwo + .long .debug_str_offsets_object_file2_start-.debug_str_offsets.dwo + + # The lengths of all CU's contributions to info, abbrev and + # string offsets table. + .long CU1_5_end-CU1_5_start + .long V5_abbrev_end-V5_abbrev_start + .long .debug_str_offsets_object_file1_end-.debug_str_offsets_object_file1_start + .long CU2_4_end-CU2_4_start + .long V4_abbrev_end-V4_abbrev_start + .long .debug_str_offsets_object_file2_end-.debug_str_offsets_object_file2_start + + .section .debug_tu_index,"",@progbits + # The index header + .long 2 # Version + .long 3 # Columns of contribution matrix + .long 2 # number of units + .long 2 # number of hash buckets in table + + # The signatures for both TUs. + .quad 0xeeaaddbbaabbeedd # signature 1 + .quad 0x00ffeeffaaff00ff # signature 2 + # The indexes for both TUs. + .long 1 # index 1 + .long 2 # index 2 + # The sections to which both TUs contribute. + .long 2 # DW_SECT_TYPES + .long 3 # DW_SECT_ABBREV + .long 6 # DW_SECT_STR_OFFSETS + + # The starting offsets of both TU's contributions to info, + # abbrev and string offsets table. + .long TU1_5_start-.debug_types.dwo + .long V5_abbrev_start-.debug_abbrev.dwo + .long .debug_str_offsets_object_file1_start-.debug_str_offsets.dwo + .long TU2_4_start-.debug_types.dwo + .long V4_abbrev_start-.debug_abbrev.dwo + .long .debug_str_offsets_object_file2_start-.debug_str_offsets.dwo + + # The lengths of both TU's contributions to info, abbrev and + # string offsets table. + .long TU1_5_end-TU1_5_start + .long V5_abbrev_end-V5_abbrev_start + .long .debug_str_offsets_object_file1_end-.debug_str_offsets_object_file1_start + .long TU2_4_end-TU2_4_start + .long V4_abbrev_end-V4_abbrev_start + .long .debug_str_offsets_object_file2_end-.debug_str_offsets_object_file2_start Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-1.s @@ -0,0 +1,62 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +.debug_str_offsets_object_file1_start: + .long 500 # Invalid length + .short 5 # DWARF version + .short 0 # Padding +.debug_str_offsets_base_1: + .long str_producer-.debug_str.dwo + .long str_CU1-.debug_str.dwo + .long str_CU1_dir-.debug_str.dwo +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: String offsets table contribution has invalid length Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-2.s @@ -0,0 +1,62 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +.debug_str_offsets_object_file1_start: + .long 0xfffffff4 # Invalid length + .short 5 # DWARF version + .short 0 # Padding +.debug_str_offsets_base_1: + .long str_producer-.debug_str.dwo + .long str_CU1-.debug_str.dwo + .long str_CU1_dir-.debug_str.dwo +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: Invalid string offsets table contribution Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-3.s @@ -0,0 +1,57 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp handles invalid DWARF v5 contributions +# to the string offsets table. We have one simple compile unit. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_producer: + .asciz "Handmade DWARF producer" +str_CU1: + .asciz "Compile_Unit_1" +str_CU1_dir: + .asciz "/home/test/CU1" + + .section .debug_str_offsets.dwo,"",@progbits +# An invalid DWARF v5 contribution to the .debug_str_offsets.dwo section. +# The section is too short to contain a valid header. +.debug_str_offsets_object_file1_start: + .long 0 +.debug_str_offsets_object_file1_end: + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x25 # DW_AT_producer + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x03 # DW_AT_name + .byte 0x1a # DW_FORM_strx + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The compile-unit DIE, which has a DW_AT_producer, DW_AT_name +# and DW_AT_compdir. + .byte 1 # Abbreviation code + .byte 0 # The index of the producer string + .byte 1 # The index of the CU name string + .byte 2 # The index of the comp dir string + .byte 0 # NULL +CU1_5_end: + +# CHECK: Invalid string offsets table contribution Index: test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-4.s =================================================================== --- test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-4.s +++ test/tools/llvm-dwp/X86/dwp-string-offsets-invalid-4.s @@ -0,0 +1,38 @@ +# RUN: llvm-mc -triple x86_64-unknown-linux %s -filetype=obj -o %t.dwo +# RUN: not llvm-dwp %t.dwo -o %t.dwp |& FileCheck %s + +# Test object to verify that dwp rejects input files that use DW_FORM_strp. +# + .section .debug_str.dwo,"MS",@progbits,1 +str_name: + .asciz "CU1" + +# A simple abbrev section. + .section .debug_abbrev.dwo,"",@progbits + .byte 0x01 # Abbrev code + .byte 0x11 # DW_TAG_compile_unit + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .byte 0x0e # DW_FORM_strp + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x00 # EOM(3) +abbrev_end: + + .section .debug_info.dwo,"",@progbits + +# DWARF v5 CU header. +CU1_5_start: + .long CU1_5_end-CU1_5_version # Length of Unit +CU1_5_version: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev.dwo # Offset Into Abbrev. Section +# The rudimentary compile-unit DIE, which has a DW_AT_name. + .byte 1 # Abbreviation code + .long str_name-.debug_str.dwo # The offset of the name string + .byte 0 # NULL +CU1_5_end: + +# CHECK: DW_FORM_strp is not supported in dwp files Index: test/tools/llvm-dwp/X86/invalid_string_form.test =================================================================== --- test/tools/llvm-dwp/X86/invalid_string_form.test +++ test/tools/llvm-dwp/X86/invalid_string_form.test @@ -1,3 +1,3 @@ RUN: not llvm-dwp %p/../Inputs/invalid_string_form.dwo -o %t 2>&1 | FileCheck %s -CHECK: error: string field encoded without DW_FORM_string or DW_FORM_GNU_str_index +CHECK: error: string field encoded with unsupported form Index: test/tools/llvm-dwp/X86/string_offsets.test =================================================================== --- test/tools/llvm-dwp/X86/string_offsets.test +++ test/tools/llvm-dwp/X86/string_offsets.test @@ -0,0 +1,94 @@ +Case 1: Produce a dwp file from 2 dwo files. The input files are constructed from IR. +The second input file (b.ll) contains DWARF v4. + +RUN: llc -filetype=obj -split-dwarf-file=%ta.dwo %p/../Inputs/string_offsets/a.ll -o %ta.o +RUN: llvm-objcopy -split-dwo=%ta.dwo %ta.o +RUN: llc -filetype=obj -split-dwarf-file=%tb.dwo %p/../Inputs/string_offsets/b.ll -o %tb.o +RUN: llvm-objcopy -split-dwo=%tb.dwo %tb.o +RUN: llvm-dwp %ta.dwo %tb.dwo -o %t1.dwp +RUN: llvm-dwarfdump -v %t1.dwp | FileCheck --check-prefixes=DWOINPUT,BOTH %s + +Case 2: Produce a dwp file from dwo and a dwp file. This ensures that both relevant +code paths in llvm-dwp's write() are exercised. The dwp input file has been hand constructed +and contains one v5 compile unit, one v4 compile unit, one v5 type unit and one v4 type unit. +The order of contributions to the string offsets table is different from the order in +which the compile units appear in the CU index table. The second compile unit's +contribution precedes the first unit's contribution. The test ensures that llvm-dwp +handles this correctly. + +RUN: llc -filetype=obj -split-dwarf-file=%tc.dwo %p/../Inputs/string_offsets/c.ll -o %tc.o +RUN: llvm-objcopy -split-dwo=%tc.dwo %tc.o +RUN: llvm-mc %p/../Inputs/string_offsets/mixed_dwp.s -filetype=obj -o %tmixed.dwp +RUN: llvm-dwp %tmixed.dwp %tc.dwo -o %t2.dwp +RUN: llvm-dwarfdump -v %t2.dwp | FileCheck --check-prefixes=MIXEDINPUT,BOTH %s + +In both cases we mix v5 and v4 units to ensure that string offsets tables are handled +correctly in a mixed scenario. In the second case, the dwp input file contains type units +in the .debug_types.dwo sections to ensure strings referenced from type units are correctly +displayed. + +In case 2 we check that the final DWP contains 2 v5 CUs with a v4 CU sandwiched between +them. We make sure that at least one string from each CU and TU is displayed correctly +and that the string offsets table looks correct. + +The first compile unit. In case 1 the type appears in the info section, in case 2 in the +types section. +FIXME - this will have to change when we put type units in the .debug_info* sections. + +BOTH: .debug_info.dwo contents: +BOTH-NEXT: Compile Unit:{{.*}}version = 0x0005 +BOTH-NOT: Compile Unit +BOTH: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "a.cpp") +DWOINPUT-NOT: Compile Unit +DWOINPUT: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "E1") + +The second compile unit. +BOTH: Compile Unit:{{.*}}version = 0x0004 +BOTH-NOT: Compile Unit +BOTH: DW_AT_name [DW_FORM_GNU_str_index] ( indexed{{.*}}string = "b.cpp") +DWOINPUT-NOT: Compile Unit +DWOINPUT: DW_AT_name [DW_FORM_GNU_str_index] ( indexed{{.*}}string = "E2") + +The third compile unit. +MIXEDINPUT: Compile Unit:{{.*}}version = 0x0005 +MIXEDINPUT-NOT: Compile Unit +MIXEDINPUT: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "E3") + +The first type unit. +MIXEDINPUT: .debug_types.dwo contents: +MIXEDINPUT: Type Unit:{{.*}}version = 0x0005 +MIXEDINPUT-NOT: Type Unit +MIXEDINPUT: DW_AT_name [DW_FORM_strx1] ( indexed{{.*}}string = "a") + +The second type unit. +MIXEDINPUT: Type Unit:{{.*}}version = 0x0004 +MIXEDINPUT-NOT: Type Unit +MIXEDINPUT: DW_AT_name [DW_FORM_GNU_str_index] ( indexed{{.*}}string = "d") + +In the dwo input scenario, check that the first contribution to the string offsets +table is of version 5 and contains the string "a.dwo". In the mixed input scenario, +we expect the first contribution to be from the second compile unit (and hence of +version 4). +BOTH: .debug_str_offsets.dwo contents: +BOTH-NEXT: 0x00000000: Contribution size = + +DWOINPUT-SAME: 32, Format = DWARF32, Version = 5 +DWOINPUT-NEXT: 0x00000008:{{.*}}a.dwo" + +MIXEDINPUT-SAME: 20, Format = DWARF32, Version = 4 +MIXEDINPUT-NEXT: 0x00000000:{{.*}}b.dwo" + +In the dwo input scenario, we check that the second contribution is of version 4, +has no header and contains the string "b.dwo". In the mixed input scenario, we +expect it to be from the first compile unit, which has version 5. +BOTH: 0x[[SECONDCONTRIBOFFSET:[0-9a-f]*]]: Contribution size = + +DWOINPUT-SAME: 32, Format = DWARF32, Version = 4 +DWOINPUT-NEXT: 0x[[SECONDCONTRIBOFFSET]]:{{.*}}b.dwo" + +MIXEDINPUT-SAME: 20, Format = DWARF32, Version = 5 +MIXEDINPUT-NEXT: 0x{{.*}}a.dwo" + +Check that the third contribution is of version 5 and contains the string "c.dwo" +MIXEDINPUT: Contribution size = 32, Format = DWARF32, Version = 5 +MIXEDINPUT-NEXT: {{.*}}c.dwo" Index: tools/llvm-dwp/llvm-dwp.cpp =================================================================== --- tools/llvm-dwp/llvm-dwp.cpp +++ tools/llvm-dwp/llvm-dwp.cpp @@ -57,18 +57,13 @@ value_desc("filename"), cat(DwpCategory)); -static void writeStringsAndOffsets(MCStreamer &Out, DWPStringPool &Strings, - MCSection *StrOffsetSection, - StringRef CurStrSection, - StringRef CurStrOffsetSection) { - // Could possibly produce an error or warning if one of these was non-null but - // the other was null. - if (CurStrSection.empty() || CurStrOffsetSection.empty()) - return; +DenseMap OffsetRemapping; - DenseMap OffsetRemapping; - - DataExtractor Data(CurStrSection, true, 0); +// String sections are concatenated by dwp. Create a map of string offsets +// that maps the original string offsets to their post-concatenation values. +static void remapStrings(DWPStringPool &Strings, StringRef StrSection) { + // FIXME: DWARF64 support requires DataExtractor to support 64 bit offsets. + DataExtractor Data(StrSection, true, 0); uint32_t LocalOffset = 0; uint32_t PrevOffset = 0; while (const char *s = Data.getCStr(&LocalOffset)) { @@ -76,18 +71,124 @@ Strings.getOffset(s, LocalOffset - PrevOffset); PrevOffset = LocalOffset; } +} - Data = DataExtractor(CurStrOffsetSection, true, 0); +// Validate and copy a single DWARF v5 string offsets table contribution +// header. +static Error writeDWARFv5StrOffContributionHeader(DataExtractor &Data, + MCStreamer &Out, + uint32_t &Offset, + uint64_t &Length, + unsigned &EntrySize) { + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return make_error("Invalid string offsets table contribution"); + Length = Data.getU32(&Offset); + Out.EmitIntValue(Length, 4); - Out.SwitchSection(StrOffsetSection); + // A length of 0xffffffff indicates that this is a DWARF64 contribution to the + // string offsets table and the actual length is encoded in the next 64 bits. + if (Length == 0xffffffffU) { + if (!Data.isValidOffsetForDataOfSize(Offset, 8)) + return make_error("Invalid string offsets table contribution"); + Length = Data.getU64(&Offset); + EntrySize = 8; + Out.EmitIntValue(Length, 8); + } else if (Length >= 0xfffffff0u) + return make_error("Invalid string offsets table contribution"); + + // Copy the 16-bit version number, followed by 2 bytes of padding. + if (!Data.isValidOffsetForDataOfSize(Offset, 4)) + return make_error("Invalid string offsets table contribution"); + Out.EmitIntValue(Data.getU32(&Offset), 4); + return Error::success(); +} +// NOTE: In the context of DWARF v5 string offsets tables, the term +// "contribution" is overloaded. A compile or type unit's contribution +// to a section in a DWP file is described by its offset and length values +// in the index table. Conversely, a DWARF v5 string offsets table +// contribution is defined in the DWARF standard and is an independent concept. +// A non-empty DWP contribution to the string offsets table can contain a DWARF +// v5 string offsets table contribution. In fact, for v5 units and above, it +// must do so. +// +// Emit a single string offsets table (DWP) contribution to the string offsets +// section. The offset values are remapped to the location of the strings +// in the newly created package file. +// +// In DWARF v5 and beyond we expect the (DWP) contribution to consist of a +// single DWARF v5 contribution with a proper header. +static Error writeStringOffsets(MCStreamer &Out, MCSection *StrOffsetSection, + StringRef CurStrOffsetSection, + int16_t Version) { + if (CurStrOffsetSection.empty()) + return Error::success(); + + DataExtractor Data(CurStrOffsetSection, true, 0); uint32_t Offset = 0; - uint64_t Size = CurStrOffsetSection.size(); - while (Offset < Size) { - auto OldOffset = Data.getU32(&Offset); + unsigned EntrySize = 4; // DWARF32 + uint64_t SectionLength = CurStrOffsetSection.size(); + uint64_t ContributionLength = SectionLength; + + Out.SwitchSection(StrOffsetSection); + + // Validate and copy a DWARF v5 contribution. Its length is returned in + // ContributionLength. To be consistent, ContributionLength must equal the + // remainder of the section length after accounting for the header. + if (Version >= 5) + if (Error HeaderError = writeDWARFv5StrOffContributionHeader( + Data, Out, Offset, ContributionLength, EntrySize)) + return HeaderError; + + // Validate that the length we may have extracted from a DWARF v5 + // contribution header is a multiple of EntrySize and that it is + // consistent with the section length. + ContributionLength = alignTo(ContributionLength, EntrySize); + if (ContributionLength != SectionLength - Offset) + return make_error( + "String offsets table contribution has invalid length"); + + uint64_t ContributionEnd = Offset + ContributionLength; + while (Offset < ContributionEnd) { + auto OldOffset = Data.getUnsigned(&Offset, EntrySize); auto NewOffset = OffsetRemapping[OldOffset]; - Out.EmitIntValue(NewOffset, 4); + Out.EmitIntValue(NewOffset, EntrySize); } + return Error::success(); +} + +// Keep track of a compile unit's contribution (as specified by the index +// table) to the string offsets section along with the unit's version. +struct StrOffContributionDescriptor { + uint64_t Offset; + uint64_t Length; + uint8_t CUVersion; + StrOffContributionDescriptor(uint64_t Offset, uint64_t Length, + uint8_t CUVersion) + : Offset(Offset), Length(Length), CUVersion(CUVersion) {} +}; + +using StrOffContributionDescriptors = std::vector; + +// Remap and write the string offsets table contributions we extracted from +// a DWP file. The contributions are described in a vector of descriptors, which +// is cleared at the end. +static Error writeStringOffsetsDWP(MCStreamer &Out, + StringRef CurStrOffsetSection, + MCSection *StrOffsetSection, + StrOffContributionDescriptors &Descriptors) { + std::sort(Descriptors.begin(), Descriptors.end(), + [](const StrOffContributionDescriptor &L, + const StrOffContributionDescriptor &R) { + return L.Offset < R.Offset; + }); + for (auto &D : Descriptors) + if (Error WriteStringsError = writeStringOffsets( + Out, StrOffsetSection, + CurStrOffsetSection.substr(D.Offset, D.Length), D.CUVersion)) + return WriteStringsError; + Descriptors.clear(); + return Error::success(); } static uint32_t getCUAbbrev(StringRef Abbrev, uint64_t AbbrCode) { @@ -106,32 +207,65 @@ return Offset; } -struct CompileUnitIdentifiers { +struct CompileUnitProperties { uint64_t Signature = 0; const char *Name = ""; const char *DWOName = ""; + uint8_t StringOffsetsBase = 0; + uint8_t StringOffsetsEntrySize = 4; + uint8_t Version = 0; }; static Expected -getIndexedString(dwarf::Form Form, DataExtractor InfoData, - uint32_t &InfoOffset, StringRef StrOffsets, StringRef Str) { +getIndexedString(dwarf::Form Form, DataExtractor InfoData, uint32_t &InfoOffset, + StringRef StrOffsets, uint8_t StringOffsetsBase, + uint8_t StringOffsetsEntrySize, StringRef Str) { if (Form == dwarf::DW_FORM_string) return InfoData.getCStr(&InfoOffset); - if (Form != dwarf::DW_FORM_GNU_str_index) - return make_error( - "string field encoded without DW_FORM_string or DW_FORM_GNU_str_index"); - auto StrIndex = InfoData.getULEB128(&InfoOffset); + + uint32_t StrIndex; + switch (Form) { + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strx: + StrIndex = InfoData.getULEB128(&InfoOffset); + break; + case dwarf::DW_FORM_strx1: + StrIndex = InfoData.getU8(&InfoOffset); + break; + case dwarf::DW_FORM_strx2: + StrIndex = InfoData.getU16(&InfoOffset); + break; + case dwarf::DW_FORM_strx3: + StrIndex = InfoData.getU24(&InfoOffset); + break; + case dwarf::DW_FORM_strx4: + StrIndex = InfoData.getU32(&InfoOffset); + break; + case dwarf::DW_FORM_strp: + return make_error("DW_FORM_strp is not supported in dwp files"); + default: { + std::string msgbuf; + raw_string_ostream msg{msgbuf}; + msg << format("string field encoded with unsupported form 0x%x", + (uint32_t)Form); + return make_error(msg.str()); + } + } DataExtractor StrOffsetsData(StrOffsets, true, 0); - uint32_t StrOffsetsOffset = 4 * StrIndex; - uint32_t StrOffset = StrOffsetsData.getU32(&StrOffsetsOffset); + uint32_t StrOffsetsOffset = + StringOffsetsBase + StringOffsetsEntrySize * StrIndex; + uint64_t StrOffset = + StrOffsetsData.getUnsigned(&StrOffsetsOffset, StringOffsetsEntrySize); DataExtractor StrData(Str, true, 0); - return StrData.getCStr(&StrOffset); + // FIXME: DataExtractor does not handle 64-bit offsets. + uint32_t StrOffset32 = (uint32_t)StrOffset; + return StrData.getCStr(&StrOffset32); } -static Expected getCUIdentifiers(StringRef Abbrev, - StringRef Info, - StringRef StrOffsets, - StringRef Str) { +static Expected getCUProperties(StringRef Abbrev, + StringRef Info, + StringRef StrOffsets, + StringRef Str) { uint32_t Offset = 0; DataExtractor InfoData(Info, true, 0); dwarf::DwarfFormat Format = dwarf::DwarfFormat::DWARF32; @@ -142,9 +276,36 @@ Format = dwarf::DwarfFormat::DWARF64; Length = InfoData.getU64(&Offset); } + uint8_t OffsetEntrySize = Format == dwarf::DwarfFormat::DWARF32 ? 4 : 8; uint16_t Version = InfoData.getU16(&Offset); - InfoData.getU32(&Offset); // Abbrev offset (should be zero) - uint8_t AddrSize = InfoData.getU8(&Offset); + uint8_t AddrSize = 0; + if (Version >= 5) { + (void)InfoData.getU8(&Offset); // UnitType + } else { + InfoData.getUnsigned(&Offset, + OffsetEntrySize); // Abbrev offset (should be zero) + } + + AddrSize = InfoData.getU8(&Offset); + if (Version >= 5) { + InfoData.getUnsigned(&Offset, + OffsetEntrySize); // Abbrev offset (should be zero) + } + + CompileUnitProperties ID; + // For DWARF v5 and later, we need to determine the start of the string + // offsets table and its entry size so we can find indexed strings. + if (Version >= 5) { + uint32_t Off = 0; + ID.StringOffsetsBase = 8; + DataExtractor StrOffsetData(StrOffsets, true, 0); + uint64_t StringOffsetsLength = StrOffsetData.getU32(&Off); + if (StringOffsetsLength == 0xffffffffU) { + ID.StringOffsetsEntrySize = 8; + ID.StringOffsetsBase = 16; + } + } + ID.Version = Version; uint32_t AbbrCode = InfoData.getULEB128(&Offset); @@ -157,22 +318,23 @@ AbbrevData.getU8(&AbbrevOffset); uint32_t Name; dwarf::Form Form; - CompileUnitIdentifiers ID; while ((Name = AbbrevData.getULEB128(&AbbrevOffset)) | (Form = static_cast(AbbrevData.getULEB128(&AbbrevOffset))) && (Name != 0 || Form != 0)) { switch (Name) { case dwarf::DW_AT_name: { - Expected EName = - getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + Expected EName = getIndexedString( + Form, InfoData, Offset, StrOffsets, ID.StringOffsetsBase, + ID.StringOffsetsEntrySize, Str); if (!EName) return EName.takeError(); ID.Name = *EName; break; } case dwarf::DW_AT_GNU_dwo_name: { - Expected EName = - getIndexedString(Form, InfoData, Offset, StrOffsets, Str); + Expected EName = getIndexedString( + Form, InfoData, Offset, StrOffsets, ID.StringOffsetsBase, + ID.StringOffsetsEntrySize, Str); if (!EName) return EName.takeError(); ID.DWOName = *EName; @@ -455,7 +617,7 @@ static Error buildDuplicateError(const std::pair &PrevE, - const CompileUnitIdentifiers &ID, StringRef DWPName) { + const CompileUnitProperties &ID, StringRef DWPName) { return make_error( std::string("Duplicate DWO ID (") + utohexstr(PrevE.first) + ") in " + buildDWODescription(PrevE.second.Name, PrevE.second.DWPName, @@ -552,11 +714,10 @@ if (InfoSection.empty()) continue; - writeStringsAndOffsets(Out, Strings, StrOffsetSection, CurStrSection, - CurStrOffsetSection); + remapStrings(Strings, CurStrSection); if (CurCUIndexSection.empty()) { - Expected EID = getCUIdentifiers( + Expected EID = getCUProperties( AbbrevSection, InfoSection, CurStrOffsetSection, CurStrSection); if (!EID) return EID.takeError(); @@ -568,6 +729,9 @@ P.first->second.DWOName = ID.DWOName; addAllTypes(Out, TypeIndexEntries, TypesSection, CurTypesSection, CurEntry, ContributionOffsets[DW_SECT_TYPES - DW_SECT_INFO]); + if (Error WriteStringsError = writeStringOffsets( + Out, StrOffsetSection, CurStrOffsetSection, ID.Version)) + return WriteStringsError; continue; } @@ -576,12 +740,20 @@ if (!CUIndex.parse(CUIndexData)) return make_error("Failed to parse cu_index"); + // In order to remap the string offsets we need to know the version of + // the compile unit that added a particular contribution to the + // string offsets table. This is because we may have DWARF v5 (or later) + // units as well as v4 (or earlier) units. V5 contributions adhere + // to the DWARF v5 standard and have a header, whereas v4 contributions + // are simply an array of string offsets. This vector keeps track of + // the individual contributions and their units' versions. + StrOffContributionDescriptors StrOffContributions; for (const DWARFUnitIndex::Entry &E : CUIndex.getRows()) { auto *I = E.getOffsets(); if (!I) continue; auto P = IndexEntries.insert(std::make_pair(E.getSignature(), CurEntry)); - Expected EID = getCUIdentifiers( + Expected EID = getCUProperties( getSubsection(AbbrevSection, E, DW_SECT_ABBREV), getSubsection(InfoSection, E, DW_SECT_INFO), getSubsection(CurStrOffsetSection, E, DW_SECT_STR_OFFSETS), @@ -599,9 +771,16 @@ auto &C = NewEntry.Contributions[Kind - DW_SECT_INFO]; C.Offset += I->Offset; C.Length = I->Length; + if (Kind == DW_SECT_STR_OFFSETS) { + StrOffContributions.push_back( + StrOffContributionDescriptor(I->Offset, I->Length, ID.Version)); + } ++I; } } + if (Error WriteStringsError = writeStringOffsetsDWP( + Out, CurStrOffsetSection, StrOffsetSection, StrOffContributions)) + return WriteStringsError; if (!CurTypesSection.empty()) { if (CurTypesSection.size() != 1)