Index: include/llvm/Support/DataExtractor.h =================================================================== --- include/llvm/Support/DataExtractor.h +++ include/llvm/Support/DataExtractor.h @@ -14,6 +14,30 @@ #include "llvm/Support/DataTypes.h" namespace llvm { + +/// An auxiliary type to facilitate extraction of 3-byte entities. +struct Uint24 { + uint8_t Bytes[3]; + Uint24(uint8_t U) { + Bytes[0] = Bytes[1] = Bytes[2] = U; + } + Uint24(uint8_t U0, uint8_t U1, uint8_t U2) { + Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2; + } + uint32_t getAsUint32(bool IsLittleEndian) const { + int LoIx = IsLittleEndian ? 0 : 2; + return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16); + } +}; + +using uint24_t = Uint24; +static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3"); + +/// Needed by swapByteOrder(). +inline uint24_t getSwappedBytes(uint24_t C) { + return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]); +} + class DataExtractor { StringRef Data; uint8_t IsLittleEndian; @@ -236,6 +260,23 @@ /// NULL otherise. uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const; + /// Extract a 24-bit unsigned value from \a *offset_ptr and return it + /// in a uint32_t. + /// + /// Extract 3 bytes from the binary data at the offset pointed to by + /// \a offset_ptr, construct a uint32_t from them and update the offset + /// on success. + /// + /// @param[in,out] offset_ptr + /// A pointer to an offset within the data that will be advanced + /// by the 3 bytes if the value is extracted correctly. If the offset + /// is out of bounds or there are not enough bytes to extract this value, + /// the offset will be left unmodified. + /// + /// @return + /// The extracted 24-bit value represented in a uint32_t. + uint32_t getU24(uint32_t *offset_ptr) const; + /// Extract a uint32_t value from \a *offset_ptr. /// /// Extract a single uint32_t from the binary data at the offset Index: lib/DebugInfo/DWARF/DWARFFormValue.cpp =================================================================== --- lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -138,6 +138,9 @@ case DW_FORM_addrx2: return 2; + case DW_FORM_strx3: + return 3; + case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: @@ -302,6 +305,10 @@ case DW_FORM_GNU_str_index: case DW_FORM_GNU_strp_alt: case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: return (FC == FC_String); case DW_FORM_implicit_const: return (FC == FC_Constant); @@ -368,6 +375,9 @@ case DW_FORM_addrx2: Value.uval = Data.getU16(OffsetPtr); break; + case DW_FORM_strx3: + Value.uval = Data.getU24(OffsetPtr); + break; case DW_FORM_data4: case DW_FORM_ref4: case DW_FORM_ref_sup4: @@ -545,6 +555,10 @@ dumpString(OS); break; case DW_FORM_strx: + case DW_FORM_strx1: + case DW_FORM_strx2: + case DW_FORM_strx3: + case DW_FORM_strx4: case DW_FORM_GNU_str_index: OS << format(" indexed (%8.8x) string = ", (uint32_t)UValue); dumpString(OS); @@ -623,7 +637,9 @@ if (Form == DW_FORM_GNU_strp_alt || U == nullptr) return None; uint32_t Offset = Value.uval; - if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx) { + if (Form == DW_FORM_GNU_str_index || Form == DW_FORM_strx || + Form == DW_FORM_strx1 || Form == DW_FORM_strx2 || Form == DW_FORM_strx3 || + Form == DW_FORM_strx4) { uint64_t StrOffset; if (!U->getStringOffsetSectionItem(Offset, StrOffset)) return None; Index: lib/Support/DataExtractor.cpp =================================================================== --- lib/Support/DataExtractor.cpp +++ lib/Support/DataExtractor.cpp @@ -68,6 +68,13 @@ Data.data()); } +uint32_t DataExtractor::getU24(uint32_t *offset_ptr) const { + uint24_t ExtractedVal = + getU(offset_ptr, this, IsLittleEndian, Data.data()); + // The 3 bytes are in the correct byte order for the host. + return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); +} + uint32_t DataExtractor::getU32(uint32_t *offset_ptr) const { return getU(offset_ptr, this, IsLittleEndian, Data.data()); } Index: test/DebugInfo/Inputs/dwarfdump-str-offsets.s =================================================================== --- test/DebugInfo/Inputs/dwarfdump-str-offsets.s +++ test/DebugInfo/Inputs/dwarfdump-str-offsets.s @@ -20,6 +20,14 @@ .asciz "Type_Unit" str_TU_type: .asciz "MyStruct" +str_Subprogram: + .asciz "MyFunc" +str_Variable1: + .asciz "MyVar1" +str_Variable2: + .asciz "MyVar2" +str_Variable3: + .asciz "MyVar3" # Every unit contributes to the string_offsets table. .section .debug_str_offsets,"",@progbits @@ -31,6 +39,10 @@ .long str_producer .long str_CU1 .long str_CU1_dir + .long str_Subprogram + .long str_Variable1 + .long str_Variable2 + .long str_Variable3 .debug_str_offsets_segment0_end: # CU2's contribution .long .debug_str_offsets_segment1_end-.debug_str_offsets_base1 @@ -85,7 +97,7 @@ .section .debug_abbrev,"",@progbits .byte 0x01 # Abbrev code .byte 0x11 # DW_TAG_compile_unit - .byte 0x00 # DW_CHILDREN_no + .byte 0x01 # DW_CHILDREN_yes .byte 0x25 # DW_AT_producer .byte 0x1a # DW_FORM_strx .byte 0x03 # DW_AT_name @@ -112,9 +124,37 @@ .byte 0x1a # DW_FORM_strx .byte 0x00 # EOM(1) .byte 0x00 # EOM(2) + .byte 0x04 # Abbrev code + .byte 0x2e # DW_TAG_subprogram + .byte 0x01 # DW_CHILDREN_yes + .byte 0x03 # DW_AT_name + .byte 0x25 # DW_FORM_strx1 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x05 # Abbrev code + .byte 0x34 # DW_TAG_variable + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .byte 0x26 # DW_FORM_strx2 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x06 # Abbrev code + .byte 0x34 # DW_TAG_variable + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .byte 0x27 # DW_FORM_strx3 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) + .byte 0x07 # Abbrev code + .byte 0x34 # DW_TAG_variable + .byte 0x00 # DW_CHILDREN_no + .byte 0x03 # DW_AT_name + .byte 0x28 # DW_FORM_strx4 + .byte 0x00 # EOM(1) + .byte 0x00 # EOM(2) .byte 0x00 # EOM(3) -# And a .dwo copy for the .dwo sections. +# And a .dwo copy of a subset for the .dwo sections. .section .debug_abbrev.dwo,"",@progbits .byte 0x01 # Abbrev code .byte 0x11 # DW_TAG_compile_unit @@ -163,6 +203,21 @@ .byte 1 # The index of the CU name string .long .debug_str_offsets_base0 .byte 2 # The index of the comp dir string +# A subprogram DIE with DW_AT_name, using DW_FORM_strx1. + .byte 4 # Abbreviation code + .byte 3 # Subprogram name string (DW_FORM_strx1) +# A variable DIE with DW_AT_name, using DW_FORM_strx2. + .byte 5 # Abbreviation code + .short 0x0004 # Subprogram name string (DW_FORM_strx2) +# A variable DIE with DW_AT_name, using DW_FORM_strx3. + .byte 6 # Abbreviation code + .byte 5 # Subprogram name string (DW_FORM_strx3) + .short 0 # Subprogram name string (DW_FORM_strx3) +# A variable DIE with DW_AT_name, using DW_FORM_strx4. + .byte 7 # Abbreviation code + .quad 0x00000006 # Subprogram name string (DW_FORM_strx4) + .byte 0 # NULL + .byte 0 # NULL .byte 0 # NULL CU1_5_end: Index: test/DebugInfo/dwarfdump-str-offsets.test =================================================================== --- test/DebugInfo/dwarfdump-str-offsets.test +++ test/DebugInfo/dwarfdump-str-offsets.test @@ -19,12 +19,24 @@ CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_1") CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000008) CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU1") +CHECK-NOT: NULL +CHECK: DW_TAG_subprogram +CHECK-NEXT: DW_AT_name [DW_FORM_strx1] ( indexed (00000003) string = "MyFunc") +CHECK-NOT: NULL +CHECK: DW_TAG_variable +CHECK-NEXT: DW_AT_name [DW_FORM_strx2] ( indexed (00000004) string = "MyVar1") +CHECK-NOT: NULL +CHECK: DW_TAG_variable +CHECK-NEXT: DW_AT_name [DW_FORM_strx3] ( indexed (00000005) string = "MyVar2") +CHECK-NOT: NULL +CHECK: DW_TAG_variable +CHECK-NEXT: DW_AT_name [DW_FORM_strx4] ( indexed (00000006) string = "MyVar3") ; Second compile unit (b.cpp) CHECK: DW_TAG_compile_unit CHECK-NEXT: DW_AT_producer [DW_FORM_strx] ( indexed (00000000) string = "Handmade DWARF producer") CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "Compile_Unit_2") -CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000001c) +CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x0000002c) CHECK-NEXT: DW_AT_comp_dir [DW_FORM_strx] ( indexed (00000002) string = "/home/test/CU2") ; The split CU @@ -40,7 +52,7 @@ CHECK: .debug_types contents: CHECK: DW_TAG_type_unit CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000000) string = "Type_Unit") -CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000030) +CHECK-NEXT: DW_AT_str_offsets_base [DW_FORM_sec_offset] (0x00000040) CHECK: DW_TAG_structure_type CHECK-NEXT: DW_AT_name [DW_FORM_strx] ( indexed (00000001) string = "MyStruct") @@ -54,17 +66,21 @@ ; The .debug_str_offsets section CHECK: .debug_str_offsets contents: -CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5 +CHECK-NEXT: 0x00000000: Contribution size = 28, Version = 5 CHECK-NEXT: 0x00000008: 00000000 "Handmade DWARF producer" CHECK-NEXT: 0x0000000c: 00000018 "Compile_Unit_1" CHECK-NEXT: 0x00000010: 00000027 "/home/test/CU1" -CHECK-NEXT: 0x00000014: Contribution size = 12, Version = 5 -CHECK-NEXT: 0x0000001c: 00000000 "Handmade DWARF producer" -CHECK-NEXT: 0x00000020: 00000036 "Compile_Unit_2" -CHECK-NEXT: 0x00000024: 00000045 "/home/test/CU2" -CHECK-NEXT: 0x00000028: Contribution size = 8, Version = 5 -CHECK-NEXT: 0x00000030: 00000054 "Type_Unit" -CHECK-NEXT: 0x00000034: 0000005e "MyStruct" +CHECK-NEXT: 0x00000014: 00000067 "MyFunc" +CHECK-NEXT: 0x00000018: 0000006e "MyVar1" +CHECK-NEXT: 0x0000001c: 00000075 "MyVar2" +CHECK-NEXT: 0x00000020: 0000007c "MyVar3" +CHECK-NEXT: 0x00000024: Contribution size = 12, Version = 5 +CHECK-NEXT: 0x0000002c: 00000000 "Handmade DWARF producer" +CHECK-NEXT: 0x00000030: 00000036 "Compile_Unit_2" +CHECK-NEXT: 0x00000034: 00000045 "/home/test/CU2" +CHECK-NEXT: 0x00000038: Contribution size = 8, Version = 5 +CHECK-NEXT: 0x00000040: 00000054 "Type_Unit" +CHECK-NEXT: 0x00000044: 0000005e "MyStruct" CHECK: .debug_str_offsets.dwo contents: CHECK-NEXT: 0x00000000: Contribution size = 12, Version = 5