diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h --- a/bolt/include/bolt/Core/DebugData.h +++ b/bolt/include/bolt/Core/DebugData.h @@ -431,7 +431,7 @@ void updateAddressMap(uint32_t Index, uint32_t Address); /// Writes out current sections entry into .debug_str_offsets. - void finalizeSection(); + void finalizeSection(DWARFUnit &Unit); /// Returns False if no strings were added to .debug_str. bool isFinalized() const { return !StrOffsetsBuffer->empty(); } @@ -445,8 +445,10 @@ std::unique_ptr StrOffsetsBuffer; std::unique_ptr StrOffsetsStream; std::map IndexToAddressMap; + DenseSet ProcessedBaseOffsets; // Section size not including header. uint32_t CurrentSectionSize{0}; + bool StrOffsetSectionWasModified = false; }; using DebugStrBufferVector = SmallVector; diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp --- a/bolt/lib/Core/DebugData.cpp +++ b/bolt/lib/Core/DebugData.cpp @@ -1083,20 +1083,37 @@ void DebugStrOffsetsWriter::updateAddressMap(uint32_t Index, uint32_t Address) { assert(IndexToAddressMap.count(Index) > 0 && "Index is not found."); IndexToAddressMap[Index] = Address; + StrOffsetSectionWasModified = true; } -void DebugStrOffsetsWriter::finalizeSection() { +void DebugStrOffsetsWriter::finalizeSection(DWARFUnit &Unit) { if (IndexToAddressMap.empty()) return; - // Writing out the header for each section. - support::endian::write(*StrOffsetsStream, CurrentSectionSize + 4, - support::little); - support::endian::write(*StrOffsetsStream, static_cast(5), - support::little); - support::endian::write(*StrOffsetsStream, static_cast(0), - support::little); - for (const auto &Entry : IndexToAddressMap) - support::endian::write(*StrOffsetsStream, Entry.second, support::little); + + std::optional AttrVal = + findAttributeInfo(Unit.getUnitDIE(), dwarf::DW_AT_str_offsets_base); + assert(AttrVal && "DW_AT_str_offsets_base not present."); + std::optional Val = AttrVal->V.getAsSectionOffset(); + assert(Val && "DW_AT_str_offsets_base Value not present."); + auto RetVal = ProcessedBaseOffsets.insert(*Val); + if (RetVal.second) { + // Writing out the header for each section. + support::endian::write(*StrOffsetsStream, CurrentSectionSize + 4, + support::little); + support::endian::write(*StrOffsetsStream, static_cast(5), + support::little); + support::endian::write(*StrOffsetsStream, static_cast(0), + support::little); + for (const auto &Entry : IndexToAddressMap) + support::endian::write(*StrOffsetsStream, Entry.second, support::little); + } + // Will print error if we already processed this contribution, and now + // skipping it, but it was modified. + if (!RetVal.second && StrOffsetSectionWasModified) + errs() << "BOLT-WARNING: skipping string offsets section for CU at offset " + << Twine::utohexstr(Unit.getOffset()) << ", but it was modified\n"; + + StrOffsetSectionWasModified = false; IndexToAddressMap.clear(); } diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp --- a/bolt/lib/Rewrite/DWARFRewriter.cpp +++ b/bolt/lib/Rewrite/DWARFRewriter.cpp @@ -317,7 +317,7 @@ RangesBase = RangesSectionWriter->getSectionOffset() + getDWARF5RngListLocListHeaderSize(); RangesSectionWriter->initSection(*Unit); - StrOffstsWriter->finalizeSection(); + StrOffstsWriter->finalizeSection(*Unit); } DebugInfoPatcher->addUnitBaseOffsetLabel(Unit->getOffset()); diff --git a/bolt/test/X86/dwarf5-shared-str-offset-base.s b/bolt/test/X86/dwarf5-shared-str-offset-base.s new file mode 100644 --- /dev/null +++ b/bolt/test/X86/dwarf5-shared-str-offset-base.s @@ -0,0 +1,304 @@ +# REQUIRES: system-linux + +# RUN: llvm-mc --filetype=obj --triple x86_64 %s -o %tmain.o --defsym MAIN=0 +# RUN: llvm-mc --filetype=obj --triple x86_64 %s -o %thelper.o +# RUN: %clang %cflags %tmain.o %thelper.o -o %tmain.exe +# RUN: llvm-bolt %tmain.exe -o %tmain.exe.bolt --update-debug-sections +# RUN: llvm-dwarfdump --debug-info %tmain.exe.bolt > %tout.text +# RUN: llvm-dwarfdump --show-section-sizes %tmain.exe >> %tout.text +# RUN: llvm-dwarfdump --show-section-sizes %tmain.exe.bolt >> %tout.text +# RUN: cat %tout.text | FileCheck %s + +# This test checks that with DWARF5 when two CUs share the same .debug_str_offsets +# entry BOLT does not create a duplicate. + +# CHECK: DW_AT_str_offsets_base (0x[[#%.8x,ADDR:]] +# CHECK: DW_AT_str_offsets_base (0x[[#ADDR]] + +# CHECK: .debug_str_offsets [[#ADDR2:]] +# CHECK: .debug_str_offsets [[#ADDR2]] + +# main.cpp +# int main(){ +# return 0; +# } + +# helper.cpp +# void foo(){} + +## Create two CUs, with dwo_ids 0 and 1 respectively. +.ifdef MAIN +.text + .file "main.cpp" + .globl main # -- Begin function main + .p2align 4, 0x90 + .type main,@function +main: # @main +.Lfunc_begin0: + .file 0 "." "main.cpp" md5 0x32c197b0a8b855eb3d7573c993ada862 + .loc 0 1 0 # main.cpp:1:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + movl $0, -4(%rbp) +.Ltmp0: + .loc 0 2 1 prologue_end # main.cpp:2:1 + xorl %eax, %eax + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 73 # DW_AT_type + .byte 19 # DW_FORM_ref4 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 36 # DW_TAG_base_type + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 62 # DW_AT_encoding + .byte 11 # DW_FORM_data1 + .byte 11 # DW_AT_byte_size + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x2b DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0xf DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + .long 50 # DW_AT_type + # DW_AT_external + .byte 3 # Abbrev [3] 0x32:0x4 DW_TAG_base_type + .byte 4 # DW_AT_name + .byte 5 # DW_AT_encoding + .byte 4 # DW_AT_byte_size + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 24 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 15.0.0" # string offset=0 +.Linfo_string1: + .asciz "main.cpp" # string offset=146 +.Linfo_string2: + .asciz "." # string offset=155 +.Linfo_string3: + .asciz "main" # string offset=198 +.Linfo_string4: + .asciz "int" # string offset=203 + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: +.else +.text + .file "helper.cpp" + .globl _Z3foov # -- Begin function _Z3foov + .p2align 4, 0x90 + .type _Z3foov,@function +_Z3foov: # @_Z3foov +.Lfunc_begin0: + .file 0 "." "helper.cpp" md5 0x5f98e4807e4f8781c26a82faf819f8a7 + .loc 0 1 0 # helper.cpp:1:0 + .cfi_startproc +# %bb.0: + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + movq %rsp, %rbp + .cfi_def_cfa_register %rbp +.Ltmp0: + .loc 0 1 12 prologue_end # helper.cpp:1:12 + popq %rbp + .cfi_def_cfa %rsp, 8 + retq +.Ltmp1: +.Lfunc_end0: + .size _Z3foov, .Lfunc_end0-_Z3foov + .cfi_endproc + # -- End function + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 64 # DW_AT_frame_base + .byte 24 # DW_FORM_exprloc + .byte 110 # DW_AT_linkage_name + .byte 37 # DW_FORM_strx1 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 58 # DW_AT_decl_file + .byte 11 # DW_FORM_data1 + .byte 59 # DW_AT_decl_line + .byte 11 # DW_FORM_data1 + .byte 63 # DW_AT_external + .byte 25 # DW_FORM_flag_present + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x24 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 33 # DW_AT_language + .byte 1 # DW_AT_name + .long 0x8 # DW_AT_str_offsets_base Manually modified to be the same as first CU + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0xc DW_TAG_subprogram + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .byte 1 # DW_AT_frame_base + .byte 86 + .byte 3 # DW_AT_linkage_name + .byte 4 # DW_AT_name + .byte 0 # DW_AT_decl_file + .byte 1 # DW_AT_decl_line + # DW_AT_external + .byte 0 # End Of Children Mark +.Ldebug_info_end0: +# Manually removed .debug_str_offsets and .debug_str + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .ident "clang version 15.0.0" + .section ".note.GNU-stack","",@progbits + .addrsig + .section .debug_line,"",@progbits +.Lline_table_start0: +.endif