diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -202,6 +202,13 @@ Add the specified offset to object file addresses when performing lookups. This can be used to perform lookups as if the object were relocated by the offset. + +.. option:: --approximate-missing-line-numbers + + Attempt to find an approximate line number in cases with no line number, for + example when the compiler has given no line number entry due to it + being nebulous due to optimization. The line number of the previous + address will instead be output. .. option:: --basenames, -s diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -58,6 +58,7 @@ bool RelativeAddresses = false; bool UntagAddresses = false; bool UseDIA = false; + bool ApproximateLineNumbers = false; std::string DefaultArch; std::vector DsymHints; std::string FallbackDebugPath; @@ -140,6 +141,10 @@ symbolizeFrameCommon(const T &ModuleSpecifier, object::SectionedAddress ModuleOffset); + void LLVMSymbolizer::ApproximateMissingLineNumber( + SymbolizableModule *Info, object::SectionedAddress ModuleOffset, + DILineInfo *LineInfo); + /// Returns a SymbolizableModule or an error if loading debug info failed. /// Only one attempt is made to load a module, and errors during loading are /// only reported once. Subsequent calls to get module info for a module that diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -71,6 +71,10 @@ Opts.UseSymbolTable); if (Opts.Demangle) LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); + + if (Opts.ApproximateLineNumbers) + ApproximateMissingLineNumber(Info, ModuleOffset, &LineInfo); + return LineInfo; } @@ -120,6 +124,10 @@ Frame->FunctionName = DemangleName(Frame->FunctionName, Info); } } + if (Opts.ApproximateLineNumbers) + ApproximateMissingLineNumber(Info, ModuleOffset, + InlinedContext.getMutableFrame(0)); + return InlinedContext; } @@ -141,6 +149,21 @@ return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); } +// In cases in which no line is matched to an address, for example due to +// compiler optimization, look at the previous address. +void LLVMSymbolizer::ApproximateMissingLineNumber( + SymbolizableModule *Info, object::SectionedAddress ModuleOffset, + DILineInfo *LineInfo) { + if (LineInfo->Line != 0 || ModuleOffset.Address == 0) + return; + + --ModuleOffset.Address; + DILineInfo ApproxLineInfo = Info->symbolizeCode( + ModuleOffset, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), + Opts.UseSymbolTable); + LineInfo->Line = ApproxLineInfo.Line; +} + template Expected LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, diff --git a/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers-inline.s b/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers-inline.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers-inline.s @@ -0,0 +1,207 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -g + +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000000 | FileCheck %s -DLINE=0 +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000000 --approximate-missing-line-numbers | FileCheck %s -DLINE=0 +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000006 | FileCheck %s -DLINE=4 +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000006 --approximate-missing-line-numbers | FileCheck %s -DLINE=4 +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000010 | FileCheck %s -DLINE1=0 -DLINE2=8 --check-prefix=INLINED +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000010 --approximate-missing-line-numbers | FileCheck %s -DLINE1=4 -DLINE2=8 --check-prefix=INLINED +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000016 | FileCheck %s -DLINE=8 +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000016 --approximate-missing-line-numbers | FileCheck %s -DLINE=8 + +# CHECK: location:[[LINE]] +# INLINED: location:[[LINE1]] +# INLINED: location:[[LINE2]] + +## Built from the following source with +## clang -target x86_64-pc-linux -O3 -g -S -gline-tables-only +## and editing the marked .loc instructions +## int foo = 0; +## +## int bar () { +## return foo; +## } +## +## int main() { +## return bar(); +## } + + .text + .file "test.c" + .file 0 "location" + .globl bar + .p2align 4, 0x90 + .type bar,@function +bar: +.Lfunc_begin0: + .loc 0 3 0 + .cfi_startproc + .loc 0 0 11 prologue_end # Set line to 0 + movl foo(%rip), %eax + .loc 0 4 4 is_stmt 0 + retq +.Ltmp0: +.Lfunc_end0: + .size bar, .Lfunc_end0-bar + .cfi_endproc + .globl main + .p2align 4, 0x90 + .type main,@function +main: +.Lfunc_begin1: + .loc 0 7 0 is_stmt 1 + .cfi_startproc + .loc 0 0 11 prologue_end # Set line to 0 + movl foo(%rip), %eax +.Ltmp1: + .loc 0 8 3 + retq +.Ltmp2: +.Lfunc_end1: + .size main, .Lfunc_end1-main + .cfi_endproc + .type foo,@object + .bss + .globl foo + .p2align 2 +foo: + .long 0 + .size foo, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 1 # DW_CHILDREN_yes + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 2 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 0 # DW_CHILDREN_no + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 32 # DW_AT_inline + .byte 33 # DW_FORM_implicit_const + .byte 1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 3 # Abbreviation Code + .byte 46 # DW_TAG_subprogram + .byte 1 # DW_CHILDREN_yes + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 122 # DW_AT_call_all_calls + .byte 25 # DW_FORM_flag_present + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 4 # Abbreviation Code + .byte 29 # DW_TAG_inlined_subroutine + .byte 0 # DW_CHILDREN_no + .byte 49 # DW_AT_abstract_origin + .byte 19 # DW_FORM_ref4 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 88 # DW_AT_call_file + .byte 11 # DW_FORM_data1 + .byte 89 # DW_AT_call_line + .byte 11 # DW_FORM_data1 + .byte 87 # DW_AT_call_column + .byte 11 # DW_FORM_data1 + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x2f DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 12 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base + .byte 2 # Abbrev [2] 0x23:0x2 DW_TAG_subprogram + .byte 3 # DW_AT_name + # DW_AT_inline + .byte 3 # Abbrev [3] 0x25:0x15 DW_TAG_subprogram + .byte 1 # DW_AT_low_pc + .long .Lfunc_end1-.Lfunc_begin1 # DW_AT_high_pc + # DW_AT_call_all_calls + .byte 4 # DW_AT_name + .byte 4 # Abbrev [4] 0x2c:0xd DW_TAG_inlined_subroutine + .long 35 # DW_AT_abstract_origin + .byte 1 # DW_AT_low_pc + .long .Ltmp1-.Lfunc_begin1 # DW_AT_high_pc + .byte 0 # DW_AT_call_file + .byte 8 # DW_AT_call_line + .byte 10 # DW_AT_call_column + .byte 0 # End Of Children Mark + .byte 0 # End Of Children Mark +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 24 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 7dce12de68880fe7fb124afaf5bcf7671229cfc0)" +.Linfo_string1: + .asciz "temp.c" +.Linfo_string2: + .asciz "location" +.Linfo_string3: + .asciz "bar" +.Linfo_string4: + .asciz "main" + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .long .Linfo_string3 + .long .Linfo_string4 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 + .quad .Lfunc_begin1 +.Ldebug_addr_end0: + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers.s b/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/approximate-missing-line-numbers.s @@ -0,0 +1,121 @@ +# REQUIRES: x86-registered-target + +# RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -g + +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000006 | FileCheck %s -DLINE=0 +# RUN: llvm-symbolizer --approximate-missing-line-numbers --obj=%t.o 0x0000000000000006 | FileCheck %s -DLINE=4 + +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000006 --output-style=GNU --no-inlines | FileCheck %s -DLINE=0 +# RUN: llvm-symbolizer --approximate-missing-line-numbers --obj=%t.o 0x0000000000000006 --output-style=GNU --no-inlines | FileCheck %s -DLINE=4 + +# RUN: llvm-symbolizer --obj=%t.o 0x0000000000000006 --output-style=JSON --no-inlines | FileCheck %s --check-prefix=JSON -DLINE=0 +# RUN: llvm-symbolizer --approximate-missing-line-numbers --obj=%t.o 0x0000000000000006 --output-style=JSON --no-inlines | FileCheck %s --check-prefix=JSON -DLINE=4 + +# CHECK: location:[[LINE]] +# JSON: "Line":[[LINE]] + +## Built from the following source with +## clang -target x86_64-pc-linux -O3 -g -S +## and editing the marked .loc instructions +## int foo = 0; +## +## int main() { +## return foo; +## } + + .text + .file 0 "location" + .globl main + .p2align 4, 0x90 + .type main,@function +main: +.Lfunc_begin0: + .loc 0 3 0 + .cfi_startproc + .loc 0 4 10 prologue_end + movl foo(%rip), %eax + .loc 0 0 3 is_stmt 0 # Set line to 0 + retq +.Ltmp0: +.Lfunc_end0: + .size main, .Lfunc_end0-main + .cfi_endproc + .type foo,@object + .bss + .globl foo + .p2align 2 +foo: + .long 0 + .size foo, 4 + + .section .debug_abbrev,"",@progbits + .byte 1 # Abbreviation Code + .byte 17 # DW_TAG_compile_unit + .byte 0 # DW_CHILDREN_no + .byte 37 # DW_AT_producer + .byte 37 # DW_FORM_strx1 + .byte 19 # DW_AT_language + .byte 5 # DW_FORM_data2 + .byte 3 # DW_AT_name + .byte 37 # DW_FORM_strx1 + .byte 114 # DW_AT_str_offsets_base + .byte 23 # DW_FORM_sec_offset + .byte 16 # DW_AT_stmt_list + .byte 23 # DW_FORM_sec_offset + .byte 27 # DW_AT_comp_dir + .byte 37 # DW_FORM_strx1 + .byte 17 # DW_AT_low_pc + .byte 27 # DW_FORM_addrx + .byte 18 # DW_AT_high_pc + .byte 6 # DW_FORM_data4 + .byte 115 # DW_AT_addr_base + .byte 23 # DW_FORM_sec_offset + .byte 0 # EOM(1) + .byte 0 # EOM(2) + .byte 0 # EOM(3) + .section .debug_info,"",@progbits +.Lcu_begin0: + .long .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit +.Ldebug_info_start0: + .short 5 # DWARF version number + .byte 1 # DWARF Unit Type + .byte 8 # Address Size (in bytes) + .long .debug_abbrev # Offset Into Abbrev. Section + .byte 1 # Abbrev [1] 0xc:0x17 DW_TAG_compile_unit + .byte 0 # DW_AT_producer + .short 12 # DW_AT_language + .byte 1 # DW_AT_name + .long .Lstr_offsets_base0 # DW_AT_str_offsets_base + .long .Lline_table_start0 # DW_AT_stmt_list + .byte 2 # DW_AT_comp_dir + .byte 0 # DW_AT_low_pc + .long .Lfunc_end0-.Lfunc_begin0 # DW_AT_high_pc + .long .Laddr_table_base0 # DW_AT_addr_base +.Ldebug_info_end0: + .section .debug_str_offsets,"",@progbits + .long 16 # Length of String Offsets Set + .short 5 + .short 0 +.Lstr_offsets_base0: + .section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 7dce12de68880fe7fb124afaf5bcf7671229cfc0)" +.Linfo_string1: + .asciz "temp.c" +.Linfo_string2: + .asciz "location" + .section .debug_str_offsets,"",@progbits + .long .Linfo_string0 + .long .Linfo_string1 + .long .Linfo_string2 + .section .debug_addr,"",@progbits + .long .Ldebug_addr_end0-.Ldebug_addr_start0 # Length of contribution +.Ldebug_addr_start0: + .short 5 # DWARF version number + .byte 8 # Address size + .byte 0 # Segment selector size +.Laddr_table_base0: + .quad .Lfunc_begin0 +.Ldebug_addr_end0: + .section .debug_line,"",@progbits +.Lline_table_start0: diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -20,6 +20,7 @@ defm adjust_vma : Eq<"adjust-vma", "Add specified offset to object file addresses">, MetaVarName<"">; +def approximate_missing_line_numbers : F<"approximate-missing-line-numbers", "Find an approximate line number in cases with no line number">; def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; defm build_id : Eq<"build-id", "Build ID used to look up the object file">; defm cache_size : Eq<"cache-size", "Max size in bytes of the in-memory binary cache.">; diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -368,6 +368,7 @@ uint64_t AdjustVMA; PrinterConfig Config; parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA); + Opts.ApproximateLineNumbers = Args.hasArg(OPT_approximate_missing_line_numbers); if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) { Opts.PathStyle = A->getOption().matches(OPT_basenames)