diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -149,16 +149,27 @@ DataExtractor *OpdExtractor, uint64_t OpdAddress) { // Avoid adding symbols from an unknown/undefined section. - const ObjectFile *Obj = Symbol.getObject(); + const ObjectFile &Obj = *Symbol.getObject(); Expected Sec = Symbol.getSection(); - if (!Sec || (Obj && Obj->section_end() == *Sec)) + if (!Sec || Obj.section_end() == *Sec) return Error::success(); + Expected SymbolTypeOrErr = Symbol.getType(); if (!SymbolTypeOrErr) return SymbolTypeOrErr.takeError(); SymbolRef::Type SymbolType = *SymbolTypeOrErr; - if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) + if (Obj.isELF()) { + // Allow function and data symbols. Additionally allow STT_NONE, which are + // common for functions defined in assembly. + uint8_t Type = ELFSymbolRef(Symbol).getELFType(); + if (Type != ELF::STT_NOTYPE && Type != ELF::STT_FUNC && + Type != ELF::STT_OBJECT && Type != ELF::STT_GNU_IFUNC) + return Error::success(); + } else if (SymbolType != SymbolRef::ST_Function && + SymbolType != SymbolRef::ST_Data) { return Error::success(); + } + Expected SymbolAddressOrErr = Symbol.getAddress(); if (!SymbolAddressOrErr) return SymbolAddressOrErr.takeError(); @@ -186,11 +197,17 @@ // Mach-O symbol table names have leading underscore, skip it. if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') SymbolName = SymbolName.drop_front(); - // FIXME: If a function has alias, there are two entries in symbol table - // with same address size. Make sure we choose the correct one. - auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects; - SymbolDesc SD = { SymbolAddress, SymbolSize }; - M.emplace_back(SD, SymbolName); + + SymbolDesc SD = {SymbolAddress, SymbolSize}; + + // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an + // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and + // STT_GNU_IFUNC) as function symbols which can be used to symbolize + // addresses. + if (SymbolType == SymbolRef::ST_Data) + Objects.emplace_back(SD, SymbolName); + else + Functions.emplace_back(SD, SymbolName); return Error::success(); } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s @@ -0,0 +1,27 @@ +## When locating a local symbol, we can obtain the filename according to the +## preceding STT_FILE symbol. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 1 2 | FileCheck %s + +## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. +# CHECK: local1 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: local2 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: local3 +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +.file "1.c" +local1: + nop + +.file "2.c" +local2: + nop + +.file "3.c" +local3: + nop diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ifunc.s @@ -0,0 +1,28 @@ +## Test we can symbolize STT_GNU_IFUNC symbols. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 1 + +# CHECK: g_ifunc +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_ifunc +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO Find the preceding STT_FILE symbol as the filename of l_ifunc. +.file "symtab-ifunc.s" + +.Lg_resolver: + ret +.size .Lg_resolver, 1 + +.globl g_ifunc +.set g_ifunc, .Lg_resolver +.type g_ifunc, @gnu_indirect_function + +.Ll_resolver: + ret +.size .Ll_resolver, 1 + +.set l_ifunc, .Ll_resolver +.type l_ifunc, @gnu_indirect_function diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s @@ -0,0 +1,19 @@ +## Ignore STT_SECTION and STT_TLS symbols for .symtab symbolization. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s + +# CHECK: b +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +.file "1.c" + +.section a,"a",@progbits +b: + .reloc ., R_X86_64_NONE, a +.section c,"a",@progbits + .reloc ., R_X86_64_NONE, c + +.section .tbss,"awT",@nobits +.globl tls +tls: diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s @@ -0,0 +1,55 @@ +## STT_NOTYPE symbols are common in assembly files. Test we can symbolize them. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t --inlines 0 1 2 3 4 5 6 7 | FileCheck %s +# RUN: llvm-symbolizer --obj=%t --no-inlines 0 1 2 3 4 5 6 7 | FileCheck %s + +# CHECK: _start +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: g_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: g_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## This is a gap. +# CHECK-NEXT: ?? +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +# CHECK-NEXT: l_notype +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO addr2line does not symbolize the last two out-of-bounds addresses. +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: l_notype_nosize +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. +.file "symtab-notype.s" + +.globl _start, g_notype +_start: + retq + +g_notype: + nop + nop +.size g_notype, . - g_notype + + nop + +l_notype: + nop +.size l_notype, . - l_notype + +l_notype_nosize: + nop