diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -56,8 +56,8 @@ bool UseSymbolTable) const; bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const; + std::string &Name, uint64_t &Addr, uint64_t &Size, + std::string &FileName) const; // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd // (function descriptor) section and OpdExtractor refers to its contents. Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize, @@ -78,12 +78,19 @@ // the following symbol. uint64_t Size; + StringRef Name; + // Non-zero if this is an ELF local symbol. See the comment in + // getNameFromSymbolTable. + uint32_t ELFLocalSymIdx; + bool operator<(const SymbolDesc &RHS) const { return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size; } }; - std::vector> Functions; - std::vector> Objects; + std::vector Functions; + std::vector Objects; + // (index, filename) pairs of ELF STT_FILE symbols. + std::vector> FileSymbols; SymbolizableObjectFile(const object::ObjectFile *Obj, std::unique_ptr DICtx, diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -16,6 +16,7 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Object/SymbolSize.h" #include "llvm/Support/Casting.h" @@ -69,9 +70,8 @@ return std::move(E); } - std::vector> &Fs = res->Functions, - &Os = res->Objects; - auto Uniquify = [](std::vector> &S) { + std::vector &Fs = res->Functions, &Os = res->Objects; + auto Uniquify = [](std::vector &S) { // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, // pick the one with the largest Size. This helps us avoid symbols with no // size information (Size=0). @@ -79,7 +79,7 @@ auto I = S.begin(), E = S.end(), J = S.begin(); while (I != E) { auto OI = I; - while (++I != E && OI->first.Addr == I->first.Addr) { + while (++I != E && OI->Addr == I->Addr) { } *J++ = I[-1]; } @@ -138,8 +138,7 @@ uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; uint64_t SymbolStart = ImageBase + Export.Offset; uint64_t SymbolSize = NextOffset - Export.Offset; - SymbolDesc SD = {SymbolStart, SymbolSize}; - Functions.emplace_back(SD, Export.Name); + Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0}); } return Error::success(); } @@ -150,9 +149,23 @@ uint64_t OpdAddress) { // Avoid adding symbols from an unknown/undefined section. const ObjectFile &Obj = *Symbol.getObject(); + Expected SymbolNameOrErr = Symbol.getName(); + if (!SymbolNameOrErr) + return SymbolNameOrErr.takeError(); + StringRef SymbolName = *SymbolNameOrErr; + + uint32_t ELFSymIdx = + Obj.isELF() ? ELFSymbolRef(Symbol).getRawDataRefImpl().d.b : 0; Expected Sec = Symbol.getSection(); - if (!Sec || Obj.section_end() == *Sec) + if (!Sec || Obj.section_end() == *Sec) { + if (Obj.isELF()) { + // Store the (index, filename) pair for a file symbol. + ELFSymbolRef ESym(Symbol); + if (ESym.getELFType() == ELF::STT_FILE) + FileSymbols.emplace_back(ELFSymIdx, SymbolName); + } return Error::success(); + } Expected SymbolTypeOrErr = Symbol.getType(); if (!SymbolTypeOrErr) @@ -190,24 +203,21 @@ if (OpdExtractor->isValidOffsetForAddress(OpdOffset)) SymbolAddress = OpdExtractor->getAddress(&OpdOffset); } - Expected SymbolNameOrErr = Symbol.getName(); - if (!SymbolNameOrErr) - return SymbolNameOrErr.takeError(); - StringRef SymbolName = *SymbolNameOrErr; // Mach-O symbol table names have leading underscore, skip it. if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') SymbolName = SymbolName.drop_front(); - SymbolDesc SD = {SymbolAddress, SymbolSize}; - + if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL) + ELFSymIdx = 0; + SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx}; // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and // STT_GNU_IFUNC) as function symbols which can be used to symbolize // addresses. if (SymbolType == SymbolRef::ST_Data) - Objects.emplace_back(SD, SymbolName); + Objects.push_back(SD); else - Functions.emplace_back(SD, SymbolName); + Functions.push_back(SD); return Error::success(); } @@ -223,23 +233,33 @@ return 0; } -bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type, - uint64_t Address, - std::string &Name, - uint64_t &Addr, - uint64_t &Size) const { +bool SymbolizableObjectFile::getNameFromSymbolTable( + SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, + uint64_t &Size, std::string &FileName) const { const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects; - std::pair SD{{Address, UINT64_C(-1)}, StringRef()}; + SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0}; auto SymbolIterator = llvm::upper_bound(Symbols, SD); if (SymbolIterator == Symbols.begin()) return false; --SymbolIterator; - if (SymbolIterator->first.Size != 0 && - SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address) + if (SymbolIterator->Size != 0 && + SymbolIterator->Addr + SymbolIterator->Size <= Address) return false; - Name = SymbolIterator->second.str(); - Addr = SymbolIterator->first.Addr; - Size = SymbolIterator->first.Size; + Name = SymbolIterator->Name.str(); + Addr = SymbolIterator->Addr; + Size = SymbolIterator->Size; + + if (SymbolIterator->ELFLocalSymIdx != 0) { + // If this is an ELF local symbol, find the STT_FILE symbol preceding + // SymbolIterator to get the filename. The ELF spec requires the STT_FILE + // symbol (if present) precedes the other STB_LOCAL symbols for the file. + assert(Module->isELF()); + auto It = llvm::upper_bound( + FileSymbols, + std::make_pair(SymbolIterator->ELFLocalSymIdx, StringRef())); + if (It != FileSymbols.begin()) + FileName = It[-1].second.str(); + } return true; } @@ -265,11 +285,13 @@ // Override function name from symbol table if necessary. if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { - std::string FunctionName; + std::string FunctionName, FileName; uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size)) { + FunctionName, Start, Size, FileName)) { LineInfo.FunctionName = FunctionName; + if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty()) + LineInfo.FileName = FileName; } } return LineInfo; @@ -290,12 +312,15 @@ // Override the function name in lower frame with name from symbol table. if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { - std::string FunctionName; + std::string FunctionName, FileName; uint64_t Start, Size; if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size)) { - InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1) - ->FunctionName = FunctionName; + FunctionName, Start, Size, FileName)) { + DILineInfo *LI = InlinedContext.getMutableFrame( + InlinedContext.getNumberOfFrames() - 1); + LI->FunctionName = FunctionName; + if (LI->FileName == DILineInfo::BadString && !FileName.empty()) + LI->FileName = FileName; } } @@ -305,8 +330,9 @@ DIGlobal SymbolizableObjectFile::symbolizeData( object::SectionedAddress ModuleOffset) const { DIGlobal Res; + std::string FileName; getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name, - Res.Start, Res.Size); + Res.Start, Res.Size, FileName); return Res; } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file-conflict.s @@ -0,0 +1,14 @@ +# REQUIRES: x86-registered-target +## If the filename from the preceding STT_FILE does not match .debug_line, +## STT_FILE wins. Compilers should not emit such bogus information. +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t +# RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s + +# CHECK: foo +# CHECK-NEXT: 1.c:0:0 + +.file "1.c" +.file 0 "/tmp" "0.c" +foo: + .loc 0 1 0 + nop diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s --- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file.s @@ -4,15 +4,14 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t # RUN: llvm-symbolizer --obj=%t 0 1 2 | FileCheck %s -## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. # CHECK: local1 -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: 1.c:0:0 # CHECK-EMPTY: # CHECK-NEXT: local2 -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: 2.c:0:0 # CHECK-EMPTY: # CHECK-NEXT: local3 -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: 3.c:0:0 # CHECK-EMPTY: .file "1.c" diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-file2.yaml @@ -0,0 +1,75 @@ +# RUN: yaml2obj --docnum=1 %s -o %t1 +# RUN: llvm-symbolizer --obj=%t1 0 1 2 | FileCheck %s + +## The local symbol has no preceding STT_FILE. Its filename is unavailable. +# CHECK: local +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +## All local symbols precede all non-local symbols. When there are multiple +## STT_FILE symbols, we cannot tell which file defines the non-local symbol in +## question. We could tell if there is only one STT_FILE but in reality there +## are always more than one file, so implementing the special case is not useful. +# CHECK-NEXT: global +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: +# CHECK-NEXT: weak +# CHECK-NEXT: ??:0:0 +# CHECK-EMPTY: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Size: 3 +Symbols: + - Name: local + Section: .text + Value: 0 + - Name: 1.c + Type: STT_FILE + Index: SHN_ABS + - Name: global + Binding: STB_GLOBAL + Section: .text + Value: 1 + - Name: weak + Binding: STB_WEAK + Section: .text + Value: 2 + +## If st_name of the STT_FILE symbols is invalid, the symbol name is lost as well. +## TODO Keep the symbol name. +# RUN: yaml2obj --docnum=2 %s -o %t2 +# RUN: llvm-symbolizer --obj=%t2 0 0 2>&1 | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: error reading file: st_name (0xffff) is past the end of the string table of size +# CHECK2-NEXT: ?? +# CHECK2-NEXT: ??:0:0 +# CHECK2-EMPTY: +# CHECK2-NEXT: ?? +# CHECK2-NEXT: ??:0:0 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Size: 1 +Symbols: + - StName: 0xffff + Type: STT_FILE + Index: SHN_ABS + - Name: local + Section: .text diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s --- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-ignored.s @@ -4,7 +4,7 @@ # RUN: llvm-symbolizer --obj=%t 0 | FileCheck %s # CHECK: b -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: 1.c:0:0 # CHECK-EMPTY: .file "1.c" diff --git a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s --- a/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s +++ b/llvm/test/DebugInfo/Symbolize/ELF/symtab-notype.s @@ -20,21 +20,20 @@ # CHECK-EMPTY: # CHECK-NEXT: l_notype -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: symtab-notype.s:0:0 # CHECK-EMPTY: ## TODO addr2line does not symbolize the last two out-of-bounds addresses. # CHECK-NEXT: l_notype_nosize -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: symtab-notype.s:0:0 # CHECK-EMPTY: # CHECK-NEXT: l_notype_nosize -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: symtab-notype.s:0:0 # CHECK-EMPTY: # CHECK-NEXT: l_notype_nosize -# CHECK-NEXT: ??:0:0 +# CHECK-NEXT: symtab-notype.s:0:0 # CHECK-EMPTY: -## TODO Find the preceding STT_FILE symbol as the filename of a local symbol. .file "symtab-notype.s" .globl _start, g_notype