diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -55,8 +55,8 @@ bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind, bool UseSymbolTable) const; - bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, uint64_t &Size, + bool getNameFromSymbolTable(uint64_t Address, std::string &Name, + uint64_t &Addr, uint64_t &Size, std::string &FileName) const; // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd // (function descriptor) section and OpdExtractor refers to its contents. @@ -87,8 +87,7 @@ return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size; } }; - std::vector Functions; - std::vector Objects; + std::vector Symbols; // (index, filename) pairs of ELF STT_FILE symbols. std::vector> FileSymbols; diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -70,23 +70,19 @@ return std::move(E); } - std::vector &Fs = res->Functions, &Os = res->Objects; - auto Uniquify = [](std::vector &S) { - // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, - // pick the one with the largest Size. This helps us avoid symbols with no - // size information (Size=0). - llvm::sort(S); - auto I = S.begin(), E = S.end(), J = S.begin(); - while (I != E) { - auto OI = I; - while (++I != E && OI->Addr == I->Addr) { - } - *J++ = I[-1]; + std::vector &SS = res->Symbols; + // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, + // pick the one with the largest Size. This helps us avoid symbols with no + // size information (Size=0). + llvm::sort(SS); + auto I = SS.begin(), E = SS.end(), J = SS.begin(); + while (I != E) { + auto OI = I; + while (++I != E && OI->Addr == I->Addr) { } - S.erase(J, S.end()); - }; - Uniquify(Fs); - Uniquify(Os); + *J++ = I[-1]; + } + SS.erase(J, SS.end()); return std::move(res); } @@ -138,7 +134,7 @@ uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1; uint64_t SymbolStart = ImageBase + Export.Offset; uint64_t SymbolSize = NextOffset - Export.Offset; - Functions.push_back({SymbolStart, SymbolSize, Export.Name, 0}); + Symbols.push_back({SymbolStart, SymbolSize, Export.Name, 0}); } return Error::success(); } @@ -209,15 +205,7 @@ if (Obj.isELF() && ELFSymbolRef(Symbol).getBinding() != ELF::STB_LOCAL) ELFSymIdx = 0; - SymbolDesc SD = {SymbolAddress, SymbolSize, SymbolName, ELFSymIdx}; - // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an - // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and - // STT_GNU_IFUNC) as function symbols which can be used to symbolize - // addresses. - if (SymbolType == SymbolRef::ST_Data) - Objects.push_back(SD); - else - Functions.push_back(SD); + Symbols.push_back({SymbolAddress, SymbolSize, SymbolName, ELFSymIdx}); return Error::success(); } @@ -234,9 +222,8 @@ } bool SymbolizableObjectFile::getNameFromSymbolTable( - SymbolRef::Type Type, uint64_t Address, std::string &Name, uint64_t &Addr, - uint64_t &Size, std::string &FileName) const { - const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects; + uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size, + std::string &FileName) const { SymbolDesc SD{Address, UINT64_C(-1), StringRef(), 0}; auto SymbolIterator = llvm::upper_bound(Symbols, SD); if (SymbolIterator == Symbols.begin()) @@ -287,8 +274,8 @@ if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName, FileName; uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size, FileName)) { + if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size, + FileName)) { LineInfo.FunctionName = FunctionName; if (LineInfo.FileName == DILineInfo::BadString && !FileName.empty()) LineInfo.FileName = FileName; @@ -314,8 +301,8 @@ if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName, FileName; uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size, FileName)) { + if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, Size, + FileName)) { DILineInfo *LI = InlinedContext.getMutableFrame( InlinedContext.getNumberOfFrames() - 1); LI->FunctionName = FunctionName; @@ -331,8 +318,8 @@ object::SectionedAddress ModuleOffset) const { DIGlobal Res; std::string FileName; - getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name, - Res.Start, Res.Size, FileName); + getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size, + FileName); return Res; } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml @@ -0,0 +1,52 @@ +## Function and data symbols have different addresses so internally we treat +## STT_NOTYPE/STT_FUNC/STT_DATA/etc the same. The DATA command may get a function +## as result. For regular applications, the input addresses are guaranteed to be +## related to data symbols. +# RUN: yaml2obj %s -o %t +# RUN: llvm-symbolizer --obj=%t 'DATA 0x1000' 'DATA 0x2000' 'DATA 0x2002' | FileCheck %s + +# CHECK: func +# CHECK-NEXT: 4096 1 +# CHECK-EMPTY: +# CHECK-NEXT: data +# CHECK-NEXT: 8192 2 +# CHECK-EMPTY: +# CHECK-NEXT: notype +# CHECK-NEXT: 8194 3 +# CHECK-EMPTY: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + Size: 1 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2000 + Size: 5 +Symbols: + - Name: func + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x1000 + Size: 1 + - Name: data + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x2000 + Size: 2 + - Name: notype + Section: .data + Binding: STB_GLOBAL + Value: 0x2002 + Size: 3