diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -55,9 +55,8 @@ bool shouldOverrideWithSymbolTable(FunctionNameKind FNKind, bool UseSymbolTable) const; - bool getNameFromSymbolTable(object::SymbolRef::Type Type, uint64_t Address, - std::string &Name, uint64_t &Addr, - uint64_t &Size) const; + bool getNameFromSymbolTable(uint64_t Address, std::string &Name, + uint64_t &Addr, uint64_t &Size) const; // For big-endian PowerPC64 ELF, OpdAddress is the address of the .opd // (function descriptor) section and OpdExtractor refers to its contents. Error addSymbol(const object::SymbolRef &Symbol, uint64_t SymbolSize, @@ -82,8 +81,7 @@ return Addr != RHS.Addr ? Addr < RHS.Addr : Size < RHS.Size; } }; - std::vector> Functions; - std::vector> Objects; + std::vector> Symbols; SymbolizableObjectFile(const object::ObjectFile *Obj, std::unique_ptr DICtx, diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -69,24 +69,19 @@ return std::move(E); } - std::vector> &Fs = res->Functions, - &Os = res->Objects; - auto Uniquify = [](std::vector> &S) { - // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, - // pick the one with the largest Size. This helps us avoid symbols with no - // size information (Size=0). - llvm::sort(S); - auto I = S.begin(), E = S.end(), J = S.begin(); - while (I != E) { - auto OI = I; - while (++I != E && OI->first.Addr == I->first.Addr) { - } - *J++ = I[-1]; + std::vector> &SS = res->Symbols; + // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr, + // pick the one with the largest Size. This helps us avoid symbols with no + // size information (Size=0). + llvm::sort(SS); + auto I = SS.begin(), E = SS.end(), J = SS.begin(); + while (I != E) { + auto OI = I; + while (++I != E && OI->first.Addr == I->first.Addr) { } - S.erase(J, S.end()); - }; - Uniquify(Fs); - Uniquify(Os); + *J++ = I[-1]; + } + SS.erase(J, SS.end()); return std::move(res); } @@ -139,7 +134,7 @@ uint64_t SymbolStart = ImageBase + Export.Offset; uint64_t SymbolSize = NextOffset - Export.Offset; SymbolDesc SD = {SymbolStart, SymbolSize}; - Functions.emplace_back(SD, Export.Name); + Symbols.emplace_back(SD, Export.Name); } return Error::success(); } @@ -198,16 +193,7 @@ if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_') SymbolName = SymbolName.drop_front(); - SymbolDesc SD = {SymbolAddress, SymbolSize}; - - // DATA command symbolizes just ST_Data (ELF STT_OBJECT) symbols as an - // optimization. Treat everything else (e.g. ELF STT_NOTYPE, STT_FUNC and - // STT_GNU_IFUNC) as function symbols which can be used to symbolize - // addresses. - if (SymbolType == SymbolRef::ST_Data) - Objects.emplace_back(SD, SymbolName); - else - Functions.emplace_back(SD, SymbolName); + Symbols.emplace_back(SymbolDesc{SymbolAddress, SymbolSize}, SymbolName); return Error::success(); } @@ -223,12 +209,10 @@ return 0; } -bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type, - uint64_t Address, +bool SymbolizableObjectFile::getNameFromSymbolTable(uint64_t Address, std::string &Name, uint64_t &Addr, uint64_t &Size) const { - const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects; std::pair SD{{Address, UINT64_C(-1)}, StringRef()}; auto SymbolIterator = llvm::upper_bound(Symbols, SD); if (SymbolIterator == Symbols.begin()) @@ -267,8 +251,8 @@ if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName; uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size)) { + if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, + Size)) { LineInfo.FunctionName = FunctionName; } } @@ -292,8 +276,8 @@ if (shouldOverrideWithSymbolTable(LineInfoSpecifier.FNKind, UseSymbolTable)) { std::string FunctionName; uint64_t Start, Size; - if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address, - FunctionName, Start, Size)) { + if (getNameFromSymbolTable(ModuleOffset.Address, FunctionName, Start, + Size)) { InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1) ->FunctionName = FunctionName; } @@ -305,8 +289,7 @@ DIGlobal SymbolizableObjectFile::symbolizeData( object::SectionedAddress ModuleOffset) const { DIGlobal Res; - getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name, - Res.Start, Res.Size); + getNameFromSymbolTable(ModuleOffset.Address, Res.Name, Res.Start, Res.Size); return Res; } diff --git a/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml new file mode 100644 --- /dev/null +++ b/llvm/test/DebugInfo/Symbolize/ELF/data-command-symtab.yaml @@ -0,0 +1,52 @@ +## Function and data symbols have different addresses so internally we treat +## STT_NOTYPE/STT_FUNC/STT_DATA/etc the same. The DATA command may get a function +## as result. For regular applications, the input addresses are guaranteed to be +## related to data symbols. +# RUN: yaml2obj %s -o %t +# RUN: llvm-symbolizer --obj=%t 'DATA 0x1000' 'DATA 0x2000' 'DATA 0x2002' | FileCheck %s + +# CHECK: func +# CHECK-NEXT: 4096 1 +# CHECK-EMPTY: +# CHECK-NEXT: data +# CHECK-NEXT: 8192 2 +# CHECK-EMPTY: +# CHECK-NEXT: notype +# CHECK-NEXT: 8194 3 +# CHECK-EMPTY: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + Size: 1 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_WRITE, SHF_ALLOC ] + Address: 0x2000 + Size: 5 +Symbols: + - Name: func + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x1000 + Size: 1 + - Name: data + Type: STT_OBJECT + Section: .data + Binding: STB_GLOBAL + Value: 0x2000 + Size: 2 + - Name: notype + Section: .data + Binding: STB_GLOBAL + Value: 0x2002 + Size: 3