diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -14,7 +14,7 @@ :program:`llvm-symbolizer` reads input names and addresses from the command-line and prints corresponding source code locations to standard output. It can also symbolize logs containing :doc:`Symbolizer Markup ` via -:option:`--filter-markup`. +:option:`--filter-markup`. Addresses may be specified as numbers or symbol names. If no address is specified on the command-line, it reads the addresses from standard input. If no input name is specified on the command-line, but addresses @@ -196,6 +196,17 @@ main foo/test.cpp:15:0 +Example 7 - Addresses as symbol names: + +.. code-block:: console + + $ llvm-symbolizer --obj=test.elf main + main + /tmp/test.cpp:14:0 + $ llvm-symbolizer --obj=test.elf "CODE foz" + foz + /tmp/test.h:1:0 + OPTIONS ------- diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -323,6 +323,8 @@ * Made significant changes to JSON output format of `llvm-readobj`/`llvm-readelf` to improve correctness and clarity. + +* llvm-symbolizer and llvm-addr2line now support addresses specified as symbol names. Changes to LLDB --------------------------------- diff --git a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h --- a/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h @@ -34,6 +34,7 @@ struct Request { StringRef ModuleName; std::optional Address; + StringRef Symbol; }; class DIPrinter { @@ -46,6 +47,8 @@ virtual void print(const Request &Request, const DIGlobal &Global) = 0; virtual void print(const Request &Request, const std::vector &Locals) = 0; + virtual void print(const Request &Request, + const std::vector &Locations) = 0; virtual void printInvalidCommand(const Request &Request, StringRef Command) = 0; @@ -63,6 +66,7 @@ bool Pretty; bool Verbose; int SourceContextLines; + bool IsGNUStyle; }; using ErrorHandler = function_ref; @@ -94,6 +98,8 @@ void print(const Request &Request, const DIGlobal &Global) override; void print(const Request &Request, const std::vector &Locals) override; + void print(const Request &Request, + const std::vector &Locations) override; void printInvalidCommand(const Request &Request, StringRef Command) override; @@ -146,6 +152,8 @@ void print(const Request &Request, const DIGlobal &Global) override; void print(const Request &Request, const std::vector &Locals) override; + void print(const Request &Request, + const std::vector &Locations) override; void printInvalidCommand(const Request &Request, StringRef Command) override; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h --- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h @@ -36,6 +36,9 @@ virtual std::vector symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0; + virtual std::vector + findSymbol(StringRef Symbol) const = 0; + // Return true if this is a 32-bit x86 PE COFF module. virtual bool isWin32Module() const = 0; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h --- a/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h @@ -43,6 +43,8 @@ DIGlobal symbolizeData(object::SectionedAddress ModuleOffset) const override; std::vector symbolizeFrame(object::SectionedAddress ModuleOffset) const override; + std::vector + findSymbol(StringRef Symbol) const override; // Return true if this is a 32-bit x86 PE COFF module. bool isWin32Module() const override; diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -104,6 +104,14 @@ Expected> symbolizeFrame(ArrayRef BuildID, object::SectionedAddress ModuleOffset); + + Expected> findSymbol(const ObjectFile &Obj, + StringRef Symbol); + Expected> findSymbol(const std::string &ModuleName, + StringRef Symbol); + Expected> findSymbol(ArrayRef BuildID, + StringRef Symbol); + void flush(); // Evict entries from the binary cache until it is under the maximum size @@ -146,6 +154,9 @@ Expected> symbolizeFrameCommon(const T &ModuleSpecifier, object::SectionedAddress ModuleOffset); + template + Expected> findSymbolCommon(const T &ModuleSpecifier, + StringRef Symbol); Expected getOrCreateModuleInfo(const ObjectFile &Obj); diff --git a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp --- a/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp @@ -260,6 +260,20 @@ printFooter(); } +void PlainPrinterBase::print(const Request &Request, + const std::vector &Locations) { + if (Locations.empty()) { + if (Config.IsGNUStyle || Request.Symbol.empty()) + OS << DILineInfo::Addr2LineBadString << ":0\n"; + else + OS << Request.Symbol << '\n'; + } else { + for (const DILineInfo &L : Locations) + print(L, false); + } + printFooter(); +} + void PlainPrinterBase::printInvalidCommand(const Request &Request, StringRef Command) { OS << Command << '\n'; @@ -278,6 +292,8 @@ static json::Object toJSON(const Request &Request, StringRef ErrorMsg = "") { json::Object Json({{"ModuleName", Request.ModuleName.str()}}); + if (!Request.Symbol.empty()) + Json["SymName"] = Request.Symbol.str(); if (Request.Address) Json["Address"] = toHex(*Request.Address); if (!ErrorMsg.empty()) @@ -367,6 +383,19 @@ printJSON(std::move(Json)); } +void JSONPrinter::print(const Request &Request, + const std::vector &Locations) { + json::Array Definitions; + for (const DILineInfo &L : Locations) + Definitions.push_back(toJSON(L)); + json::Object Json = toJSON(Request); + Json["Loc"] = std::move(Definitions); + if (ObjectList) + ObjectList->push_back(std::move(Json)); + else + printJSON(std::move(Json)); +} + void JSONPrinter::printInvalidCommand(const Request &Request, StringRef Command) { printError(Request, diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -351,6 +351,19 @@ return DebugInfoContext->getLocalsForAddress(ModuleOffset); } +std::vector +SymbolizableObjectFile::findSymbol(StringRef Symbol) const { + std::vector Result; + for (const SymbolDesc &Sym : Symbols) { + if (Sym.Name.equals(Symbol)) { + object::SectionedAddress A{Sym.Addr, + getModuleSectionIndexForAddress(Sym.Addr)}; + Result.push_back(A); + } + } + return Result; +} + /// Search for the first occurence of specified Address in ObjectFile. uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress( uint64_t Address) const { diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -230,6 +230,50 @@ return symbolizeFrameCommon(BuildID, ModuleOffset); } +template +Expected> +LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) { + auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier); + if (!InfoOrErr) + return InfoOrErr.takeError(); + + SymbolizableModule *Info = *InfoOrErr; + std::vector Result; + + // A null module means an error has already been reported. Return an empty + // result. + if (!Info) + return Result; + + for (object::SectionedAddress A : Info->findSymbol(Symbol)) { + DILineInfo LineInfo = Info->symbolizeCode( + A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions), + Opts.UseSymbolTable); + if (LineInfo.FileName != DILineInfo::BadString) { + if (Opts.Demangle) + LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info); + Result.push_back(LineInfo); + } + } + + return Result; +} + +Expected> +LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) { + return findSymbolCommon(Obj, Symbol); +} + +Expected> +LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol) { + return findSymbolCommon(ModuleName, Symbol); +} + +Expected> +LLVMSymbolizer::findSymbol(ArrayRef BuildID, StringRef Symbol) { + return findSymbolCommon(BuildID, Symbol); +} + void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); LRUBinaries.clear(); diff --git a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp --- a/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp +++ b/llvm/test/tools/llvm-symbolizer/Inputs/addr.inp @@ -1,3 +1,3 @@ -some text +something not a valid address 0x40054d -some text2 +some text possibly a symbol diff --git a/llvm/test/tools/llvm-symbolizer/flag-grouping.test b/llvm/test/tools/llvm-symbolizer/flag-grouping.test --- a/llvm/test/tools/llvm-symbolizer/flag-grouping.test +++ b/llvm/test/tools/llvm-symbolizer/flag-grouping.test @@ -3,8 +3,8 @@ RUN: llvm-symbolizer -apCie=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s RUN: llvm-symbolizer -apCie%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s -CHECK: some text +CHECK: something not a valid address CHECK: 0x40054d: inctwo CHECK: (inlined by) inc CHECK (inlined by) main -CHECK: some text2 +CHECK: some text possibly a symbol diff --git a/llvm/test/tools/llvm-symbolizer/flush-output.s b/llvm/test/tools/llvm-symbolizer/flush-output.s --- a/llvm/test/tools/llvm-symbolizer/flush-output.s +++ b/llvm/test/tools/llvm-symbolizer/flush-output.s @@ -14,4 +14,4 @@ # RUN: | FileCheck %s # CHECK: flush-output.s:10 -# CHECK: bad +# CHECK: ??:0 diff --git a/llvm/test/tools/llvm-symbolizer/invalid-input-address.test b/llvm/test/tools/llvm-symbolizer/invalid-input-address.test --- a/llvm/test/tools/llvm-symbolizer/invalid-input-address.test +++ b/llvm/test/tools/llvm-symbolizer/invalid-input-address.test @@ -15,9 +15,12 @@ RUN: llvm-symbolizer --obj=%p/Inputs/addr.exe @%t.rsp | FileCheck --check-prefix=BAD-INPUT %s # Test bad input address values for the GNU-compatible version. -RUN: llvm-addr2line --obj=%p/Inputs/addr.exe < %t.inp | FileCheck --check-prefix=BAD-INPUT %s -RUN: llvm-addr2line --obj=%p/Inputs/addr.exe "some text" "some text2" | FileCheck --check-prefix=BAD-INPUT %s -RUN: llvm-addr2line --obj=%p/Inputs/addr.exe @%t.rsp | FileCheck --check-prefix=BAD-INPUT %s +RUN: llvm-addr2line --obj=%p/Inputs/addr.exe < %t.inp | FileCheck --check-prefix=BAD-INPUT-GNU %s +RUN: llvm-addr2line --obj=%p/Inputs/addr.exe "some text" "some text2" | FileCheck --check-prefix=BAD-INPUT-GNU %s +RUN: llvm-addr2line --obj=%p/Inputs/addr.exe @%t.rsp | FileCheck --check-prefix=BAD-INPUT-GNU %s BAD-INPUT: some text BAD-INPUT-NEXT: some text2 + +BAD-INPUT-GNU: ??:0 +BAD-INPUT-GNU-NEXT: ??:0 diff --git a/llvm/test/tools/llvm-symbolizer/output-style-empty-line.test b/llvm/test/tools/llvm-symbolizer/output-style-empty-line.test --- a/llvm/test/tools/llvm-symbolizer/output-style-empty-line.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-empty-line.test @@ -3,26 +3,33 @@ --output-style=LLVM or if the option is omitted. RUN: llvm-symbolizer -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=LLVM +RUN: | FileCheck %s --check-prefix=SYMB-LLVM RUN: llvm-symbolizer --output-style=LLVM -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=LLVM +RUN: | FileCheck %s --check-prefix=SYMB-LLVM + +SYMB-LLVM: x.c:14:0 +SYMB-LLVM-EMPTY: +SYMB-LLVM-NEXT: some text possibly a symbol RUN: llvm-symbolizer --output-style=GNU -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=GNU +RUN: | FileCheck %s --check-prefix=SYMB-GNU + +SYMB-GNU: x.c:14 +SYMB-GNU-NEXT: some text possibly a symbol RUN: llvm-addr2line -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=GNU +RUN: | FileCheck %s --check-prefix=ADDR-GNU RUN: llvm-addr2line --output-style=GNU -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=GNU +RUN: | FileCheck %s --check-prefix=ADDR-GNU -RUN: llvm-addr2line --output-style=LLVM -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ -RUN: | FileCheck %s --check-prefix=LLVM +ADDR-GNU: x.c:14 +ADDR-GNU-NEXT: ??:0 -LLVM: x.c:14:0 -LLVM-EMPTY: -LLVM-NEXT: some text2 +RUN: llvm-addr2line --output-style=LLVM -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp \ +RUN: | FileCheck %s --check-prefix=ADDR-LLVM -GNU: x.c:14 -GNU-NEXT: some text2 +ADDR-LLVM: x.c:14:0 +ADDR-LLVM-EMPTY: +ADDR-LLVM-NEXT: some diff --git a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test --- a/llvm/test/tools/llvm-symbolizer/output-style-json-code.test +++ b/llvm/test/tools/llvm-symbolizer/output-style-json-code.test @@ -25,39 +25,45 @@ # RUN: llvm-symbolizer --output-style=JSON --no-inlines -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=NO-INLINES --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# NO-INLINES:{"Error":{"Message":"unable to parse arguments: some text"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# NO-INLINES:{"Error":{"Message":"unable to parse arguments: something not a valid address"},"ModuleName":"{{.*}}/Inputs/addr.exe"} ## Resolve valid address. # NO-INLINES-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2}]} ## Invalid argument after a valid one. -# NO-INLINES-NEXT:{"Error":{"Message":"unable to parse arguments: some text2"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# NO-INLINES-NEXT:{"Error":{"Message":"unable to parse arguments: some text possibly a symbol"},"ModuleName":"{{.*}}/Inputs/addr.exe"} ## This test case is testing stdin input, inlines by default. # RUN: llvm-symbolizer --output-style=JSON -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=INLINE --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# INLINE:{"Error":{"Message":"unable to parse arguments: some text"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# INLINE:{"Error":{"Message":"unable to parse arguments: something not a valid address"},"ModuleName":"{{.*}}/Inputs/addr.exe"} ## Resolve valid address. # INLINE-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# INLINE-NEXT:{"Error":{"Message":"unable to parse arguments: some text2"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# INLINE-NEXT:{"Error":{"Message":"unable to parse arguments: some text possibly a symbol"},"ModuleName":"{{.*}}/Inputs/addr.exe"} ## Also check the last test case with llvm-adr2line. -## The expected result is the same with -f -i. -# RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ -# RUN: FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}} +## The expected result is the same with -f -i for valid addresses. +## If input is not a valid address specification, like "some text", llvm-addr2line +## behaves differently than llvm-symbolizer. Part of the input starting from space +## is ignored and the remaining starting part ("some") is treated as a symbol name. +## So the utility reports that symbol is not found rather than unable to parse the +## input. +## +# RUN: llvm-addr2line --output-style=JSON -f -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ +# RUN: FileCheck %s --check-prefix=INLINE-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# INLINE-A2L:{"Error":{"Message":"unable to parse arguments: some text"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# INLINE-A2L:{"Address":"0x0","Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. -# INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} +# INLINE-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inctwo","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"inc","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"main","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# INLINE-A2L-NEXT:{"Error":{"Message":"unable to parse arguments: some text2"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# INLINE-A2L:{"Address":"0x0","Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} ## Note llvm-addr2line without -f does not print the function name in JSON too. # RUN: llvm-addr2line --output-style=JSON -i -e %p/Inputs/addr.exe < %p/Inputs/addr.inp | \ # RUN: FileCheck %s --check-prefix=NO-FUNC-A2L --strict-whitespace --match-full-lines --implicit-check-not={{.}} ## Invalid first argument before any valid one. -# NO-FUNC-A2L:{"Error":{"Message":"unable to parse arguments: some text"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# NO-FUNC-A2L:{"Address":"0x0","Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"something"} ## Resolve valid address. # NO-FUNC-A2L-NEXT:{"Address":"0x40054d","ModuleName":"{{.*}}/Inputs/addr.exe","Symbol":[{"Column":3,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":3,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":2},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":7,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":6},{"Column":0,"Discriminator":0,"FileName":"/tmp{{/|\\\\}}x.c","FunctionName":"","Line":14,"StartAddress":"0x400540","StartFileName":"/tmp{{/|\\\\}}x.c","StartLine":12}]} ## Invalid argument after a valid one. -# NO-FUNC-A2L-NEXT:{"Error":{"Message":"unable to parse arguments: some text2"},"ModuleName":"{{.*}}/Inputs/addr.exe"} +# NO-FUNC-A2L-NEXT:{"Address":"0x0","Loc":[],"ModuleName":"{{.*}}/Inputs/addr.exe","SymName":"some"} diff --git a/llvm/test/tools/llvm-symbolizer/sym.test b/llvm/test/tools/llvm-symbolizer/sym.test --- a/llvm/test/tools/llvm-symbolizer/sym.test +++ b/llvm/test/tools/llvm-symbolizer/sym.test @@ -1,52 +1,27 @@ -#Source: -##include -#static inline int inctwo (int *a) { -# printf ("%d\n",(*a)++); -# return (*a)++; -#} -#static inline int inc (int *a) { -# printf ("%d\n",inctwo(a)); -# return (*a)++; -#} -# -# -#int main () { -# int x = 1; -# return inc(&x); -#} -# -#Build as : clang -g -O2 addr.c - -RUN: llvm-symbolizer --print-address --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s -RUN: llvm-symbolizer --addresses --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s -RUN: llvm-symbolizer -a --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s -RUN: llvm-symbolizer --inlining --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -RUN: llvm-symbolizer --inlining --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -RUN: llvm-symbolizer --inlines --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -RUN: llvm-symbolizer --inlines --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -RUN: llvm-symbolizer -i --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -RUN: llvm-symbolizer -i --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s -## Before 2020-08-04, asan_symbolize.py passed --inlining=true. -## Support this compatibility alias for a while. -RUN: llvm-symbolizer --inlining=true --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s - -RUN: echo "0x1" > %t.input -RUN: llvm-symbolizer --obj=%p/Inputs/zero < %t.input | FileCheck -check-prefix="ZERO" %s - -RUN: llvm-addr2line --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix=A2L %s -RUN: llvm-addr2line -a --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_A %s -RUN: llvm-addr2line -f --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_F %s -RUN: llvm-addr2line -i --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_I %s -RUN: llvm-addr2line -fi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_F,A2L_I,A2L_FI %s - -RUN: llvm-addr2line -pa --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_A %s -RUN: llvm-addr2line -pf --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_F %s -RUN: llvm-addr2line -paf --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_AF %s -RUN: llvm-addr2line -pai --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_A,A2LP_I %s -RUN: llvm-addr2line -pfi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_F,A2LP_FI %s -RUN: llvm-addr2line -pafi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_AF,A2LP_FI %s +## Source: +## #include +## static inline int inctwo (int *a) { +## printf ("%d\n",(*a)++); +## return (*a)++; +## } +## static inline int inc (int *a) { +## printf ("%d\n",inctwo(a)); +## return (*a)++; +## } +## +## +## int main () { +## int x = 1; +## return inc(&x); +## } +## +## Build as : clang -g -O2 addr.c -# CHECK: some text +# RUN: llvm-symbolizer --print-address --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s +# RUN: llvm-symbolizer --addresses --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s +# RUN: llvm-symbolizer -a --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s +# +# CHECK: not a valid address # CHECK-NEXT: 0x40054d # CHECK-NEXT: inctwo # CHECK-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:3:3 @@ -55,33 +30,61 @@ # CHECK-NEXT: main # CHECK-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:14:0 # CHECK-EMPTY: -# CHECK-NEXT: some text2 -# -#PRETTY: some text -#PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3 -#PRETTY: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0 -#PRETTY: (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0 -#PRETTY: some text2 +# CHECK-NEXT: some text possibly a symbol + +## Before 2020-08-04, asan_symbolize.py passed --inlining=true. +## Support this compatibility alias for a while. +# RUN: llvm-symbolizer --inlining=true --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s + +# RUN: llvm-symbolizer --inlining --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +# RUN: llvm-symbolizer --inlining --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +# RUN: llvm-symbolizer --inlines --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +# RUN: llvm-symbolizer --inlines --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +# RUN: llvm-symbolizer -i --print-address --pretty-print --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s +# RUN: llvm-symbolizer -i --print-address -p --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix="PRETTY" %s # -#ZERO: ?? -#ZERO: ??:0:0 +# PRETTY: something not a valid address +# PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3 +# PRETTY: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0 +# PRETTY: (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0 +# PRETTY: some text possibly a symbol + +# RUN: echo "0x1" > %t.input +# RUN: llvm-symbolizer --obj=%p/Inputs/zero < %t.input | FileCheck -check-prefix="ZERO" %s # -#A2L: some text -#A2L_A-NEXT: 0x40054d -#A2L_F-NEXT: inctwo -#A2L-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} -#A2L_FI-NEXT: inc{{$}} -#A2L_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} -#A2L_FI-NEXT: main -#A2L_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} -#A2L-NEXT: some text2 +# ZERO: ?? +# ZERO: ??:0:0 -#A2LP: some text -#A2LP_A-NEXT: 0x40054d: {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} -#A2LP_F-NEXT: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} -#A2LP_AF-NEXT: 0x40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} -#A2LP_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} -#A2LP_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} -#A2LP_FI-NEXT: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} -#A2LP_FI-NEXT: (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} -#A2LP-NEXT: some text2 +# RUN: llvm-addr2line --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefix=A2L %s +# RUN: llvm-addr2line -a --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_A %s +# RUN: llvm-addr2line -f --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_F %s +# RUN: llvm-addr2line -i --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_I %s +# RUN: llvm-addr2line -fi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2L,A2L_F,A2L_I,A2L_FI %s +# +# RUN: llvm-addr2line -pa --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_A %s +# RUN: llvm-addr2line -pf --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_F %s +# RUN: llvm-addr2line -paf --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_AF %s +# RUN: llvm-addr2line -pai --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_A,A2LP_I %s +# RUN: llvm-addr2line -pfi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_F,A2LP_FI %s +# RUN: llvm-addr2line -pafi --obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck -check-prefixes=A2LP,A2LP_AF,A2LP_FI %s +# +# A2L: ??:0 +# A2L_A-NEXT: 0x40054d +# A2L_F-NEXT: inctwo +# A2L-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} +# A2L_FI-NEXT: inc{{$}} +# A2L_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} +# A2L_FI-NEXT: main +# A2L_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} +# A2L-NEXT: ??:0 +# +# A2LP: ??:0 +# A2LP_A-NEXT: 0x40054d: {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} +# A2LP_F-NEXT: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} +# A2LP_AF-NEXT: 0x40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3{{$}} +# A2LP_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} +# A2LP_I-NEXT: {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} +# A2LP_FI-NEXT: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7{{$}} +# A2LP_FI-NEXT: (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14{{$}} +# A2LP-NEXT: ??:0 + \ No newline at end of file diff --git a/llvm/test/tools/llvm-symbolizer/symbol-search.test b/llvm/test/tools/llvm-symbolizer/symbol-search.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/symbol-search.test @@ -0,0 +1,33 @@ +## This test checks the case when address is specified by a symbol name rather +## than a number. +## +## It uses ELF shared object `Inputs/symbols.so` built for x86_64 using +## the instructions from `Inputs/symbols.h`. + +# RUN: llvm-addr2line --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE_CMD %s +# RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so "CODE func_01" | FileCheck --check-prefix=CODE_CMD %s +# CODE_CMD: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 + +# RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s +# RUN: llvm-symbolizer -e %p/Inputs/symbols.so func_01 | FileCheck --check-prefix=SYMB %s +# SYMB: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 + +# RUN: llvm-addr2line -e %p/Inputs/symbols.so static_func | FileCheck --check-prefix=SYMB_MULTI %s +# SYMB_MULTI: /tmp/dbginfo{{[/\]+}}symbols.part3.c:4 +# SYMB_MULTI-NEXT: /tmp/dbginfo{{[/\]+}}symbols.part4.c:4 + +# RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT_ADDR %s +# NONEXISTENT_ADDR: ?? + +# RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_666 | FileCheck --check-prefix=NONEXISTENT_LLVM %s +# NONEXISTENT_LLVM: func_666 + +# RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s +# RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --check-prefix=FUNCS %s +# FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12 +# FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10 + +# RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI_CXX %s +# RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI_CXX %s +# MULTI_CXX: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:7 +# MULTI_CXX: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:5 diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -138,7 +138,7 @@ static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, object::BuildID &BuildID, - uint64_t &ModuleOffset) { + StringRef &Symbol, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; if (InputString.consume_front("CODE ")) { @@ -200,26 +200,58 @@ Pos = InputString.data(); ModuleName = BinaryName.str(); } - // Skip delimiters and parse module offset. + + // Parse address, which can be specified as a module offset or as a + // symbol. Pos += strspn(Pos, kDelimiters); int OffsetLength = strcspn(Pos, kDelimiters); StringRef Offset(Pos, OffsetLength); + // GNU addr2line assumes the offset is hexadecimal and allows a redundant // "0x" or "0X" prefix; do the same for compatibility. + bool StartsWithHexPrefix = false; if (IsAddr2Line) - Offset.consume_front("0x") || Offset.consume_front("0X"); - return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); + StartsWithHexPrefix = + Offset.consume_front("0x") || Offset.consume_front("0X"); + if (!Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) { + // Address specification is a module offset. + Symbol = StringRef(); + return true; + } + + // Recognize the cases when address specification is absent or invalid. + if (Offset.empty() || StartsWithHexPrefix || std::isdigit(Offset.front())) + return false; + + // If address specification contains a space, GNU addr2line ignores everything + // starting from it. If not in addr2line compatibility mode, such input is an + // invalid specification. + Pos += OffsetLength; + Pos += strspn(Pos, kDelimiters); + if (!IsAddr2Line && *Pos) + return false; + + // Address in executable code may be specified as a symbol name. + if (Cmd != Command::Code) + return false; + + // Address specification is a symbol if addr2line compatibility mode in on. + // Otherwise treat it as an error for compatibility with previous versions of + // llvm-symbolizer. + Symbol = Offset; + ModuleOffset = 0; + return true; } template void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, - uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, - OutputStyle Style, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { + StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA, + bool ShouldInline, OutputStyle Style, + LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; object::SectionedAddress Address = {AdjustedOffset, object::SectionedAddress::UndefSection}; - Request SymRequest = {ModuleName, Offset}; + Request SymRequest = {ModuleName, Offset, Symbol}; if (Cmd == Command::Data) { Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print(SymRequest, ResOrErr, Printer); @@ -227,6 +259,10 @@ Expected> ResOrErr = Symbolizer.symbolizeFrame(ModuleSpec, Address); print(SymRequest, ResOrErr, Printer); + } else if (!Symbol.empty()) { + Expected> ResOrErr = + Symbolizer.findSymbol(ModuleSpec, Symbol); + print(SymRequest, ResOrErr, Printer); } else if (ShouldInline) { Expected ResOrErr = Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); @@ -263,9 +299,12 @@ std::string ModuleName; object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end()); uint64_t Offset = 0; + StringRef Symbol; if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, - StringRef(InputString), Cmd, ModuleName, BuildID, Offset)) { - Printer.printInvalidCommand({ModuleName, std::nullopt}, InputString); + StringRef(InputString), Cmd, ModuleName, BuildID, Symbol, + Offset)) { + Printer.printInvalidCommand({ModuleName, std::nullopt, Symbol}, + InputString); return; } bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); @@ -274,11 +313,11 @@ if (!Args.hasArg(OPT_no_debuginfod)) enableDebuginfod(Symbolizer, Args); std::string BuildIDStr = toHex(BuildID); - executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, - Style, Symbolizer, Printer); + executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA, + ShouldInline, Style, Symbolizer, Printer); } else { - executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, - Style, Symbolizer, Printer); + executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA, + ShouldInline, Style, Symbolizer, Printer); } } @@ -463,6 +502,7 @@ else Style = OutputStyle::LLVM; } + Config.IsGNUStyle = Style == OutputStyle::GNU; if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) { errs() << "error: cannot specify both --build-id and --obj\n"; @@ -484,7 +524,7 @@ if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg && IsAddr2Line) { auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue()); if (!Status) { - Request SymRequest = {Arg->getValue(), 0}; + Request SymRequest = {Arg->getValue(), 0, StringRef()}; handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) { Printer->printError(SymRequest, EI); }); diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -24,6 +24,7 @@ using ::llvm::DILineInfo; using ::llvm::DILineInfoSpecifier; using ::llvm::DILocal; +using ::llvm::StringRef; using ::llvm::memprof::CallStackMap; using ::llvm::memprof::Frame; using ::llvm::memprof::FrameId; @@ -56,6 +57,9 @@ virtual std::vector symbolizeFrame(SectionedAddress) const { llvm_unreachable("unused"); } + virtual std::vector findSymbol(StringRef Symbol) const { + llvm_unreachable("unused"); + } virtual bool isWin32Module() const { llvm_unreachable("unused"); } virtual uint64_t getModulePreferredBase() const { llvm_unreachable("unused");