diff --git a/llvm/docs/CommandGuide/llvm-addr2line.rst b/llvm/docs/CommandGuide/llvm-addr2line.rst --- a/llvm/docs/CommandGuide/llvm-addr2line.rst +++ b/llvm/docs/CommandGuide/llvm-addr2line.rst @@ -17,6 +17,11 @@ Here are some of those differences: +- ``llvm-addr2line`` interprets all addresses as hexadecimal and ignores an + optional ``0x`` prefix, whereas ``llvm-symbolizer`` attempts to determine + the base from the literal's prefix and defaults to decimal if there is no + prefix. + - ``llvm-addr2line`` defaults not to print function names. Use `-f`_ to enable that. diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -146,10 +146,10 @@ # FIXME: Why do we have both `lli` and `%lli` that do slightly different things? tools.extend([ 'dsymutil', 'lli', 'lli-child-target', 'llvm-ar', 'llvm-as', - 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov', 'llvm-cxxdump', 'llvm-cvtres', - 'llvm-diff', 'llvm-dis', 'llvm-dwarfdump', 'llvm-exegesis', 'llvm-extract', - 'llvm-isel-fuzzer', 'llvm-ifs', 'llvm-install-name-tool', - 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib', + 'llvm-addr2line', 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov', + 'llvm-cxxdump', 'llvm-cvtres', 'llvm-diff', 'llvm-dis', 'llvm-dwarfdump', + 'llvm-exegesis', 'llvm-extract', 'llvm-isel-fuzzer', 'llvm-ifs', + 'llvm-install-name-tool', 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib', 'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca', 'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump', 'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf', diff --git a/llvm/test/tools/llvm-symbolizer/input-base.test b/llvm/test/tools/llvm-symbolizer/input-base.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/input-base.test @@ -0,0 +1,33 @@ +# llvm-symbolizer infers the number base from the form of the address. +RUN: llvm-symbolizer -e /dev/null -a 0x1234 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 0X1234 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 4660 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 011064 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 0b1001000110100 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 0B1001000110100 | FileCheck %s +RUN: llvm-symbolizer -e /dev/null -a 0o11064 | FileCheck %s + +# llvm-symbolizer / StringRef::getAsInteger only accepts the 0o prefix in lowercase. +RUN: llvm-symbolizer -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER + +# llvm-addr2line always requires hexadecimal, but accepts an optional 0x prefix. +RUN: llvm-addr2line -e /dev/null -a 0x1234 | FileCheck %s +RUN: llvm-addr2line -e /dev/null -a 0X1234 | FileCheck %s +RUN: llvm-addr2line -e /dev/null -a 1234 | FileCheck %s +RUN: llvm-addr2line -e /dev/null -a 01234 | FileCheck %s +RUN: llvm-addr2line -e /dev/null -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY +RUN: llvm-addr2line -e /dev/null -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY +RUN: llvm-addr2line -e /dev/null -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER +RUN: llvm-addr2line -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER + +CHECK: 0x1234 +CHECK-NEXT: ?? + +HEXADECIMAL-NOT-BINARY: 0xb1010 +HEXADECIMAL-NOT-BINARY: ?? + +INVALID-NOT-OCTAL-LOWER: 0o1234 +INVALID-NOT-OCTAL-LOWER-NOT: ?? + +INVALID-NOT-OCTAL-UPPER: 0O1234 +INVALID-NOT-OCTAL-UPPER-NOT: ?? diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -181,7 +181,7 @@ Frame, }; -static bool parseCommand(StringRef InputString, Command &Cmd, +static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; @@ -218,15 +218,21 @@ // Skip delimiters and parse module offset. Pos += strspn(Pos, kDelimiters); int OffsetLength = strcspn(Pos, kDelimiters); - return !StringRef(Pos, OffsetLength).getAsInteger(0, ModuleOffset); + StringRef Offset(Pos, OffsetLength); + // GNU addr2line assumes the offset is hexadecimal and allows a redundant + // "0x" or "0X" prefix; do the same for compatibility. + if (IsAddr2Line) + Offset.consume_front("0x") || Offset.consume_front("0X"); + return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } -static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { +static void symbolizeInput(bool IsAddr2Line, StringRef InputString, + LLVMSymbolizer &Symbolizer, DIPrinter &Printer) { Command Cmd; std::string ModuleName; uint64_t Offset = 0; - if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) { + if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName, + Offset)) { outs() << InputString << "\n"; return; } @@ -340,12 +346,12 @@ std::remove_if(StrippedInputString.begin(), StrippedInputString.end(), [](char c) { return c == '\r' || c == '\n'; }), StrippedInputString.end()); - symbolizeInput(StrippedInputString, Symbolizer, Printer); + symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer); outs().flush(); } } else { for (StringRef Address : ClInputAddresses) - symbolizeInput(Address, Symbolizer, Printer); + symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer); } return 0;