diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -183,6 +183,11 @@ Print just the file's name without any directories, instead of the absolute path. +.. option:: --build-id + + Look up the object using the given build ID, specified as a hexadecimal + string. Mutually exclusive with :option:`--obj`. + .. _llvm-symbolizer-opt-C: .. option:: --demangle, -C @@ -232,7 +237,8 @@ .. option:: --obj , --exe, -e Path to object file to be symbolized. If ``-`` is specified, read the object - directly from the standard input stream. + directly from the standard input stream. Mutually exclusive with + :option:`--build-id`. .. _llvm-symbolizer-opt-output-style: diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -13,6 +13,7 @@ #ifndef LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H +#include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/Binary.h" #include "llvm/Object/ELFObjectFile.h" @@ -61,22 +62,32 @@ object::SectionedAddress ModuleOffset); Expected symbolizeCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeInlinedCode(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected + symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected symbolizeData(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected symbolizeData(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const ObjectFile &Obj, object::SectionedAddress ModuleOffset); Expected> symbolizeFrame(const std::string &ModuleName, object::SectionedAddress ModuleOffset); + Expected> + symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset); void flush(); static std::string @@ -84,6 +95,9 @@ const SymbolizableModule *DbiModuleDescriptor); private: + bool getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result); + // Bundles together object file with code/data and object file with // corresponding debug info. These objects can be the same. using ObjectPair = std::pair; @@ -111,6 +125,8 @@ Expected getOrCreateModuleInfo(const std::string &ModuleName); Expected getOrCreateModuleInfo(const ObjectFile &Obj); + Expected + getOrCreateModuleInfo(ArrayRef BuildID); Expected createModuleInfo(const ObjectFile *Obj, std::unique_ptr Context, @@ -138,6 +154,7 @@ std::map, std::less<>> Modules; + StringMap BuildIDPaths; /// Contains cached results of getOrCreateObjectPair(). std::map, ObjectPair> diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -81,6 +81,12 @@ return symbolizeCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeInlinedCodeCommon( const T &ModuleSpecifier, object::SectionedAddress ModuleOffset) { @@ -124,6 +130,12 @@ return symbolizeInlinedCodeCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeInlinedCode(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeInlinedCodeCommon(BuildID, ModuleOffset); +} + template Expected LLVMSymbolizer::symbolizeDataCommon(const T &ModuleSpecifier, @@ -163,6 +175,12 @@ return symbolizeDataCommon(ModuleName, ModuleOffset); } +Expected +LLVMSymbolizer::symbolizeData(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeDataCommon(BuildID, ModuleOffset); +} + template Expected> LLVMSymbolizer::symbolizeFrameCommon(const T &ModuleSpecifier, @@ -198,15 +216,21 @@ return symbolizeFrameCommon(ModuleName, ModuleOffset); } +Expected> +LLVMSymbolizer::symbolizeFrame(ArrayRef BuildID, + object::SectionedAddress ModuleOffset) { + return symbolizeFrameCommon(BuildID, ModuleOffset); +} + void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); BinaryForPath.clear(); ObjectPairForPathArch.clear(); Modules.clear(); + BuildIDPaths.clear(); } namespace { - // For Path="/path/to/foo" and Basename="foo" assume that debug info is in // /path/to/foo.dSYM/Contents/Resources/DWARF/foo. // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in @@ -350,9 +374,24 @@ llvm_unreachable("unsupported file format"); return BuildID; } +} // end anonymous namespace + +static StringRef getBuildIDStr(ArrayRef BuildID) { + return StringRef(reinterpret_cast(BuildID.data()), + BuildID.size()); +} + +bool LLVMSymbolizer::getOrFindDebugBinary(const ArrayRef BuildID, + std::string &Result) { + StringRef BuildIDStr = getBuildIDStr(BuildID); + auto I = BuildIDPaths.find(BuildIDStr); + // A cached empty string means the lookup previously failed. Retry in that + // case, since the failure may be transient. + if (I != BuildIDPaths.end() && !I->second.empty()) { + Result = I->second; + return true; + } -bool findDebugBinary(const std::vector &DebugFileDirectory, - const ArrayRef BuildID, std::string &Result) { auto getDebugPath = [&](StringRef Directory) { SmallString<128> Path{Directory}; sys::path::append(Path, ".build-id", @@ -361,7 +400,13 @@ Path += ".debug"; return Path; }; - if (DebugFileDirectory.empty()) { + auto recordPath = [&](StringRef Path) { + Result = Path.str(); + auto InsertResult = BuildIDPaths.insert({BuildIDStr, Result}); + assert(InsertResult.second); + }; + + if (Opts.DebugFileDirectory.empty()) { SmallString<128> Path = getDebugPath( #if defined(__NetBSD__) // Try /usr/libdata/debug/.build-id/../... @@ -372,15 +417,15 @@ #endif ); if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); + recordPath(Path); return true; } } else { - for (const auto &Directory : DebugFileDirectory) { + for (const auto &Directory : Opts.DebugFileDirectory) { // Try /.build-id/../... SmallString<128> Path = getDebugPath(Directory); if (llvm::sys::fs::exists(Path)) { - Result = std::string(Path.str()); + recordPath(Path); return true; } } @@ -389,14 +434,16 @@ Expected PathOrErr = getCachedOrDownloadDebuginfo(BuildID); if (!PathOrErr) { consumeError(PathOrErr.takeError()); + // Record that an error occurred. This silences error reporting, but the + // lookup may still be retried. + if (I == BuildIDPaths.end()) + recordPath(""); return false; } - Result = *PathOrErr; + recordPath(*PathOrErr); return true; } -} // end anonymous namespace - ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, const MachOObjectFile *MachExeObj, const std::string &ArchName) { @@ -458,7 +505,7 @@ if (BuildID->size() < 2) return nullptr; std::string DebugBinaryPath; - if (!findDebugBinary(Opts.DebugFileDirectory, *BuildID, DebugBinaryPath)) + if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath)) return nullptr; auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName); if (!DbgObjOrErr) { @@ -623,6 +670,23 @@ return createModuleInfo(&Obj, std::move(Context), ObjName); } +Expected +LLVMSymbolizer::getOrCreateModuleInfo(ArrayRef BuildID) { + StringRef BuildIDStr = getBuildIDStr(BuildID); + auto I = BuildIDPaths.find(BuildIDStr); + bool ErrorAlreadyReported = I != BuildIDPaths.end() && I->second.empty(); + + std::string Path; + if (!getOrFindDebugBinary(BuildID, Path)) { + if (!ErrorAlreadyReported) + return createStringError(errc::no_such_file_or_directory, + Twine("could not find build ID '") + + toHex(BuildID) + "'"); + return nullptr; + } + return getOrCreateModuleInfo(Path); +} + namespace { // Undo these various manglings for Win32 extern "C" functions: diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test b/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-bad-build-id.test @@ -0,0 +1,3 @@ +RUN: not llvm-symbolizer --build-id=not_a_hex_string 0x1234 2>&1 | FileCheck %s + +CHECK: --build-id=: expected a build ID, but got 'not_a_hex_string' diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test b/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-build-id-and-obj.test @@ -0,0 +1,3 @@ +RUN: not llvm-symbolizer --build-id=abc --obj=bad 0x1234 2>&1 | FileCheck %s + +CHECK: error: cannot specify both --build-id and --obj diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test b/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-symbolizer/debuginfod-missing-build-id.test @@ -0,0 +1,11 @@ +RUN: llvm-symbolizer --build-id=abad 0x1234 0x5678 > %t.stdout 2> %t.stderr +RUN: FileCheck %s --check-prefix=STDOUT < %t.stdout +RUN: FileCheck %s --check-prefix=STDERR < %t.stderr + +STDOUT: ?? +STDOUT: ??:0:0 +STDOUT: ?? +STDOUT: ??:0:0 + +STDERR: LLVMSymbolizer: error reading file: could not find build ID 'ABAD' +STDERR-NOT: LLVMSymbolizer diff --git a/llvm/test/tools/llvm-symbolizer/debuginfod.test b/llvm/test/tools/llvm-symbolizer/debuginfod.test --- a/llvm/test/tools/llvm-symbolizer/debuginfod.test +++ b/llvm/test/tools/llvm-symbolizer/debuginfod.test @@ -25,3 +25,8 @@ RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ RUN: --obj=%t/addr.exe 0x40054d | FileCheck %s --check-prefix=FOUND FOUND: {{[/\]+}}tmp{{[/\]+}}x.c:14:0 + +# This should also work if the build ID is provided. +RUN: env DEBUGINFOD_CACHE_PATH=%t llvm-symbolizer \ +RUN: --build-id=127da749021c1fc1a58cba734a1f542cbe2b7ce4 0x40054d | \ +RUN: FileCheck %s --check-prefix=FOUND diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -21,6 +21,7 @@ : Eq<"adjust-vma", "Add specified offset to object file addresses">, MetaVarName<"">; def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; +defm build_id : Eq<"build-id", "Build ID used to look up the object file">; defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"">; defm default_arch : Eq<"default-arch", "Default architecture (for multi-arch objects)">, diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "Opts.inc" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/DebugInfo/Symbolize/DIPrinter.h" @@ -102,8 +103,8 @@ Frame, }; -static bool parseCommand(StringRef BinaryName, bool IsAddr2Line, - StringRef InputString, Command &Cmd, +static bool parseCommand(StringRef BinaryName, ArrayRef BuildID, + bool IsAddr2Line, StringRef InputString, Command &Cmd, std::string &ModuleName, uint64_t &ModuleOffset) { const char kDelimiters[] = " \n\r"; ModuleName = ""; @@ -119,7 +120,7 @@ } const char *Pos = InputString.data(); // Skip delimiters and parse input filename (if needed). - if (BinaryName.empty()) { + if (BinaryName.empty() && BuildID.empty()) { Pos += strspn(Pos, kDelimiters); if (*Pos == '"' || *Pos == '\'') { char Quote = *Pos; @@ -148,31 +149,24 @@ return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset); } -static void symbolizeInput(const opt::InputArgList &Args, uint64_t AdjustVMA, - bool IsAddr2Line, OutputStyle Style, - StringRef InputString, LLVMSymbolizer &Symbolizer, - DIPrinter &Printer) { - Command Cmd; - std::string ModuleName; - uint64_t Offset = 0; - if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line, - StringRef(InputString), Cmd, ModuleName, Offset)) { - Printer.printInvalidCommand({ModuleName, None}, InputString); - return; - } - +template +void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd, + uint64_t Offset, uint64_t AdjustVMA, bool ShouldInline, + OutputStyle Style, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { uint64_t AdjustedOffset = Offset - AdjustVMA; + object::SectionedAddress Address = {AdjustedOffset, + object::SectionedAddress::UndefSection}; if (Cmd == Command::Data) { - Expected ResOrErr = Symbolizer.symbolizeData( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Cmd == Command::Frame) { - Expected> ResOrErr = Symbolizer.symbolizeFrame( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected> ResOrErr = + Symbolizer.symbolizeFrame(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); - } else if (Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line)) { - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + } else if (ShouldInline) { + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } else if (Style == OutputStyle::GNU) { // With PrintFunctions == FunctionNameKind::LinkageName (default) @@ -181,8 +175,8 @@ // caller function in the inlining chain. This contradicts the existing // behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only // the topmost function, which suits our needs better. - Expected ResOrErr = Symbolizer.symbolizeInlinedCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeInlinedCode(ModuleSpec, Address); Expected Res0OrErr = !ResOrErr ? Expected(ResOrErr.takeError()) @@ -190,12 +184,37 @@ : ResOrErr->getFrame(0)); print({ModuleName, Offset}, Res0OrErr, Printer); } else { - Expected ResOrErr = Symbolizer.symbolizeCode( - ModuleName, {AdjustedOffset, object::SectionedAddress::UndefSection}); + Expected ResOrErr = + Symbolizer.symbolizeCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } } +static void symbolizeInput(const opt::InputArgList &Args, + ArrayRef BuildID, uint64_t AdjustVMA, + bool IsAddr2Line, OutputStyle Style, + StringRef InputString, LLVMSymbolizer &Symbolizer, + DIPrinter &Printer) { + Command Cmd; + std::string ModuleName; + uint64_t Offset = 0; + if (!parseCommand(Args.getLastArgValue(OPT_obj_EQ), BuildID, IsAddr2Line, + StringRef(InputString), Cmd, ModuleName, Offset)) { + Printer.printInvalidCommand({ModuleName, None}, InputString); + return; + } + bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line); + if (!BuildID.empty()) { + assert(ModuleName.empty()); + std::string BuildIDStr = toHex(BuildID); + executeCommand(BuildIDStr, BuildID, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } else { + executeCommand(ModuleName, ModuleName, Cmd, Offset, AdjustVMA, ShouldInline, + Style, Symbolizer, Printer); + } +} + static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl, raw_ostream &OS) { const char HelpText[] = " [options] addresses..."; @@ -260,6 +279,22 @@ return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName; } +SmallVector parseBuildIDArg(const opt::InputArgList &Args, int ID) { + if (const opt::Arg *A = Args.getLastArg(ID)) { + StringRef V(A->getValue()); + std::string Bytes; + if (!tryGetFromHex(V, Bytes)) { + errs() << A->getSpelling() + ": expected a build ID, but got '" + V + + "'\n"; + exit(1); + } + ArrayRef BuildID(reinterpret_cast(Bytes.data()), + Bytes.size()); + return SmallVector(BuildID.begin(), BuildID.end()); + } + return {}; +} + int main(int argc, char **argv) { InitLLVM X(argc, argv); // The HTTPClient must be initialized for use by the debuginfod client. @@ -329,6 +364,13 @@ Style = OutputStyle::LLVM; } + if (!Args.getLastArgValue(OPT_build_id_EQ).empty() && + !Args.getLastArgValue(OPT_obj_EQ).empty()) { + errs() << "error: cannot specify both --build-id and --obj\n"; + return EXIT_FAILURE; + } + SmallVector BuildID = parseBuildIDArg(Args, OPT_build_id_EQ); + LLVMSymbolizer Symbolizer(Opts); std::unique_ptr Printer; if (Style == OutputStyle::GNU) @@ -348,15 +390,15 @@ std::string StrippedInputString(InputString); llvm::erase_if(StrippedInputString, [](char c) { return c == '\r' || c == '\n'; }); - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, StrippedInputString, - Symbolizer, *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, + StrippedInputString, Symbolizer, *Printer); outs().flush(); } } else { Printer->listBegin(); for (StringRef Address : InputAddresses) - symbolizeInput(Args, AdjustVMA, IsAddr2Line, Style, Address, Symbolizer, - *Printer); + symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address, + Symbolizer, *Printer); Printer->listEnd(); }