Index: llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h =================================================================== --- llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -154,6 +154,8 @@ Options Opts; }; +Optional> getBuildID(const ELFObjectFileBase *Obj); + } // end namespace symbolize } // end namespace llvm Index: llvm/include/llvm/Debuginfod/Debuginfod.h =================================================================== --- llvm/include/llvm/Debuginfod/Debuginfod.h +++ llvm/include/llvm/Debuginfod/Debuginfod.h @@ -19,9 +19,13 @@ #ifndef LLVM_DEBUGINFOD_DEBUGINFOD_H #define LLVM_DEBUGINFOD_DEBUGINFOD_H +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Debuginfod/HTTPServer.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include namespace llvm { @@ -66,6 +70,35 @@ StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef DebuginfodUrls, std::chrono::milliseconds Timeout); +class ThreadPool; + +/// Tracks a collection of debuginfod artifacts on the local filesystem. +class DebuginfodCollection { + SmallVector Paths; + sys::Mutex BinariesMutex; + StringMap Binaries; + sys::Mutex DebugBinariesMutex; + StringMap DebugBinaries; + Error findBinaries(StringRef Path); + ThreadPool &Pool; + size_t Concurrency = 1; + +public: + DebuginfodCollection(ArrayRef Paths, ThreadPool &Pool, + size_t Concurrency); + Error update(); + Error updateForever(std::chrono::milliseconds Interval); + Expected getDebugBinaryPath(BuildIDRef); + Expected getBinaryPath(BuildIDRef); +}; + +class DebuginfodServer { +public: + HTTPServer Server; + DebuginfodCollection &Collection; + DebuginfodServer(DebuginfodCollection &Collection); +}; + } // end namespace llvm #endif Index: llvm/lib/DebugInfo/Symbolize/Symbolize.cpp =================================================================== --- llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -336,21 +336,6 @@ return {}; } -Optional> getBuildID(const ELFObjectFileBase *Obj) { - Optional> BuildID; - if (auto *O = dyn_cast>(Obj)) - BuildID = getBuildID(O->getELFFile()); - else if (auto *O = dyn_cast>(Obj)) - BuildID = getBuildID(O->getELFFile()); - else if (auto *O = dyn_cast>(Obj)) - BuildID = getBuildID(O->getELFFile()); - else if (auto *O = dyn_cast>(Obj)) - BuildID = getBuildID(O->getELFFile()); - else - llvm_unreachable("unsupported file format"); - return BuildID; -} - bool findDebugBinary(const std::vector &DebugFileDirectory, const ArrayRef BuildID, std::string &Result) { auto getDebugPath = [&](StringRef Directory) { @@ -397,6 +382,21 @@ } // end anonymous namespace +Optional> getBuildID(const ELFObjectFileBase *Obj) { + Optional> BuildID; + if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else if (auto *O = dyn_cast>(Obj)) + BuildID = getBuildID(O->getELFFile()); + else + llvm_unreachable("unsupported file format"); + return BuildID; +} + ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, const MachOObjectFile *MachExeObj, const std::string &ArchName) { Index: llvm/lib/Debuginfod/CMakeLists.txt =================================================================== --- llvm/lib/Debuginfod/CMakeLists.txt +++ llvm/lib/Debuginfod/CMakeLists.txt @@ -21,6 +21,8 @@ LINK_COMPONENTS Support + Symbolize + DebugInfoDWARF ) # This block is only needed for llvm-config. When we deprecate llvm-config and Index: llvm/lib/Debuginfod/Debuginfod.cpp =================================================================== --- llvm/lib/Debuginfod/Debuginfod.cpp +++ llvm/lib/Debuginfod/Debuginfod.cpp @@ -17,14 +17,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Debuginfod/Debuginfod.h" -#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/HTTPClient.h" -#include "llvm/Support/CachePruning.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Caching.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/TaskQueue.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/xxhash.h" +#include +#include + +#define DEBUG_TYPE "Debuginfod" + namespace llvm { static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); } @@ -180,4 +192,187 @@ return createStringError(errc::argument_out_of_domain, "build id not found"); } + +DebuginfodCollection::DebuginfodCollection(ArrayRef PathsRef, + ThreadPool &Pool, size_t Concurrency) + : Pool(Pool), Concurrency(Concurrency) { + for (auto Path : PathsRef) + Paths.push_back(Path.str()); +} + +Error DebuginfodCollection::update() { + for (auto Path : Paths) { + LLVM_DEBUG(dbgs() << "Updating binaries at path " << Path << "\n"); + if (Error Err = findBinaries(Path)) + return Err; + } + return Error::success(); +} + +Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { + while (1) { + if (Error Err = update()) + return Err; + std::this_thread::sleep_for(Interval); + } + return Error::success(); +} + +static bool isDebugBinary(object::ObjectFile *Object) { + // TODO: handle PDB debuginfo + std::unique_ptr Context = DWARFContext::create( + *Object, DWARFContext::ProcessDebugRelocations::Process); + const DWARFObject &DObj = Context->getDWARFObj(); + unsigned NumSections = 0; + DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; }); + return NumSections; +} + +static bool hasELFMagic(StringRef FilePath) { + file_magic Type; + std::error_code EC = identify_magic(FilePath, Type); + if (EC) + return false; + switch (Type) { + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + return true; + default: + return false; + } +} + +Error DebuginfodCollection::findBinaries(StringRef Path) { + std::error_code ec; + std::vector FilePaths; + for (sys::fs::recursive_directory_iterator i(Twine(Path), ec), e; i != e; + i.increment(ec)) { + if (ec) + return errorCodeToError(ec); + FilePaths.push_back(i->path()); + } + + std::atomic NumTasksRemaining(FilePaths.size()); + + for (size_t i = 0; i < FilePaths.size(); i++) { + std::string FilePath = FilePaths.at(i); + + if (!hasELFMagic(FilePath)) { + NumTasksRemaining--; + continue; + } + + Pool.async([&NumTasksRemaining, &FilePath, this]() -> size_t { + Expected> BinOrErr = + object::createBinary(FilePath); + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + return NumTasksRemaining--; + } + object::Binary *Bin = std::move(BinOrErr.get().getBinary()); + if (!Bin->isObject()) + return NumTasksRemaining--; + + // TODO: Support non-ELF binaries + object::ELFObjectFileBase *Object = + dyn_cast(Bin); + if (!Object) + return NumTasksRemaining--; + + Optional ID = symbolize::getBuildID(Object); + if (!ID) + return NumTasksRemaining--; + + std::string IDString = buildIDToString(ID.getValue()); + LLVM_DEBUG(dbgs() << FilePath << " is an object file with BuildID " + << IDString << "\n";); + + if (isDebugBinary(Object)) { + LLVM_DEBUG(dbgs() << FilePath << " is a debug binary.\n";); + std::lock_guard Guard(DebugBinariesMutex); + DebugBinaries[IDString] = FilePath; + } else { + LLVM_DEBUG(dbgs() << FilePath << " is not a debug binary.\n";); + std::lock_guard Guard(BinariesMutex); + Binaries[IDString] = FilePath; + } + return NumTasksRemaining--; + }); + // Wait for the number of concurrent jobs to go down + while (NumTasksRemaining + i > FilePaths.size() + Concurrency) { + LLVM_DEBUG(dbgs() << NumTasksRemaining << " tasks remaining\n";); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } + // Wait for all jobs to finish + while (NumTasksRemaining) { + LLVM_DEBUG(dbgs() << NumTasksRemaining << " tasks remaining\n";); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + return Error::success(); +} + +Expected DebuginfodCollection::getBinaryPath(BuildIDRef ID) { + LLVM_DEBUG(dbgs() << "getting binary path of ID " << buildIDToString(ID) + << "\n";); + auto Loc = Binaries.find(buildIDToString(ID)); + if (Loc != Binaries.end()) { + return Loc->getValue(); + } + LLVM_DEBUG(dbgs() << "falling back to debug binary\n";); + return getDebugBinaryPath(ID); +} + +Expected DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { + LLVM_DEBUG(dbgs() << "getting debug binary path of ID " << buildIDToString(ID) + << "\n";); + auto Loc = DebugBinaries.find(buildIDToString(ID)); + if (Loc != DebugBinaries.end()) { + return Loc->getValue(); + } + return createStringError(errc::no_such_file_or_directory, + "No matching binary available"); +} + +DebuginfodServer::DebuginfodServer(DebuginfodCollection &Collection) + : Collection(Collection) { + cantFail( + Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[1], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + BuildID ID(IDString.begin(), IDString.end()); + Expected PathOrErr = Collection.getDebugBinaryPath(ID); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + return; + } + streamFile(Request, *PathOrErr); + return; + })); + cantFail( + Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[1], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + Expected PathOrErr = + Collection.getBinaryPath(BuildID(IDString.begin(), IDString.end())); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + } + streamFile(Request, *PathOrErr); + })); +} + } // namespace llvm