Index: llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h =================================================================== --- llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -243,6 +243,8 @@ std::function Evictor; }; +Optional> getBuildID(const ELFObjectFileBase *Obj); + } // end namespace symbolize } // end namespace llvm Index: llvm/include/llvm/Debuginfod/Debuginfod.h =================================================================== --- llvm/include/llvm/Debuginfod/Debuginfod.h +++ llvm/include/llvm/Debuginfod/Debuginfod.h @@ -7,23 +7,31 @@ //===----------------------------------------------------------------------===// /// /// \file -/// This file contains the declarations of getCachedOrDownloadArtifact and -/// several convenience functions for specific artifact types: +/// This file contains several declarations for the debuginfod client and +/// server. The client functions are getCachedOrDownloadArtifact and several +/// convenience functions for specific artifact types: /// getCachedOrDownloadSource, getCachedOrDownloadExecutable, and -/// getCachedOrDownloadDebuginfo. This file also declares -/// getDefaultDebuginfodUrls and getDefaultDebuginfodCacheDirectory. -/// +/// getCachedOrDownloadDebuginfo. For the server, this file declares the +/// DebuginfodLogEntry and DebuginfodServer structs, as well as the +/// DebuginfodLog, DebuginfodCollection classes. /// //===----------------------------------------------------------------------===// #ifndef LLVM_DEBUGINFOD_DEBUGINFOD_H #define LLVM_DEBUGINFOD_DEBUGINFOD_H +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Debuginfod/HTTPServer.h" #include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/RWMutex.h" +#include "llvm/Support/Timer.h" #include +#include namespace llvm { @@ -31,14 +39,6 @@ typedef SmallVector BuildID; -/// Finds default array of Debuginfod server URLs by checking DEBUGINFOD_URLS -/// environment variable. -Expected> getDefaultDebuginfodUrls(); - -/// Finds a default local file caching directory for the debuginfod client, -/// first checking DEBUGINFOD_CACHE_PATH. -Expected getDefaultDebuginfodCacheDirectory(); - /// Finds a default timeout for debuginfod HTTP requests. Checks /// DEBUGINFOD_TIMEOUT environment variable, default is 90 seconds (90000 ms). std::chrono::milliseconds getDefaultDebuginfodTimeout(); @@ -68,6 +68,67 @@ StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath, ArrayRef DebuginfodUrls, std::chrono::milliseconds Timeout); +class ThreadPool; + +struct DebuginfodLogEntry { + std::string Message; + DebuginfodLogEntry() = default; + DebuginfodLogEntry(StringRef Message); +}; + +class DebuginfodLog { + std::mutex QueueMutex; + std::condition_variable QueueCondition; + std::queue LogEntryQueue; + +public: + // Adds a log entry to end of the queue. + void push(DebuginfodLogEntry Entry); + // Adds a log entry to end of the queue. + void push(StringRef Message); + // If there are log entries in the queue, pops and returns the first one. + // Otherwise returns None. + Optional pop(); + /// Blocking wait for the log entry queue to have a message. + void wait(); +}; + +/// Tracks a collection of debuginfod artifacts on the local filesystem. +class DebuginfodCollection { + SmallVector Paths; + sys::RWMutex BinariesMutex; + StringMap Binaries; + sys::RWMutex DebugBinariesMutex; + StringMap DebugBinaries; + Error findBinaries(StringRef Path); + Expected> getDebugBinaryPath(BuildIDRef); + Expected> getBinaryPath(BuildIDRef); + Error updateIfStale(); + DebuginfodLog &Log; + ThreadPool &Pool; + Timer UpdateTimer; + sys::Mutex UpdateMutex; + + // Minimum update interval, in seconds, for on-demand updates triggered when a + // build-id is not found. + double MinInterval; + +public: + DebuginfodCollection(ArrayRef Paths, DebuginfodLog &Log, + ThreadPool &Pool, double MinInterval); + Error update(); + Error updateForever(std::chrono::milliseconds Interval); + Expected findDebugBinaryPath(BuildIDRef); + Expected findBinaryPath(BuildIDRef); +}; + +struct DebuginfodServer { + HTTPServer Server; + DebuginfodLog &Log; + DebuginfodCollection &Collection; + DebuginfodServer(DebuginfodLog &Log, DebuginfodCollection &Collection); +}; + } // end namespace llvm #endif Index: llvm/include/llvm/Support/ThreadPool.h =================================================================== --- llvm/include/llvm/Support/ThreadPool.h +++ llvm/include/llvm/Support/ThreadPool.h @@ -64,6 +64,9 @@ /// It is an error to try to add new tasks while blocking on this call. void wait(); + /// Blocking wait for the queue to have size at most Size. + void waitQueueSize(size_t Size = 0); + // TODO: misleading legacy name warning! // Returns the maximum number of worker threads in the pool, not the current // number of threads! @@ -156,6 +159,9 @@ /// Signaling for job completion std::condition_variable CompletionCondition; + /// Signaling for queue size decreases + std::condition_variable QueueSizeDecreaseCondition; + /// Keep track of the number of thread actually busy unsigned ActiveThreads = 0; Index: llvm/lib/DebugInfo/Symbolize/Symbolize.cpp =================================================================== --- llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -323,6 +323,8 @@ return {}; } +} // end anonymous namespace + Optional> getBuildID(const ELFObjectFileBase *Obj) { Optional> BuildID; if (auto *O = dyn_cast>(Obj)) @@ -338,8 +340,6 @@ return BuildID; } -} // end anonymous namespace - ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath, const MachOObjectFile *MachExeObj, const std::string &ArchName) { Index: llvm/lib/Debuginfod/CMakeLists.txt =================================================================== --- llvm/lib/Debuginfod/CMakeLists.txt +++ llvm/lib/Debuginfod/CMakeLists.txt @@ -25,4 +25,5 @@ LINK_COMPONENTS Support Symbolize + DebugInfoDWARF ) Index: llvm/lib/Debuginfod/Debuginfod.cpp =================================================================== --- llvm/lib/Debuginfod/Debuginfod.cpp +++ llvm/lib/Debuginfod/Debuginfod.cpp @@ -8,25 +8,41 @@ /// /// \file /// -/// This file defines the fetchInfo function, which retrieves -/// any of the three supported artifact types: (executable, debuginfo, source -/// file) associated with a build-id from debuginfod servers. If a source file -/// is to be fetched, its absolute path must be specified in the Description -/// argument to fetchInfo. +/// This file contains several definitions for the debuginfod client and server. +/// For the client, this file defines the fetchInfo function. For the server, +/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as +/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo +/// function retrieves any of the three supported artifact types: (executable, +/// debuginfo, source file) associated with a build-id from debuginfod servers. +/// If a source file is to be fetched, its absolute path must be specified in +/// the Description argument to fetchInfo. The DebuginfodLogEntry, +/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to +/// scan the local filesystem for binaries and serve the debuginfod protocol. /// //===----------------------------------------------------------------------===// #include "llvm/Debuginfod/Debuginfod.h" #include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Debuginfod/HTTPClient.h" +#include "llvm/Object/Binary.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/CachePruning.h" #include "llvm/Support/Caching.h" #include "llvm/Support/Errc.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ThreadPool.h" #include "llvm/Support/xxhash.h" +#include + +#define DEBUG_TYPE "Debuginfod" + namespace llvm { static std::string uniqueKey(llvm::StringRef S) { return utostr(xxHash64(S)); } @@ -36,6 +52,8 @@ return llvm::toHex(ID, /*LowerCase=*/true); } +/// Finds default array of Debuginfod server URLs by checking DEBUGINFOD_URLS +/// environment variable. Expected> getDefaultDebuginfodUrls() { const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS"); if (DebuginfodUrlsEnv == nullptr) @@ -46,6 +64,10 @@ return DebuginfodUrls; } +Expected> getDefaultDebuginfodUrls(); + +/// Finds a default local file caching directory for the debuginfod client, +/// first checking DEBUGINFOD_CACHE_PATH. Expected getDefaultDebuginfodCacheDirectory() { if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH")) return CacheDirectoryEnv; @@ -183,4 +205,295 @@ return createStringError(errc::argument_out_of_domain, "build id not found"); } + +DebuginfodLogEntry::DebuginfodLogEntry(StringRef Message) : Message(Message) {} + +void DebuginfodLog::push(StringRef Message) { + push(DebuginfodLogEntry(Message)); +} + +void DebuginfodLog::push(DebuginfodLogEntry Entry) { + { + std::lock_guard Guard(QueueMutex); + LogEntryQueue.push(Entry); + } + QueueCondition.notify_one(); +} + +void DebuginfodLog::wait() { + std::unique_lock Guard(QueueMutex); + // Wait for messages to be pushed into the queue + QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); }); +} + +Optional DebuginfodLog::pop() { + std::lock_guard Guard(QueueMutex); + if (!LogEntryQueue.size()) + return None; + DebuginfodLogEntry Entry = LogEntryQueue.front(); + LogEntryQueue.pop(); + return Entry; +} + +DebuginfodCollection::DebuginfodCollection(ArrayRef PathsRef, + DebuginfodLog &Log, ThreadPool &Pool, + double MinInterval) + : Log(Log), Pool(Pool), MinInterval(MinInterval) { + for (auto Path : PathsRef) + Paths.push_back(Path.str()); +} + +Error DebuginfodCollection::update() { + std::lock_guard Guard(UpdateMutex); + + for (auto Path : Paths) { + Log.push("Updating binaries at path " + Path); + if (Error Err = findBinaries(Path)) + return Err; + } + Log.push("Updated collection"); + return Error::success(); +} + +Error DebuginfodCollection::updateIfStale() { + if (!UpdateTimer.isRunning() || + UpdateTimer.getTotalTime().getWallTime() < MinInterval) + return createStringError(errc::device_or_resource_busy, + "the collection is not stale"); + if (Error Err = update()) + return Err; + return Error::success(); +} + +Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) { + while (true) { + if (Error Err = update()) + return Err; + std::this_thread::sleep_for(Interval); + } + llvm_unreachable("updateForever loop should never end"); +} + +static bool isDebugBinary(object::ObjectFile *Object) { + // TODO: handle PDB debuginfo + std::unique_ptr Context = DWARFContext::create( + *Object, DWARFContext::ProcessDebugRelocations::Process); + const DWARFObject &DObj = Context->getDWARFObj(); + unsigned NumSections = 0; + DObj.forEachInfoSections([&](const DWARFSection &S) { NumSections++; }); + return NumSections; +} + +static bool hasELFMagic(StringRef FilePath) { + file_magic Type; + std::error_code EC = identify_magic(FilePath, Type); + if (EC) + return false; + switch (Type) { + case file_magic::elf: + case file_magic::elf_relocatable: + case file_magic::elf_executable: + case file_magic::elf_shared_object: + case file_magic::elf_core: + return true; + default: + return false; + } +} + +Error DebuginfodCollection::findBinaries(StringRef Path) { + std::error_code ec; + + for (sys::fs::recursive_directory_iterator i(Twine(Path), ec), e; i != e; + i.increment(ec)) { + if (ec) + return errorCodeToError(ec); + + std::string FilePath = i->path(); + + Pool.async([FilePath, this]() -> void { + if (!hasELFMagic(FilePath)) + return; + + Expected> BinOrErr = + object::createBinary(FilePath); + + if (!BinOrErr) { + consumeError(BinOrErr.takeError()); + return; + } + object::Binary *Bin = std::move(BinOrErr.get().getBinary()); + if (!Bin->isObject()) + return; + + // TODO: Support non-ELF binaries + object::ELFObjectFileBase *Object = + dyn_cast(Bin); + if (!Object) + return; + + Optional ID = symbolize::getBuildID(Object); + if (!ID) + return; + + std::string IDString = buildIDToString(ID.getValue()); + if (isDebugBinary(Object)) { + bool LockSucceeded = DebugBinariesMutex.lock(); + assert(LockSucceeded && "Failed to acquire writer lock."); + DebugBinaries[IDString] = FilePath; + LockSucceeded = DebugBinariesMutex.unlock(); + assert(LockSucceeded && "Failed to release writer lock."); + } else { + bool LockSucceeded = BinariesMutex.lock(); + assert(LockSucceeded && "Failed to acquire writer lock."); + Binaries[IDString] = FilePath; + LockSucceeded = BinariesMutex.unlock(); + assert(LockSucceeded && "Failed to release writer lock."); + } + return; + }); + // Wait for empty queue before proceeding to the next file to avoid + // unbounded memory usage + Pool.waitQueueSize(); + } + return Error::success(); +} + +Expected> +DebuginfodCollection::getBinaryPath(BuildIDRef ID) { + + Log.push("getting binary path of ID " + buildIDToString(ID)); + if (!BinariesMutex.lock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to acquire reader lock."); + auto Loc = Binaries.find(buildIDToString(ID)); + if (Loc != Binaries.end()) { + std::string Path = Loc->getValue(); + if (!BinariesMutex.unlock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to release reader lock."); + return Path; + } + if (!BinariesMutex.unlock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to release reader lock."); + return None; +} + +Expected> +DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) { + + Log.push("getting debug binary path of ID " + buildIDToString(ID)); + if (!DebugBinariesMutex.lock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to acquire reader lock."); + auto Loc = DebugBinaries.find(buildIDToString(ID)); + if (Loc != DebugBinaries.end()) { + std::string Path = Loc->getValue(); + if (!DebugBinariesMutex.unlock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to release reader lock."); + return Path; + } + if (!DebugBinariesMutex.unlock_shared()) + return createStringError(errc::resource_deadlock_would_occur, + "Failed to release reader lock."); + return None; +} + +Expected DebuginfodCollection::findBinaryPath(BuildIDRef ID) { + { + // check collection, perform on-demand update if stale + Expected> PathOrErr = getBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Optional Path = *PathOrErr; + if (!Path) { + if (Error Err = updateIfStale()) + return std::move(Err); + // try once more + PathOrErr = getBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Path = *PathOrErr; + } + if (Path) + return Path.getValue(); + } + + // federation + Expected PathOrErr = getCachedOrDownloadExecutable(ID); + if (!PathOrErr) + consumeError(PathOrErr.takeError()); + + // fall-back to debug binary + return findDebugBinaryPath(ID); +} + +Expected DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) { + { + // check collection, perform on-demand update if stale + Expected> PathOrErr = getDebugBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Optional Path = *PathOrErr; + if (!Path) { + if (Error Err = updateIfStale()) + return std::move(Err); + // try once more + PathOrErr = getDebugBinaryPath(ID); + if (!PathOrErr) + return PathOrErr.takeError(); + Path = *PathOrErr; + } + if (Path) + return Path.getValue(); + } + + // federation + return getCachedOrDownloadDebuginfo(ID); +} + +DebuginfodServer::DebuginfodServer(DebuginfodLog &Log, + DebuginfodCollection &Collection) + : Log(Log), Collection(Collection) { + cantFail( + Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) { + Log.push("GET " + Request.UrlPath); + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + BuildID ID(IDString.begin(), IDString.end()); + Expected PathOrErr = Collection.findDebugBinaryPath(ID); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + return; + } + streamFile(Request, *PathOrErr); + return; + })); + cantFail( + Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) { + Log.push("GET " + Request.UrlPath); + std::string IDString; + if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) { + Request.setResponse( + {404, "text/plain", "Build ID is not a hex string\n"}); + return; + } + BuildID ID(IDString.begin(), IDString.end()); + Expected PathOrErr = Collection.findBinaryPath(ID); + if (Error Err = PathOrErr.takeError()) { + consumeError(std::move(Err)); + Request.setResponse({404, "text/plain", "Build ID not found\n"}); + return; + } + streamFile(Request, *PathOrErr); + })); +} + } // namespace llvm Index: llvm/lib/Support/ThreadPool.cpp =================================================================== --- llvm/lib/Support/ThreadPool.cpp +++ llvm/lib/Support/ThreadPool.cpp @@ -54,6 +54,9 @@ ++ActiveThreads; Task = std::move(Tasks.front()); Tasks.pop(); + + // Notify the condition variable that the queue size has decreased. + QueueSizeDecreaseCondition.notify_one(); } // Run the task we just grabbed Task(); @@ -80,6 +83,13 @@ CompletionCondition.wait(LockGuard, [&] { return workCompletedUnlocked(); }); } +void ThreadPool::waitQueueSize(size_t Size) { + // Wait for the queue to have at most Size elements + std::unique_lock LockGuard(QueueLock); + QueueSizeDecreaseCondition.wait(LockGuard, + [&] { return Tasks.size() <= Size; }); +} + bool ThreadPool::isWorkerThread() const { std::unique_lock LockGuard(ThreadsLock); llvm::thread::id CurrentThreadId = llvm::this_thread::get_id();