diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -14,6 +14,8 @@ #define LLVM_DEBUGINFO_SYMBOLIZE_SYMBOLIZE_H #include "llvm/ADT/StringMap.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/simple_ilist.h" #include "llvm/DebugInfo/Symbolize/DIFetcher.h" #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h" #include "llvm/Object/Binary.h" @@ -22,6 +24,7 @@ #include "llvm/Support/Error.h" #include #include +#include #include #include #include @@ -36,6 +39,8 @@ using FunctionNameKind = DILineInfoSpecifier::FunctionNameKind; using FileLineInfoKind = DILineInfoSpecifier::FileLineInfoKind; +class CachedBinary; + class LLVMSymbolizer { public: struct Options { @@ -51,6 +56,9 @@ std::string FallbackDebugPath; std::string DWPName; std::vector DebugFileDirectory; + size_t MaxCacheSize = sizeof(size_t) == 4 + ? 512 * 1024 * 1024 /* 512 MiB */ + : 4ULL * 1024 * 1024 * 1024 /* 4 GiB */; }; LLVMSymbolizer() = default; @@ -91,6 +99,11 @@ object::SectionedAddress ModuleOffset); void flush(); + // Evict entries from the binary cache until it is under the maximum size + // given in the options. Calling this invalidates references in the DI... + // objects returned by the methods above. + void pruneCache(); + static std::string DemangleName(const std::string &Name, const SymbolizableModule *DbiModuleDescriptor); @@ -165,6 +178,9 @@ Expected getOrCreateObject(const std::string &Path, const std::string &ArchName); + /// Update the LRU cache order when a binary is accessed. + void recordAccess(CachedBinary &Bin); + std::map, std::less<>> Modules; StringMap BuildIDPaths; @@ -174,7 +190,12 @@ ObjectPairForPathArch; /// Contains parsed binary for each path, or parsing error. - std::map> BinaryForPath; + std::map BinaryForPath; + + /// A list of cached binaries in LRU order. + simple_ilist LRUBinaries; + /// Sum of the sizes of the cached binaries. + size_t CacheSize = 0; /// Parsed object file for path/architecture pair, where "path" refers /// to Mach-O universal binary. @@ -186,6 +207,35 @@ SmallVector> DIFetchers; }; +// A binary intrusively linked into a LRU cache list. If the binary is empty, +// then the entry marks that an error occurred, and it is not part of the LRU +// list. +class CachedBinary : public ilist_node { +public: + CachedBinary() = default; + CachedBinary(OwningBinary Bin) : Bin(std::move(Bin)) {} + + OwningBinary &operator*() { return Bin; } + OwningBinary *operator->() { return &Bin; } + + // Add an action to be performed when the binary is evicted, before all + // previously registered evictors. + void pushEvictor(std::function Evictor); + + // Run all registered evictors in the reverse of the order in which they were + // added. + void evict() { + if (Evictor) + Evictor(); + } + + size_t size() { return Bin.getBinary()->getData().size(); } + +private: + OwningBinary Bin; + std::function Evictor; +}; + } // end namespace symbolize } // end namespace llvm diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -223,6 +223,8 @@ void LLVMSymbolizer::flush() { ObjectForUBPathAndArch.clear(); + LRUBinaries.clear(); + CacheSize = 0; BinaryForPath.clear(); ObjectPairForPathArch.clear(); Modules.clear(); @@ -489,8 +491,10 @@ LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path, const std::string &ArchName) { auto I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName)); - if (I != ObjectPairForPathArch.end()) + if (I != ObjectPairForPathArch.end()) { + recordAccess(BinaryForPath[Path]); return I->second; + } auto ObjOrErr = getOrCreateObject(Path, ArchName); if (!ObjOrErr) { @@ -512,7 +516,11 @@ if (!DbgObj) DbgObj = Obj; ObjectPair Res = std::make_pair(Obj, DbgObj); - ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); + std::string DbgObjPath = DbgObj->getFileName().str(); + auto Pair = + ObjectPairForPathArch.emplace(std::make_pair(Path, ArchName), Res); + BinaryForPath[DbgObjPath].pushEvictor( + [this, I = Pair.first]() { ObjectPairForPathArch.erase(I); }); return Res; } @@ -522,13 +530,20 @@ Binary *Bin; auto Pair = BinaryForPath.emplace(Path, OwningBinary()); if (!Pair.second) { - Bin = Pair.first->second.getBinary(); + Bin = Pair.first->second->getBinary(); + if (Bin) + recordAccess(Pair.first->second); } else { Expected> BinOrErr = createBinary(Path); if (!BinOrErr) return BinOrErr.takeError(); - Pair.first->second = std::move(BinOrErr.get()); - Bin = Pair.first->second.getBinary(); + + CachedBinary &CachedBin = Pair.first->second; + CachedBin = std::move(BinOrErr.get()); + CachedBin.pushEvictor([this, I = Pair.first]() { BinaryForPath.erase(I); }); + LRUBinaries.push_back(CachedBin); + CacheSize += CachedBin.size(); + Bin = CachedBin->getBinary(); } if (!Bin) @@ -547,8 +562,10 @@ return ObjOrErr.takeError(); } ObjectFile *Res = ObjOrErr->get(); - ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), - std::move(ObjOrErr.get())); + auto Pair = ObjectForUBPathAndArch.emplace(std::make_pair(Path, ArchName), + std::move(ObjOrErr.get())); + BinaryForPath[Path].pushEvictor( + [this, Iter = Pair.first]() { ObjectForUBPathAndArch.erase(Iter); }); return Res; } if (Bin->isObject()) { @@ -576,10 +593,6 @@ Expected LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) { - auto I = Modules.find(ModuleName); - if (I != Modules.end()) - return I->second.get(); - std::string BinaryName = ModuleName; std::string ArchName = Opts.DefaultArch; size_t ColonPos = ModuleName.find_last_of(':'); @@ -591,6 +604,13 @@ ArchName = ArchStr; } } + + auto I = Modules.find(ModuleName); + if (I != Modules.end()) { + recordAccess(BinaryForPath[BinaryName]); + return I->second.get(); + } + auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName); if (!ObjectsOrErr) { // Failed to find valid object file. @@ -625,7 +645,13 @@ Context = DWARFContext::create( *Objects.second, DWARFContext::ProcessDebugRelocations::Process, nullptr, Opts.DWPName); - return createModuleInfo(Objects.first, std::move(Context), ModuleName); + auto ModuleOrErr = + createModuleInfo(Objects.first, std::move(Context), ModuleName); + if (ModuleOrErr) { + auto I = Modules.find(ModuleName); + BinaryForPath[BinaryName].pushEvictor([this, I]() { Modules.erase(I); }); + } + return ModuleOrErr; } Expected @@ -708,5 +734,35 @@ return Name; } +void LLVMSymbolizer::recordAccess(CachedBinary &Bin) { + if (Bin->getBinary()) + LRUBinaries.splice(LRUBinaries.end(), LRUBinaries, Bin.getIterator()); +} + +void LLVMSymbolizer::pruneCache() { + // Evict the LRU binary until the max cache size is reached or there's <= 1 + // item in the cache. The MRU binary is always kept to avoid thrashing if it's + // larger than the cache size. + while (CacheSize > Opts.MaxCacheSize && !LRUBinaries.empty() && + std::next(LRUBinaries.begin()) != LRUBinaries.end()) { + CachedBinary &Bin = LRUBinaries.front(); + CacheSize -= Bin.size(); + LRUBinaries.pop_front(); + Bin.evict(); + } +} + +void CachedBinary::pushEvictor(std::function NewEvictor) { + if (Evictor) { + this->Evictor = [OldEvictor = std::move(this->Evictor), + NewEvictor = std::move(NewEvictor)]() { + NewEvictor(); + OldEvictor(); + }; + } else { + this->Evictor = std::move(NewEvictor); + } +} + } // namespace symbolize } // namespace llvm diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -22,6 +22,7 @@ MetaVarName<"">; def basenames : Flag<["--"], "basenames">, HelpText<"Strip directory names from paths">; defm build_id : Eq<"build-id", "Build ID used to look up the object file">; +defm cache_size : Eq<"cache-size", "Max size in bytes of the in-memory binary cache.">; defm debug_file_directory : Eq<"debug-file-directory", "Path to directory where to look for debug files">, MetaVarName<"">; defm debuginfod : B<"debuginfod", "Use debuginfod to find debug binaries", "Don't use debuginfod to find debug binaries">; defm default_arch diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -190,6 +190,7 @@ Symbolizer.symbolizeCode(ModuleSpec, Address); print({ModuleName, Offset}, ResOrErr, Printer); } + Symbolizer.pruneCache(); } static void symbolizeInput(const opt::InputArgList &Args, @@ -360,6 +361,8 @@ } #endif Opts.UseSymbolTable = true; + if (Args.hasArg(OPT_cache_size_EQ)) + parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize); Config.PrintAddress = Args.hasArg(OPT_addresses); Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None; Config.Pretty = Args.hasArg(OPT_pretty_print);