diff --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h --- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/DebugInfo/Symbolize/ProcessContext.h" #include "llvm/Object/BuildID.h" #include "llvm/Support/JSON.h" #include "llvm/Support/WithColor.h" @@ -49,29 +50,12 @@ void finish(); private: - struct Module { - uint64_t ID; - std::string Name; - SmallVector BuildID; - }; - - struct MMap { - uint64_t Addr; - uint64_t Size; - const Module *Mod; - std::string Mode; // Lowercase - uint64_t ModuleRelativeAddr; - - bool contains(uint64_t Addr) const; - uint64_t getModuleRelativeAddr(uint64_t Addr) const; - }; - // An informational module line currently being constructed. As many mmap // elements as possible are folded into one ModuleInfo line. struct ModuleInfoLine { - const Module *Mod; + const ProcessContext::Module *Mod; - SmallVector MMaps = {}; + SmallVector MMaps = {}; }; // The semantics of a possible program counter value. @@ -92,7 +76,7 @@ bool tryModule(const MarkupNode &Element, const SmallVector &DeferredNodes); - void beginModuleInfoLine(const Module *M); + void beginModuleInfoLine(const ProcessContext::Module *M); void endAnyModuleInfoLine(); void filterNode(const MarkupNode &Node); @@ -115,10 +99,10 @@ void printRawElement(const MarkupNode &Element); void printValue(Twine Value); - void dumpProcessContext(); - - std::optional parseModule(const MarkupNode &Element) const; - std::optional parseMMap(const MarkupNode &Element) const; + std::optional + parseModule(const MarkupNode &Element) const; + std::optional + parseMMap(const MarkupNode &Element) const; std::optional parseAddr(StringRef Str) const; std::optional parseModuleID(StringRef Str) const; @@ -136,9 +120,6 @@ void reportTypeError(StringRef Str, StringRef TypeName) const; void reportLocation(StringRef::iterator Loc) const; - const MMap *getOverlappingMMap(const MMap &Map) const; - const MMap *getContainingMMap(uint64_t Addr) const; - uint64_t adjustAddr(uint64_t Addr, PCType Type) const; StringRef lineEnding() const; @@ -161,11 +142,7 @@ std::optional Color; bool Bold = false; - // Map from Module ID to Module. - DenseMap> Modules; - - // Ordered map from starting address to mmap. - std::map MMaps; + ProcessContext Context; }; } // end namespace symbolize diff --git a/llvm/include/llvm/DebugInfo/Symbolize/ProcessContext.h b/llvm/include/llvm/DebugInfo/Symbolize/ProcessContext.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/DebugInfo/Symbolize/ProcessContext.h @@ -0,0 +1,86 @@ +//===- ProcessContext.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file declares a process context that relates runtime addresses to +/// module-relative addresses. This allows undoing the effects of ASLR and +/// dynamic linking when symbolizing addresses, e.g., from a log bearing +/// symbolizer markup. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_SYMBOLIZE_PROCESSCONTEXT_H +#define LLVM_DEBUGINFO_SYMBOLIZE_PROCESSCONTEXT_H + +#include "Markup.h" + +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/Object/BuildID.h" + +namespace llvm { + +namespace json { +class OStream; +} // namespace json + +namespace symbolize { + +/// A process context that allows converting between virtual addresses and +/// module-relative addresses. +class ProcessContext { +public: + struct Module { + uint64_t ID; + std::string Name; + object::BuildID BuildID; + }; + + struct MMap { + uint64_t Addr; + uint64_t Size; + const Module *Mod; + std::string Mode; // Lowercase + uint64_t ModuleRelativeAddr; + + bool contains(uint64_t Addr) const; + uint64_t getModuleRelativeAddr(uint64_t Addr) const; + }; + +private: + // Map from Module ID to Module. + DenseMap> Modules; + + // Ordered map from starting address to mmap. + std::map MMaps; + +public: + bool empty() const; + void clear(); + + void dump(json::OStream &JOS) const; + + std::pair insertModule(Module M); + const Module *getModule(uint64_t ID) const; + + /// Inserts an mmap if no existing mmap overlaps; otherwise returns the + /// existing one. + std::pair insertMMap(MMap MM); + + /// Returns the MMap that contains the given address or nullptr if none. + const MMap *getContainingMMap(uint64_t Addr) const; + +private: + const MMap *getOverlappingMMap(const MMap &Map) const; +}; + +} // end namespace symbolize +} // end namespace llvm + +#endif // LLVM_DEBUGINFO_SYMBOLIZE_PROCESSCONTEXT_H diff --git a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt --- a/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt +++ b/llvm/lib/DebugInfo/Symbolize/CMakeLists.txt @@ -2,6 +2,7 @@ DIPrinter.cpp Markup.cpp MarkupFilter.cpp + ProcessContext.cpp SymbolizableObjectFile.cpp Symbolize.cpp diff --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp --- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp +++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp @@ -80,12 +80,11 @@ endAnyModuleInfoLine(); resetColor(); if (JOS) { - dumpProcessContext(); + Context.dump(*JOS); JOS->arrayEnd(); OS << '\n'; } - Modules.clear(); - MMaps.clear(); + Context.clear(); } // See if the given node is a contextual element and handle it if so. This may @@ -109,22 +108,20 @@ const SmallVector &DeferredNodes) { if (Node.Tag != "mmap") return false; - std::optional ParsedMMap = parseMMap(Node); + std::optional ParsedMMap = parseMMap(Node); if (!ParsedMMap) return true; - if (const MMap *M = getOverlappingMMap(*ParsedMMap)) { + auto Res = Context.insertMMap(std::move(*ParsedMMap)); + const ProcessContext::MMap &MMap = *Res.first; + if (!Res.second) { WithColor::error(errs()) - << formatv("overlapping mmap: #{0:x} [{1:x}-{2:x}]\n", M->Mod->ID, - M->Addr, M->Addr + M->Size - 1); + << formatv("overlapping mmap: #{0:x} [{1:x}-{2:x}]\n", MMap.Mod->ID, + MMap.Addr, MMap.Addr + MMap.Size - 1); reportLocation(Node.Fields[0].begin()); return true; } - auto Res = MMaps.emplace(ParsedMMap->Addr, std::move(*ParsedMMap)); - assert(Res.second && "Overlap check should ensure emplace succeeds."); - MMap &MMap = Res.first->second; - if (!MIL || MIL->Mod != MMap.Mod) { endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) @@ -145,9 +142,9 @@ return true; if (JOS) - dumpProcessContext(); + Context.dump(*JOS); - if (!Modules.empty() || !MMaps.empty()) { + if (!Context.empty()) { endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) filterNode(Node); @@ -158,8 +155,7 @@ restoreColor(); } - Modules.clear(); - MMaps.clear(); + Context.clear(); } return true; } @@ -168,18 +164,17 @@ const SmallVector &DeferredNodes) { if (Node.Tag != "module") return false; - std::optional ParsedModule = parseModule(Node); + std::optional ParsedModule = parseModule(Node); if (!ParsedModule) return true; - auto Res = Modules.try_emplace( - ParsedModule->ID, std::make_unique(std::move(*ParsedModule))); + auto Res = Context.insertModule(std::move(*ParsedModule)); if (!Res.second) { WithColor::error(errs()) << "duplicate module ID\n"; reportLocation(Node.Fields[0].begin()); return true; } - Module &Module = *Res.first->second; + const ProcessContext::Module &Module = *Res.first; endAnyModuleInfoLine(); for (const MarkupNode &Node : DeferredNodes) @@ -192,7 +187,7 @@ return true; } -void MarkupFilter::beginModuleInfoLine(const Module *M) { +void MarkupFilter::beginModuleInfoLine(const ProcessContext::Module *M) { if (!JOS) { highlight(); OS << "[[[ELF module"; @@ -208,10 +203,11 @@ if (!MIL) return; if (!JOS) { - llvm::stable_sort(MIL->MMaps, [](const MMap *A, const MMap *B) { + llvm::stable_sort(MIL->MMaps, [](const ProcessContext::MMap *A, + const ProcessContext::MMap *B) { return A->Addr < B->Addr; }); - for (const MMap *M : MIL->MMaps) { + for (const ProcessContext::MMap *M : MIL->MMaps) { OS << (M == MIL->MMaps.front() ? ' ' : ','); OS << '['; printValue(formatv("{0:x}", M->Addr)); @@ -277,7 +273,7 @@ // PC addresses that aren't part of a backtrace are assumed to be precise code // locations. - PCType Type = PCType::PreciseCode; + auto Type = PCType::PreciseCode; if (Node.Fields.size() == 2) { std::optional ParsedType = parsePCType(Node.Fields[1]); if (!ParsedType) @@ -286,7 +282,7 @@ } *Addr = adjustAddr(*Addr, Type); - const MMap *MMap = getContainingMMap(*Addr); + const ProcessContext::MMap *MMap = Context.getContainingMMap(*Addr); if (!MMap) { WithColor::error() << "no mmap covers address\n"; reportLocation(Node.Fields[0].begin()); @@ -334,7 +330,7 @@ return true; // Backtrace addresses are assumed to be return addresses by default. - PCType Type = PCType::ReturnAddress; + auto Type = PCType::ReturnAddress; if (Node.Fields.size() == 3) { std::optional ParsedType = parsePCType(Node.Fields[2]); if (!ParsedType) @@ -343,7 +339,7 @@ } *Addr = adjustAddr(*Addr, Type); - const MMap *MMap = getContainingMMap(*Addr); + const ProcessContext::MMap *MMap = Context.getContainingMMap(*Addr); if (!MMap) { WithColor::error() << "no mmap covers address\n"; reportLocation(Node.Fields[0].begin()); @@ -407,7 +403,7 @@ if (!Addr) return true; - const MMap *MMap = getContainingMMap(*Addr); + const ProcessContext::MMap *MMap = Context.getContainingMMap(*Addr); if (!MMap) { WithColor::error() << "no mmap covers address\n"; reportLocation(Node.Fields[0].begin()); @@ -429,33 +425,6 @@ return true; } -void MarkupFilter::dumpProcessContext() { - JOS->object([&] { - JOS->attributeArray("modules", [&] { - for (const auto &[_, Module] : Modules) { - JOS->objectBegin(); - JOS->attribute("id", Module->ID); - JOS->attribute("name", Module->Name); - JOS->attribute("type", "elf"); - JOS->attribute("buildID", toHex(Module->BuildID, /*LowerCase=*/true)); - JOS->objectEnd(); - } - }); - JOS->attributeArray("mmaps", [&] { - for (const auto &[_, Map] : MMaps) { - JOS->objectBegin(); - JOS->attribute("address", Map.Addr); - JOS->attribute("size", Map.Size); - JOS->attribute("type", "load"); - JOS->attribute("moduleID", Map.Mod->ID); - JOS->attribute("mode", Map.Mode); - JOS->attribute("moduleRelativeAddress", Map.ModuleRelativeAddr); - JOS->objectEnd(); - } - }); - }); -} - bool MarkupFilter::trySGR(const MarkupNode &Node) { if (Node.Text == "\033[0m") { resetColor(); @@ -555,7 +524,7 @@ return std::nullopt; \ TYPE NAME = std::move(*NAME##Opt) -std::optional +std::optional MarkupFilter::parseModule(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) return std::nullopt; @@ -572,10 +541,10 @@ SmallVector BuildID = parseBuildID(Element.Fields[3]); if (BuildID.empty()) return std::nullopt; - return Module{ID, Name.str(), std::move(BuildID)}; + return ProcessContext::Module{ID, Name.str(), std::move(BuildID)}; } -std::optional +std::optional MarkupFilter::parseMMap(const MarkupNode &Element) const { if (!checkNumFieldsAtLeast(Element, 3)) return std::nullopt; @@ -591,16 +560,16 @@ return std::nullopt; ASSIGN_OR_RETURN_NONE(uint64_t, ID, parseModuleID(Element.Fields[3])); ASSIGN_OR_RETURN_NONE(std::string, Mode, parseMode(Element.Fields[4])); - auto It = Modules.find(ID); - if (It == Modules.end()) { + const ProcessContext::Module *M = Context.getModule(ID); + if (!M) { WithColor::error() << "unknown module ID\n"; reportLocation(Element.Fields[3].begin()); return std::nullopt; } ASSIGN_OR_RETURN_NONE(uint64_t, ModuleRelativeAddr, parseAddr(Element.Fields[5])); - return MMap{Addr, Size, It->second.get(), std::move(Mode), - ModuleRelativeAddr}; + return ProcessContext::MMap{Addr, Size, M, std::move(Mode), + ModuleRelativeAddr}; } // Parse an address (%p in the spec). @@ -689,11 +658,10 @@ std::optional MarkupFilter::parsePCType(StringRef Str) const { - std::optional Type = - StringSwitch>(Str) - .Case("ra", MarkupFilter::PCType::ReturnAddress) - .Case("pc", MarkupFilter::PCType::PreciseCode) - .Default(std::nullopt); + auto Type = StringSwitch>(Str) + .Case("ra", PCType::ReturnAddress) + .Case("pc", PCType::PreciseCode) + .Default(std::nullopt); if (!Type) reportTypeError(Str, "PC type"); return Type; @@ -758,56 +726,14 @@ errs() << '\n'; } -// Checks for an existing mmap that overlaps the given one and returns a -// pointer to one of them. -const MarkupFilter::MMap * -MarkupFilter::getOverlappingMMap(const MMap &Map) const { - // If the given map contains the start of another mmap, they overlap. - auto I = MMaps.upper_bound(Map.Addr); - if (I != MMaps.end() && Map.contains(I->second.Addr)) - return &I->second; - - // If no element starts inside the given mmap, the only possible overlap would - // be if the preceding mmap contains the start point of the given mmap. - if (I != MMaps.begin()) { - --I; - if (I->second.contains(Map.Addr)) - return &I->second; - } - return nullptr; -} - -// Returns the MMap that contains the given address or nullptr if none. -const MarkupFilter::MMap *MarkupFilter::getContainingMMap(uint64_t Addr) const { - // Find the first mmap starting >= Addr. - auto I = MMaps.lower_bound(Addr); - if (I != MMaps.end() && I->second.contains(Addr)) - return &I->second; - - // The previous mmap is the last one starting < Addr. - if (I == MMaps.begin()) - return nullptr; - --I; - return I->second.contains(Addr) ? &I->second : nullptr; -} - uint64_t MarkupFilter::adjustAddr(uint64_t Addr, PCType Type) const { // Decrementing return addresses by one moves them into the call instruction. // The address doesn't have to be the start of the call instruction, just some // byte on the inside. Subtracting one avoids needing detailed instruction // length information here. - return Type == MarkupFilter::PCType::ReturnAddress ? Addr - 1 : Addr; + return Type == PCType::ReturnAddress ? Addr - 1 : Addr; } StringRef MarkupFilter::lineEnding() const { return Line.endswith("\r\n") ? "\r\n" : "\n"; } - -bool MarkupFilter::MMap::contains(uint64_t Addr) const { - return this->Addr <= Addr && Addr < this->Addr + Size; -} - -// Returns the module-relative address for a given virtual address. -uint64_t MarkupFilter::MMap::getModuleRelativeAddr(uint64_t Addr) const { - return Addr - this->Addr + ModuleRelativeAddr; -} diff --git a/llvm/lib/DebugInfo/Symbolize/ProcessContext.cpp b/llvm/lib/DebugInfo/Symbolize/ProcessContext.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/DebugInfo/Symbolize/ProcessContext.cpp @@ -0,0 +1,135 @@ +//===-- lib/DebugInfo/Symbolize/ProcessContext.cpp ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file implements a process context that relates virtual addresses to +/// module addresses. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/DebugInfo/Symbolize/ProcessContext.h" + +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/DebugInfo/Symbolize/Markup.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Debuginfod/Debuginfod.h" +#include "llvm/Demangle/Demangle.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/WithColor.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace llvm::symbolize; + +bool ProcessContext::empty() const { return Modules.empty() && MMaps.empty(); } + +void ProcessContext::clear() { + Modules.clear(); + MMaps.clear(); +} + +void ProcessContext::dump(json::OStream &JOS) const { + JOS.object([&] { + JOS.attributeArray("modules", [&] { + for (const auto &[_, Module] : Modules) { + JOS.objectBegin(); + JOS.attribute("id", Module->ID); + JOS.attribute("name", Module->Name); + JOS.attribute("type", "elf"); + JOS.attribute("buildID", toHex(Module->BuildID, /*LowerCase=*/true)); + JOS.objectEnd(); + } + }); + JOS.attributeArray("mmaps", [&] { + for (const auto &[_, Map] : MMaps) { + JOS.objectBegin(); + JOS.attribute("address", Map.Addr); + JOS.attribute("size", Map.Size); + JOS.attribute("type", "load"); + JOS.attribute("moduleID", Map.Mod->ID); + JOS.attribute("mode", Map.Mode); + JOS.attribute("moduleRelativeAddress", Map.ModuleRelativeAddr); + JOS.objectEnd(); + } + }); + }); +} + +std::pair +ProcessContext::insertModule(Module M) { + const auto Res = + Modules.try_emplace(M.ID, std::make_unique(std::move(M))); + return {Res.first->second.get(), Res.second}; +} + +const ProcessContext::Module *ProcessContext::getModule(uint64_t ID) const { + const auto It = Modules.find(ID); + return It == Modules.end() ? nullptr : It->second.get(); +} + +std::pair +ProcessContext::insertMMap(MMap MM) { + if (const MMap *Overlap = getOverlappingMMap(MM)) + return {Overlap, false}; + const auto Res = MMaps.try_emplace(MM.Addr, std::move(MM)); + (void)Res; + assert(Res.second && + "Overlap check should have ensured unique starting address."); + return {&Res.first->second, true}; +} + +const ProcessContext::MMap * +ProcessContext::getContainingMMap(uint64_t Addr) const { + // Find the first mmap starting >= Addr. + auto I = MMaps.lower_bound(Addr); + if (I != MMaps.end() && I->second.contains(Addr)) + return &I->second; + + // The previous mmap is the last one starting < Addr. + if (I == MMaps.begin()) + return nullptr; + --I; + return I->second.contains(Addr) ? &I->second : nullptr; +} + +// Checks for an existing mmap that overlaps the given one and returns a pointer +// to it. Checking for overlaps in insertMMap ensures the overlap is unique. +const ProcessContext::MMap * +ProcessContext::getOverlappingMMap(const MMap &Map) const { + // If the given map contains the start of another mmap, they overlap. + auto I = MMaps.upper_bound(Map.Addr); + if (I != MMaps.end() && Map.contains(I->second.Addr)) + return &I->second; + + // If no element starts inside the given mmap, the only possible overlap would + // be if the preceding mmap contains the start point of the given mmap. + if (I != MMaps.begin()) { + --I; + if (I->second.contains(Map.Addr)) + return &I->second; + } + return nullptr; +} + +bool ProcessContext::MMap::contains(uint64_t Addr) const { + return this->Addr <= Addr && Addr < this->Addr + Size; +} + +// Returns the module-relative address for a given virtual address. +uint64_t ProcessContext::MMap::getModuleRelativeAddr(uint64_t Addr) const { + return Addr - this->Addr + ModuleRelativeAddr; +}