diff --git a/lldb/include/lldb/Target/TraceHTR.h b/lldb/include/lldb/Target/TraceHTR.h new file mode 100644 --- /dev/null +++ b/lldb/include/lldb/Target/TraceHTR.h @@ -0,0 +1,399 @@ +//===-- TraceHTR.h ----------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLDB_TARGET_TRACE_HTR_H +#define LLDB_TARGET_TRACE_HTR_H + +#include "lldb/lldb-types.h" + +#include +#include +#include +#include + +/// Hierarchical Trace Representation (HTR): +/// +/// A high level tracing tool might need to represent a trace in a hierarchical +/// fashion respecting the chronological structure of the instructions. For +/// example, a call tree inspector will need to organize the trace instructions +/// as a tree of blocks, each one representing sequential instructions that +/// start at a function call and end when that call returns to its parent. +/// Another tool might just want to display the hierarchy of calls between +/// modules to understand the flow of data. In summary, these applications need +/// a way to represent blocks (sequential instructions) that might also have +/// child blocks. +/// +/// As the number of instructions in a trace can be in the order of hundreds of +/// millions, creating copies of the instructions should be avoided, and thus +/// trace blocks should use instruction identifiers (see \a +/// TraceCursor::GetId()) to point to some key instructions and then use a +/// TraceCursor to traverse the trace sequentially from that point. +/// +/// Creating an HTR of a trace can be a very expensive operation, so the data +/// storage of the HTR blocks must be efficient and easy to save and load to and +/// from disk. An additional design consideration is that the underlying data +/// storage should be replaceable, so that different storage strategies can be +/// used without having to change the API. +/// +/// Another design consideration is that this structure should be efficient when +/// traversing in a recursive fashion, because it might be the most common way +/// to gather data from it. +/// +/// Finally, analyzers will be needed. They will compute metrics or indices on +/// the HTR structures. And not just that, they should be able to be serialized +/// so that they don't need to be recomputed in future sessions. + +namespace lldb_private { + +/// This structure contains the underlying metadata of each trace block in +/// memory. Each block can be referred to with an array index. In order to +/// facilitate the communication between the analyzers and the storages without +/// exposing pointers to the underlying data structures, we'll be using these +/// block indices ubiquitously. +/// +/// Below this class is another variant of this class when the storage is on +/// disk. +class TraceHTRInMemoryStorage { + // A block represents all the instructions from GetFirstInstructionId() to + // GetLastInstructionId(). A child of a block must be a sequence of + // instructions within the above range. Exceptions exist: + // - the first child can end before GetFirstInstructionId(), e.g. a function + // returning + // to its parent because tracing started late, or because there are gaps in + // the trace + // - the last child can start after GetLastInstructionId(), e.g. the trace + // ended abruptly before + // returning to its parent because tracing stopped or there are gaps in the + // trace + struct HTRInMemoryBlock { + + // This is the first instruction that strictly belongs to this block + // and not to a child. + lldb::user_id_t GetFirstInstructionId() { return m_first_insn_id; } + + // This is the last instruction that strictly belongs to this block + // and not to a child. + lldb::user_id_t GetLastInstructionId() { return m_last_insn_id; } + + size_t GetChildrenCount() { return m_children; } + + // Return the index of the child in the m_blocks array. + // We could also use uint32_t, saving 4 bytes, if we force HTR to have at + // most 2^32 elements, which seems reasonable. + size_t &GetChildAtIndex(size_t index) { return m_children_indices[index]; } + + llvm::Optional GetParent() { return m_parent_index; } + + private: + HTRInMemoryBlock(const HTRInMemoryBlock &) = delete; + HTRInMemoryBlock &operator=(const HTRInMemoryBlock &) = delete; + + std::vector + m_children_indices; // this could later be represented in a different + // way to use maybe 2 pointers instead of 3, but + // that's for later. + llvm::Optional + m_parent_index; // we could use -1 as a wildcard for "no parent", thus + // saving some memory. + lldb::user_id_t m_first_insn_id; // we can't get rid of these two, as they + // are most basic identifiers + lldb::user_id_t m_last_insn_id; // for this trace bloc + } + + // Used to compute the memory cost of HTR to do optimizations. + size_t + GetByteSize(); + + // We need this accessor because each block doesn't know its own index + HTRInMemoryBlock &GetBlockAtIndex(size_t index) { return m_blocks[index]; } + +private: + // might be worth later trying with deque, which doesn't copy elements when + // growing + std::vector m_blocks; +}; + +// This would be the variant when the underlying data is stored on disk and is +// mmapped. It should have the same interfaces as the in-memory one. +// +// A posibility is to use a base abstract class, but that's not the purpose of +// this diff. +class TraceHTROnDiskStorage { + struct HTROnDiskBlock { + lldb::user_id_t GetFirstInstructionId(); + + lldb::user_id_t GetLastInstructionId(); + + size_t GetChildrenCount(); + + size_t GetChildAtIndex(size_t index); + + llvm::Optional GetParent(); + }; + + HTROnDiskBlock &GetBlockAtIndex(size_t index); + +private: + HTROnDiskBlock *m_blocks; + size_t m_num_blocks; +}; + +/// This class is a presentation object for the underlying block storage. +/// It can reroute its API to the actual storage being used. +class TraceHTRBlock { + lldb::user_id_t GetFirstInstructionId() { + return m_in_memory ? m_in_memory_block->GetFirstInstructionId() + : m_on_disk_block->GetFirstInstructionId(); + } + + lldb::user_id_t GetLastInstructionId() { + return m_in_memory ? m_in_memory_block->GetLastInstructionId() + : m_on_disk_block->GetLastInstructionId(); + } + + TraceHTRBlock GetParent() { + if (m_in_memory) + return TraceHTRBlock(m_in_memory_storage, + m_in_memory_block->GetParentIndex()); + else + return TraceHTRBlock(m_on_disk_storage, + m_on_disk_block->GetParentIndex()); + } + + TraceHTRBlock GetChildAtIndex(size_t index) { + if (m_in_memory) + return TraceHTRBlock(m_in_memory_storage, + m_in_memory_block->GetChildAtIndex(index)); + else + return TraceHTRBlock(m_on_disk_storage, + m_on_disk_block->GetChildAtIndex(index)); + } + + /// A number between 0 and `total number of instruction` - 1. + /// Only used to implement extensions/analyzers to the main HTR. + size_t GetIndex() { return m_index; } + + TraceHTRBlock(TraceHTRInMemoryStorage *in_memory_storage, size_t index) + : m_in_memory(true), m_in_memory_storage(in_memory_storage), + m_in_memory_block(in_memory_storage->m_blocks[index]), m_index(index) {} + +private: + union { + TraceHTRInMemoryStorage *m_in_memory_storage; + TraceHTROnDiskStore *m_on_disk_storage; + }; + union { + HTRInMemoryBlock *m_in_memory_block; + HTRInMemoryBlock *m_on_disk_block; + }; + bool m_in_memory; + size_t m_index; +}; + +/// This is an example of an analyzer or extension to HTR. This computes and +/// stores a metric based on a cummulative counter in the trace. It can be used +/// for TSCs, computing for each block the amount of CPU ticks spent locally and +/// within its children. Not only that, it knows how to serialize and +/// deserialize itself onto and from disk. +class TraceHTRCumulativeCounterMetric { +public: + // E.g. the amount of CPU ticks spent in a specific function excluding its + // children + uint64_t GetLocalCount(const TraceHTRBlock &block) { + return m_accumulations[block.GetId()].GetLocalCount(); + } + + // E.g. the amount of CPU ticks spent in a function including recursively all + // its children + uint64_t GetTotalCount(const TraceHTRBlock &block) { + return m_accumulations[block.GetId()].total; + } + +private: + friend class HTR; + + TraceHTRCumulativeCounterMetric(lldb::TraceCounter counter_type) + : m_counter_type(counter_type) {} + + void Compute(TraceHTR &trace_htr) { + // I went ahead and fully implemented this analyzer to make sure the + // interfaces are enough for this kind of work. + + m_accumulations.resize(trace_htr.GetNumBlocks()); + + int last_level = -1; + // We'll use these vectors to compute the first and last absolute counters + // in a subtree. One example in which this is useful is when the first + // instruction is a return from `foo` to `main`. We started tracing late, + // and in the subtree of `main`, the first absolute counter might be in + // `foo` instead of main, but we need to know this when we are processing + // main. + std::vector first_counter_in_subtree( + trace_htr.GetMaxDepth(), std::numeric_limits::max()); + std::vector last_counter_in_subtree(trace_htr.GetMaxDepth(), 0); + + trace_htr.Recurse( + TraceHTR::RecurseOrder::PostOrder, // We use post order so that we + // process a block after all its + // children have been visited + [&](const TraceHTRBlock &block, const TraceHTRBlock *parent, + TraceCursor &cursor, int level) { + // We first get the first counter in this block or its subtree + cursor.GoToId(block.GetFirstInstructionId()); + uint64_t &first_counter = first_counter_in_subtree[level]; + first_counter = min(first_counter, cursor.GetCounter(m_counter_type)); + + // We do the same for the last counter + cursor.GoToId(block.GetLastInstructionId()); + uint64_t &last_counter = last_counter_in_subtree[level]; + last_counter = max(last_counter, cursor.GetCounter(m_counter_type)); + + // We can now compute the total cumulative counter for the subtree + m_accumulations[block.GetIndex()].total = + last_counter - first_counter; + + // We now update the children information for the parent + if (parent) { + m_accumulations[parent->GetIndex()].children += m_counters.total; + first_counter_in_subtree[level - 1] = + min(first_counter, first_counter_in_subtree[level - 1]); + last_counter_in_subtree[level - 1] = + min(last_counter, last_counter_in_subtree[level - 1]); + } + }); + } + + // Serialization. We can figure out the parameters later + size_t SaveToDisk(); + + static TraceHTRCumulativeCounterMetric LoadFromDisk(); + + // This is the actual storage for the counts, we might need to change this to + // allow mmapings. + struct BlockCounters { + uint64_t total = 0; + uint64_t children = 0; + + uint64_t GetLocalCount() { return total - children; } + } lldb::TraceCounter m_counter_type; + // The fact that we use indices to refer to HTRBlocks allows each analyzer to + // use a simple vector to organize its data. + std::vector m_accumulations; +} + +// This is a hypothetical extension that would compute a mapping between Symbols +// and Blocks, i.e. given a symbol retrieve the list of blocks whose first +// instruction is that symbol. This could be used alongside the +// TraceHTRCumulativeCounterMetric to produce a report of the total aggregated +// cost of each function along with some pointers to the most expensive runs of +// each function. +class TraceHTRSymbolIndex { + void Compute(TraceHTR &trace_htr) {} + + /// This returns block ids, which can later be converted into TraceBlocks by + /// TraceHTR. Or this can just return a vector of HTRBlocks. + llvm::ArrayRef GetBlocks(lldb::SymbolContext sc); + + // Serialization + size_t SaveToDisk(); + + static TraceHTRSymbolIndex LoadFromDisk(); + +private: + std::map> m_mapping; // symbol id -> block index +}; + +// This is main entry point and owner of all storages for blocks and analyzers. +// It's easier to handle serialization and deserialization if this owns +// everything. Also it's easier to avoid computing the same analyzer twice if +// this object owns it. +class TraceHTR { + enum RecurseOrder { PreOrder = 0; PostOrder; }; + + size_t GetNumBlocks(); + + size_t GetMaxDepth(); + + void Recurse(RecurseOrder order, + std::function + visitor); + + TraceHTRCumulativeCounterMetric & + GetOrCreateCumulativeCounterMetric(lldb::TraceCounter counter_type) { + auto it = m_metrics.find(counter); + if (it == m_metrics.end()) + it = m_metrics.try_emplace(counter_type, counter_type).first; + return *it->second.get(); + } + + TraceHTRSymbolIndex &GetOrCreateSymbolIndex(); + + TraceHTRBlock GetBlockForInstructionId(lldb::user_id_t id) { + // Each trace plug-in knows exactly what ids really mean and can use this + // knowledge to traverse efficiently the tree of HTR blocks to quickly find + // this block. This can be used, for example, for implementing reverse + // stepping starting at an arbitrary point, because once you know in which + // block your current instruction is, you can start moving up and down in + // the tree efficiently. In other words, TraceIntelPT will need to implement + // GetHTRBlockForInstructionId. In this specific case, TraceIntelPT knows + // that the IDs are chronologically incremental, so it can traverse the tree + // efficiently using this knowledge. + m_trace.lock()->GetHTRBlockForInstructionId(*this, id); + } + +private: + // Only of them active at a given time. An abstract class might also come + // handy. + std::unique_ptr m_in_memory_storage; + std::unique_ptr m_in_disk_storage; + + // reference to the owning trace + std::weak_ptr m_trace; + + std::map> + m_metrics; +}; + +// This is an example of a consumer of HTR and a TSC analyzer. With the +// composable design it's relatively simple to traverse de functions, print them +// with indentation and include time information +void DumpFunctionsWithTime(ThreadSP &thread_sp, lldb::Stream &s) { + TargetSP &target_sp = thread_sp->GetTarget(); + // We first compute all the expensive data that we need around HTR. + // We ask the trace for a call tree, but we can later create other different + // hierarchical structures with different names + TraceHTR &htr = target_sp->GetTrace().GetOrCreateCallTree(thread_sp); + TraceHTRCumulativeCounterMetric &tscs = + htr.GetOrCreateCumulativeCounterMetric(eTraceCounterTSC); + + // Now we can use the tscs to print the functions + trace_htr.Recurse( + TraceHTR::RecurseOrder::PreOrder, + [&](const TraceHTRBlock &block, const TraceHTRBlock *parent, + TraceCursor &cursor, int level) { + cursor.GoToId(block.GetFirstInstructionId()); + Address address; + address.SetLoadAddress(cursor.GetInstructionAddress(), &target); + + SymbolContext sc; + address.CalculateSymbolContext(&sc, eSymbolContextEverything); + + s.SetIndentLevel(level); + s.Indent(); + // this is an oversimplification of all the cases + s << sc.function->GetDisplayName() + << " [tsc: total=" << tscs.GetTotalCount(block) + << " local=" << tscs.GetLocalCount(block) << "]\n"; + }); +} + +} // namespace lldb_private + +#endif // LLDB_TARGET_TRACE_HTR_H