Index: llvm/trunk/tools/llvm-xray/trie-node.h =================================================================== --- llvm/trunk/tools/llvm-xray/trie-node.h +++ llvm/trunk/tools/llvm-xray/trie-node.h @@ -0,0 +1,92 @@ +//===- trie-node.h - XRay Call Stack Data Structure -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides a data structure and routines for working with call stacks +// of instrumented functions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H +#define LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H + +#include +#include + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" + +/// A type to represent a trie of invocations. It is useful to construct a +/// graph of these nodes from reading an XRay trace, such that each function +/// call can be placed in a larger context. +/// +/// The template parameter allows users of the template to attach their own +/// data elements to each node in the invocation graph. +template struct TrieNode { + /// The function ID. + int32_t FuncId; + + /// The caller of this function. + TrieNode *Parent; + + /// The callees from this function. + llvm::SmallVector *, 4> Callees; + + /// Additional parameterized data on each node. + AssociatedData ExtraData; +}; + +/// Merges together two TrieNodes with like function ids, aggregating their +/// callee lists and durations. The caller must provide storage where new merged +/// nodes can be allocated in the form of a linked list. +template +TrieNode * +mergeTrieNodes(const TrieNode &Left, const TrieNode &Right, + /*Non-deduced pointer type for nullptr compatibility*/ + typename std::remove_reference *>::type NewParent, + std::forward_list> &NodeStore, + Callable &&MergeCallable) { + llvm::function_ref MergeFn( + std::forward(MergeCallable)); + assert(Left.FuncId == Right.FuncId); + NodeStore.push_front(TrieNode{ + Left.FuncId, NewParent, {}, MergeFn(Left.ExtraData, Right.ExtraData)}); + auto I = NodeStore.begin(); + auto *Node = &*I; + + // Build a map of callees from the left side. + llvm::DenseMap *> LeftCalleesByFuncId; + for (auto *Callee : Left.Callees) { + LeftCalleesByFuncId[Callee->FuncId] = Callee; + } + + // Iterate through the right side, either merging with the map values or + // directly adding to the Callees vector. The iteration also removes any + // merged values from the left side map. + // TODO: Unroll into iterative and explicit stack for efficiency. + for (auto *Callee : Right.Callees) { + auto iter = LeftCalleesByFuncId.find(Callee->FuncId); + if (iter != LeftCalleesByFuncId.end()) { + Node->Callees.push_back( + mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore, MergeFn)); + LeftCalleesByFuncId.erase(iter); + } else { + Node->Callees.push_back(Callee); + } + } + + // Add any callees that weren't found in the right side. + for (auto MapPairIter : LeftCalleesByFuncId) { + Node->Callees.push_back(MapPairIter.second); + } + + return Node; +} + +#endif // LLVM_TOOLS_LLVM_XRAY_STACK_TRIE_H Index: llvm/trunk/tools/llvm-xray/xray-converter.h =================================================================== --- llvm/trunk/tools/llvm-xray/xray-converter.h +++ llvm/trunk/tools/llvm-xray/xray-converter.h @@ -15,8 +15,8 @@ #define LLVM_TOOLS_LLVM_XRAY_XRAY_CONVERTER_H #include "func-id-helper.h" -#include "llvm/XRay/XRayRecord.h" #include "llvm/XRay/Trace.h" +#include "llvm/XRay/XRayRecord.h" namespace llvm { namespace xray { @@ -31,6 +31,11 @@ void exportAsYAML(const Trace &Records, raw_ostream &OS); void exportAsRAWv1(const Trace &Records, raw_ostream &OS); + + /// For this conversion, the Function records within each thread are expected + /// to be in sorted TSC order. The trace event format encodes stack traces, so + /// the linear history is essential for correct output. + void exportAsChromeTraceEventFormat(const Trace &Records, raw_ostream &OS); }; } // namespace xray Index: llvm/trunk/tools/llvm-xray/xray-converter.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-converter.cc +++ llvm/trunk/tools/llvm-xray/xray-converter.cc @@ -12,10 +12,12 @@ //===----------------------------------------------------------------------===// #include "xray-converter.h" +#include "trie-node.h" #include "xray-registry.h" #include "llvm/DebugInfo/Symbolize/Symbolize.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/ScopedPrinter.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/Support/raw_ostream.h" @@ -32,11 +34,14 @@ static cl::opt ConvertInput(cl::Positional, cl::desc(""), cl::Required, cl::sub(Convert)); -enum class ConvertFormats { BINARY, YAML }; +enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; static cl::opt ConvertOutputFormat( "output-format", cl::desc("output format"), cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), - clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml")), + clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), + clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", + "Output in chrome's trace event format. " + "May be visualized with the Catapult trace viewer.")), cl::sub(Convert)); static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), cl::desc("Alias for -output-format"), @@ -142,6 +147,192 @@ } } +namespace { + +// A structure that allows building a dictionary of stack ids for the Chrome +// trace event format. +struct StackIdData { + // Each Stack of function calls has a unique ID. + unsigned id; + + // Bookkeeping so that IDs can be maintained uniquely across threads. + // Traversal keeps sibling pointers to other threads stacks. This is helpful + // to determine when a thread encounters a new stack and should assign a new + // unique ID. + SmallVector *, 4> siblings; +}; + +using StackTrieNode = TrieNode; + +// A helper function to find the sibling nodes for an encountered function in a +// thread of execution. Relies on the invariant that each time a new node is +// traversed in a thread, sibling bidirectional pointers are maintained. +SmallVector +findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, + const DenseMap> + &StackRootsByThreadId) { + + SmallVector Siblings{}; + + if (parent == nullptr) { + for (auto map_iter : StackRootsByThreadId) { + // Only look for siblings in other threads. + if (map_iter.first != TId) + for (auto node_iter : map_iter.second) { + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + } + } + return Siblings; + } + + for (auto *ParentSibling : parent->ExtraData.siblings) + for (auto node_iter : ParentSibling->Callees) + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + + return Siblings; +} + +// Given a function being invoked in a thread with id TId, finds and returns the +// StackTrie representing the function call stack. If no node exists, creates +// the node. Assigns unique IDs to stacks newly encountered among all threads +// and keeps sibling links up to when creating new nodes. +StackTrieNode *findOrCreateStackNode( + StackTrieNode *Parent, int32_t FuncId, uint32_t TId, + DenseMap> &StackRootsByThreadId, + DenseMap &StacksByStackId, unsigned *id_counter, + std::forward_list &NodeStore) { + SmallVector &ParentCallees = + Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; + auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { + return FuncId == ParentCallee->FuncId; + }); + if (match != ParentCallees.end()) + return *match; + + SmallVector siblings = + findSiblings(Parent, FuncId, TId, StackRootsByThreadId); + if (siblings.empty()) { + NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + StacksByStackId[*id_counter - 1] = CurrentStack; + ParentCallees.push_back(CurrentStack); + return CurrentStack; + } + unsigned stack_id = siblings[0]->ExtraData.id; + NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + for (auto *sibling : CurrentStack->ExtraData.siblings) + sibling->ExtraData.siblings.push_back(CurrentStack); + ParentCallees.push_back(CurrentStack); + return CurrentStack; +} + +void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId, + bool Symbolize, + const FuncIdConversionHelper &FuncIdHelper, + double EventTimestampUs, + const StackTrieNode &StackCursor, + StringRef FunctionPhenotype) { + OS << " "; + OS << llvm::formatv( + R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" + R"("ts" : "{3:f3}", "sf" : "{4}" })", + (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) + : llvm::to_string(FuncId)), + FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); +} + +} // namespace + +void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, + raw_ostream &OS) { + const auto &FH = Records.getFileHeader(); + auto CycleFreq = FH.CycleFrequency; + + unsigned id_counter = 0; + + OS << "{\n \"traceEvents\": ["; + DenseMap StackCursorByThreadId{}; + DenseMap> StackRootsByThreadId{}; + DenseMap StacksByStackId{}; + std::forward_list NodeStore{}; + int loop_count = 0; + for (const auto &R : Records) { + if (loop_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + + // Chrome trace event format always wants data in micros. + // CyclesPerMicro = CycleHertz / 10^6 + // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp + // Could lose some precision here by converting the TSC to a double to + // multiply by the period in micros. 52 bit mantissa is a good start though. + // TODO: Make feature request to Chrome Trace viewer to accept ticks and a + // frequency or do some more involved calculation to avoid dangers of + // conversion. + double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); + StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; + switch (R.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: + StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, + StackRootsByThreadId, StacksByStackId, + &id_counter, NodeStore); + // Each record is represented as a json dictionary with function name, + // type of B for begin or E for end, thread id, process id (faked), + // timestamp in microseconds, and a stack frame id. The ids are logged + // in an id dictionary after the events. + writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper, + EventTimestampUs, *StackCursor, "B"); + break; + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: + // No entries to record end for. + if (StackCursor == nullptr) + break; + // Should we emit an END record anyway or account this condition? + // (And/Or in loop termination below) + StackTrieNode *PreviousCursor = nullptr; + do { + writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize, + FuncIdHelper, EventTimestampUs, *StackCursor, + "E"); + PreviousCursor = StackCursor; + StackCursor = StackCursor->Parent; + } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); + break; + } + } + OS << "\n ],\n"; // Close the Trace Events array. + OS << " " + << "\"displayTimeUnit\": \"ns\",\n"; + + // The stackFrames dictionary substantially reduces size of the output file by + // avoiding repeating the entire call stack of function names for each entry. + OS << R"( "stackFrames": {)"; + int stack_frame_count = 0; + for (auto map_iter : StacksByStackId) { + if (stack_frame_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + OS << " "; + OS << llvm::formatv( + R"("{0}" : { "name" : "{1}")", map_iter.first, + (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) + : llvm::to_string(map_iter.second->FuncId))); + if (map_iter.second->Parent != nullptr) + OS << llvm::formatv(R"(, "parent": "{0}")", + map_iter.second->Parent->ExtraData.id); + OS << " }"; + } + OS << "\n }\n"; // Close the stack frames map. + OS << "}\n"; // Close the JSON entry. +} + namespace llvm { namespace xray { @@ -191,6 +382,9 @@ case ConvertFormats::BINARY: TC.exportAsRAWv1(T, OS); break; + case ConvertFormats::CHROME_TRACE_EVENT: + TC.exportAsChromeTraceEventFormat(T, OS); + break; } return Error::success(); }); Index: llvm/trunk/tools/llvm-xray/xray-stacks.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-stacks.cc +++ llvm/trunk/tools/llvm-xray/xray-stacks.cc @@ -19,6 +19,7 @@ #include #include "func-id-helper.h" +#include "trie-node.h" #include "xray-registry.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" @@ -255,96 +256,61 @@ /// maintain an index of unique functions, and provide a means of iterating /// through all the instrumented call stacks which we know about. -struct TrieNode { - int32_t FuncId; - TrieNode *Parent; - SmallVector Callees; - // Separate durations depending on whether the node is the deepest node in the - // stack. - SmallVector TerminalDurations; - SmallVector IntermediateDurations; +struct StackDuration { + llvm::SmallVector TerminalDurations; + llvm::SmallVector IntermediateDurations; }; -/// Merges together two TrieNodes with like function ids, aggregating their -/// callee lists and durations. The caller must provide storage where new merged -/// nodes can be allocated in the form of a linked list. -TrieNode *mergeTrieNodes(const TrieNode &Left, const TrieNode &Right, - TrieNode *NewParent, - std::forward_list &NodeStore) { - assert(Left.FuncId == Right.FuncId); - NodeStore.push_front(TrieNode{Left.FuncId, NewParent, {}, {}, {}}); - auto I = NodeStore.begin(); - auto *Node = &*I; - - // Build a map of callees from the left side. - DenseMap LeftCalleesByFuncId; - for (auto *Callee : Left.Callees) { - LeftCalleesByFuncId[Callee->FuncId] = Callee; - } - - // Iterate through the right side, either merging with the map values or - // directly adding to the Callees vector. The iteration also removes any - // merged values from the left side map. - for (auto *Callee : Right.Callees) { - auto iter = LeftCalleesByFuncId.find(Callee->FuncId); - if (iter != LeftCalleesByFuncId.end()) { - Node->Callees.push_back( - mergeTrieNodes(*(iter->second), *Callee, Node, NodeStore)); - LeftCalleesByFuncId.erase(iter); - } else { - Node->Callees.push_back(Callee); - } - } - - // Add any callees that weren't found in the right side. - for (auto MapPairIter : LeftCalleesByFuncId) { - Node->Callees.push_back(MapPairIter.second); - } - +StackDuration mergeStackDuration(const StackDuration &Left, + const StackDuration &Right) { + StackDuration Data{}; + Data.TerminalDurations.reserve(Left.TerminalDurations.size() + + Right.TerminalDurations.size()); + Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + + Right.IntermediateDurations.size()); // Aggregate the durations. - for (auto duration : Left.TerminalDurations) { - Node->TerminalDurations.push_back(duration); - } - for (auto duration : Right.TerminalDurations) { - Node->TerminalDurations.push_back(duration); - } - for (auto duration : Left.IntermediateDurations) { - Node->IntermediateDurations.push_back(duration); - } - for (auto duration : Right.IntermediateDurations) { - Node->IntermediateDurations.push_back(duration); - } - - return Node; + for (auto duration : Left.TerminalDurations) + Data.TerminalDurations.push_back(duration); + for (auto duration : Right.TerminalDurations) + Data.TerminalDurations.push_back(duration); + + for (auto duration : Left.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + for (auto duration : Right.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + return Data; } +using StackTrieNode = TrieNode; + template -std::size_t GetValueForStack(const TrieNode *Node); +std::size_t GetValueForStack(const StackTrieNode *Node); // When computing total time spent in a stack, we're adding the timings from // its callees and the timings from when it was a leaf. template <> std::size_t -GetValueForStack(const TrieNode *Node) { - auto TopSum = std::accumulate(Node->TerminalDurations.begin(), - Node->TerminalDurations.end(), 0uLL); - return std::accumulate(Node->IntermediateDurations.begin(), - Node->IntermediateDurations.end(), TopSum); +GetValueForStack(const StackTrieNode *Node) { + auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(), + Node->ExtraData.TerminalDurations.end(), 0uLL); + return std::accumulate(Node->ExtraData.IntermediateDurations.begin(), + Node->ExtraData.IntermediateDurations.end(), TopSum); } // Calculates how many times a function was invoked. // TODO: Hook up option to produce stacks template <> std::size_t -GetValueForStack(const TrieNode *Node) { - return Node->TerminalDurations.size() + Node->IntermediateDurations.size(); +GetValueForStack(const StackTrieNode *Node) { + return Node->ExtraData.TerminalDurations.size() + + Node->ExtraData.IntermediateDurations.size(); } // Make sure there are implementations for each enum value. template struct DependentFalseType : std::false_type {}; template -std::size_t GetValueForStack(const TrieNode *Node) { +std::size_t GetValueForStack(const StackTrieNode *Node) { static_assert(DependentFalseType::value, "No implementation found for aggregation type provided."); return 0; @@ -353,21 +319,21 @@ class StackTrie { // Avoid the magic number of 4 propagated through the code with an alias. // We use this SmallVector to track the root nodes in a call graph. - using RootVector = SmallVector; + using RootVector = SmallVector; // We maintain pointers to the roots of the tries we see. DenseMap Roots; // We make sure all the nodes are accounted for in this list. - std::forward_list NodeStore; + std::forward_list NodeStore; // A map of thread ids to pairs call stack trie nodes and their start times. - DenseMap, 8>> + DenseMap, 8>> ThreadStackMap; - TrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, - TrieNode *Parent) { - NodeStore.push_front(TrieNode{FuncId, Parent, {}, {}, {}}); + StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, + StackTrieNode *Parent) { + NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}}); auto I = NodeStore.begin(); auto *Node = &*I; if (!Parent) @@ -375,10 +341,10 @@ return Node; } - TrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { + StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { const auto &RootsByThread = Roots[ThreadId]; auto I = find_if(RootsByThread, - [&](TrieNode *N) { return N->FuncId == FuncId; }); + [&](StackTrieNode *N) { return N->FuncId == FuncId; }); return (I == RootsByThread.end()) ? nullptr : *I; } @@ -416,7 +382,7 @@ auto &Top = TS.back(); auto I = find_if(Top.first->Callees, - [&](TrieNode *N) { return N->FuncId == R.FuncId; }); + [&](StackTrieNode *N) { return N->FuncId == R.FuncId; }); if (I == Top.first->Callees.end()) { // We didn't find the callee in the stack trie, so we're going to // add to the stack then set up the pointers properly. @@ -447,8 +413,8 @@ return AccountRecordStatus::ENTRY_NOT_FOUND; } - auto FunctionEntryMatch = - find_if(reverse(TS), [&](const std::pair &E) { + auto FunctionEntryMatch = find_if( + reverse(TS), [&](const std::pair &E) { return E.first->FuncId == R.FuncId; }); auto status = AccountRecordStatus::OK; @@ -461,14 +427,14 @@ } auto I = FunctionEntryMatch.base(); for (auto &E : make_range(I, TS.end() - 1)) - E.first->IntermediateDurations.push_back(std::max(E.second, R.TSC) - - std::min(E.second, R.TSC)); + E.first->ExtraData.IntermediateDurations.push_back( + std::max(E.second, R.TSC) - std::min(E.second, R.TSC)); auto &Deepest = TS.back(); if (wasLastRecordExit) - Deepest.first->IntermediateDurations.push_back( + Deepest.first->ExtraData.IntermediateDurations.push_back( std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); else - Deepest.first->TerminalDurations.push_back( + Deepest.first->ExtraData.TerminalDurations.push_back( std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); TS.erase(I, TS.end()); return status; @@ -479,11 +445,11 @@ bool isEmpty() const { return Roots.empty(); } - void printStack(raw_ostream &OS, const TrieNode *Top, + void printStack(raw_ostream &OS, const StackTrieNode *Top, FuncIdConversionHelper &FN) { // Traverse the pointers up to the parent, noting the sums, then print // in reverse order (callers at top, callees down bottom). - SmallVector CurrentStack; + SmallVector CurrentStack; for (auto *F = Top; F != nullptr; F = F->Parent) CurrentStack.push_back(F); int Level = 0; @@ -491,21 +457,22 @@ "count", "sum"); for (auto *F : reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { - auto Sum = std::accumulate(F->IntermediateDurations.begin(), - F->IntermediateDurations.end(), 0LL); + auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(), + F->ExtraData.IntermediateDurations.end(), 0LL); auto FuncId = FN.SymbolOrNumber(F->FuncId); OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, - F->IntermediateDurations.size(), Sum); + F->ExtraData.IntermediateDurations.size(), Sum); } auto *Leaf = *CurrentStack.begin(); - auto LeafSum = std::accumulate(Leaf->TerminalDurations.begin(), - Leaf->TerminalDurations.end(), 0LL); + auto LeafSum = + std::accumulate(Leaf->ExtraData.TerminalDurations.begin(), + Leaf->ExtraData.TerminalDurations.end(), 0LL); auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." : LeafFuncId, - Leaf->TerminalDurations.size(), LeafSum); + Leaf->ExtraData.TerminalDurations.size(), LeafSum); OS << "\n"; } @@ -552,20 +519,20 @@ /// Creates a merged list of Tries for unique stacks that disregards their /// thread IDs. - RootVector mergeAcrossThreads(std::forward_list &NodeStore) { + RootVector mergeAcrossThreads(std::forward_list &NodeStore) { RootVector MergedByThreadRoots; for (auto MapIter : Roots) { const auto &RootNodeVector = MapIter.second; for (auto *Node : RootNodeVector) { auto MaybeFoundIter = - find_if(MergedByThreadRoots, [Node](TrieNode *elem) { + find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) { return Node->FuncId == elem->FuncId; }); if (MaybeFoundIter == MergedByThreadRoots.end()) { MergedByThreadRoots.push_back(Node); } else { - MergedByThreadRoots.push_back( - mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore)); + MergedByThreadRoots.push_back(mergeTrieNodes( + **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration)); MergedByThreadRoots.erase(MaybeFoundIter); } } @@ -577,7 +544,7 @@ template void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, StackOutputFormat format) { - std::forward_list AggregatedNodeStore; + std::forward_list AggregatedNodeStore; RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); bool reportThreadId = false; printAll(OS, FN, MergedByThreadRoots, @@ -586,7 +553,7 @@ /// Merges the trie by thread id before printing top stacks. void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { - std::forward_list AggregatedNodeStore; + std::forward_list AggregatedNodeStore; RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); print(OS, FN, MergedByThreadRoots); } @@ -595,7 +562,7 @@ template void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, RootVector RootValues, uint32_t ThreadId, bool ReportThread) { - SmallVector S; + SmallVector S; for (const auto *N : RootValues) { S.clear(); S.push_back(N); @@ -616,10 +583,10 @@ template void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, bool ReportThread, uint32_t ThreadId, - const TrieNode *Node) { + const StackTrieNode *Node) { if (ReportThread) OS << "thread_" << ThreadId << ";"; - SmallVector lineage{}; + SmallVector lineage{}; lineage.push_back(Node); while (lineage.back()->Parent != nullptr) lineage.push_back(lineage.back()->Parent); @@ -639,15 +606,17 @@ // - Total number of unique stacks // - Top 10 stacks by count // - Top 10 stacks by aggregate duration - SmallVector, 11> TopStacksByCount; - SmallVector, 11> TopStacksBySum; - auto greater_second = [](const std::pair &A, - const std::pair &B) { - return A.second > B.second; - }; + SmallVector, 11> + TopStacksByCount; + SmallVector, 11> TopStacksBySum; + auto greater_second = + [](const std::pair &A, + const std::pair &B) { + return A.second > B.second; + }; uint64_t UniqueStacks = 0; for (const auto *N : RootValues) { - SmallVector S; + SmallVector S; S.emplace_back(N); while (!S.empty()) { @@ -655,10 +624,11 @@ // We only start printing the stack (by walking up the parent pointers) // when we get to a leaf function. - if (!Top->TerminalDurations.empty()) { + if (!Top->ExtraData.TerminalDurations.empty()) { ++UniqueStacks; - auto TopSum = std::accumulate(Top->TerminalDurations.begin(), - Top->TerminalDurations.end(), 0uLL); + auto TopSum = + std::accumulate(Top->ExtraData.TerminalDurations.begin(), + Top->ExtraData.TerminalDurations.end(), 0uLL); { auto E = std::make_pair(Top, TopSum); TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(), @@ -669,7 +639,8 @@ TopStacksBySum.pop_back(); } { - auto E = std::make_pair(Top, Top->TerminalDurations.size()); + auto E = + std::make_pair(Top, Top->ExtraData.TerminalDurations.size()); TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), TopStacksByCount.end(), E, greater_second),