Index: test/tools/llvm-xray/X86/graph-deduce-tail-call.yaml =================================================================== --- /dev/null +++ test/tools/llvm-xray/X86/graph-deduce-tail-call.yaml @@ -0,0 +1,65 @@ +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d \ +#RUN: | FileCheck %s -check-prefix=EMPTY +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e count \ +#RUN: | FileCheck %s -check-prefix=EMPTY +# +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e min \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e med \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e 90p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e 99p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e max \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e sum \ +#RUN: | FileCheck %s -check-prefix=TIME +# +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 0 +records: +# Here we reconstruct the following call trace: +# +# f1() +# f2() +# f3() +# +# But we find that we're missing an exit record for f2() because it's +# tail-called f3(). We make sure that if we see a trace like this that we can +# deduce tail calls, and account the time (potentially wrongly) to f2() when +# f1() exits. That is because we don't go back to f3()'s entry record to +# properly do the math on the timing of f2(). +# +# Note that by default, tail/sibling call deduction is disabled, and is enabled +# with a flag "-d" or "-deduce-sibling-calls". +# + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 10000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10001 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 10002 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 10003 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 10004 } +... + +#EMPTY: digraph xray { +#EMPTY-DAG: F0 -> F1 [label="1"]; +#EMPTY-DAG: F1 -> F2 [label="1"]; +#EMPTY-DAG: F2 -> F3 [label="1"]; +#EMPTY-DAG: F1 [label="@(1)"]; +#EMPTY-DAG: F2 [label="@(2)"]; +#EMPTY-DAG: F3 [label="@(3)"]; +#EMPTY: } + +#TIME: digraph xray { +#TIME-DAG: F0 -> F1 [label="4.{{.*}}"]; +#TIME-DAG: F1 -> F2 [label="3.{{.*}}"]; +#TIME-DAG: F2 -> F3 [label="1.{{.*}}"]; +#TIME-DAG: F1 [label="@(1)"]; +#TIME-DAG: F2 [label="@(2)"]; +#TIME-DAG: F3 [label="@(3)"]; +#TIME: } Index: test/tools/llvm-xray/X86/graph-simple-case.yaml =================================================================== --- /dev/null +++ test/tools/llvm-xray/X86/graph-simple-case.yaml @@ -0,0 +1,41 @@ +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml \ +#RUN: | FileCheck %s -check-prefix=EMPTY +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e count \ +#RUN: | FileCheck %s -check-prefix=EMPTY +# +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e min \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e med \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e 90p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e 99p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e max \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -i=yaml -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e sum \ +#RUN: | FileCheck %s -check-prefix=TIME +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 2601000000 +records: + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, + tsc: 10001 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, + tsc: 10100 } +... + + +#EMPTY: digraph xray { +#EMPTY: F0 -> F1 [label="1"]; +#EMPTY: F1 [label="@(1)"]; +#EMPTY: } + +#TIME: digraph xray { +#TIME: F0 -> F1 [label="3.8{{.*}}e-08"]; +#TIME: F1 [label="@(1)"]; +#TIME: } Index: tools/llvm-xray/CMakeLists.txt =================================================================== --- tools/llvm-xray/CMakeLists.txt +++ tools/llvm-xray/CMakeLists.txt @@ -11,6 +11,7 @@ xray-converter.cc xray-extract.cc xray-extract.cc + xray-graph.cc xray-log-reader.cc xray-registry.cc) Index: tools/llvm-xray/xray-graph.h =================================================================== --- /dev/null +++ tools/llvm-xray/xray-graph.h @@ -0,0 +1,132 @@ +//===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// + +#ifndef XRAY_GRAPH_H +#define XRAY_GRAPH_H + +#include +#include + +#include "func-id-helper.h" +#include "xray-record.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +namespace xray { +/// A class encapsulating the logic related to analyzing XRay traces, producting +/// Graphs from them and then exporting those graphs for review. +class GraphRenderer { + /// An inner struct for common timing statistics information + struct TimeStat { + uint64_t Count; + double Min; + double Median; + double Pct90; + double Pct99; + double Max; + double Sum; + }; + /// An inner struct for storing edge attributes for our graph. Here the + /// attributes are mainly function call statistics. + /// + /// FIXME: expand to contain more information eg call latencies. + struct EdgeAttribute { + TimeStat S; + std::vector Timings; + }; + + /// The Graph stored in an edge-list like format, with the edges also having + /// An attached set of attributes. + DenseMap> Graph; + + /// An Inner Struct for storing vertex attributes, at the moment just + /// SymbolNames, however in future we could store bulk function statistics. + /// + /// FIXME: Store more attributes based on instrumentation map. + struct VertexAttribute { + std::string SymbolName; + TimeStat S; + }; + + /// Graph Vertex Attributes. These are presently stored seperate from the + /// main graph. + DenseMap VertexAttrs; + + struct FunctionAttr{ + int32_t FuncId; + uint64_t TSC; + }; + /// Use a Map to store the Function stack for each thread whilst building the + /// graph. + /// + /// FIXME: Perhaps we can Build this into LatencyAccountant? or vise versa? + DenseMap> PerThreadFunctionStack; + + /// Usefull object for getting human readable Symbol Names. + FuncIdConversionHelper &FuncIdHelper; + bool DeduceSiblingCalls = false; + uint64_t CurrentMaxTSC = 0; + + /// A private function to help implement the statistic generation functions; + template + void getStats(U begin, V end, GraphRenderer::TimeStat &S); +public: + /// Takes in a reference to a FuncIdHelper in order to have ready access to + /// Symbol names. + explicit GraphRenderer(FuncIdConversionHelper &FuncIdHelper, bool DSC) + : FuncIdHelper(FuncIdHelper), DeduceSiblingCalls(DSC){} + + /// Process an Xray record and expand the graph. + /// + /// This Function will return true on success, or false if records are not + /// presented in per-thread call-tree DFS order. (That is for each thread the + /// Records should be in order runtime on an ideal system.) + /// + /// FIXME: Make this more robust against small irregularities. + bool accountRecord(const XRayRecord &Record); + + /// An enum for enumerating the various statistics gathered on latencies + enum class StatType{ + COUNT, + MIN, + MED, + PCT90, + PCT99, + MAX, + SUM + }; + + /// Output the Embedded graph in DOT format on \p OS, labeling the edges by + /// \p T + void exportGraphAsDOT(raw_ostream &OS, StatType T = StatType::COUNT) const; + + /// Calculates latency statistics for each edge and stores the data in the + /// Graph + void calculateEdgeStatistics(void); + + ///Calculates latency statistics for each vertex and stores the data in the + /// Graph + void calculateVertexStatistics(void); + + /// Normalises latency statistics for each edge and vertex by CycleFrequency; + void normaliseStatistics(double CycleFrequency); +private: + /// A private function to facilitate the output of edge labels; + void outputEdgeInfo(const TimeStat &S, StatType T, raw_ostream &OS) const; +}; +} +} + +#endif // XRAY_GRAPH_H Index: tools/llvm-xray/xray-graph.cc =================================================================== --- /dev/null +++ tools/llvm-xray/xray-graph.cc @@ -0,0 +1,341 @@ +//===-- xray-graph.c - XRay Function Call Graph Renderer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include + +#include "xray-extract.h" +#include "xray-graph.h" +#include "xray-log-reader.h" +#include "xray-registry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" + +using namespace llvm; +using namespace xray; + +// Setup llvm-xray graph subcommand and its options. +static cl::SubCommand Graph("graph", "Generate function-call graph"); +static cl::opt GraphInput(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(Graph)); + +static cl::opt GraphInputFormat( + "input-format", cl::desc("input format"), + cl::values(clEnumValN(LogInputFormats::RAW, "binary", "input in binary"), + clEnumValN(LogInputFormats::YAML, "yaml", "input in yaml")), + cl::sub(Graph)); +static cl::alias GraphInputFormat2("i", cl::desc("Alias for -input-format"), + cl::aliasopt(GraphInputFormat), + cl::sub(Graph)); + +static cl::opt + GraphOutput("output", cl::value_desc("Output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), cl::sub(Graph)); +static cl::alias GraphOutput2("o", cl::aliasopt(GraphOutput), + cl::desc("Alias for -output"), cl::sub(Graph)); + +static cl::opt GraphInstrMap( + "instr_map", cl::desc("binary with the instrumrntation map, or " + "a separate instrumentation map"), + cl::value_desc("binary with xray_instr_map"), cl::sub(Graph), cl::init("")); +static cl::alias GraphInstrMap2("m", cl::aliasopt(GraphInstrMap), + cl::desc("alias for -instr_map"), + cl::sub(Graph)); + +static cl::opt InstrMapFormat( + "instr-map-format", cl::desc("format of instrumentation map"), + cl::values(clEnumValN(InstrumentationMapExtractor::InputFormats::ELF, "elf", + "instrumentation map in an ELF header"), + clEnumValN(InstrumentationMapExtractor::InputFormats::YAML, + "yaml", "instrumentation map in YAML")), + cl::sub(Graph), cl::init(InstrumentationMapExtractor::InputFormats::ELF)); +static cl::alias InstrMapFormat2("t", cl::aliasopt(InstrMapFormat), + cl::desc("Alias for -instr-map-format"), + cl::sub(Graph)); + +static cl::opt GraphDeduceSiblingCalls( + "deduce-sibling-calls", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(Graph), cl::init(false)); +static cl::alias + GraphDeduceSiblingCalls2("d", cl::aliasopt(GraphDeduceSiblingCalls), + cl::desc("Alias for -deduce-sibling-calls"), + cl::sub(Graph)); + +static cl::opt GraphEdgeLabel( + "edge-label", cl::desc("Output graphs with edges labeled with this field"), + cl::value_desc("field"), cl::sub(Graph), cl::init(GraphRenderer::StatType::COUNT), + cl::values(clEnumValN(GraphRenderer::StatType::COUNT, "count", "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", "sum of call durations"))); +static cl::alias GraphEdgeLabel2("e", cl::aliasopt(GraphEdgeLabel), + cl::desc("Alias for -edge-label"), + cl::sub(Graph)); + +namespace { + +template T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } + +} + +// Evaluates an XRay record and performs accounting on it, creating and +// decorating a function call graph as it does so. It does this by maintaining +// a call stack on a per-thread basis and adding edges and verticies to the +// graph as they are seen for the first time. +// +// There is an immaginary root for functions at the top of their stack with +// FuncId 0. +// +// FIXME: make more robust to errors and +// Decorate Graph More Heavily. +bool GraphRenderer::accountRecord(const XRayRecord &Record) { + + if (CurrentMaxTSC == 0) + CurrentMaxTSC = Record.TSC; + + if (Record.TSC < CurrentMaxTSC) + return false; + + auto &ThreadStack = PerThreadFunctionStack[Record.TId]; + switch (Record.Type) { + case RecordTypes::ENTER: { + if (VertexAttrs.count(Record.FuncId) == 0) + VertexAttrs[Record.FuncId].SymbolName = FuncIdHelper.SymbolOrNumber(Record.FuncId); + ThreadStack.push_back({Record.FuncId, Record.TSC}); + break; + } + case RecordTypes::EXIT: { + if (ThreadStack.size() == 0 || ThreadStack.back().FuncId != Record.FuncId){ + if(!DeduceSiblingCalls) + return false; + auto Parent = + std::find_if(ThreadStack.rbegin(), ThreadStack.rend(), + [&](const FunctionAttr &A){ + return A.FuncId == Record.FuncId; + }); + if (Parent == ThreadStack.rend()) + return false; //There is no matching Function for this exit. + while (ThreadStack.back().FuncId != Record.FuncId){ + uint64_t T = diff(ThreadStack.back().TSC, Record.TSC); + int32_t TopFuncId = ThreadStack.back().FuncId; + ThreadStack.pop_back(); + assert(ThreadStack.size() != 0); + Graph[ThreadStack.back().FuncId][TopFuncId].Timings.push_back(T); + } + } + uint64_t T = diff(ThreadStack.back().TSC, Record.TSC); + ThreadStack.pop_back(); + if(ThreadStack.size() == 0){ + Graph[0][Record.FuncId].Timings.push_back(T); + } else { + Graph[ThreadStack.back().FuncId][Record.FuncId].Timings.push_back(T); + } + break; + } + } + + return true; +} +template +void GraphRenderer::getStats(U begin, V end, GraphRenderer::TimeStat &S){ + assert(begin != end); + S.Sum = std::accumulate(begin, end, 0.0); + auto MinMax = std::minmax_element(begin, end); + S.Min = *MinMax.first; + S.Max = *MinMax.second; + S.Count = end - begin; + std::ptrdiff_t MedianOff = S.Count / 2; + std::nth_element(begin, begin + MedianOff, end); + S.Median = *(begin + MedianOff); + std::ptrdiff_t Pct90Off = (S.Count * 9)/10; + std::nth_element(begin, begin + Pct90Off, end); + S.Pct90 = *(begin + Pct90Off); + std::ptrdiff_t Pct99Off = (S.Count * 99)/100; + std::nth_element(begin, begin + Pct99Off, end); + S.Pct99 = *(begin + Pct99Off); +} +void GraphRenderer::normaliseStatistics(double CycleFrequency) { + for (auto &V : Graph) { + for (auto &E : V.second) { + auto &S = E.second.S; + S.Min /= CycleFrequency; + S.Median /= CycleFrequency; + S.Max /= CycleFrequency; + S.Sum /= CycleFrequency; + S.Pct90 /= CycleFrequency; + S.Pct99 /= CycleFrequency; + } + } + for (auto &V : VertexAttrs){ + auto &S = V.second.S; + + S.Min /= CycleFrequency; + S.Median /= CycleFrequency; + S.Max /= CycleFrequency; + S.Sum /= CycleFrequency; + S.Pct90 /= CycleFrequency; + S.Pct99 /= CycleFrequency; + } +} +void GraphRenderer::calculateEdgeStatistics(void) { + for (auto &V : Graph) { + for (auto &E : V.second) { + auto &A = E.second; + getStats(A.Timings.begin(), A.Timings.end(), A.S); + } + } +} + +void GraphRenderer::calculateVertexStatistics(void) { + DenseMap> IncommingEdges; + for (auto &V : Graph){ + for (auto &E : V.second) { + IncommingEdges[E.first].push_back(&E.second); + } + } + std::vector Timings; + for (auto &V : IncommingEdges){ + for (auto &P : V.second){ + Timings.insert(Timings.begin(), P->Timings.begin(), P->Timings.end()); + } + getStats(Timings.begin(), Timings.end(), VertexAttrs[V.first].S); + Timings.clear(); + } +} + +void GraphRenderer::outputEdgeInfo(const TimeStat &S, StatType T, raw_ostream &OS) const{ + switch (T){ + case StatType::COUNT: + OS << S.Count; + break; + case StatType::MIN: + OS << S.Min; + break; + case StatType::MED: + OS << S.Median; + break; + case StatType::PCT90: + OS << S.Pct90; + break; + case StatType::PCT99: + OS << S.Pct99; + break; + case StatType::MAX: + OS << S.Max; + break; + case StatType::SUM: + OS << S.Sum; + break; + } +} + +// Does what the name suggests, it creates a DOT file from the Graph embedded in +// the GraphRenderer object. It does this in the expected way by itterating +// through all edges then vertices and then outputting them and their +// annotations. +// +// FIXME: output more information, better presented. +void GraphRenderer::exportGraphAsDOT(raw_ostream &OS,StatType T) const { + OS << "digraph xray {\n"; + + for (const auto &V : Graph) + for (const auto &E : V.second) { + OS << "F" << V.first << " -> " + << "F" << E.first << " [label=\""; + outputEdgeInfo(E.second.S, T, OS); + OS << "\"];\n"; + } + + for (const auto &V : VertexAttrs) + OS << "F" << V.first << " [label=\"" + << (V.second.SymbolName.size() > 40 + ? V.second.SymbolName.substr(0, 40) + "..." + : V.second.SymbolName) + << "\"];\n"; + + OS << "}\n"; +} + +// Here we register and implement the llvm-xray graph subcommand. +// The bulk of this code reads in the options, opens the required files, uses +// those files to create a context for analysing the xray trace, then there is a +// short loop which actually analyses the trace, generates the graph and then +// outputs it as a DOT. +// +// FIXME: include additional filtering and annalysis passes to provide more +// specific useful information. +static CommandRegistration Unused(&Graph, []() -> Error { + int Fd; + auto EC = sys::fs::openFileForRead(GraphInput, Fd); + if (EC) + return make_error( + Twine("Cannot open file '") + GraphInput + "'", EC); + + Error Err = Error::success(); + xray::InstrumentationMapExtractor Extractor(GraphInstrMap, InstrMapFormat, + Err); + if (auto E = handleErrors( + std::move(Err), [&](std::unique_ptr E) -> Error { + if (E->convertToErrorCode() == std::errc::no_such_file_or_directory) + return Error::success(); + return Error(std::move(E)); + })) + return E; + + raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); + + const auto &FunctionAddresses = Extractor.getFunctionAddresses(); + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + llvm::xray::FuncIdConversionHelper FuncIdHelper(GraphInstrMap, Symbolizer, + FunctionAddresses); + xray::GraphRenderer GR(FuncIdHelper, GraphDeduceSiblingCalls); + const llvm::xray::XRayFileHeader *Header = nullptr; + LogReader Reader(GraphInput, Err, true, getSupportedLoader(GraphInputFormat)); + if (Err) + return joinErrors( + make_error( + Twine("Failed loading input file '") + GraphInput + "'", + std::make_error_code(std::errc::protocol_error)), + std::move(Err)); + + Header = &Reader.getFileHeader(); + for (const auto &Record : Reader) { + // Generate graph, FIXME: better error recovery. + if (!GR.accountRecord(Record)) { + return make_error( + Twine("Failed accounting function calls in file '") + GraphInput + + "'.", + std::make_error_code(std::errc::bad_message)); + } + } + + GR.calculateEdgeStatistics(); + GR.calculateVertexStatistics(); + if(Header->CycleFrequency) + GR.normaliseStatistics(Header->CycleFrequency); + + GR.exportGraphAsDOT(OS, GraphEdgeLabel); + return Error::success(); +});