Index: test/tools/llvm-xray/X86/Inputs/simple-instrmap.yaml =================================================================== --- test/tools/llvm-xray/X86/Inputs/simple-instrmap.yaml +++ test/tools/llvm-xray/X86/Inputs/simple-instrmap.yaml @@ -7,4 +7,16 @@ - { id: 2, address: 0x3, function: 0x2, kind: function-exit, always-instrument: true} - { id: 3, address: 0x3, function: 0x3, kind: function-enter, always-instrument: true} - { id: 3, address: 0x4, function: 0x3, kind: function-exit, always-instrument: true} +- { id: 4, address: 0x4, function: 0x4, kind: function-enter, always-instrument: true} +- { id: 4, address: 0x5, function: 0x4, kind: function-exit, always-instrument: true} +- { id: 5, address: 0x5, function: 0x5, kind: function-enter, always-instrument: true} +- { id: 5, address: 0x6, function: 0x5, kind: function-exit, always-instrument: true} +- { id: 6, address: 0x6, function: 0x6, kind: function-enter, always-instrument: true} +- { id: 6, address: 0x7, function: 0x6, kind: function-exit, always-instrument: true} +- { id: 7, address: 0x7, function: 0x7, kind: function-enter, always-instrument: true} +- { id: 7, address: 0x8, function: 0x7, kind: function-exit, always-instrument: true} +- { id: 8, address: 0x8, function: 0x8, kind: function-enter, always-instrument: true} +- { id: 8, address: 0x9, function: 0x8, kind: function-exit, always-instrument: true} +- { id: 9, address: 0x9, function: 0x9, kind: function-enter, always-instrument: true} +- { id: 9, address: 0xA, function: 0x9, kind: function-exit, always-instrument: true} ... Index: test/tools/llvm-xray/X86/graph-color-simple-case.yaml =================================================================== --- /dev/null +++ test/tools/llvm-xray/X86/graph-color-simple-case.yaml @@ -0,0 +1,75 @@ +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e sum -c sum \ +#RUN: | FileCheck %s -check-prefix=EDGE +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -v sum -b sum \ +#RUN: | FileCheck %s -check-prefix=VERTEX +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 1 +records: + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 10000 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 10010 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10100 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-exit, tsc: 10120 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 10200 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 10230 } + - { type: 0, func-id: 4, cpu: 1, thread: 111, kind: function-enter, tsc: 10300 } + - { type: 0, func-id: 4, cpu: 1, thread: 111, kind: function-exit, tsc: 10340 } + - { type: 0, func-id: 5, cpu: 1, thread: 111, kind: function-enter, tsc: 10400 } + - { type: 0, func-id: 5, cpu: 1, thread: 111, kind: function-exit, tsc: 10450 } + - { type: 0, func-id: 6, cpu: 1, thread: 111, kind: function-enter, tsc: 10500 } + - { type: 0, func-id: 6, cpu: 1, thread: 111, kind: function-exit, tsc: 10560 } + - { type: 0, func-id: 7, cpu: 1, thread: 111, kind: function-enter, tsc: 10600 } + - { type: 0, func-id: 7, cpu: 1, thread: 111, kind: function-exit, tsc: 10670 } + - { type: 0, func-id: 8, cpu: 1, thread: 111, kind: function-enter, tsc: 10700 } + - { type: 0, func-id: 8, cpu: 1, thread: 111, kind: function-exit, tsc: 10780 } + - { type: 0, func-id: 9, cpu: 1, thread: 111, kind: function-enter, tsc: 10800 } + - { type: 0, func-id: 9, cpu: 1, thread: 111, kind: function-exit, tsc: 10890 } +--- + + +#EDGE: digraph xray { +#EDGE-DAG: F0 -> F7 [label="7.{{[0-9]*}}e+01" color="#B00100"]; +#EDGE-DAG: F0 -> F2 [label="2.{{[0-9]*}}e+01" color="#FD9965"]; +#EDGE-DAG: F0 -> F9 [label="9.{{[0-9]*}}e+01" color="#7F0000"]; +#EDGE-DAG: F0 -> F4 [label="4.{{[0-9]*}}e+01" color="#E8543b"]; +#EDGE-DAG: F0 -> F6 [label="6.{{[0-9]*}}e+01" color="#C5140a"]; +#EDGE-DAG: F0 -> F1 [label="1.{{[0-9]*}}e+01" color="#FDC58c"]; +#EDGE-DAG: F0 -> F8 [label="8.{{[0-9]*}}e+01" color="#990101"]; +#EDGE-DAG: F0 -> F3 [label="3.{{[0-9]*}}e+01" color="#F5744d"]; +#EDGE-DAG: F0 -> F5 [label="5.{{[0-9]*}}e+01" color="#D83323"]; +#EDGE-DAG: F7 [label="@(7)"]; +#EDGE-DAG: F2 [label="@(2)"]; +#EDGE-DAG: F9 [label="@(9)"]; +#EDGE-DAG: F4 [label="@(4)"]; +#EDGE-DAG: F6 [label="@(6)"]; +#EDGE-DAG: F1 [label="@(1)"]; +#EDGE-DAG: F8 [label="@(8)"]; +#EDGE-DAG: F3 [label="@(3)"]; +#EDGE-DAG: F5 [label="@(5)"]; +#EDGE-NEXT: } +# +#VERTEX: digraph xray { +#VERTEX-DAG: node [shape=record]; +#VERTEX-DAG: F0 -> F7 [label=""]; +#VERTEX-DAG: F0 -> F2 [label=""]; +#VERTEX-DAG: F0 -> F9 [label=""]; +#VERTEX-DAG: F0 -> F4 [label=""]; +#VERTEX-DAG: F0 -> F6 [label=""]; +#VERTEX-DAG: F0 -> F1 [label=""]; +#VERTEX-DAG: F0 -> F8 [label=""]; +#VERTEX-DAG: F0 -> F3 [label=""]; +#VERTEX-DAG: F0 -> F5 [label=""]; +#VERTEX-DAG: F7 [label="{@(7)|7.{{[0-9]*}}e+01}" color="#B00100"]; +#VERTEX-DAG: F2 [label="{@(2)|2.{{[0-9]*}}e+01}" color="#FD9965"]; +#VERTEX-DAG: F9 [label="{@(9)|9.{{[0-9]*}}e+01}" color="#7F0000"]; +#VERTEX-DAG: F4 [label="{@(4)|4.{{[0-9]*}}e+01}" color="#E8543b"]; +#VERTEX-DAG: F6 [label="{@(6)|6.{{[0-9]*}}e+01}" color="#C5140a"]; +#VERTEX-DAG: F1 [label="{@(1)|1.{{[0-9]*}}e+01}" color="#FDC58c"]; +#VERTEX-DAG: F8 [label="{@(8)|8.{{[0-9]*}}e+01}" color="#990101"]; +#VERTEX-DAG: F3 [label="{@(3)|3.{{[0-9]*}}e+01}" color="#F5744d"]; +#VERTEX-DAG: F5 [label="{@(5)|5.{{[0-9]*}}e+01}" color="#D83323"]; +#VERTEX-NEXT: } Index: test/tools/llvm-xray/X86/graph-deduce-tail-call.yaml =================================================================== --- /dev/null +++ test/tools/llvm-xray/X86/graph-deduce-tail-call.yaml @@ -0,0 +1,75 @@ +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d \ +#RUN: | FileCheck %s -check-prefix=EMPTY +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e count \ +#RUN: | FileCheck %s -check-prefix=COUNT +# +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e min \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e med \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e 90p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e 99p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e max \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -d -e sum \ +#RUN: | FileCheck %s -check-prefix=TIME +# +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 0 +records: +# Here we reconstruct the following call trace: +# +# f1() +# f2() +# f3() +# +# But we find that we're missing an exit record for f2() because it's +# tail-called f3(). We make sure that if we see a trace like this that we can +# deduce tail calls, and account the time (potentially wrongly) to f2() when +# f1() exits. That is because we don't go back to f3()'s entry record to +# properly do the math on the timing of f2(). +# +# Note that by default, tail/sibling call deduction is disabled, and is enabled +# with a flag "-d" or "-deduce-sibling-calls". +# + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, tsc: 10000 } + - { type: 0, func-id: 2, cpu: 1, thread: 111, kind: function-enter, tsc: 10001 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-enter, tsc: 10002 } + - { type: 0, func-id: 3, cpu: 1, thread: 111, kind: function-exit, tsc: 10003 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, tsc: 10004 } +... + +#EMPTY: digraph xray { +#EMPTY-DAG: F0 -> F1 [label=""]; +#EMPTY-DAG: F1 -> F2 [label=""]; +#EMPTY-DAG: F2 -> F3 [label=""]; +#EMPTY-DAG: F1 [label="@(1)"]; +#EMPTY-DAG: F2 [label="@(2)"]; +#EMPTY-DAG: F3 [label="@(3)"]; +#EMPTY-NEXT: } + +#COUNT: digraph xray { +#COUNT-DAG: F0 -> F1 [label="1"]; +#COUNT-DAG: F1 -> F2 [label="1"]; +#COUNT-DAG: F2 -> F3 [label="1"]; +#COUNT-DAG: F1 [label="@(1)"]; +#COUNT-DAG: F2 [label="@(2)"]; +#COUNT-DAG: F3 [label="@(3)"]; +#COUNT-NEXT: } + + +#TIME: digraph xray { +#TIME-DAG: F0 -> F1 [label="4.{{.*}}"]; +#TIME-DAG: F1 -> F2 [label="3.{{.*}}"]; +#TIME-DAG: F2 -> F3 [label="1.{{.*}}"]; +#TIME-DAG: F1 [label="@(1)"]; +#TIME-DAG: F2 [label="@(2)"]; +#TIME-DAG: F3 [label="@(3)"]; +#TIME-NEXT: } Index: test/tools/llvm-xray/X86/graph-simple-case.yaml =================================================================== --- /dev/null +++ test/tools/llvm-xray/X86/graph-simple-case.yaml @@ -0,0 +1,46 @@ +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml \ +#RUN: | FileCheck %s -check-prefix=EMPTY +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e count \ +#RUN: | FileCheck %s -check-prefix=COUNT +# +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e min \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e med \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e 90p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e 99p \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e max \ +#RUN: | FileCheck %s -check-prefix=TIME +#RUN: llvm-xray graph %s -o - -m %S/Inputs/simple-instrmap.yaml -t yaml -e sum \ +#RUN: | FileCheck %s -check-prefix=TIME +--- +header: + version: 1 + type: 0 + constant-tsc: true + nonstop-tsc: true + cycle-frequency: 2601000000 +records: + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-enter, + tsc: 10001 } + - { type: 0, func-id: 1, cpu: 1, thread: 111, kind: function-exit, + tsc: 10100 } +... + + +#EMPTY: digraph xray { +#EMPTY-NEXT: F0 -> F1 [label=""]; +#EMPTY-NEXT: F1 [label="@(1)"]; +#EMPTY-NEXT: } + +#COUNT: digraph xray { +#COUNT-NEXT: F0 -> F1 [label="1"]; +#COUNT-NEXT: F1 [label="@(1)"]; +#COUNT-NEXT: } + +#TIME: digraph xray { +#TIME-NEXT: F0 -> F1 [label="3.8{{.*}}e-08"]; +#TIME-NEXT: F1 [label="@(1)"]; +#TIME-NEXT: } Index: tools/llvm-xray/CMakeLists.txt =================================================================== --- tools/llvm-xray/CMakeLists.txt +++ tools/llvm-xray/CMakeLists.txt @@ -12,6 +12,7 @@ xray-converter.cc xray-extract.cc xray-extract.cc + xray-graph.cc xray-registry.cc) add_llvm_tool(llvm-xray llvm-xray.cc ${LLVM_XRAY_TOOLS}) Index: tools/llvm-xray/xray-graph.h =================================================================== --- /dev/null +++ tools/llvm-xray/xray-graph.h @@ -0,0 +1,139 @@ +//===-- xray-graph.h - XRay Function Call Graph Renderer --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// + +#ifndef XRAY_GRAPH_H +#define XRAY_GRAPH_H + +#include +#include + +#include "func-id-helper.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/XRay/Trace.h" +#include "llvm/XRay/XRayRecord.h" + +namespace llvm { +namespace xray { + +/// A class encapsulating the logic related to analyzing XRay traces, producting +/// Graphs from them and then exporting those graphs for review. +class GraphRenderer { +public: + /// An enum for enumerating the various statistics gathered on latencies + enum class StatType { NONE, COUNT, MIN, MED, PCT90, PCT99, MAX, SUM }; + + /// An inner struct for common timing statistics information + struct TimeStat { + uint64_t Count = 0; + double Min = 0; + double Median = 0; + double Pct90 = 0; + double Pct99 = 0; + double Max = 0; + double Sum = 0; + std::string getAsString(StatType T) const; + double compare(StatType T, const TimeStat &Other) const; + }; + + /// An inner struct for storing edge attributes for our graph. Here the + /// attributes are mainly function call statistics. + /// + /// FIXME: expand to contain more information eg call latencies. + struct EdgeAttribute { + TimeStat S; + std::vector Timings; + }; + + /// An Inner Struct for storing vertex attributes, at the moment just + /// SymbolNames, however in future we could store bulk function statistics. + /// + /// FIXME: Store more attributes based on instrumentation map. + struct VertexAttribute { + std::string SymbolName; + TimeStat S; + }; + +private: + /// The Graph stored in an edge-list like format, with the edges also having + /// An attached set of attributes. + DenseMap> Graph; + + /// Graph Vertex Attributes. These are presently stored seperate from the + /// main graph. + DenseMap VertexAttrs; + + TimeStat GraphEdgeMax; + TimeStat GraphVertexMax; + + struct FunctionAttr { + int32_t FuncId; + uint64_t TSC; + }; + + /// Use a Map to store the Function stack for each thread whilst building the + /// graph. + /// + /// FIXME: Perhaps we can Build this into LatencyAccountant? or vise versa? + DenseMap> PerThreadFunctionStack; + + /// Usefull object for getting human readable Symbol Names. + FuncIdConversionHelper &FuncIdHelper; + bool DeduceSiblingCalls = false; + uint64_t CurrentMaxTSC = 0; + + /// A private function to help implement the statistic generation functions; + template + void getStats(U begin, U end, GraphRenderer::TimeStat &S); + void updateMaxStats(const TimeStat &S, TimeStat &M); + + /// Calculates latency statistics for each edge and stores the data in the + /// Graph + void calculateEdgeStatistics(); + + /// Calculates latency statistics for each vertex and stores the data in the + /// Graph + void calculateVertexStatistics(); + + /// Normalises latency statistics for each edge and vertex by CycleFrequency; + void normalizeStatistics(double CycleFrequency); + +public: + /// Takes in a reference to a FuncIdHelper in order to have ready access to + /// Symbol names. + explicit GraphRenderer(FuncIdConversionHelper &FuncIdHelper, bool DSC) + : FuncIdHelper(FuncIdHelper), DeduceSiblingCalls(DSC) {} + + /// Process an Xray record and expand the graph. + /// + /// This Function will return true on success, or false if records are not + /// presented in per-thread call-tree DFS order. (That is for each thread the + /// Records should be in order runtime on an ideal system.) + /// + /// FIXME: Make this more robust against small irregularities. + bool accountRecord(const XRayRecord &Record); + + /// Output the Embedded graph in DOT format on \p OS, labeling the edges by + /// \p T + void exportGraphAsDOT(raw_ostream &OS, const XRayFileHeader &H, + StatType EdgeLabel = StatType::NONE, + StatType EdgeColor = StatType::NONE, + StatType VertexLabel = StatType::NONE, + StatType VertexColor = StatType::NONE); +}; +} +} + +#endif // XRAY_GRAPH_H Index: tools/llvm-xray/xray-graph.cc =================================================================== --- /dev/null +++ tools/llvm-xray/xray-graph.cc @@ -0,0 +1,570 @@ +//===-- xray-graph.c - XRay Function Call Graph Renderer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include +#include + +#include "xray-extract.h" +#include "xray-graph.h" +#include "xray-registry.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/XRay/Trace.h" +#include "llvm/XRay/YAMLXRayRecord.h" + +using namespace llvm; +using namespace xray; + +// Setup llvm-xray graph subcommand and its options. +static cl::SubCommand Graph("graph", "Generate function-call graph"); +static cl::opt GraphInput(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(Graph)); + +static cl::opt + GraphOutput("output", cl::value_desc("Output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), cl::sub(Graph)); +static cl::alias GraphOutput2("o", cl::aliasopt(GraphOutput), + cl::desc("Alias for -output"), cl::sub(Graph)); + +static cl::opt GraphInstrMap( + "instr_map", cl::desc("binary with the instrumrntation map, or " + "a separate instrumentation map"), + cl::value_desc("binary with xray_instr_map"), cl::sub(Graph), cl::init("")); +static cl::alias GraphInstrMap2("m", cl::aliasopt(GraphInstrMap), + cl::desc("alias for -instr_map"), + cl::sub(Graph)); + +static cl::opt InstrMapFormat( + "instr-map-format", cl::desc("format of instrumentation map"), + cl::values(clEnumValN(InstrumentationMapExtractor::InputFormats::ELF, "elf", + "instrumentation map in an ELF header"), + clEnumValN(InstrumentationMapExtractor::InputFormats::YAML, + "yaml", "instrumentation map in YAML")), + cl::sub(Graph), cl::init(InstrumentationMapExtractor::InputFormats::ELF)); +static cl::alias InstrMapFormat2("t", cl::aliasopt(InstrMapFormat), + cl::desc("Alias for -instr-map-format"), + cl::sub(Graph)); + +static cl::opt GraphDeduceSiblingCalls( + "deduce-sibling-calls", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(Graph), cl::init(false)); +static cl::alias + GraphDeduceSiblingCalls2("d", cl::aliasopt(GraphDeduceSiblingCalls), + cl::desc("Alias for -deduce-sibling-calls"), + cl::sub(Graph)); + +static cl::opt + GraphEdgeLabel("edge-label", + cl::desc("Output graphs with edges labeled with this field"), + cl::value_desc("field"), cl::sub(Graph), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, + "count", "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphEdgeLabel2("e", cl::aliasopt(GraphEdgeLabel), + cl::desc("Alias for -edge-label"), + cl::sub(Graph)); + +static cl::opt GraphVertexLabel( + "vertex-label", + cl::desc("Output graphs with vertices labeled with this field"), + cl::value_desc("field"), cl::sub(Graph), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphVertexLabel2("v", cl::aliasopt(GraphVertexLabel), + cl::desc("Alias for -edge-label"), + cl::sub(Graph)); + +static cl::opt GraphEdgeColorType( + "color-edges", + cl::desc("Output graphs with edge colors determined by this field"), + cl::value_desc("field"), cl::sub(Graph), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphEdgeColorType2("c", cl::aliasopt(GraphEdgeColorType), + cl::desc("Alias for -color-edges"), + cl::sub(Graph)); + +static cl::opt GraphVertexColorType( + "color-vertices", + cl::desc("Output graphs with vertex colors determined by this field"), + cl::value_desc("field"), cl::sub(Graph), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphVertexColorType2("b", cl::aliasopt(GraphVertexColorType), + cl::desc("Alias for -edge-label"), + cl::sub(Graph)); +template static T diff(T L, T R) { + return std::max(L, R) - std::min(L, R); +} + +static void updateStat(GraphRenderer::TimeStat &S, int64_t lat) { + S.Count++; + if (S.Min > lat || S.Min == 0) + S.Min = lat; + if (S.Max < lat) + S.Max = lat; + S.Sum += lat; +} + +// Evaluates an XRay record and performs accounting on it, creating and +// decorating a function call graph as it does so. It does this by maintaining +// a call stack on a per-thread basis and adding edges and verticies to the +// graph as they are seen for the first time. +// +// There is an immaginary root for functions at the top of their stack with +// FuncId 0. +// +// FIXME: make more robust to errors and +// Decorate Graph More Heavily. +// FIXME: Refactor this and account subcommand to reduce code duplication. +bool GraphRenderer::accountRecord(const XRayRecord &Record) { + if (CurrentMaxTSC == 0) + CurrentMaxTSC = Record.TSC; + + if (Record.TSC < CurrentMaxTSC) + return false; + + auto &ThreadStack = PerThreadFunctionStack[Record.TId]; + switch (Record.Type) { + case RecordTypes::ENTER: { + if (VertexAttrs.count(Record.FuncId) == 0) + VertexAttrs[Record.FuncId].SymbolName = + FuncIdHelper.SymbolOrNumber(Record.FuncId); + ThreadStack.push_back({Record.FuncId, Record.TSC}); + break; + } + case RecordTypes::EXIT: { + // FIXME: Refactor this and the account subcommand to reducr code + // duplication + if (ThreadStack.size() == 0 || ThreadStack.back().FuncId != Record.FuncId) { + if (!DeduceSiblingCalls) + return false; + auto Parent = std::find_if( + ThreadStack.rbegin(), ThreadStack.rend(), + [&](const FunctionAttr &A) { return A.FuncId == Record.FuncId; }); + if (Parent == ThreadStack.rend()) + return false; // There is no matching Function for this exit. + while (ThreadStack.back().FuncId != Record.FuncId) { + uint64_t D = diff(ThreadStack.back().TSC, Record.TSC); + int32_t TopFuncId = ThreadStack.back().FuncId; + ThreadStack.pop_back(); + assert(ThreadStack.size() != 0); + auto &EA = Graph[ThreadStack.back().FuncId][TopFuncId]; + EA.Timings.push_back(D); + updateStat(EA.S, D); + updateStat(VertexAttrs[TopFuncId].S, D); + } + } + uint64_t D = diff(ThreadStack.back().TSC, Record.TSC); + ThreadStack.pop_back(); + auto &V = Graph[ThreadStack.empty() ? 0 : ThreadStack.back().FuncId]; + auto &EA = V[Record.FuncId]; + EA.Timings.push_back(D); + updateStat(EA.S, D); + updateStat(VertexAttrs[Record.FuncId].S, D); + break; + } + } + + return true; +} + +template +void GraphRenderer::getStats(U begin, U end, GraphRenderer::TimeStat &S) { + assert(begin != end); + std::ptrdiff_t MedianOff = S.Count / 2; + std::nth_element(begin, begin + MedianOff, end); + S.Median = *(begin + MedianOff); + std::ptrdiff_t Pct90Off = (S.Count * 9) / 10; + std::nth_element(begin, begin + Pct90Off, end); + S.Pct90 = *(begin + Pct90Off); + std::ptrdiff_t Pct99Off = (S.Count * 99) / 100; + std::nth_element(begin, begin + Pct99Off, end); + S.Pct99 = *(begin + Pct99Off); +} + +void GraphRenderer::updateMaxStats(const GraphRenderer::TimeStat &S, + GraphRenderer::TimeStat &M) { + M.Count = std::max(M.Count, S.Count); + M.Min = std::max(M.Min, S.Min); + M.Median = std::max(M.Median, S.Median); + M.Pct90 = std::max(M.Pct90, S.Pct90); + M.Pct99 = std::max(M.Pct99, S.Pct99); + M.Max = std::max(M.Max, S.Max); + M.Sum = std::max(M.Sum, S.Sum); +} + +void GraphRenderer::calculateEdgeStatistics() { + for (auto &V : Graph) { + for (auto &E : V.second) { + auto &A = E.second; + getStats(A.Timings.begin(), A.Timings.end(), A.S); + updateMaxStats(A.S, GraphEdgeMax); + } + } +} + +void GraphRenderer::calculateVertexStatistics() { + DenseMap>> + IncommingEdges; + uint64_t MaxCount = 0; + for (auto &V : Graph) { + for (auto &E : V.second) { + auto &IEV = IncommingEdges[E.first]; + IEV.second.push_back(&E.second); + IEV.first += E.second.S.Count; + if (IEV.first > MaxCount) + MaxCount = IEV.first; + } + } + std::vector TempTimings; + TempTimings.reserve(MaxCount); + for (auto &V : IncommingEdges) { + for (auto &P : V.second.second) { + TempTimings.insert(TempTimings.end(), P->Timings.begin(), + P->Timings.end()); + } + getStats(TempTimings.begin(), TempTimings.end(), VertexAttrs[V.first].S); + updateMaxStats(VertexAttrs[V.first].S, GraphVertexMax); + TempTimings.clear(); + } +} + +// A Helper function for normalizeStatistics which normalises a single +// TimeStat element. +static void normalizeTimeStat(GraphRenderer::TimeStat &S, + double CycleFrequency) { + S.Min /= CycleFrequency; + S.Median /= CycleFrequency; + S.Max /= CycleFrequency; + S.Sum /= CycleFrequency; + S.Pct90 /= CycleFrequency; + S.Pct99 /= CycleFrequency; +} + +// Normalises the statistics in the graph for a given TSC frequency. +void GraphRenderer::normalizeStatistics(double CycleFrequency) { + for (auto &V : Graph) { + for (auto &E : V.second) { + auto &S = E.second.S; + normalizeTimeStat(S, CycleFrequency); + } + } + for (auto &V : VertexAttrs) { + auto &S = V.second.S; + normalizeTimeStat(S, CycleFrequency); + } + + normalizeTimeStat(GraphEdgeMax, CycleFrequency); + normalizeTimeStat(GraphVertexMax, CycleFrequency); +} + +// Returns a string containing the value of statistic field T +std::string +GraphRenderer::TimeStat::getAsString(GraphRenderer::StatType T) const { + std::string St; + raw_string_ostream S{St}; + switch (T) { + case GraphRenderer::StatType::COUNT: + S << Count; + break; + case GraphRenderer::StatType::MIN: + S << Min; + break; + case GraphRenderer::StatType::MED: + S << Median; + break; + case GraphRenderer::StatType::PCT90: + S << Pct90; + break; + case GraphRenderer::StatType::PCT99: + S << Pct99; + break; + case GraphRenderer::StatType::MAX: + S << Max; + break; + case GraphRenderer::StatType::SUM: + S << Sum; + break; + case GraphRenderer::StatType::NONE: + break; + } + return S.str(); +} + +// Evaluates a polynomial given the coefficints provided in an ArrayRef +// evaluating: +// +// p(x) = a[n-0]*x^0 + a[n-1]*x^1 + ... a[n-n]*x^n +// +// at x_0 using Horner's Method for both performance and stability reasons. +static double polyEval(ArrayRef a, double x_0) { + double B = 0; + for (const auto &c : a) { + B = c + B * x_0; + } + return B; +} +// Takes a double precision number, clips it between 0 and 1 and then converts +// that to an integer between 0x00 and 0xFF with propper rounding. +static uint8_t uintIntervalTo8bitChar(double B) { + double n = std::max(std::min(B, 1.0), 0.0); + return static_cast(255 * n + 0.5); +} + +// Gets a color in a gradient given a number in the interval [0,1], it does this +// by evaluating a polynomial which maps [0, 1] -> [0, 1] for each of the R G +// and B values in the color. It then converts this [0,1] colors to a 24 bit +// color. +// +// In order to calculate these polynomials, I used Mathematica to do the +// following: +// 1. Convert the OrRed9 color scheme from http://colorbrewer2.org/ from sRGB +// to LAB color space. +// 2. Interpolate between the descrete colors in LAB space using a cubic +// spline interpolation. +// 3. Sample this interpolation at 100 points and convert to sRGB. +// 4. Calculate a polynomial fit for these 100 points for each of R G and B. +// using a degree 9 polynominal subjectively provided a good result. +// 5. Extract these polynomial coefficients from matlab as a set of constants. +static std::string getColor(double point) { + assert(point >= 0.0 && point <= 1); + const static double RedPoly[] = {-38.4295, 239.239, -600.108, 790.544, + -591.26, 251.304, -58.0983, 6.62999, + -0.325899, 1.00173}; + const static double GreenPoly[] = {-603.634, 2338.15, -3606.74, 2786.16, + -1085.19, 165.15, 11.2584, -6.11338, + -0.0091078, 0.965469}; + const static double BluePoly[] = {-325.686, 947.415, -699.079, -513.75, + 1127.78, -732.617, 228.092, -33.8202, + 0.732108, 0.913916}; + using std::begin; + using std::end; + uint8_t r = uintIntervalTo8bitChar(polyEval(RedPoly, point)); + uint8_t g = uintIntervalTo8bitChar(polyEval(GreenPoly, point)); + uint8_t b = uintIntervalTo8bitChar(polyEval(BluePoly, point)); + + return llvm::formatv("#{0:X-2}{1:X-2}{2:x-2}", r, g, b); +} + +// Returns the quotient between the property T of this and annother TimeStat as +// a double +double GraphRenderer::TimeStat::compare(StatType T, const TimeStat &O) const { + double retval = 0; + switch (T) { + case GraphRenderer::StatType::COUNT: + retval = static_cast(Count) / static_cast(O.Count); + break; + case GraphRenderer::StatType::MIN: + retval = Min / O.Min; + break; + case GraphRenderer::StatType::MED: + retval = Median / O.Median; + break; + case GraphRenderer::StatType::PCT90: + retval = Pct90 / O.Pct90; + break; + case GraphRenderer::StatType::PCT99: + retval = Pct99 / O.Pct99; + break; + case GraphRenderer::StatType::MAX: + retval = Max / O.Max; + break; + case GraphRenderer::StatType::SUM: + retval = Sum / O.Sum; + break; + case GraphRenderer::StatType::NONE: + retval = 0.0; + break; + } + return std::sqrt( + retval); // the square root here provides more dynamic contrast for + // low runtime edges, giving better separation and + // coloring lower down the call stack. +} + +// Outputs a DOT format version of the Graph embedded in the GraphRenderer +// object on OS. It does this in the expected way by itterating +// through all edges then vertices and then outputting them and their +// annotations. +// +// FIXME: output more information, better presented. +void GraphRenderer::exportGraphAsDOT(raw_ostream &OS, const XRayFileHeader &H, + StatType ET, StatType EC, StatType VT, + StatType VC) { + calculateEdgeStatistics(); + calculateVertexStatistics(); + if (H.CycleFrequency) + normalizeStatistics(H.CycleFrequency); + + OS << "digraph xray {\n"; + + if (VT != StatType::NONE) + OS << "node [shape=record];\n"; + + for (const auto &V : Graph) + for (const auto &E : V.second) { + const auto &S = E.second.S; + OS << "F" << V.first << " -> " + << "F" << E.first << " [label=\"" << S.getAsString(ET) << "\""; + if (EC != StatType::NONE) + OS << " color=\"" << getColor(S.compare(EC, GraphEdgeMax)) << "\""; + OS << "];\n"; + } + + for (const auto &V : VertexAttrs) { + const auto &VA = V.second; + OS << "F" << V.first << " [label=\"" << (VT != StatType::NONE ? "{" : "") + << (VA.SymbolName.size() > 40 ? VA.SymbolName.substr(0, 40) + "..." + : VA.SymbolName); + if (VT != StatType::NONE) + OS << "|" << VA.S.getAsString(VT) << "}\""; + else + OS << "\""; + if (VC != StatType::NONE) + OS << " color=\"" << getColor(VA.S.compare(VC, GraphVertexMax)) << "\""; + OS << "];\n"; + } + OS << "}\n"; +} + +// Here we register and implement the llvm-xray graph subcommand. +// The bulk of this code reads in the options, opens the required files, uses +// those files to create a context for analysing the xray trace, then there is a +// short loop which actually analyses the trace, generates the graph and then +// outputs it as a DOT. +// +// FIXME: include additional filtering and annalysis passes to provide more +// specific useful information. +static CommandRegistration Unused(&Graph, []() -> Error { + int Fd; + auto EC = sys::fs::openFileForRead(GraphInput, Fd); + if (EC) + return make_error( + Twine("Cannot open file '") + GraphInput + "'", EC); + + Error Err = Error::success(); + xray::InstrumentationMapExtractor Extractor(GraphInstrMap, InstrMapFormat, + Err); + handleAllErrors(std::move(Err), + [&](const ErrorInfoBase &E) { E.log(errs()); }); + + const auto &FunctionAddresses = Extractor.getFunctionAddresses(); + + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + + symbolize::LLVMSymbolizer Symbolizer(Opts); + + llvm::xray::FuncIdConversionHelper FuncIdHelper(GraphInstrMap, Symbolizer, + FunctionAddresses); + + xray::GraphRenderer GR(FuncIdHelper, GraphDeduceSiblingCalls); + + raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); + + if (EC) + return make_error( + Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); + + auto TraceOrErr = loadTraceFile(GraphInput, true); + + if (!TraceOrErr) { + return joinErrors( + make_error( + Twine("Failed loading input file '") + GraphInput + "'", + std::make_error_code(std::errc::protocol_error)), + std::move(Err)); + } + + auto &Trace = *TraceOrErr; + const auto &Header = Trace.getFileHeader(); + for (const auto &Record : Trace) { + // Generate graph, FIXME: better error recovery. + if (!GR.accountRecord(Record)) { + return make_error( + Twine("Failed accounting function calls in file '") + GraphInput + + "'.", + std::make_error_code(std::errc::bad_message)); + } + } + + GR.exportGraphAsDOT(OS, Header, GraphEdgeLabel, GraphEdgeColorType, + GraphVertexLabel, GraphVertexColorType); + return Error::success(); +});