Index: llvm/trunk/tools/llvm-xray/CMakeLists.txt =================================================================== --- llvm/trunk/tools/llvm-xray/CMakeLists.txt +++ llvm/trunk/tools/llvm-xray/CMakeLists.txt @@ -7,14 +7,14 @@ XRay) set(LLVM_XRAY_TOOLS - func-id-helper.cc - xray-account.cc - xray-color-helper.cc - xray-converter.cc - xray-extract.cc - xray-graph.cc - xray-graph-diff.cc - xray-stacks.cc - xray-registry.cc) + func-id-helper.cpp + xray-account.cpp + xray-color-helper.cpp + xray-converter.cpp + xray-extract.cpp + xray-graph.cpp + xray-graph-diff.cpp + xray-stacks.cpp + xray-registry.cpp) -add_llvm_tool(llvm-xray llvm-xray.cc ${LLVM_XRAY_TOOLS}) +add_llvm_tool(llvm-xray llvm-xray.cpp ${LLVM_XRAY_TOOLS}) Index: llvm/trunk/tools/llvm-xray/func-id-helper.cc =================================================================== --- llvm/trunk/tools/llvm-xray/func-id-helper.cc +++ llvm/trunk/tools/llvm-xray/func-id-helper.cc @@ -1,66 +0,0 @@ -//===- xray-fc-account.cc - XRay Function Call Accounting Tool ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the helper tools dealing with XRay-generated function ids. -// -//===----------------------------------------------------------------------===// - -#include "func-id-helper.h" -#include "llvm/Support/Path.h" -#include - -using namespace llvm; -using namespace xray; - -std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const { - auto CacheIt = CachedNames.find(FuncId); - if (CacheIt != CachedNames.end()) - return CacheIt->second; - - std::ostringstream F; - auto It = FunctionAddresses.find(FuncId); - if (It == FunctionAddresses.end()) { - F << "#" << FuncId; - return F.str(); - } - - if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second)) { - auto &DI = *ResOrErr; - if (DI.FunctionName == "") - F << "@(" << std::hex << It->second << ")"; - else - F << DI.FunctionName; - } else - handleAllErrors(ResOrErr.takeError(), [&](const ErrorInfoBase &) { - F << "@(" << std::hex << It->second << ")"; - }); - - auto S = F.str(); - CachedNames[FuncId] = S; - return S; -} - -std::string FuncIdConversionHelper::FileLineAndColumn(int32_t FuncId) const { - auto It = FunctionAddresses.find(FuncId); - if (It == FunctionAddresses.end()) - return "(unknown)"; - - std::ostringstream F; - auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second); - if (!ResOrErr) { - consumeError(ResOrErr.takeError()); - return "(unknown)"; - } - - auto &DI = *ResOrErr; - F << sys::path::filename(DI.FileName).str() << ":" << DI.Line << ":" - << DI.Column; - - return F.str(); -} Index: llvm/trunk/tools/llvm-xray/func-id-helper.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/func-id-helper.cpp +++ llvm/trunk/tools/llvm-xray/func-id-helper.cpp @@ -0,0 +1,66 @@ +//===- xray-fc-account.cpp: XRay Function Call Accounting Tool ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the helper tools dealing with XRay-generated function ids. +// +//===----------------------------------------------------------------------===// + +#include "func-id-helper.h" +#include "llvm/Support/Path.h" +#include + +using namespace llvm; +using namespace xray; + +std::string FuncIdConversionHelper::SymbolOrNumber(int32_t FuncId) const { + auto CacheIt = CachedNames.find(FuncId); + if (CacheIt != CachedNames.end()) + return CacheIt->second; + + std::ostringstream F; + auto It = FunctionAddresses.find(FuncId); + if (It == FunctionAddresses.end()) { + F << "#" << FuncId; + return F.str(); + } + + if (auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second)) { + auto &DI = *ResOrErr; + if (DI.FunctionName == "") + F << "@(" << std::hex << It->second << ")"; + else + F << DI.FunctionName; + } else + handleAllErrors(ResOrErr.takeError(), [&](const ErrorInfoBase &) { + F << "@(" << std::hex << It->second << ")"; + }); + + auto S = F.str(); + CachedNames[FuncId] = S; + return S; +} + +std::string FuncIdConversionHelper::FileLineAndColumn(int32_t FuncId) const { + auto It = FunctionAddresses.find(FuncId); + if (It == FunctionAddresses.end()) + return "(unknown)"; + + std::ostringstream F; + auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second); + if (!ResOrErr) { + consumeError(ResOrErr.takeError()); + return "(unknown)"; + } + + auto &DI = *ResOrErr; + F << sys::path::filename(DI.FileName).str() << ":" << DI.Line << ":" + << DI.Column; + + return F.str(); +} Index: llvm/trunk/tools/llvm-xray/llvm-xray.cc =================================================================== --- llvm/trunk/tools/llvm-xray/llvm-xray.cc +++ llvm/trunk/tools/llvm-xray/llvm-xray.cc @@ -1,49 +0,0 @@ -//===- llvm-xray.cc - XRay Tool Main Program ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the main entry point for the suite of XRay tools. All -// additional functionality are implemented as subcommands. -// -//===----------------------------------------------------------------------===// -// -// Basic usage: -// -// llvm-xray [options] [subcommand-specific options] -// -#include "xray-registry.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -using namespace llvm::xray; - -int main(int argc, char *argv[]) { - cl::ParseCommandLineOptions(argc, argv, - "XRay Tools\n\n" - " This program consolidates multiple XRay trace " - "processing tools for convenient access.\n"); - for (auto *SC : cl::getRegisteredSubcommands()) { - if (*SC) { - // If no subcommand was provided, we need to explicitly check if this is - // the top-level subcommand. - if (SC == &*cl::TopLevelSubCommand) { - cl::PrintHelpMessage(false, true); - return 0; - } - if (auto C = dispatch(SC)) { - ExitOnError("llvm-xray: ")(C()); - return 0; - } - } - } - - // If all else fails, we still print the usage message. - cl::PrintHelpMessage(false, true); - return 0; -} Index: llvm/trunk/tools/llvm-xray/llvm-xray.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/llvm-xray.cpp +++ llvm/trunk/tools/llvm-xray/llvm-xray.cpp @@ -0,0 +1,49 @@ +//===- llvm-xray.cpp: XRay Tool Main Program ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the main entry point for the suite of XRay tools. All +// additional functionality are implemented as subcommands. +// +//===----------------------------------------------------------------------===// +// +// Basic usage: +// +// llvm-xray [options] [subcommand-specific options] +// +#include "xray-registry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace llvm::xray; + +int main(int argc, char *argv[]) { + cl::ParseCommandLineOptions(argc, argv, + "XRay Tools\n\n" + " This program consolidates multiple XRay trace " + "processing tools for convenient access.\n"); + for (auto *SC : cl::getRegisteredSubcommands()) { + if (*SC) { + // If no subcommand was provided, we need to explicitly check if this is + // the top-level subcommand. + if (SC == &*cl::TopLevelSubCommand) { + cl::PrintHelpMessage(false, true); + return 0; + } + if (auto C = dispatch(SC)) { + ExitOnError("llvm-xray: ")(C()); + return 0; + } + } + } + + // If all else fails, we still print the usage message. + cl::PrintHelpMessage(false, true); + return 0; +} Index: llvm/trunk/tools/llvm-xray/xray-account.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-account.cc +++ llvm/trunk/tools/llvm-xray/xray-account.cc @@ -1,510 +0,0 @@ -//===- xray-account.h - XRay Function Call Accounting ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements basic function call accounting from an XRay trace. -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include -#include - -#include "xray-account.h" -#include "xray-registry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/XRay/InstrumentationMap.h" -#include "llvm/XRay/Trace.h" - -using namespace llvm; -using namespace llvm::xray; - -static cl::SubCommand Account("account", "Function call accounting"); -static cl::opt AccountInput(cl::Positional, - cl::desc(""), - cl::Required, cl::sub(Account)); -static cl::opt - AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), - cl::sub(Account), cl::init(false)); -static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing), - cl::desc("Alias for -keep_going"), - cl::sub(Account)); -static cl::opt AccountDeduceSiblingCalls( - "deduce-sibling-calls", - cl::desc("Deduce sibling calls when unrolling function call stacks"), - cl::sub(Account), cl::init(false)); -static cl::alias - AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls), - cl::desc("Alias for -deduce_sibling_calls"), - cl::sub(Account)); -static cl::opt - AccountOutput("output", cl::value_desc("output file"), cl::init("-"), - cl::desc("output file; use '-' for stdout"), - cl::sub(Account)); -static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput), - cl::desc("Alias for -output"), - cl::sub(Account)); -enum class AccountOutputFormats { TEXT, CSV }; -static cl::opt - AccountOutputFormat("format", cl::desc("output format"), - cl::values(clEnumValN(AccountOutputFormats::TEXT, - "text", "report stats in text"), - clEnumValN(AccountOutputFormats::CSV, "csv", - "report stats in csv")), - cl::sub(Account)); -static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"), - cl::aliasopt(AccountOutputFormat), - cl::sub(Account)); - -enum class SortField { - FUNCID, - COUNT, - MIN, - MED, - PCT90, - PCT99, - MAX, - SUM, - FUNC, -}; - -static cl::opt AccountSortOutput( - "sort", cl::desc("sort output by this field"), cl::value_desc("field"), - cl::sub(Account), cl::init(SortField::FUNCID), - cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"), - clEnumValN(SortField::COUNT, "count", "funciton call counts"), - clEnumValN(SortField::MIN, "min", "minimum function durations"), - clEnumValN(SortField::MED, "med", "median function durations"), - clEnumValN(SortField::PCT90, "90p", "90th percentile durations"), - clEnumValN(SortField::PCT99, "99p", "99th percentile durations"), - clEnumValN(SortField::MAX, "max", "maximum function durations"), - clEnumValN(SortField::SUM, "sum", "sum of call durations"), - clEnumValN(SortField::FUNC, "func", "function names"))); -static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput), - cl::desc("Alias for -sort"), - cl::sub(Account)); - -enum class SortDirection { - ASCENDING, - DESCENDING, -}; -static cl::opt AccountSortOrder( - "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING), - cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"), - clEnumValN(SortDirection::DESCENDING, "dsc", "descending")), - cl::sub(Account)); -static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder), - cl::desc("Alias for -sortorder"), - cl::sub(Account)); - -static cl::opt AccountTop("top", cl::desc("only show the top N results"), - cl::value_desc("N"), cl::sub(Account), - cl::init(-1)); -static cl::alias AccountTop2("p", cl::desc("Alias for -top"), - cl::aliasopt(AccountTop), cl::sub(Account)); - -static cl::opt - AccountInstrMap("instr_map", - cl::desc("binary with the instrumentation map, or " - "a separate instrumentation map"), - cl::value_desc("binary with xray_instr_map"), - cl::sub(Account), cl::init("")); -static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap), - cl::desc("Alias for -instr_map"), - cl::sub(Account)); - -namespace { - -template void setMinMax(std::pair &MM, U &&V) { - if (MM.first == 0 || MM.second == 0) - MM = std::make_pair(std::forward(V), std::forward(V)); - else - MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V)); -} - -template T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } - -} // namespace - -bool LatencyAccountant::accountRecord(const XRayRecord &Record) { - setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC); - setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC); - - if (CurrentMaxTSC == 0) - CurrentMaxTSC = Record.TSC; - - if (Record.TSC < CurrentMaxTSC) - return false; - - auto &ThreadStack = PerThreadFunctionStack[Record.TId]; - switch (Record.Type) { - case RecordTypes::ENTER: - case RecordTypes::ENTER_ARG: { - ThreadStack.emplace_back(Record.FuncId, Record.TSC); - break; - } - case RecordTypes::EXIT: - case RecordTypes::TAIL_EXIT: { - if (ThreadStack.empty()) - return false; - - if (ThreadStack.back().first == Record.FuncId) { - const auto &Top = ThreadStack.back(); - recordLatency(Top.first, diff(Top.second, Record.TSC)); - ThreadStack.pop_back(); - break; - } - - if (!DeduceSiblingCalls) - return false; - - // Look for the parent up the stack. - auto Parent = - std::find_if(ThreadStack.rbegin(), ThreadStack.rend(), - [&](const std::pair &E) { - return E.first == Record.FuncId; - }); - if (Parent == ThreadStack.rend()) - return false; - - // Account time for this apparently sibling call exit up the stack. - // Considering the following case: - // - // f() - // g() - // h() - // - // We might only ever see the following entries: - // - // -> f() - // -> g() - // -> h() - // <- h() - // <- f() - // - // Now we don't see the exit to g() because some older version of the XRay - // runtime wasn't instrumenting tail exits. If we don't deduce tail calls, - // we may potentially never account time for g() -- and this code would have - // already bailed out, because `<- f()` doesn't match the current "top" of - // stack where we're waiting for the exit to `g()` instead. This is not - // ideal and brittle -- so instead we provide a potentially inaccurate - // accounting of g() instead, computing it from the exit of f(). - // - // While it might be better that we account the time between `-> g()` and - // `-> h()` as the proper accounting of time for g() here, this introduces - // complexity to do correctly (need to backtrack, etc.). - // - // FIXME: Potentially implement the more complex deduction algorithm? - auto I = std::next(Parent).base(); - for (auto &E : make_range(I, ThreadStack.end())) { - recordLatency(E.first, diff(E.second, Record.TSC)); - } - ThreadStack.erase(I, ThreadStack.end()); - break; - } - } - - return true; -} - -namespace { - -// We consolidate the data into a struct which we can output in various forms. -struct ResultRow { - uint64_t Count; - double Min; - double Median; - double Pct90; - double Pct99; - double Max; - double Sum; - std::string DebugInfo; - std::string Function; -}; - -ResultRow getStats(std::vector &Timings) { - assert(!Timings.empty()); - ResultRow R; - R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0); - auto MinMax = std::minmax_element(Timings.begin(), Timings.end()); - R.Min = *MinMax.first; - R.Max = *MinMax.second; - R.Count = Timings.size(); - - auto MedianOff = Timings.size() / 2; - std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end()); - R.Median = Timings[MedianOff]; - - auto Pct90Off = std::floor(Timings.size() * 0.9); - std::nth_element(Timings.begin(), Timings.begin() + Pct90Off, Timings.end()); - R.Pct90 = Timings[Pct90Off]; - - auto Pct99Off = std::floor(Timings.size() * 0.99); - std::nth_element(Timings.begin(), Timings.begin() + Pct99Off, Timings.end()); - R.Pct99 = Timings[Pct99Off]; - return R; -} - -} // namespace - -template -void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const { - using TupleType = std::tuple; - std::vector Results; - Results.reserve(FunctionLatencies.size()); - for (auto FT : FunctionLatencies) { - const auto &FuncId = FT.first; - auto &Timings = FT.second; - Results.emplace_back(FuncId, Timings.size(), getStats(Timings)); - auto &Row = std::get<2>(Results.back()); - if (Header.CycleFrequency) { - double CycleFrequency = Header.CycleFrequency; - Row.Min /= CycleFrequency; - Row.Median /= CycleFrequency; - Row.Pct90 /= CycleFrequency; - Row.Pct99 /= CycleFrequency; - Row.Max /= CycleFrequency; - Row.Sum /= CycleFrequency; - } - - Row.Function = FuncIdHelper.SymbolOrNumber(FuncId); - Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId); - } - - // Sort the data according to user-provided flags. - switch (AccountSortOutput) { - case SortField::FUNCID: - llvm::sort(Results.begin(), Results.end(), - [](const TupleType &L, const TupleType &R) { - if (AccountSortOrder == SortDirection::ASCENDING) - return std::get<0>(L) < std::get<0>(R); - if (AccountSortOrder == SortDirection::DESCENDING) - return std::get<0>(L) > std::get<0>(R); - llvm_unreachable("Unknown sort direction"); - }); - break; - case SortField::COUNT: - llvm::sort(Results.begin(), Results.end(), - [](const TupleType &L, const TupleType &R) { - if (AccountSortOrder == SortDirection::ASCENDING) - return std::get<1>(L) < std::get<1>(R); - if (AccountSortOrder == SortDirection::DESCENDING) - return std::get<1>(L) > std::get<1>(R); - llvm_unreachable("Unknown sort direction"); - }); - break; - default: - // Here we need to look into the ResultRow for the rest of the data that - // we want to sort by. - llvm::sort(Results.begin(), Results.end(), - [&](const TupleType &L, const TupleType &R) { - auto &LR = std::get<2>(L); - auto &RR = std::get<2>(R); - switch (AccountSortOutput) { - case SortField::COUNT: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Count < RR.Count; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Count > RR.Count; - llvm_unreachable("Unknown sort direction"); - case SortField::MIN: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Min < RR.Min; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Min > RR.Min; - llvm_unreachable("Unknown sort direction"); - case SortField::MED: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Median < RR.Median; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Median > RR.Median; - llvm_unreachable("Unknown sort direction"); - case SortField::PCT90: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Pct90 < RR.Pct90; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Pct90 > RR.Pct90; - llvm_unreachable("Unknown sort direction"); - case SortField::PCT99: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Pct99 < RR.Pct99; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Pct99 > RR.Pct99; - llvm_unreachable("Unknown sort direction"); - case SortField::MAX: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Max < RR.Max; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Max > RR.Max; - llvm_unreachable("Unknown sort direction"); - case SortField::SUM: - if (AccountSortOrder == SortDirection::ASCENDING) - return LR.Sum < RR.Sum; - if (AccountSortOrder == SortDirection::DESCENDING) - return LR.Sum > RR.Sum; - llvm_unreachable("Unknown sort direction"); - default: - llvm_unreachable("Unsupported sort order"); - } - }); - break; - } - - if (AccountTop > 0) - Results.erase(Results.begin() + AccountTop.getValue(), Results.end()); - - for (const auto &R : Results) - Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R)); -} - -void LatencyAccountant::exportStatsAsText(raw_ostream &OS, - const XRayFileHeader &Header) const { - OS << "Functions with latencies: " << FunctionLatencies.size() << "\n"; - - // We spend some effort to make the text output more readable, so we do the - // following formatting decisions for each of the fields: - // - // - funcid: 32-bit, but we can determine the largest number and be - // between - // a minimum of 5 characters, up to 9 characters, right aligned. - // - count: 64-bit, but we can determine the largest number and be - // between - // a minimum of 5 characters, up to 9 characters, right aligned. - // - min, median, 90pct, 99pct, max: double precision, but we want to keep - // the values in seconds, with microsecond precision (0.000'001), so we - // have at most 6 significant digits, with the whole number part to be - // at - // least 1 character. For readability we'll right-align, with full 9 - // characters each. - // - debug info, function name: we format this as a concatenation of the - // debug info and the function name. - // - static constexpr char StatsHeaderFormat[] = - "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}"; - static constexpr char StatsFormat[] = - R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})"; - OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p", - "99p", "max", "sum") - << llvm::formatv(" {0,-12}\n", "function"); - exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) { - OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median, - Row.Pct90, Row.Pct99, Row.Max, Row.Sum) - << " " << Row.DebugInfo << ": " << Row.Function << "\n"; - }); -} - -void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS, - const XRayFileHeader &Header) const { - OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n"; - exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) { - OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ',' - << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum - << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n"; - }); -} - -using namespace llvm::xray; - -namespace llvm { -template <> struct format_provider { - static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream, - StringRef Style) { - switch(T) { - case RecordTypes::ENTER: - Stream << "enter"; - break; - case RecordTypes::ENTER_ARG: - Stream << "enter-arg"; - break; - case RecordTypes::EXIT: - Stream << "exit"; - break; - case RecordTypes::TAIL_EXIT: - Stream << "tail-exit"; - break; - } - } -}; -} // namespace llvm - -static CommandRegistration Unused(&Account, []() -> Error { - InstrumentationMap Map; - if (!AccountInstrMap.empty()) { - auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap); - if (!InstrumentationMapOrError) - return joinErrors(make_error( - Twine("Cannot open instrumentation map '") + - AccountInstrMap + "'", - std::make_error_code(std::errc::invalid_argument)), - InstrumentationMapOrError.takeError()); - Map = std::move(*InstrumentationMapOrError); - } - - std::error_code EC; - raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::F_Text); - if (EC) - return make_error( - Twine("Cannot open file '") + AccountOutput + "' for writing.", EC); - - const auto &FunctionAddresses = Map.getFunctionAddresses(); - symbolize::LLVMSymbolizer::Options Opts( - symbolize::FunctionNameKind::LinkageName, true, true, false, ""); - symbolize::LLVMSymbolizer Symbolizer(Opts); - llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer, - FunctionAddresses); - xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls); - auto TraceOrErr = loadTraceFile(AccountInput); - if (!TraceOrErr) - return joinErrors( - make_error( - Twine("Failed loading input file '") + AccountInput + "'", - std::make_error_code(std::errc::executable_format_error)), - TraceOrErr.takeError()); - - auto &T = *TraceOrErr; - for (const auto &Record : T) { - if (FCA.accountRecord(Record)) - continue; - errs() - << "Error processing record: " - << llvm::formatv( - R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}}})", - Record.RecordType, Record.CPU, Record.Type, Record.FuncId, - Record.TId) - << '\n'; - for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) { - errs() << "Thread ID: " << ThreadStack.first << "\n"; - if (ThreadStack.second.empty()) { - errs() << " (empty stack)\n"; - continue; - } - auto Level = ThreadStack.second.size(); - for (const auto &Entry : llvm::reverse(ThreadStack.second)) - errs() << " #" << Level-- << "\t" - << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n'; - } - if (!AccountKeepGoing) - return make_error( - Twine("Failed accounting function calls in file '") + AccountInput + - "'.", - std::make_error_code(std::errc::executable_format_error)); - } - switch (AccountOutputFormat) { - case AccountOutputFormats::TEXT: - FCA.exportStatsAsText(OS, T.getFileHeader()); - break; - case AccountOutputFormats::CSV: - FCA.exportStatsAsCSV(OS, T.getFileHeader()); - break; - } - - return Error::success(); -}); Index: llvm/trunk/tools/llvm-xray/xray-account.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-account.cpp +++ llvm/trunk/tools/llvm-xray/xray-account.cpp @@ -0,0 +1,510 @@ +//===- xray-account.h - XRay Function Call Accounting ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic function call accounting from an XRay trace. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "xray-account.h" +#include "xray-registry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/XRay/InstrumentationMap.h" +#include "llvm/XRay/Trace.h" + +using namespace llvm; +using namespace llvm::xray; + +static cl::SubCommand Account("account", "Function call accounting"); +static cl::opt AccountInput(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(Account)); +static cl::opt + AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), + cl::sub(Account), cl::init(false)); +static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing), + cl::desc("Alias for -keep_going"), + cl::sub(Account)); +static cl::opt AccountDeduceSiblingCalls( + "deduce-sibling-calls", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(Account), cl::init(false)); +static cl::alias + AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls), + cl::desc("Alias for -deduce_sibling_calls"), + cl::sub(Account)); +static cl::opt + AccountOutput("output", cl::value_desc("output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), + cl::sub(Account)); +static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput), + cl::desc("Alias for -output"), + cl::sub(Account)); +enum class AccountOutputFormats { TEXT, CSV }; +static cl::opt + AccountOutputFormat("format", cl::desc("output format"), + cl::values(clEnumValN(AccountOutputFormats::TEXT, + "text", "report stats in text"), + clEnumValN(AccountOutputFormats::CSV, "csv", + "report stats in csv")), + cl::sub(Account)); +static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"), + cl::aliasopt(AccountOutputFormat), + cl::sub(Account)); + +enum class SortField { + FUNCID, + COUNT, + MIN, + MED, + PCT90, + PCT99, + MAX, + SUM, + FUNC, +}; + +static cl::opt AccountSortOutput( + "sort", cl::desc("sort output by this field"), cl::value_desc("field"), + cl::sub(Account), cl::init(SortField::FUNCID), + cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"), + clEnumValN(SortField::COUNT, "count", "funciton call counts"), + clEnumValN(SortField::MIN, "min", "minimum function durations"), + clEnumValN(SortField::MED, "med", "median function durations"), + clEnumValN(SortField::PCT90, "90p", "90th percentile durations"), + clEnumValN(SortField::PCT99, "99p", "99th percentile durations"), + clEnumValN(SortField::MAX, "max", "maximum function durations"), + clEnumValN(SortField::SUM, "sum", "sum of call durations"), + clEnumValN(SortField::FUNC, "func", "function names"))); +static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput), + cl::desc("Alias for -sort"), + cl::sub(Account)); + +enum class SortDirection { + ASCENDING, + DESCENDING, +}; +static cl::opt AccountSortOrder( + "sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING), + cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"), + clEnumValN(SortDirection::DESCENDING, "dsc", "descending")), + cl::sub(Account)); +static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder), + cl::desc("Alias for -sortorder"), + cl::sub(Account)); + +static cl::opt AccountTop("top", cl::desc("only show the top N results"), + cl::value_desc("N"), cl::sub(Account), + cl::init(-1)); +static cl::alias AccountTop2("p", cl::desc("Alias for -top"), + cl::aliasopt(AccountTop), cl::sub(Account)); + +static cl::opt + AccountInstrMap("instr_map", + cl::desc("binary with the instrumentation map, or " + "a separate instrumentation map"), + cl::value_desc("binary with xray_instr_map"), + cl::sub(Account), cl::init("")); +static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap), + cl::desc("Alias for -instr_map"), + cl::sub(Account)); + +namespace { + +template void setMinMax(std::pair &MM, U &&V) { + if (MM.first == 0 || MM.second == 0) + MM = std::make_pair(std::forward(V), std::forward(V)); + else + MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V)); +} + +template T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } + +} // namespace + +bool LatencyAccountant::accountRecord(const XRayRecord &Record) { + setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC); + setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC); + + if (CurrentMaxTSC == 0) + CurrentMaxTSC = Record.TSC; + + if (Record.TSC < CurrentMaxTSC) + return false; + + auto &ThreadStack = PerThreadFunctionStack[Record.TId]; + switch (Record.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: { + ThreadStack.emplace_back(Record.FuncId, Record.TSC); + break; + } + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: { + if (ThreadStack.empty()) + return false; + + if (ThreadStack.back().first == Record.FuncId) { + const auto &Top = ThreadStack.back(); + recordLatency(Top.first, diff(Top.second, Record.TSC)); + ThreadStack.pop_back(); + break; + } + + if (!DeduceSiblingCalls) + return false; + + // Look for the parent up the stack. + auto Parent = + std::find_if(ThreadStack.rbegin(), ThreadStack.rend(), + [&](const std::pair &E) { + return E.first == Record.FuncId; + }); + if (Parent == ThreadStack.rend()) + return false; + + // Account time for this apparently sibling call exit up the stack. + // Considering the following case: + // + // f() + // g() + // h() + // + // We might only ever see the following entries: + // + // -> f() + // -> g() + // -> h() + // <- h() + // <- f() + // + // Now we don't see the exit to g() because some older version of the XRay + // runtime wasn't instrumenting tail exits. If we don't deduce tail calls, + // we may potentially never account time for g() -- and this code would have + // already bailed out, because `<- f()` doesn't match the current "top" of + // stack where we're waiting for the exit to `g()` instead. This is not + // ideal and brittle -- so instead we provide a potentially inaccurate + // accounting of g() instead, computing it from the exit of f(). + // + // While it might be better that we account the time between `-> g()` and + // `-> h()` as the proper accounting of time for g() here, this introduces + // complexity to do correctly (need to backtrack, etc.). + // + // FIXME: Potentially implement the more complex deduction algorithm? + auto I = std::next(Parent).base(); + for (auto &E : make_range(I, ThreadStack.end())) { + recordLatency(E.first, diff(E.second, Record.TSC)); + } + ThreadStack.erase(I, ThreadStack.end()); + break; + } + } + + return true; +} + +namespace { + +// We consolidate the data into a struct which we can output in various forms. +struct ResultRow { + uint64_t Count; + double Min; + double Median; + double Pct90; + double Pct99; + double Max; + double Sum; + std::string DebugInfo; + std::string Function; +}; + +ResultRow getStats(std::vector &Timings) { + assert(!Timings.empty()); + ResultRow R; + R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0); + auto MinMax = std::minmax_element(Timings.begin(), Timings.end()); + R.Min = *MinMax.first; + R.Max = *MinMax.second; + R.Count = Timings.size(); + + auto MedianOff = Timings.size() / 2; + std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end()); + R.Median = Timings[MedianOff]; + + auto Pct90Off = std::floor(Timings.size() * 0.9); + std::nth_element(Timings.begin(), Timings.begin() + Pct90Off, Timings.end()); + R.Pct90 = Timings[Pct90Off]; + + auto Pct99Off = std::floor(Timings.size() * 0.99); + std::nth_element(Timings.begin(), Timings.begin() + Pct99Off, Timings.end()); + R.Pct99 = Timings[Pct99Off]; + return R; +} + +} // namespace + +template +void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const { + using TupleType = std::tuple; + std::vector Results; + Results.reserve(FunctionLatencies.size()); + for (auto FT : FunctionLatencies) { + const auto &FuncId = FT.first; + auto &Timings = FT.second; + Results.emplace_back(FuncId, Timings.size(), getStats(Timings)); + auto &Row = std::get<2>(Results.back()); + if (Header.CycleFrequency) { + double CycleFrequency = Header.CycleFrequency; + Row.Min /= CycleFrequency; + Row.Median /= CycleFrequency; + Row.Pct90 /= CycleFrequency; + Row.Pct99 /= CycleFrequency; + Row.Max /= CycleFrequency; + Row.Sum /= CycleFrequency; + } + + Row.Function = FuncIdHelper.SymbolOrNumber(FuncId); + Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId); + } + + // Sort the data according to user-provided flags. + switch (AccountSortOutput) { + case SortField::FUNCID: + llvm::sort(Results.begin(), Results.end(), + [](const TupleType &L, const TupleType &R) { + if (AccountSortOrder == SortDirection::ASCENDING) + return std::get<0>(L) < std::get<0>(R); + if (AccountSortOrder == SortDirection::DESCENDING) + return std::get<0>(L) > std::get<0>(R); + llvm_unreachable("Unknown sort direction"); + }); + break; + case SortField::COUNT: + llvm::sort(Results.begin(), Results.end(), + [](const TupleType &L, const TupleType &R) { + if (AccountSortOrder == SortDirection::ASCENDING) + return std::get<1>(L) < std::get<1>(R); + if (AccountSortOrder == SortDirection::DESCENDING) + return std::get<1>(L) > std::get<1>(R); + llvm_unreachable("Unknown sort direction"); + }); + break; + default: + // Here we need to look into the ResultRow for the rest of the data that + // we want to sort by. + llvm::sort(Results.begin(), Results.end(), + [&](const TupleType &L, const TupleType &R) { + auto &LR = std::get<2>(L); + auto &RR = std::get<2>(R); + switch (AccountSortOutput) { + case SortField::COUNT: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Count < RR.Count; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Count > RR.Count; + llvm_unreachable("Unknown sort direction"); + case SortField::MIN: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Min < RR.Min; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Min > RR.Min; + llvm_unreachable("Unknown sort direction"); + case SortField::MED: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Median < RR.Median; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Median > RR.Median; + llvm_unreachable("Unknown sort direction"); + case SortField::PCT90: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Pct90 < RR.Pct90; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Pct90 > RR.Pct90; + llvm_unreachable("Unknown sort direction"); + case SortField::PCT99: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Pct99 < RR.Pct99; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Pct99 > RR.Pct99; + llvm_unreachable("Unknown sort direction"); + case SortField::MAX: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Max < RR.Max; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Max > RR.Max; + llvm_unreachable("Unknown sort direction"); + case SortField::SUM: + if (AccountSortOrder == SortDirection::ASCENDING) + return LR.Sum < RR.Sum; + if (AccountSortOrder == SortDirection::DESCENDING) + return LR.Sum > RR.Sum; + llvm_unreachable("Unknown sort direction"); + default: + llvm_unreachable("Unsupported sort order"); + } + }); + break; + } + + if (AccountTop > 0) + Results.erase(Results.begin() + AccountTop.getValue(), Results.end()); + + for (const auto &R : Results) + Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R)); +} + +void LatencyAccountant::exportStatsAsText(raw_ostream &OS, + const XRayFileHeader &Header) const { + OS << "Functions with latencies: " << FunctionLatencies.size() << "\n"; + + // We spend some effort to make the text output more readable, so we do the + // following formatting decisions for each of the fields: + // + // - funcid: 32-bit, but we can determine the largest number and be + // between + // a minimum of 5 characters, up to 9 characters, right aligned. + // - count: 64-bit, but we can determine the largest number and be + // between + // a minimum of 5 characters, up to 9 characters, right aligned. + // - min, median, 90pct, 99pct, max: double precision, but we want to keep + // the values in seconds, with microsecond precision (0.000'001), so we + // have at most 6 significant digits, with the whole number part to be + // at + // least 1 character. For readability we'll right-align, with full 9 + // characters each. + // - debug info, function name: we format this as a concatenation of the + // debug info and the function name. + // + static constexpr char StatsHeaderFormat[] = + "{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}"; + static constexpr char StatsFormat[] = + R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})"; + OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p", + "99p", "max", "sum") + << llvm::formatv(" {0,-12}\n", "function"); + exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) { + OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median, + Row.Pct90, Row.Pct99, Row.Max, Row.Sum) + << " " << Row.DebugInfo << ": " << Row.Function << "\n"; + }); +} + +void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS, + const XRayFileHeader &Header) const { + OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n"; + exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) { + OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ',' + << Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum + << ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n"; + }); +} + +using namespace llvm::xray; + +namespace llvm { +template <> struct format_provider { + static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream, + StringRef Style) { + switch(T) { + case RecordTypes::ENTER: + Stream << "enter"; + break; + case RecordTypes::ENTER_ARG: + Stream << "enter-arg"; + break; + case RecordTypes::EXIT: + Stream << "exit"; + break; + case RecordTypes::TAIL_EXIT: + Stream << "tail-exit"; + break; + } + } +}; +} // namespace llvm + +static CommandRegistration Unused(&Account, []() -> Error { + InstrumentationMap Map; + if (!AccountInstrMap.empty()) { + auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap); + if (!InstrumentationMapOrError) + return joinErrors(make_error( + Twine("Cannot open instrumentation map '") + + AccountInstrMap + "'", + std::make_error_code(std::errc::invalid_argument)), + InstrumentationMapOrError.takeError()); + Map = std::move(*InstrumentationMapOrError); + } + + std::error_code EC; + raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + AccountOutput + "' for writing.", EC); + + const auto &FunctionAddresses = Map.getFunctionAddresses(); + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer, + FunctionAddresses); + xray::LatencyAccountant FCA(FuncIdHelper, AccountDeduceSiblingCalls); + auto TraceOrErr = loadTraceFile(AccountInput); + if (!TraceOrErr) + return joinErrors( + make_error( + Twine("Failed loading input file '") + AccountInput + "'", + std::make_error_code(std::errc::executable_format_error)), + TraceOrErr.takeError()); + + auto &T = *TraceOrErr; + for (const auto &Record : T) { + if (FCA.accountRecord(Record)) + continue; + errs() + << "Error processing record: " + << llvm::formatv( + R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}}})", + Record.RecordType, Record.CPU, Record.Type, Record.FuncId, + Record.TId) + << '\n'; + for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) { + errs() << "Thread ID: " << ThreadStack.first << "\n"; + if (ThreadStack.second.empty()) { + errs() << " (empty stack)\n"; + continue; + } + auto Level = ThreadStack.second.size(); + for (const auto &Entry : llvm::reverse(ThreadStack.second)) + errs() << " #" << Level-- << "\t" + << FuncIdHelper.SymbolOrNumber(Entry.first) << '\n'; + } + if (!AccountKeepGoing) + return make_error( + Twine("Failed accounting function calls in file '") + AccountInput + + "'.", + std::make_error_code(std::errc::executable_format_error)); + } + switch (AccountOutputFormat) { + case AccountOutputFormats::TEXT: + FCA.exportStatsAsText(OS, T.getFileHeader()); + break; + case AccountOutputFormats::CSV: + FCA.exportStatsAsCSV(OS, T.getFileHeader()); + break; + } + + return Error::success(); +}); Index: llvm/trunk/tools/llvm-xray/xray-color-helper.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-color-helper.cc +++ llvm/trunk/tools/llvm-xray/xray-color-helper.cc @@ -1,222 +0,0 @@ -//===-- xray-graph.cc - XRay Function Call Graph Renderer -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// A class to get a color from a specified gradient. -// -//===----------------------------------------------------------------------===// - -#include "xray-color-helper.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; -using namespace xray; - -// Sequential ColorMaps, which are used to represent information -// from some minimum to some maximum. - -static const std::tuple SequentialMaps[][9] = { - {// The greys color scheme from http://colorbrewer2.org/ - std::make_tuple(255, 255, 255), std::make_tuple(240, 240, 240), - std::make_tuple(217, 217, 217), std::make_tuple(189, 189, 189), - std::make_tuple(150, 150, 150), std::make_tuple(115, 115, 115), - std::make_tuple(82, 82, 82), std::make_tuple(37, 37, 37), - std::make_tuple(0, 0, 0)}, - {// The OrRd color scheme from http://colorbrewer2.org/ - std::make_tuple(255, 247, 236), std::make_tuple(254, 232, 200), - std::make_tuple(253, 212, 158), std::make_tuple(253, 187, 132), - std::make_tuple(252, 141, 89), std::make_tuple(239, 101, 72), - std::make_tuple(215, 48, 31), std::make_tuple(179, 0, 0), - std::make_tuple(127, 0, 0)}, - {// The PuBu color scheme from http://colorbrewer2.org/ - std::make_tuple(255, 247, 251), std::make_tuple(236, 231, 242), - std::make_tuple(208, 209, 230), std::make_tuple(166, 189, 219), - std::make_tuple(116, 169, 207), std::make_tuple(54, 144, 192), - std::make_tuple(5, 112, 176), std::make_tuple(4, 90, 141), - std::make_tuple(2, 56, 88)}}; - -// Sequential Maps extend the last colors given out of range inputs. -static const std::tuple SequentialBounds[][2] = { - {// The Bounds for the greys color scheme - std::make_tuple(255, 255, 255), std::make_tuple(0, 0, 0)}, - {// The Bounds for the OrRd color Scheme - std::make_tuple(255, 247, 236), std::make_tuple(127, 0, 0)}, - {// The Bounds for the PuBu color Scheme - std::make_tuple(255, 247, 251), std::make_tuple(2, 56, 88)}}; - -ColorHelper::ColorHelper(ColorHelper::SequentialScheme S) - : MinIn(0.0), MaxIn(1.0), ColorMap(SequentialMaps[static_cast(S)]), - BoundMap(SequentialBounds[static_cast(S)]) {} - -// Diverging ColorMaps, which are used to represent information -// representing differenes, or a range that goes from negative to positive. -// These take an input in the range [-1,1]. - -static const std::tuple DivergingCoeffs[][11] = { - {// The PiYG color scheme from http://colorbrewer2.org/ - std::make_tuple(142, 1, 82), std::make_tuple(197, 27, 125), - std::make_tuple(222, 119, 174), std::make_tuple(241, 182, 218), - std::make_tuple(253, 224, 239), std::make_tuple(247, 247, 247), - std::make_tuple(230, 245, 208), std::make_tuple(184, 225, 134), - std::make_tuple(127, 188, 65), std::make_tuple(77, 146, 33), - std::make_tuple(39, 100, 25)}}; - -// Diverging maps use out of bounds ranges to show missing data. Missing Right -// Being below min, and missing left being above max. -static const std::tuple DivergingBounds[][2] = { - {// The PiYG color scheme has green and red for missing right and left - // respectively. - std::make_tuple(255, 0, 0), std::make_tuple(0, 255, 0)}}; - -ColorHelper::ColorHelper(ColorHelper::DivergingScheme S) - : MinIn(-1.0), MaxIn(1.0), ColorMap(DivergingCoeffs[static_cast(S)]), - BoundMap(DivergingBounds[static_cast(S)]) {} - -// Takes a tuple of uint8_ts representing a color in RGB and converts them to -// HSV represented by a tuple of doubles -static std::tuple -convertToHSV(const std::tuple &Color) { - double Scaled[3] = {std::get<0>(Color) / 255.0, std::get<1>(Color) / 255.0, - std::get<2>(Color) / 255.0}; - int Min = 0; - int Max = 0; - for (int i = 1; i < 3; ++i) { - if (Scaled[i] < Scaled[Min]) - Min = i; - if (Scaled[i] > Scaled[Max]) - Max = i; - } - - double C = Scaled[Max] - Scaled[Min]; - - double HPrime = - (C == 0) ? 0 : (Scaled[(Max + 1) % 3] - Scaled[(Max + 2) % 3]) / C; - HPrime = HPrime + 2.0 * Max; - - double H = (HPrime < 0) ? (HPrime + 6.0) * 60 - : HPrime * 60; // Scale to between 0 and 360 - double V = Scaled[Max]; - - double S = (V == 0.0) ? 0.0 : C / V; - - return std::make_tuple(H, S, V); -} - -// Takes a double precision number, clips it between 0 and 1 and then converts -// that to an integer between 0x00 and 0xFF with proxpper rounding. -static uint8_t unitIntervalTo8BitChar(double B) { - double n = std::max(std::min(B, 1.0), 0.0); - return static_cast(255 * n + 0.5); -} - -// Takes a typle of doubles representing a color in HSV and converts them to -// RGB represented as a tuple of uint8_ts -static std::tuple -convertToRGB(const std::tuple &Color) { - const double &H = std::get<0>(Color); - const double &S = std::get<1>(Color); - const double &V = std::get<2>(Color); - - double C = V * S; - - double HPrime = H / 60; - double X = C * (1 - std::abs(std::fmod(HPrime, 2.0) - 1)); - - double RGB1[3]; - int HPrimeInt = static_cast(HPrime); - if (HPrimeInt % 2 == 0) { - RGB1[(HPrimeInt / 2) % 3] = C; - RGB1[(HPrimeInt / 2 + 1) % 3] = X; - RGB1[(HPrimeInt / 2 + 2) % 3] = 0.0; - } else { - RGB1[(HPrimeInt / 2) % 3] = X; - RGB1[(HPrimeInt / 2 + 1) % 3] = C; - RGB1[(HPrimeInt / 2 + 2) % 3] = 0.0; - } - - double Min = V - C; - double RGB2[3] = {RGB1[0] + Min, RGB1[1] + Min, RGB1[2] + Min}; - - return std::make_tuple(unitIntervalTo8BitChar(RGB2[0]), - unitIntervalTo8BitChar(RGB2[1]), - unitIntervalTo8BitChar(RGB2[2])); -} - -// The Hue component of the HSV interpolation Routine -static double interpolateHue(double H0, double H1, double T) { - double D = H1 - H0; - if (H0 > H1) { - std::swap(H0, H1); - - D = -D; - T = 1 - T; - } - - if (D <= 180) { - return H0 + T * (H1 - H0); - } else { - H0 = H0 + 360; - return std::fmod(H0 + T * (H1 - H0) + 720, 360); - } -} - -// Interpolates between two HSV Colors both represented as a tuple of doubles -// Returns an HSV Color represented as a tuple of doubles -static std::tuple -interpolateHSV(const std::tuple &C0, - const std::tuple &C1, double T) { - double H = interpolateHue(std::get<0>(C0), std::get<0>(C1), T); - double S = std::get<1>(C0) + T * (std::get<1>(C1) - std::get<1>(C0)); - double V = std::get<2>(C0) + T * (std::get<2>(C1) - std::get<2>(C0)); - return std::make_tuple(H, S, V); -} - -// Get the Color as a tuple of uint8_ts -std::tuple -ColorHelper::getColorTuple(double Point) const { - assert(!ColorMap.empty() && "ColorMap must not be empty!"); - assert(!BoundMap.empty() && "BoundMap must not be empty!"); - - if (Point < MinIn) - return BoundMap[0]; - if (Point > MaxIn) - return BoundMap[1]; - - size_t MaxIndex = ColorMap.size() - 1; - double IntervalWidth = MaxIn - MinIn; - double OffsetP = Point - MinIn; - double SectionWidth = IntervalWidth / static_cast(MaxIndex); - size_t SectionNo = std::floor(OffsetP / SectionWidth); - double T = (OffsetP - SectionNo * SectionWidth) / SectionWidth; - - auto &RGBColor0 = ColorMap[SectionNo]; - auto &RGBColor1 = ColorMap[std::min(SectionNo + 1, MaxIndex)]; - - auto HSVColor0 = convertToHSV(RGBColor0); - auto HSVColor1 = convertToHSV(RGBColor1); - - auto InterpolatedHSVColor = interpolateHSV(HSVColor0, HSVColor1, T); - return convertToRGB(InterpolatedHSVColor); -} - -// A helper method to convert a color represented as tuple of uint8s to a hex -// string. -std::string -ColorHelper::getColorString(std::tuple t) { - return llvm::formatv("#{0:X-2}{1:X-2}{2:X-2}", std::get<0>(t), std::get<1>(t), - std::get<2>(t)); -} - -// Gets a color in a gradient given a number in the interval [0,1], it does this -// by evaluating a polynomial which maps [0, 1] -> [0, 1] for each of the R G -// and B values in the color. It then converts this [0,1] colors to a 24 bit -// color as a hex string. -std::string ColorHelper::getColorString(double Point) const { - return getColorString(getColorTuple(Point)); -} Index: llvm/trunk/tools/llvm-xray/xray-color-helper.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-color-helper.cpp +++ llvm/trunk/tools/llvm-xray/xray-color-helper.cpp @@ -0,0 +1,222 @@ +//===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// A class to get a color from a specified gradient. +// +//===----------------------------------------------------------------------===// + +#include "xray-color-helper.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace xray; + +// Sequential ColorMaps, which are used to represent information +// from some minimum to some maximum. + +static const std::tuple SequentialMaps[][9] = { + {// The greys color scheme from http://colorbrewer2.org/ + std::make_tuple(255, 255, 255), std::make_tuple(240, 240, 240), + std::make_tuple(217, 217, 217), std::make_tuple(189, 189, 189), + std::make_tuple(150, 150, 150), std::make_tuple(115, 115, 115), + std::make_tuple(82, 82, 82), std::make_tuple(37, 37, 37), + std::make_tuple(0, 0, 0)}, + {// The OrRd color scheme from http://colorbrewer2.org/ + std::make_tuple(255, 247, 236), std::make_tuple(254, 232, 200), + std::make_tuple(253, 212, 158), std::make_tuple(253, 187, 132), + std::make_tuple(252, 141, 89), std::make_tuple(239, 101, 72), + std::make_tuple(215, 48, 31), std::make_tuple(179, 0, 0), + std::make_tuple(127, 0, 0)}, + {// The PuBu color scheme from http://colorbrewer2.org/ + std::make_tuple(255, 247, 251), std::make_tuple(236, 231, 242), + std::make_tuple(208, 209, 230), std::make_tuple(166, 189, 219), + std::make_tuple(116, 169, 207), std::make_tuple(54, 144, 192), + std::make_tuple(5, 112, 176), std::make_tuple(4, 90, 141), + std::make_tuple(2, 56, 88)}}; + +// Sequential Maps extend the last colors given out of range inputs. +static const std::tuple SequentialBounds[][2] = { + {// The Bounds for the greys color scheme + std::make_tuple(255, 255, 255), std::make_tuple(0, 0, 0)}, + {// The Bounds for the OrRd color Scheme + std::make_tuple(255, 247, 236), std::make_tuple(127, 0, 0)}, + {// The Bounds for the PuBu color Scheme + std::make_tuple(255, 247, 251), std::make_tuple(2, 56, 88)}}; + +ColorHelper::ColorHelper(ColorHelper::SequentialScheme S) + : MinIn(0.0), MaxIn(1.0), ColorMap(SequentialMaps[static_cast(S)]), + BoundMap(SequentialBounds[static_cast(S)]) {} + +// Diverging ColorMaps, which are used to represent information +// representing differenes, or a range that goes from negative to positive. +// These take an input in the range [-1,1]. + +static const std::tuple DivergingCoeffs[][11] = { + {// The PiYG color scheme from http://colorbrewer2.org/ + std::make_tuple(142, 1, 82), std::make_tuple(197, 27, 125), + std::make_tuple(222, 119, 174), std::make_tuple(241, 182, 218), + std::make_tuple(253, 224, 239), std::make_tuple(247, 247, 247), + std::make_tuple(230, 245, 208), std::make_tuple(184, 225, 134), + std::make_tuple(127, 188, 65), std::make_tuple(77, 146, 33), + std::make_tuple(39, 100, 25)}}; + +// Diverging maps use out of bounds ranges to show missing data. Missing Right +// Being below min, and missing left being above max. +static const std::tuple DivergingBounds[][2] = { + {// The PiYG color scheme has green and red for missing right and left + // respectively. + std::make_tuple(255, 0, 0), std::make_tuple(0, 255, 0)}}; + +ColorHelper::ColorHelper(ColorHelper::DivergingScheme S) + : MinIn(-1.0), MaxIn(1.0), ColorMap(DivergingCoeffs[static_cast(S)]), + BoundMap(DivergingBounds[static_cast(S)]) {} + +// Takes a tuple of uint8_ts representing a color in RGB and converts them to +// HSV represented by a tuple of doubles +static std::tuple +convertToHSV(const std::tuple &Color) { + double Scaled[3] = {std::get<0>(Color) / 255.0, std::get<1>(Color) / 255.0, + std::get<2>(Color) / 255.0}; + int Min = 0; + int Max = 0; + for (int i = 1; i < 3; ++i) { + if (Scaled[i] < Scaled[Min]) + Min = i; + if (Scaled[i] > Scaled[Max]) + Max = i; + } + + double C = Scaled[Max] - Scaled[Min]; + + double HPrime = + (C == 0) ? 0 : (Scaled[(Max + 1) % 3] - Scaled[(Max + 2) % 3]) / C; + HPrime = HPrime + 2.0 * Max; + + double H = (HPrime < 0) ? (HPrime + 6.0) * 60 + : HPrime * 60; // Scale to between 0 and 360 + double V = Scaled[Max]; + + double S = (V == 0.0) ? 0.0 : C / V; + + return std::make_tuple(H, S, V); +} + +// Takes a double precision number, clips it between 0 and 1 and then converts +// that to an integer between 0x00 and 0xFF with proxpper rounding. +static uint8_t unitIntervalTo8BitChar(double B) { + double n = std::max(std::min(B, 1.0), 0.0); + return static_cast(255 * n + 0.5); +} + +// Takes a typle of doubles representing a color in HSV and converts them to +// RGB represented as a tuple of uint8_ts +static std::tuple +convertToRGB(const std::tuple &Color) { + const double &H = std::get<0>(Color); + const double &S = std::get<1>(Color); + const double &V = std::get<2>(Color); + + double C = V * S; + + double HPrime = H / 60; + double X = C * (1 - std::abs(std::fmod(HPrime, 2.0) - 1)); + + double RGB1[3]; + int HPrimeInt = static_cast(HPrime); + if (HPrimeInt % 2 == 0) { + RGB1[(HPrimeInt / 2) % 3] = C; + RGB1[(HPrimeInt / 2 + 1) % 3] = X; + RGB1[(HPrimeInt / 2 + 2) % 3] = 0.0; + } else { + RGB1[(HPrimeInt / 2) % 3] = X; + RGB1[(HPrimeInt / 2 + 1) % 3] = C; + RGB1[(HPrimeInt / 2 + 2) % 3] = 0.0; + } + + double Min = V - C; + double RGB2[3] = {RGB1[0] + Min, RGB1[1] + Min, RGB1[2] + Min}; + + return std::make_tuple(unitIntervalTo8BitChar(RGB2[0]), + unitIntervalTo8BitChar(RGB2[1]), + unitIntervalTo8BitChar(RGB2[2])); +} + +// The Hue component of the HSV interpolation Routine +static double interpolateHue(double H0, double H1, double T) { + double D = H1 - H0; + if (H0 > H1) { + std::swap(H0, H1); + + D = -D; + T = 1 - T; + } + + if (D <= 180) { + return H0 + T * (H1 - H0); + } else { + H0 = H0 + 360; + return std::fmod(H0 + T * (H1 - H0) + 720, 360); + } +} + +// Interpolates between two HSV Colors both represented as a tuple of doubles +// Returns an HSV Color represented as a tuple of doubles +static std::tuple +interpolateHSV(const std::tuple &C0, + const std::tuple &C1, double T) { + double H = interpolateHue(std::get<0>(C0), std::get<0>(C1), T); + double S = std::get<1>(C0) + T * (std::get<1>(C1) - std::get<1>(C0)); + double V = std::get<2>(C0) + T * (std::get<2>(C1) - std::get<2>(C0)); + return std::make_tuple(H, S, V); +} + +// Get the Color as a tuple of uint8_ts +std::tuple +ColorHelper::getColorTuple(double Point) const { + assert(!ColorMap.empty() && "ColorMap must not be empty!"); + assert(!BoundMap.empty() && "BoundMap must not be empty!"); + + if (Point < MinIn) + return BoundMap[0]; + if (Point > MaxIn) + return BoundMap[1]; + + size_t MaxIndex = ColorMap.size() - 1; + double IntervalWidth = MaxIn - MinIn; + double OffsetP = Point - MinIn; + double SectionWidth = IntervalWidth / static_cast(MaxIndex); + size_t SectionNo = std::floor(OffsetP / SectionWidth); + double T = (OffsetP - SectionNo * SectionWidth) / SectionWidth; + + auto &RGBColor0 = ColorMap[SectionNo]; + auto &RGBColor1 = ColorMap[std::min(SectionNo + 1, MaxIndex)]; + + auto HSVColor0 = convertToHSV(RGBColor0); + auto HSVColor1 = convertToHSV(RGBColor1); + + auto InterpolatedHSVColor = interpolateHSV(HSVColor0, HSVColor1, T); + return convertToRGB(InterpolatedHSVColor); +} + +// A helper method to convert a color represented as tuple of uint8s to a hex +// string. +std::string +ColorHelper::getColorString(std::tuple t) { + return llvm::formatv("#{0:X-2}{1:X-2}{2:X-2}", std::get<0>(t), std::get<1>(t), + std::get<2>(t)); +} + +// Gets a color in a gradient given a number in the interval [0,1], it does this +// by evaluating a polynomial which maps [0, 1] -> [0, 1] for each of the R G +// and B values in the color. It then converts this [0,1] colors to a 24 bit +// color as a hex string. +std::string ColorHelper::getColorString(double Point) const { + return getColorString(getColorTuple(Point)); +} Index: llvm/trunk/tools/llvm-xray/xray-converter.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-converter.cc +++ llvm/trunk/tools/llvm-xray/xray-converter.cc @@ -1,393 +0,0 @@ -//===- xray-converter.cc - XRay Trace Conversion --------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implements the trace conversion functions. -// -//===----------------------------------------------------------------------===// -#include "xray-converter.h" - -#include "trie-node.h" -#include "xray-registry.h" -#include "llvm/DebugInfo/Symbolize/Symbolize.h" -#include "llvm/Support/EndianStream.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/ScopedPrinter.h" -#include "llvm/Support/YAMLTraits.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/XRay/InstrumentationMap.h" -#include "llvm/XRay/Trace.h" -#include "llvm/XRay/YAMLXRayRecord.h" - -using namespace llvm; -using namespace xray; - -// llvm-xray convert -// ---------------------------------------------------------------------------- -static cl::SubCommand Convert("convert", "Trace Format Conversion"); -static cl::opt ConvertInput(cl::Positional, - cl::desc(""), - cl::Required, cl::sub(Convert)); -enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; -static cl::opt ConvertOutputFormat( - "output-format", cl::desc("output format"), - cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), - clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), - clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", - "Output in chrome's trace event format. " - "May be visualized with the Catapult trace viewer.")), - cl::sub(Convert)); -static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), - cl::desc("Alias for -output-format"), - cl::sub(Convert)); -static cl::opt - ConvertOutput("output", cl::value_desc("output file"), cl::init("-"), - cl::desc("output file; use '-' for stdout"), - cl::sub(Convert)); -static cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput), - cl::desc("Alias for -output"), - cl::sub(Convert)); - -static cl::opt - ConvertSymbolize("symbolize", - cl::desc("symbolize function ids from the input log"), - cl::init(false), cl::sub(Convert)); -static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize), - cl::desc("Alias for -symbolize"), - cl::sub(Convert)); - -static cl::opt - ConvertInstrMap("instr_map", - cl::desc("binary with the instrumentation map, or " - "a separate instrumentation map"), - cl::value_desc("binary with xray_instr_map"), - cl::sub(Convert), cl::init("")); -static cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap), - cl::desc("Alias for -instr_map"), - cl::sub(Convert)); -static cl::opt ConvertSortInput( - "sort", - cl::desc("determines whether to sort input log records by timestamp"), - cl::sub(Convert), cl::init(true)); -static cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput), - cl::desc("Alias for -sort"), - cl::sub(Convert)); - -using llvm::yaml::Output; - -void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) { - YAMLXRayTrace Trace; - const auto &FH = Records.getFileHeader(); - Trace.Header = {FH.Version, FH.Type, FH.ConstantTSC, FH.NonstopTSC, - FH.CycleFrequency}; - Trace.Records.reserve(Records.size()); - for (const auto &R : Records) { - Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId, - Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId) - : llvm::to_string(R.FuncId), - R.TSC, R.TId, R.CallArgs}); - } - Output Out(OS, nullptr, 0); - Out << Trace; -} - -void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) { - // First write out the file header, in the correct endian-appropriate format - // (XRay assumes currently little endian). - support::endian::Writer Writer(OS); - const auto &FH = Records.getFileHeader(); - Writer.write(FH.Version); - Writer.write(FH.Type); - uint32_t Bitfield{0}; - if (FH.ConstantTSC) - Bitfield |= 1uL; - if (FH.NonstopTSC) - Bitfield |= 1uL << 1; - Writer.write(Bitfield); - Writer.write(FH.CycleFrequency); - - // There's 16 bytes of padding at the end of the file header. - static constexpr uint32_t Padding4B = 0; - Writer.write(Padding4B); - Writer.write(Padding4B); - Writer.write(Padding4B); - Writer.write(Padding4B); - - // Then write out the rest of the records, still in an endian-appropriate - // format. - for (const auto &R : Records) { - Writer.write(R.RecordType); - // The on disk naive raw format uses 8 bit CPUs, but the record has 16. - // There's no choice but truncation. - Writer.write(static_cast(R.CPU)); - switch (R.Type) { - case RecordTypes::ENTER: - case RecordTypes::ENTER_ARG: - Writer.write(uint8_t{0}); - break; - case RecordTypes::EXIT: - Writer.write(uint8_t{1}); - break; - case RecordTypes::TAIL_EXIT: - Writer.write(uint8_t{2}); - break; - } - Writer.write(R.FuncId); - Writer.write(R.TSC); - Writer.write(R.TId); - Writer.write(Padding4B); - Writer.write(Padding4B); - Writer.write(Padding4B); - } -} - -namespace { - -// A structure that allows building a dictionary of stack ids for the Chrome -// trace event format. -struct StackIdData { - // Each Stack of function calls has a unique ID. - unsigned id; - - // Bookkeeping so that IDs can be maintained uniquely across threads. - // Traversal keeps sibling pointers to other threads stacks. This is helpful - // to determine when a thread encounters a new stack and should assign a new - // unique ID. - SmallVector *, 4> siblings; -}; - -using StackTrieNode = TrieNode; - -// A helper function to find the sibling nodes for an encountered function in a -// thread of execution. Relies on the invariant that each time a new node is -// traversed in a thread, sibling bidirectional pointers are maintained. -SmallVector -findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, - const DenseMap> - &StackRootsByThreadId) { - - SmallVector Siblings{}; - - if (parent == nullptr) { - for (auto map_iter : StackRootsByThreadId) { - // Only look for siblings in other threads. - if (map_iter.first != TId) - for (auto node_iter : map_iter.second) { - if (node_iter->FuncId == FnId) - Siblings.push_back(node_iter); - } - } - return Siblings; - } - - for (auto *ParentSibling : parent->ExtraData.siblings) - for (auto node_iter : ParentSibling->Callees) - if (node_iter->FuncId == FnId) - Siblings.push_back(node_iter); - - return Siblings; -} - -// Given a function being invoked in a thread with id TId, finds and returns the -// StackTrie representing the function call stack. If no node exists, creates -// the node. Assigns unique IDs to stacks newly encountered among all threads -// and keeps sibling links up to when creating new nodes. -StackTrieNode *findOrCreateStackNode( - StackTrieNode *Parent, int32_t FuncId, uint32_t TId, - DenseMap> &StackRootsByThreadId, - DenseMap &StacksByStackId, unsigned *id_counter, - std::forward_list &NodeStore) { - SmallVector &ParentCallees = - Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; - auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { - return FuncId == ParentCallee->FuncId; - }); - if (match != ParentCallees.end()) - return *match; - - SmallVector siblings = - findSiblings(Parent, FuncId, TId, StackRootsByThreadId); - if (siblings.empty()) { - NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); - StackTrieNode *CurrentStack = &NodeStore.front(); - StacksByStackId[*id_counter - 1] = CurrentStack; - ParentCallees.push_back(CurrentStack); - return CurrentStack; - } - unsigned stack_id = siblings[0]->ExtraData.id; - NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); - StackTrieNode *CurrentStack = &NodeStore.front(); - for (auto *sibling : CurrentStack->ExtraData.siblings) - sibling->ExtraData.siblings.push_back(CurrentStack); - ParentCallees.push_back(CurrentStack); - return CurrentStack; -} - -void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId, - bool Symbolize, - const FuncIdConversionHelper &FuncIdHelper, - double EventTimestampUs, - const StackTrieNode &StackCursor, - StringRef FunctionPhenotype) { - OS << " "; - OS << llvm::formatv( - R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" - R"("ts" : "{3:f3}", "sf" : "{4}" })", - (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) - : llvm::to_string(FuncId)), - FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); -} - -} // namespace - -void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, - raw_ostream &OS) { - const auto &FH = Records.getFileHeader(); - auto CycleFreq = FH.CycleFrequency; - - unsigned id_counter = 0; - - OS << "{\n \"traceEvents\": ["; - DenseMap StackCursorByThreadId{}; - DenseMap> StackRootsByThreadId{}; - DenseMap StacksByStackId{}; - std::forward_list NodeStore{}; - int loop_count = 0; - for (const auto &R : Records) { - if (loop_count++ == 0) - OS << "\n"; - else - OS << ",\n"; - - // Chrome trace event format always wants data in micros. - // CyclesPerMicro = CycleHertz / 10^6 - // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp - // Could lose some precision here by converting the TSC to a double to - // multiply by the period in micros. 52 bit mantissa is a good start though. - // TODO: Make feature request to Chrome Trace viewer to accept ticks and a - // frequency or do some more involved calculation to avoid dangers of - // conversion. - double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); - StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; - switch (R.Type) { - case RecordTypes::ENTER: - case RecordTypes::ENTER_ARG: - StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, - StackRootsByThreadId, StacksByStackId, - &id_counter, NodeStore); - // Each record is represented as a json dictionary with function name, - // type of B for begin or E for end, thread id, process id (faked), - // timestamp in microseconds, and a stack frame id. The ids are logged - // in an id dictionary after the events. - writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper, - EventTimestampUs, *StackCursor, "B"); - break; - case RecordTypes::EXIT: - case RecordTypes::TAIL_EXIT: - // No entries to record end for. - if (StackCursor == nullptr) - break; - // Should we emit an END record anyway or account this condition? - // (And/Or in loop termination below) - StackTrieNode *PreviousCursor = nullptr; - do { - writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize, - FuncIdHelper, EventTimestampUs, *StackCursor, - "E"); - PreviousCursor = StackCursor; - StackCursor = StackCursor->Parent; - } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); - break; - } - } - OS << "\n ],\n"; // Close the Trace Events array. - OS << " " - << "\"displayTimeUnit\": \"ns\",\n"; - - // The stackFrames dictionary substantially reduces size of the output file by - // avoiding repeating the entire call stack of function names for each entry. - OS << R"( "stackFrames": {)"; - int stack_frame_count = 0; - for (auto map_iter : StacksByStackId) { - if (stack_frame_count++ == 0) - OS << "\n"; - else - OS << ",\n"; - OS << " "; - OS << llvm::formatv( - R"("{0}" : { "name" : "{1}")", map_iter.first, - (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) - : llvm::to_string(map_iter.second->FuncId))); - if (map_iter.second->Parent != nullptr) - OS << llvm::formatv(R"(, "parent": "{0}")", - map_iter.second->Parent->ExtraData.id); - OS << " }"; - } - OS << "\n }\n"; // Close the stack frames map. - OS << "}\n"; // Close the JSON entry. -} - -namespace llvm { -namespace xray { - -static CommandRegistration Unused(&Convert, []() -> Error { - // FIXME: Support conversion to BINARY when upgrading XRay trace versions. - InstrumentationMap Map; - if (!ConvertInstrMap.empty()) { - auto InstrumentationMapOrError = loadInstrumentationMap(ConvertInstrMap); - if (!InstrumentationMapOrError) - return joinErrors(make_error( - Twine("Cannot open instrumentation map '") + - ConvertInstrMap + "'", - std::make_error_code(std::errc::invalid_argument)), - InstrumentationMapOrError.takeError()); - Map = std::move(*InstrumentationMapOrError); - } - - const auto &FunctionAddresses = Map.getFunctionAddresses(); - symbolize::LLVMSymbolizer::Options Opts( - symbolize::FunctionNameKind::LinkageName, true, true, false, ""); - symbolize::LLVMSymbolizer Symbolizer(Opts); - llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, - FunctionAddresses); - llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize); - std::error_code EC; - raw_fd_ostream OS(ConvertOutput, EC, - ConvertOutputFormat == ConvertFormats::BINARY - ? sys::fs::OpenFlags::F_None - : sys::fs::OpenFlags::F_Text); - if (EC) - return make_error( - Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); - - auto TraceOrErr = loadTraceFile(ConvertInput, ConvertSortInput); - if (!TraceOrErr) - return joinErrors( - make_error( - Twine("Failed loading input file '") + ConvertInput + "'.", - std::make_error_code(std::errc::executable_format_error)), - TraceOrErr.takeError()); - - auto &T = *TraceOrErr; - switch (ConvertOutputFormat) { - case ConvertFormats::YAML: - TC.exportAsYAML(T, OS); - break; - case ConvertFormats::BINARY: - TC.exportAsRAWv1(T, OS); - break; - case ConvertFormats::CHROME_TRACE_EVENT: - TC.exportAsChromeTraceEventFormat(T, OS); - break; - } - return Error::success(); -}); - -} // namespace xray -} // namespace llvm Index: llvm/trunk/tools/llvm-xray/xray-converter.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-converter.cpp +++ llvm/trunk/tools/llvm-xray/xray-converter.cpp @@ -0,0 +1,393 @@ +//===- xray-converter.cpp: XRay Trace Conversion --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the trace conversion functions. +// +//===----------------------------------------------------------------------===// +#include "xray-converter.h" + +#include "trie-node.h" +#include "xray-registry.h" +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/ScopedPrinter.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/XRay/InstrumentationMap.h" +#include "llvm/XRay/Trace.h" +#include "llvm/XRay/YAMLXRayRecord.h" + +using namespace llvm; +using namespace xray; + +// llvm-xray convert +// ---------------------------------------------------------------------------- +static cl::SubCommand Convert("convert", "Trace Format Conversion"); +static cl::opt ConvertInput(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(Convert)); +enum class ConvertFormats { BINARY, YAML, CHROME_TRACE_EVENT }; +static cl::opt ConvertOutputFormat( + "output-format", cl::desc("output format"), + cl::values(clEnumValN(ConvertFormats::BINARY, "raw", "output in binary"), + clEnumValN(ConvertFormats::YAML, "yaml", "output in yaml"), + clEnumValN(ConvertFormats::CHROME_TRACE_EVENT, "trace_event", + "Output in chrome's trace event format. " + "May be visualized with the Catapult trace viewer.")), + cl::sub(Convert)); +static cl::alias ConvertOutputFormat2("f", cl::aliasopt(ConvertOutputFormat), + cl::desc("Alias for -output-format"), + cl::sub(Convert)); +static cl::opt + ConvertOutput("output", cl::value_desc("output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), + cl::sub(Convert)); +static cl::alias ConvertOutput2("o", cl::aliasopt(ConvertOutput), + cl::desc("Alias for -output"), + cl::sub(Convert)); + +static cl::opt + ConvertSymbolize("symbolize", + cl::desc("symbolize function ids from the input log"), + cl::init(false), cl::sub(Convert)); +static cl::alias ConvertSymbolize2("y", cl::aliasopt(ConvertSymbolize), + cl::desc("Alias for -symbolize"), + cl::sub(Convert)); + +static cl::opt + ConvertInstrMap("instr_map", + cl::desc("binary with the instrumentation map, or " + "a separate instrumentation map"), + cl::value_desc("binary with xray_instr_map"), + cl::sub(Convert), cl::init("")); +static cl::alias ConvertInstrMap2("m", cl::aliasopt(ConvertInstrMap), + cl::desc("Alias for -instr_map"), + cl::sub(Convert)); +static cl::opt ConvertSortInput( + "sort", + cl::desc("determines whether to sort input log records by timestamp"), + cl::sub(Convert), cl::init(true)); +static cl::alias ConvertSortInput2("s", cl::aliasopt(ConvertSortInput), + cl::desc("Alias for -sort"), + cl::sub(Convert)); + +using llvm::yaml::Output; + +void TraceConverter::exportAsYAML(const Trace &Records, raw_ostream &OS) { + YAMLXRayTrace Trace; + const auto &FH = Records.getFileHeader(); + Trace.Header = {FH.Version, FH.Type, FH.ConstantTSC, FH.NonstopTSC, + FH.CycleFrequency}; + Trace.Records.reserve(Records.size()); + for (const auto &R : Records) { + Trace.Records.push_back({R.RecordType, R.CPU, R.Type, R.FuncId, + Symbolize ? FuncIdHelper.SymbolOrNumber(R.FuncId) + : llvm::to_string(R.FuncId), + R.TSC, R.TId, R.CallArgs}); + } + Output Out(OS, nullptr, 0); + Out << Trace; +} + +void TraceConverter::exportAsRAWv1(const Trace &Records, raw_ostream &OS) { + // First write out the file header, in the correct endian-appropriate format + // (XRay assumes currently little endian). + support::endian::Writer Writer(OS); + const auto &FH = Records.getFileHeader(); + Writer.write(FH.Version); + Writer.write(FH.Type); + uint32_t Bitfield{0}; + if (FH.ConstantTSC) + Bitfield |= 1uL; + if (FH.NonstopTSC) + Bitfield |= 1uL << 1; + Writer.write(Bitfield); + Writer.write(FH.CycleFrequency); + + // There's 16 bytes of padding at the end of the file header. + static constexpr uint32_t Padding4B = 0; + Writer.write(Padding4B); + Writer.write(Padding4B); + Writer.write(Padding4B); + Writer.write(Padding4B); + + // Then write out the rest of the records, still in an endian-appropriate + // format. + for (const auto &R : Records) { + Writer.write(R.RecordType); + // The on disk naive raw format uses 8 bit CPUs, but the record has 16. + // There's no choice but truncation. + Writer.write(static_cast(R.CPU)); + switch (R.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: + Writer.write(uint8_t{0}); + break; + case RecordTypes::EXIT: + Writer.write(uint8_t{1}); + break; + case RecordTypes::TAIL_EXIT: + Writer.write(uint8_t{2}); + break; + } + Writer.write(R.FuncId); + Writer.write(R.TSC); + Writer.write(R.TId); + Writer.write(Padding4B); + Writer.write(Padding4B); + Writer.write(Padding4B); + } +} + +namespace { + +// A structure that allows building a dictionary of stack ids for the Chrome +// trace event format. +struct StackIdData { + // Each Stack of function calls has a unique ID. + unsigned id; + + // Bookkeeping so that IDs can be maintained uniquely across threads. + // Traversal keeps sibling pointers to other threads stacks. This is helpful + // to determine when a thread encounters a new stack and should assign a new + // unique ID. + SmallVector *, 4> siblings; +}; + +using StackTrieNode = TrieNode; + +// A helper function to find the sibling nodes for an encountered function in a +// thread of execution. Relies on the invariant that each time a new node is +// traversed in a thread, sibling bidirectional pointers are maintained. +SmallVector +findSiblings(StackTrieNode *parent, int32_t FnId, uint32_t TId, + const DenseMap> + &StackRootsByThreadId) { + + SmallVector Siblings{}; + + if (parent == nullptr) { + for (auto map_iter : StackRootsByThreadId) { + // Only look for siblings in other threads. + if (map_iter.first != TId) + for (auto node_iter : map_iter.second) { + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + } + } + return Siblings; + } + + for (auto *ParentSibling : parent->ExtraData.siblings) + for (auto node_iter : ParentSibling->Callees) + if (node_iter->FuncId == FnId) + Siblings.push_back(node_iter); + + return Siblings; +} + +// Given a function being invoked in a thread with id TId, finds and returns the +// StackTrie representing the function call stack. If no node exists, creates +// the node. Assigns unique IDs to stacks newly encountered among all threads +// and keeps sibling links up to when creating new nodes. +StackTrieNode *findOrCreateStackNode( + StackTrieNode *Parent, int32_t FuncId, uint32_t TId, + DenseMap> &StackRootsByThreadId, + DenseMap &StacksByStackId, unsigned *id_counter, + std::forward_list &NodeStore) { + SmallVector &ParentCallees = + Parent == nullptr ? StackRootsByThreadId[TId] : Parent->Callees; + auto match = find_if(ParentCallees, [FuncId](StackTrieNode *ParentCallee) { + return FuncId == ParentCallee->FuncId; + }); + if (match != ParentCallees.end()) + return *match; + + SmallVector siblings = + findSiblings(Parent, FuncId, TId, StackRootsByThreadId); + if (siblings.empty()) { + NodeStore.push_front({FuncId, Parent, {}, {(*id_counter)++, {}}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + StacksByStackId[*id_counter - 1] = CurrentStack; + ParentCallees.push_back(CurrentStack); + return CurrentStack; + } + unsigned stack_id = siblings[0]->ExtraData.id; + NodeStore.push_front({FuncId, Parent, {}, {stack_id, std::move(siblings)}}); + StackTrieNode *CurrentStack = &NodeStore.front(); + for (auto *sibling : CurrentStack->ExtraData.siblings) + sibling->ExtraData.siblings.push_back(CurrentStack); + ParentCallees.push_back(CurrentStack); + return CurrentStack; +} + +void writeTraceViewerRecord(raw_ostream &OS, int32_t FuncId, uint32_t TId, + bool Symbolize, + const FuncIdConversionHelper &FuncIdHelper, + double EventTimestampUs, + const StackTrieNode &StackCursor, + StringRef FunctionPhenotype) { + OS << " "; + OS << llvm::formatv( + R"({ "name" : "{0}", "ph" : "{1}", "tid" : "{2}", "pid" : "1", )" + R"("ts" : "{3:f3}", "sf" : "{4}" })", + (Symbolize ? FuncIdHelper.SymbolOrNumber(FuncId) + : llvm::to_string(FuncId)), + FunctionPhenotype, TId, EventTimestampUs, StackCursor.ExtraData.id); +} + +} // namespace + +void TraceConverter::exportAsChromeTraceEventFormat(const Trace &Records, + raw_ostream &OS) { + const auto &FH = Records.getFileHeader(); + auto CycleFreq = FH.CycleFrequency; + + unsigned id_counter = 0; + + OS << "{\n \"traceEvents\": ["; + DenseMap StackCursorByThreadId{}; + DenseMap> StackRootsByThreadId{}; + DenseMap StacksByStackId{}; + std::forward_list NodeStore{}; + int loop_count = 0; + for (const auto &R : Records) { + if (loop_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + + // Chrome trace event format always wants data in micros. + // CyclesPerMicro = CycleHertz / 10^6 + // TSC / CyclesPerMicro == TSC * 10^6 / CycleHertz == MicroTimestamp + // Could lose some precision here by converting the TSC to a double to + // multiply by the period in micros. 52 bit mantissa is a good start though. + // TODO: Make feature request to Chrome Trace viewer to accept ticks and a + // frequency or do some more involved calculation to avoid dangers of + // conversion. + double EventTimestampUs = double(1000000) / CycleFreq * double(R.TSC); + StackTrieNode *&StackCursor = StackCursorByThreadId[R.TId]; + switch (R.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: + StackCursor = findOrCreateStackNode(StackCursor, R.FuncId, R.TId, + StackRootsByThreadId, StacksByStackId, + &id_counter, NodeStore); + // Each record is represented as a json dictionary with function name, + // type of B for begin or E for end, thread id, process id (faked), + // timestamp in microseconds, and a stack frame id. The ids are logged + // in an id dictionary after the events. + writeTraceViewerRecord(OS, R.FuncId, R.TId, Symbolize, FuncIdHelper, + EventTimestampUs, *StackCursor, "B"); + break; + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: + // No entries to record end for. + if (StackCursor == nullptr) + break; + // Should we emit an END record anyway or account this condition? + // (And/Or in loop termination below) + StackTrieNode *PreviousCursor = nullptr; + do { + writeTraceViewerRecord(OS, StackCursor->FuncId, R.TId, Symbolize, + FuncIdHelper, EventTimestampUs, *StackCursor, + "E"); + PreviousCursor = StackCursor; + StackCursor = StackCursor->Parent; + } while (PreviousCursor->FuncId != R.FuncId && StackCursor != nullptr); + break; + } + } + OS << "\n ],\n"; // Close the Trace Events array. + OS << " " + << "\"displayTimeUnit\": \"ns\",\n"; + + // The stackFrames dictionary substantially reduces size of the output file by + // avoiding repeating the entire call stack of function names for each entry. + OS << R"( "stackFrames": {)"; + int stack_frame_count = 0; + for (auto map_iter : StacksByStackId) { + if (stack_frame_count++ == 0) + OS << "\n"; + else + OS << ",\n"; + OS << " "; + OS << llvm::formatv( + R"("{0}" : { "name" : "{1}")", map_iter.first, + (Symbolize ? FuncIdHelper.SymbolOrNumber(map_iter.second->FuncId) + : llvm::to_string(map_iter.second->FuncId))); + if (map_iter.second->Parent != nullptr) + OS << llvm::formatv(R"(, "parent": "{0}")", + map_iter.second->Parent->ExtraData.id); + OS << " }"; + } + OS << "\n }\n"; // Close the stack frames map. + OS << "}\n"; // Close the JSON entry. +} + +namespace llvm { +namespace xray { + +static CommandRegistration Unused(&Convert, []() -> Error { + // FIXME: Support conversion to BINARY when upgrading XRay trace versions. + InstrumentationMap Map; + if (!ConvertInstrMap.empty()) { + auto InstrumentationMapOrError = loadInstrumentationMap(ConvertInstrMap); + if (!InstrumentationMapOrError) + return joinErrors(make_error( + Twine("Cannot open instrumentation map '") + + ConvertInstrMap + "'", + std::make_error_code(std::errc::invalid_argument)), + InstrumentationMapOrError.takeError()); + Map = std::move(*InstrumentationMapOrError); + } + + const auto &FunctionAddresses = Map.getFunctionAddresses(); + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + llvm::xray::FuncIdConversionHelper FuncIdHelper(ConvertInstrMap, Symbolizer, + FunctionAddresses); + llvm::xray::TraceConverter TC(FuncIdHelper, ConvertSymbolize); + std::error_code EC; + raw_fd_ostream OS(ConvertOutput, EC, + ConvertOutputFormat == ConvertFormats::BINARY + ? sys::fs::OpenFlags::F_None + : sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + ConvertOutput + "' for writing.", EC); + + auto TraceOrErr = loadTraceFile(ConvertInput, ConvertSortInput); + if (!TraceOrErr) + return joinErrors( + make_error( + Twine("Failed loading input file '") + ConvertInput + "'.", + std::make_error_code(std::errc::executable_format_error)), + TraceOrErr.takeError()); + + auto &T = *TraceOrErr; + switch (ConvertOutputFormat) { + case ConvertFormats::YAML: + TC.exportAsYAML(T, OS); + break; + case ConvertFormats::BINARY: + TC.exportAsRAWv1(T, OS); + break; + case ConvertFormats::CHROME_TRACE_EVENT: + TC.exportAsChromeTraceEventFormat(T, OS); + break; + } + return Error::success(); +}); + +} // namespace xray +} // namespace llvm Index: llvm/trunk/tools/llvm-xray/xray-extract.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-extract.cc +++ llvm/trunk/tools/llvm-xray/xray-extract.cc @@ -1,97 +0,0 @@ -//===- xray-extract.cc - XRay Instrumentation Map Extraction --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implementation of the xray-extract.h interface. -// -// FIXME: Support other XRay-instrumented binary formats other than ELF. -// -//===----------------------------------------------------------------------===// - - -#include "func-id-helper.h" -#include "xray-registry.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Error.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/XRay/InstrumentationMap.h" - -using namespace llvm; -using namespace llvm::xray; -using namespace llvm::yaml; - -// llvm-xray extract -// ---------------------------------------------------------------------------- -static cl::SubCommand Extract("extract", "Extract instrumentation maps"); -static cl::opt ExtractInput(cl::Positional, - cl::desc(""), cl::Required, - cl::sub(Extract)); -static cl::opt - ExtractOutput("output", cl::value_desc("output file"), cl::init("-"), - cl::desc("output file; use '-' for stdout"), - cl::sub(Extract)); -static cl::alias ExtractOutput2("o", cl::aliasopt(ExtractOutput), - cl::desc("Alias for -output"), - cl::sub(Extract)); -static cl::opt ExtractSymbolize("symbolize", cl::value_desc("symbolize"), - cl::init(false), - cl::desc("symbolize functions"), - cl::sub(Extract)); -static cl::alias ExtractSymbolize2("s", cl::aliasopt(ExtractSymbolize), - cl::desc("alias for -symbolize"), - cl::sub(Extract)); - -namespace { - -void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, - FuncIdConversionHelper &FH) { - // First we translate the sleds into the YAMLXRaySledEntry objects in a deque. - std::vector YAMLSleds; - auto Sleds = Map.sleds(); - YAMLSleds.reserve(std::distance(Sleds.begin(), Sleds.end())); - for (const auto &Sled : Sleds) { - auto FuncId = Map.getFunctionId(Sled.Function); - if (!FuncId) - return; - YAMLSleds.push_back({*FuncId, Sled.Address, Sled.Function, Sled.Kind, - Sled.AlwaysInstrument, - ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : ""}); - } - Output Out(OS, nullptr, 0); - Out << YAMLSleds; -} - -} // namespace - -static CommandRegistration Unused(&Extract, []() -> Error { - auto InstrumentationMapOrError = loadInstrumentationMap(ExtractInput); - if (!InstrumentationMapOrError) - return joinErrors(make_error( - Twine("Cannot extract instrumentation map from '") + - ExtractInput + "'.", - std::make_error_code(std::errc::invalid_argument)), - InstrumentationMapOrError.takeError()); - - std::error_code EC; - raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text); - if (EC) - return make_error( - Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC); - const auto &FunctionAddresses = - InstrumentationMapOrError->getFunctionAddresses(); - symbolize::LLVMSymbolizer::Options Opts( - symbolize::FunctionNameKind::LinkageName, true, true, false, ""); - symbolize::LLVMSymbolizer Symbolizer(Opts); - llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer, - FunctionAddresses); - exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper); - return Error::success(); -}); Index: llvm/trunk/tools/llvm-xray/xray-extract.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-extract.cpp +++ llvm/trunk/tools/llvm-xray/xray-extract.cpp @@ -0,0 +1,97 @@ +//===- xray-extract.cpp: XRay Instrumentation Map Extraction --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the xray-extract.h interface. +// +// FIXME: Support other XRay-instrumented binary formats other than ELF. +// +//===----------------------------------------------------------------------===// + + +#include "func-id-helper.h" +#include "xray-registry.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/XRay/InstrumentationMap.h" + +using namespace llvm; +using namespace llvm::xray; +using namespace llvm::yaml; + +// llvm-xray extract +// ---------------------------------------------------------------------------- +static cl::SubCommand Extract("extract", "Extract instrumentation maps"); +static cl::opt ExtractInput(cl::Positional, + cl::desc(""), cl::Required, + cl::sub(Extract)); +static cl::opt + ExtractOutput("output", cl::value_desc("output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), + cl::sub(Extract)); +static cl::alias ExtractOutput2("o", cl::aliasopt(ExtractOutput), + cl::desc("Alias for -output"), + cl::sub(Extract)); +static cl::opt ExtractSymbolize("symbolize", cl::value_desc("symbolize"), + cl::init(false), + cl::desc("symbolize functions"), + cl::sub(Extract)); +static cl::alias ExtractSymbolize2("s", cl::aliasopt(ExtractSymbolize), + cl::desc("alias for -symbolize"), + cl::sub(Extract)); + +namespace { + +void exportAsYAML(const InstrumentationMap &Map, raw_ostream &OS, + FuncIdConversionHelper &FH) { + // First we translate the sleds into the YAMLXRaySledEntry objects in a deque. + std::vector YAMLSleds; + auto Sleds = Map.sleds(); + YAMLSleds.reserve(std::distance(Sleds.begin(), Sleds.end())); + for (const auto &Sled : Sleds) { + auto FuncId = Map.getFunctionId(Sled.Function); + if (!FuncId) + return; + YAMLSleds.push_back({*FuncId, Sled.Address, Sled.Function, Sled.Kind, + Sled.AlwaysInstrument, + ExtractSymbolize ? FH.SymbolOrNumber(*FuncId) : ""}); + } + Output Out(OS, nullptr, 0); + Out << YAMLSleds; +} + +} // namespace + +static CommandRegistration Unused(&Extract, []() -> Error { + auto InstrumentationMapOrError = loadInstrumentationMap(ExtractInput); + if (!InstrumentationMapOrError) + return joinErrors(make_error( + Twine("Cannot extract instrumentation map from '") + + ExtractInput + "'.", + std::make_error_code(std::errc::invalid_argument)), + InstrumentationMapOrError.takeError()); + + std::error_code EC; + raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC); + const auto &FunctionAddresses = + InstrumentationMapOrError->getFunctionAddresses(); + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + llvm::xray::FuncIdConversionHelper FuncIdHelper(ExtractInput, Symbolizer, + FunctionAddresses); + exportAsYAML(*InstrumentationMapOrError, OS, FuncIdHelper); + return Error::success(); +}); Index: llvm/trunk/tools/llvm-xray/xray-graph-diff.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-graph-diff.cc +++ llvm/trunk/tools/llvm-xray/xray-graph-diff.cc @@ -1,484 +0,0 @@ -//===-- xray-graph-diff.cc - XRay Function Call Graph Renderer ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Generate a DOT file to represent the function call graph encountered in -// the trace. -// -//===----------------------------------------------------------------------===// -#include -#include -#include -#include - -#include "xray-graph-diff.h" -#include "xray-graph.h" -#include "xray-registry.h" - -#include "xray-color-helper.h" -#include "llvm/ADT/iterator_range.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/XRay/Trace.h" - -using namespace llvm; -using namespace xray; - -static cl::SubCommand GraphDiff("graph-diff", - "Generate diff of function-call graphs"); -static cl::opt GraphDiffInput1(cl::Positional, - cl::desc(""), - cl::Required, cl::sub(GraphDiff)); -static cl::opt GraphDiffInput2(cl::Positional, - cl::desc(""), - cl::Required, cl::sub(GraphDiff)); - -static cl::opt - GraphDiffKeepGoing("keep-going", - cl::desc("Keep going on errors encountered"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias GraphDiffKeepGoingA("k", cl::aliasopt(GraphDiffKeepGoing), - cl::desc("Alias for -keep-going"), - cl::sub(GraphDiff)); -static cl::opt - GraphDiffKeepGoing1("keep-going-1", - cl::desc("Keep going on errors encountered in trace 1"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias GraphDiffKeepGoing1A("k1", cl::aliasopt(GraphDiffKeepGoing1), - cl::desc("Alias for -keep-going-1"), - cl::sub(GraphDiff)); -static cl::opt - GraphDiffKeepGoing2("keep-going-2", - cl::desc("Keep going on errors encountered in trace 2"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias GraphDiffKeepGoing2A("k2", cl::aliasopt(GraphDiffKeepGoing2), - cl::desc("Alias for -keep-going-2"), - cl::sub(GraphDiff)); - -static cl::opt - GraphDiffInstrMap("instr-map", - cl::desc("binary with the instrumentation map, or " - "a separate instrumentation map for graph"), - cl::value_desc("binary with xray_instr_map or yaml"), - cl::sub(GraphDiff), cl::init("")); -static cl::alias GraphDiffInstrMapA("m", cl::aliasopt(GraphDiffInstrMap), - cl::desc("Alias for -instr-map"), - cl::sub(GraphDiff)); -static cl::opt - GraphDiffInstrMap1("instr-map-1", - cl::desc("binary with the instrumentation map, or " - "a separate instrumentation map for graph 1"), - cl::value_desc("binary with xray_instr_map or yaml"), - cl::sub(GraphDiff), cl::init("")); -static cl::alias GraphDiffInstrMap1A("m1", cl::aliasopt(GraphDiffInstrMap1), - cl::desc("Alias for -instr-map-1"), - cl::sub(GraphDiff)); -static cl::opt - GraphDiffInstrMap2("instr-map-2", - cl::desc("binary with the instrumentation map, or " - "a separate instrumentation map for graph 2"), - cl::value_desc("binary with xray_instr_map or yaml"), - cl::sub(GraphDiff), cl::init("")); -static cl::alias GraphDiffInstrMap2A("m2", cl::aliasopt(GraphDiffInstrMap2), - cl::desc("Alias for -instr-map-2"), - cl::sub(GraphDiff)); - -static cl::opt GraphDiffDeduceSiblingCalls( - "deduce-sibling-calls", - cl::desc("Deduce sibling calls when unrolling function call stacks"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias - GraphDiffDeduceSiblingCallsA("d", cl::aliasopt(GraphDiffDeduceSiblingCalls), - cl::desc("Alias for -deduce-sibling-calls"), - cl::sub(GraphDiff)); -static cl::opt GraphDiffDeduceSiblingCalls1( - "deduce-sibling-calls-1", - cl::desc("Deduce sibling calls when unrolling function call stacks"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias GraphDiffDeduceSiblingCalls1A( - "d1", cl::aliasopt(GraphDiffDeduceSiblingCalls1), - cl::desc("Alias for -deduce-sibling-calls-1"), cl::sub(GraphDiff)); -static cl::opt GraphDiffDeduceSiblingCalls2( - "deduce-sibling-calls-2", - cl::desc("Deduce sibling calls when unrolling function call stacks"), - cl::sub(GraphDiff), cl::init(false)); -static cl::alias GraphDiffDeduceSiblingCalls2A( - "d2", cl::aliasopt(GraphDiffDeduceSiblingCalls2), - cl::desc("Alias for -deduce-sibling-calls-2"), cl::sub(GraphDiff)); - -static cl::opt GraphDiffEdgeLabel( - "edge-label", cl::desc("Output graphs with edges labeled with this field"), - cl::value_desc("field"), cl::sub(GraphDiff), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not label Edges"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphDiffEdgeLabelA("e", cl::aliasopt(GraphDiffEdgeLabel), - cl::desc("Alias for -edge-label"), - cl::sub(GraphDiff)); - -static cl::opt GraphDiffEdgeColor( - "edge-color", cl::desc("Output graphs with edges colored by this field"), - cl::value_desc("field"), cl::sub(GraphDiff), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not color Edges"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphDiffEdgeColorA("c", cl::aliasopt(GraphDiffEdgeColor), - cl::desc("Alias for -edge-color"), - cl::sub(GraphDiff)); - -static cl::opt GraphDiffVertexLabel( - "vertex-label", - cl::desc("Output graphs with vertices labeled with this field"), - cl::value_desc("field"), cl::sub(GraphDiff), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not label Vertices"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphDiffVertexLabelA("v", cl::aliasopt(GraphDiffVertexLabel), - cl::desc("Alias for -vertex-label"), - cl::sub(GraphDiff)); - -static cl::opt GraphDiffVertexColor( - "vertex-color", - cl::desc("Output graphs with vertices colored by this field"), - cl::value_desc("field"), cl::sub(GraphDiff), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not color Vertices"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphDiffVertexColorA("b", cl::aliasopt(GraphDiffVertexColor), - cl::desc("Alias for -vertex-color"), - cl::sub(GraphDiff)); - -static cl::opt GraphDiffVertexLabelTrunc( - "vertex-label-trun", cl::desc("What length to truncate vertex labels to "), - cl::sub(GraphDiff), cl::init(40)); -static cl::alias - GraphDiffVertexLabelTrunc1("t", cl::aliasopt(GraphDiffVertexLabelTrunc), - cl::desc("Alias for -vertex-label-trun"), - cl::sub(GraphDiff)); - -static cl::opt - GraphDiffOutput("output", cl::value_desc("Output file"), cl::init("-"), - cl::desc("output file; use '-' for stdout"), - cl::sub(GraphDiff)); -static cl::alias GraphDiffOutputA("o", cl::aliasopt(GraphDiffOutput), - cl::desc("Alias for -output"), - cl::sub(GraphDiff)); - -Expected GraphDiffRenderer::Factory::getGraphDiffRenderer() { - GraphDiffRenderer R; - - for (int i = 0; i < N; ++i) { - const auto &G = this->G[i].get(); - for (const auto &V : G.vertices()) { - const auto &VAttr = V.second; - R.G[VAttr.SymbolName].CorrVertexPtr[i] = &V; - } - for (const auto &E : G.edges()) { - auto &EdgeTailID = E.first.first; - auto &EdgeHeadID = E.first.second; - auto EdgeTailAttrOrErr = G.at(EdgeTailID); - auto EdgeHeadAttrOrErr = G.at(EdgeHeadID); - if (!EdgeTailAttrOrErr) - return EdgeTailAttrOrErr.takeError(); - if (!EdgeHeadAttrOrErr) - return EdgeHeadAttrOrErr.takeError(); - GraphT::EdgeIdentifier ID{EdgeTailAttrOrErr->SymbolName, - EdgeHeadAttrOrErr->SymbolName}; - R.G[ID].CorrEdgePtr[i] = &E; - } - } - - return R; -} -// Returns the Relative change With respect to LeftStat between LeftStat -// and RightStat. -static double statRelDiff(const GraphDiffRenderer::TimeStat &LeftStat, - const GraphDiffRenderer::TimeStat &RightStat, - GraphDiffRenderer::StatType T) { - double LeftAttr = LeftStat.getDouble(T); - double RightAttr = RightStat.getDouble(T); - - return RightAttr / LeftAttr - 1.0; -} - -static std::string getColor(const GraphDiffRenderer::GraphT::EdgeValueType &E, - const GraphDiffRenderer::GraphT &G, ColorHelper H, - GraphDiffRenderer::StatType T) { - auto &EdgeAttr = E.second; - if (EdgeAttr.CorrEdgePtr[0] == nullptr) - return H.getColorString(2.0); // A number greater than 1.0 - if (EdgeAttr.CorrEdgePtr[1] == nullptr) - return H.getColorString(-2.0); // A number less than -1.0 - - if (T == GraphDiffRenderer::StatType::NONE) - return H.getDefaultColorString(); - - const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; - const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; - - double RelDiff = statRelDiff(LeftStat, RightStat, T); - double CappedRelDiff = std::min(1.0, std::max(-1.0, RelDiff)); - - return H.getColorString(CappedRelDiff); -} - -static std::string getColor(const GraphDiffRenderer::GraphT::VertexValueType &V, - const GraphDiffRenderer::GraphT &G, ColorHelper H, - GraphDiffRenderer::StatType T) { - auto &VertexAttr = V.second; - if (VertexAttr.CorrVertexPtr[0] == nullptr) - return H.getColorString(2.0); // A number greater than 1.0 - if (VertexAttr.CorrVertexPtr[1] == nullptr) - return H.getColorString(-2.0); // A number less than -1.0 - - if (T == GraphDiffRenderer::StatType::NONE) - return H.getDefaultColorString(); - - const auto &LeftStat = VertexAttr.CorrVertexPtr[0]->second.S; - const auto &RightStat = VertexAttr.CorrVertexPtr[1]->second.S; - - double RelDiff = statRelDiff(LeftStat, RightStat, T); - double CappedRelDiff = std::min(1.0, std::max(-1.0, RelDiff)); - - return H.getColorString(CappedRelDiff); -} - -static Twine truncateString(const StringRef &S, size_t n) { - return (S.size() > n) ? Twine(S.substr(0, n)) + "..." : Twine(S); -} - -template static bool containsNullptr(const T &Collection) { - for (const auto &E : Collection) - if (E == nullptr) - return true; - return false; -} - -static std::string getLabel(const GraphDiffRenderer::GraphT::EdgeValueType &E, - GraphDiffRenderer::StatType EL) { - auto &EdgeAttr = E.second; - switch (EL) { - case GraphDiffRenderer::StatType::NONE: - return ""; - default: - if (containsNullptr(EdgeAttr.CorrEdgePtr)) - return ""; - - const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; - const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; - - double RelDiff = statRelDiff(LeftStat, RightStat, EL); - return formatv(R"({0:P})", RelDiff); - } -} - -static std::string getLabel(const GraphDiffRenderer::GraphT::VertexValueType &V, - GraphDiffRenderer::StatType VL, int TrunLen) { - const auto &VertexId = V.first; - const auto &VertexAttr = V.second; - switch (VL) { - case GraphDiffRenderer::StatType::NONE: - return formatv(R"({0})", truncateString(VertexId, TrunLen).str()); - default: - if (containsNullptr(VertexAttr.CorrVertexPtr)) - return formatv(R"({0})", truncateString(VertexId, TrunLen).str()); - - const auto &LeftStat = VertexAttr.CorrVertexPtr[0]->second.S; - const auto &RightStat = VertexAttr.CorrVertexPtr[1]->second.S; - - double RelDiff = statRelDiff(LeftStat, RightStat, VL); - return formatv(R"({{{0}|{1:P}})", truncateString(VertexId, TrunLen).str(), - RelDiff); - } -} - -static double getLineWidth(const GraphDiffRenderer::GraphT::EdgeValueType &E, - GraphDiffRenderer::StatType EL) { - auto &EdgeAttr = E.second; - switch (EL) { - case GraphDiffRenderer::StatType::NONE: - return 1.0; - default: - if (containsNullptr(EdgeAttr.CorrEdgePtr)) - return 1.0; - - const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; - const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; - - double RelDiff = statRelDiff(LeftStat, RightStat, EL); - return (RelDiff > 1.0) ? RelDiff : 1.0; - } -} - -void GraphDiffRenderer::exportGraphAsDOT(raw_ostream &OS, StatType EdgeLabel, - StatType EdgeColor, - StatType VertexLabel, - StatType VertexColor, int TruncLen) { - // Get numbering of vertices for dot output. - StringMap VertexNo; - - int i = 0; - for (const auto &V : G.vertices()) { - VertexNo[V.first] = i++; - } - - ColorHelper H(ColorHelper::DivergingScheme::PiYG); - - OS << "digraph xrayDiff {\n"; - - if (VertexLabel != StatType::NONE) - OS << "node [shape=record]\n"; - - for (const auto &E : G.edges()) { - const auto &HeadId = E.first.first; - const auto &TailId = E.first.second; - OS << formatv(R"(F{0} -> F{1} [tooltip="{2} -> {3}" label="{4}" )" - R"(color="{5}" labelfontcolor="{5}" penwidth={6}])" - "\n", - VertexNo[HeadId], VertexNo[TailId], - (HeadId.equals("")) ? static_cast("F0") : HeadId, - TailId, getLabel(E, EdgeLabel), getColor(E, G, H, EdgeColor), - getLineWidth(E, EdgeColor)); - } - - for (const auto &V : G.vertices()) { - const auto &VertexId = V.first; - if (VertexId.equals("")) { - OS << formatv(R"(F{0} [label="F0"])" - "\n", - VertexNo[VertexId]); - continue; - } - OS << formatv(R"(F{0} [label="{1}" color="{2}"])" - "\n", - VertexNo[VertexId], getLabel(V, VertexLabel, TruncLen), - getColor(V, G, H, VertexColor)); - } - - OS << "}\n"; -} - -template static T &ifSpecified(T &A, cl::alias &AA, T &B) { - if (A.getPosition() == 0 && AA.getPosition() == 0) - return B; - - return A; -} - -static CommandRegistration Unused(&GraphDiff, []() -> Error { - std::array Factories{ - {{ifSpecified(GraphDiffKeepGoing1, GraphDiffKeepGoing1A, - GraphDiffKeepGoing), - ifSpecified(GraphDiffDeduceSiblingCalls1, GraphDiffDeduceSiblingCalls1A, - GraphDiffDeduceSiblingCalls), - ifSpecified(GraphDiffInstrMap1, GraphDiffInstrMap1A, GraphDiffInstrMap), - Trace()}, - {ifSpecified(GraphDiffKeepGoing2, GraphDiffKeepGoing2A, - GraphDiffKeepGoing), - ifSpecified(GraphDiffDeduceSiblingCalls2, GraphDiffDeduceSiblingCalls2A, - GraphDiffDeduceSiblingCalls), - ifSpecified(GraphDiffInstrMap2, GraphDiffInstrMap2A, GraphDiffInstrMap), - Trace()}}}; - - std::array Inputs{{GraphDiffInput1, GraphDiffInput2}}; - - std::array Graphs; - - for (int i = 0; i < 2; i++) { - auto TraceOrErr = loadTraceFile(Inputs[i], true); - if (!TraceOrErr) - return make_error( - Twine("Failed Loading Input File '") + Inputs[i] + "'", - make_error_code(llvm::errc::invalid_argument)); - Factories[i].Trace = std::move(*TraceOrErr); - - auto GraphRendererOrErr = Factories[i].getGraphRenderer(); - - if (!GraphRendererOrErr) - return GraphRendererOrErr.takeError(); - - auto GraphRenderer = *GraphRendererOrErr; - - Graphs[i] = GraphRenderer.getGraph(); - } - - GraphDiffRenderer::Factory DGF(Graphs[0], Graphs[1]); - - auto GDROrErr = DGF.getGraphDiffRenderer(); - if (!GDROrErr) - return GDROrErr.takeError(); - - auto &GDR = *GDROrErr; - - std::error_code EC; - raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::F_Text); - if (EC) - return make_error( - Twine("Cannot open file '") + GraphDiffOutput + "' for writing.", EC); - - GDR.exportGraphAsDOT(OS, GraphDiffEdgeLabel, GraphDiffEdgeColor, - GraphDiffVertexLabel, GraphDiffVertexColor, - GraphDiffVertexLabelTrunc); - - return Error::success(); -}); Index: llvm/trunk/tools/llvm-xray/xray-graph-diff.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-graph-diff.cpp +++ llvm/trunk/tools/llvm-xray/xray-graph-diff.cpp @@ -0,0 +1,484 @@ +//===-- xray-graph-diff.cpp: XRay Function Call Graph Renderer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// +#include +#include +#include +#include + +#include "xray-graph-diff.h" +#include "xray-graph.h" +#include "xray-registry.h" + +#include "xray-color-helper.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/XRay/Trace.h" + +using namespace llvm; +using namespace xray; + +static cl::SubCommand GraphDiff("graph-diff", + "Generate diff of function-call graphs"); +static cl::opt GraphDiffInput1(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(GraphDiff)); +static cl::opt GraphDiffInput2(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(GraphDiff)); + +static cl::opt + GraphDiffKeepGoing("keep-going", + cl::desc("Keep going on errors encountered"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias GraphDiffKeepGoingA("k", cl::aliasopt(GraphDiffKeepGoing), + cl::desc("Alias for -keep-going"), + cl::sub(GraphDiff)); +static cl::opt + GraphDiffKeepGoing1("keep-going-1", + cl::desc("Keep going on errors encountered in trace 1"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias GraphDiffKeepGoing1A("k1", cl::aliasopt(GraphDiffKeepGoing1), + cl::desc("Alias for -keep-going-1"), + cl::sub(GraphDiff)); +static cl::opt + GraphDiffKeepGoing2("keep-going-2", + cl::desc("Keep going on errors encountered in trace 2"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias GraphDiffKeepGoing2A("k2", cl::aliasopt(GraphDiffKeepGoing2), + cl::desc("Alias for -keep-going-2"), + cl::sub(GraphDiff)); + +static cl::opt + GraphDiffInstrMap("instr-map", + cl::desc("binary with the instrumentation map, or " + "a separate instrumentation map for graph"), + cl::value_desc("binary with xray_instr_map or yaml"), + cl::sub(GraphDiff), cl::init("")); +static cl::alias GraphDiffInstrMapA("m", cl::aliasopt(GraphDiffInstrMap), + cl::desc("Alias for -instr-map"), + cl::sub(GraphDiff)); +static cl::opt + GraphDiffInstrMap1("instr-map-1", + cl::desc("binary with the instrumentation map, or " + "a separate instrumentation map for graph 1"), + cl::value_desc("binary with xray_instr_map or yaml"), + cl::sub(GraphDiff), cl::init("")); +static cl::alias GraphDiffInstrMap1A("m1", cl::aliasopt(GraphDiffInstrMap1), + cl::desc("Alias for -instr-map-1"), + cl::sub(GraphDiff)); +static cl::opt + GraphDiffInstrMap2("instr-map-2", + cl::desc("binary with the instrumentation map, or " + "a separate instrumentation map for graph 2"), + cl::value_desc("binary with xray_instr_map or yaml"), + cl::sub(GraphDiff), cl::init("")); +static cl::alias GraphDiffInstrMap2A("m2", cl::aliasopt(GraphDiffInstrMap2), + cl::desc("Alias for -instr-map-2"), + cl::sub(GraphDiff)); + +static cl::opt GraphDiffDeduceSiblingCalls( + "deduce-sibling-calls", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias + GraphDiffDeduceSiblingCallsA("d", cl::aliasopt(GraphDiffDeduceSiblingCalls), + cl::desc("Alias for -deduce-sibling-calls"), + cl::sub(GraphDiff)); +static cl::opt GraphDiffDeduceSiblingCalls1( + "deduce-sibling-calls-1", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias GraphDiffDeduceSiblingCalls1A( + "d1", cl::aliasopt(GraphDiffDeduceSiblingCalls1), + cl::desc("Alias for -deduce-sibling-calls-1"), cl::sub(GraphDiff)); +static cl::opt GraphDiffDeduceSiblingCalls2( + "deduce-sibling-calls-2", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(GraphDiff), cl::init(false)); +static cl::alias GraphDiffDeduceSiblingCalls2A( + "d2", cl::aliasopt(GraphDiffDeduceSiblingCalls2), + cl::desc("Alias for -deduce-sibling-calls-2"), cl::sub(GraphDiff)); + +static cl::opt GraphDiffEdgeLabel( + "edge-label", cl::desc("Output graphs with edges labeled with this field"), + cl::value_desc("field"), cl::sub(GraphDiff), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphDiffEdgeLabelA("e", cl::aliasopt(GraphDiffEdgeLabel), + cl::desc("Alias for -edge-label"), + cl::sub(GraphDiff)); + +static cl::opt GraphDiffEdgeColor( + "edge-color", cl::desc("Output graphs with edges colored by this field"), + cl::value_desc("field"), cl::sub(GraphDiff), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not color Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphDiffEdgeColorA("c", cl::aliasopt(GraphDiffEdgeColor), + cl::desc("Alias for -edge-color"), + cl::sub(GraphDiff)); + +static cl::opt GraphDiffVertexLabel( + "vertex-label", + cl::desc("Output graphs with vertices labeled with this field"), + cl::value_desc("field"), cl::sub(GraphDiff), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Vertices"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphDiffVertexLabelA("v", cl::aliasopt(GraphDiffVertexLabel), + cl::desc("Alias for -vertex-label"), + cl::sub(GraphDiff)); + +static cl::opt GraphDiffVertexColor( + "vertex-color", + cl::desc("Output graphs with vertices colored by this field"), + cl::value_desc("field"), cl::sub(GraphDiff), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not color Vertices"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphDiffVertexColorA("b", cl::aliasopt(GraphDiffVertexColor), + cl::desc("Alias for -vertex-color"), + cl::sub(GraphDiff)); + +static cl::opt GraphDiffVertexLabelTrunc( + "vertex-label-trun", cl::desc("What length to truncate vertex labels to "), + cl::sub(GraphDiff), cl::init(40)); +static cl::alias + GraphDiffVertexLabelTrunc1("t", cl::aliasopt(GraphDiffVertexLabelTrunc), + cl::desc("Alias for -vertex-label-trun"), + cl::sub(GraphDiff)); + +static cl::opt + GraphDiffOutput("output", cl::value_desc("Output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), + cl::sub(GraphDiff)); +static cl::alias GraphDiffOutputA("o", cl::aliasopt(GraphDiffOutput), + cl::desc("Alias for -output"), + cl::sub(GraphDiff)); + +Expected GraphDiffRenderer::Factory::getGraphDiffRenderer() { + GraphDiffRenderer R; + + for (int i = 0; i < N; ++i) { + const auto &G = this->G[i].get(); + for (const auto &V : G.vertices()) { + const auto &VAttr = V.second; + R.G[VAttr.SymbolName].CorrVertexPtr[i] = &V; + } + for (const auto &E : G.edges()) { + auto &EdgeTailID = E.first.first; + auto &EdgeHeadID = E.first.second; + auto EdgeTailAttrOrErr = G.at(EdgeTailID); + auto EdgeHeadAttrOrErr = G.at(EdgeHeadID); + if (!EdgeTailAttrOrErr) + return EdgeTailAttrOrErr.takeError(); + if (!EdgeHeadAttrOrErr) + return EdgeHeadAttrOrErr.takeError(); + GraphT::EdgeIdentifier ID{EdgeTailAttrOrErr->SymbolName, + EdgeHeadAttrOrErr->SymbolName}; + R.G[ID].CorrEdgePtr[i] = &E; + } + } + + return R; +} +// Returns the Relative change With respect to LeftStat between LeftStat +// and RightStat. +static double statRelDiff(const GraphDiffRenderer::TimeStat &LeftStat, + const GraphDiffRenderer::TimeStat &RightStat, + GraphDiffRenderer::StatType T) { + double LeftAttr = LeftStat.getDouble(T); + double RightAttr = RightStat.getDouble(T); + + return RightAttr / LeftAttr - 1.0; +} + +static std::string getColor(const GraphDiffRenderer::GraphT::EdgeValueType &E, + const GraphDiffRenderer::GraphT &G, ColorHelper H, + GraphDiffRenderer::StatType T) { + auto &EdgeAttr = E.second; + if (EdgeAttr.CorrEdgePtr[0] == nullptr) + return H.getColorString(2.0); // A number greater than 1.0 + if (EdgeAttr.CorrEdgePtr[1] == nullptr) + return H.getColorString(-2.0); // A number less than -1.0 + + if (T == GraphDiffRenderer::StatType::NONE) + return H.getDefaultColorString(); + + const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; + const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; + + double RelDiff = statRelDiff(LeftStat, RightStat, T); + double CappedRelDiff = std::min(1.0, std::max(-1.0, RelDiff)); + + return H.getColorString(CappedRelDiff); +} + +static std::string getColor(const GraphDiffRenderer::GraphT::VertexValueType &V, + const GraphDiffRenderer::GraphT &G, ColorHelper H, + GraphDiffRenderer::StatType T) { + auto &VertexAttr = V.second; + if (VertexAttr.CorrVertexPtr[0] == nullptr) + return H.getColorString(2.0); // A number greater than 1.0 + if (VertexAttr.CorrVertexPtr[1] == nullptr) + return H.getColorString(-2.0); // A number less than -1.0 + + if (T == GraphDiffRenderer::StatType::NONE) + return H.getDefaultColorString(); + + const auto &LeftStat = VertexAttr.CorrVertexPtr[0]->second.S; + const auto &RightStat = VertexAttr.CorrVertexPtr[1]->second.S; + + double RelDiff = statRelDiff(LeftStat, RightStat, T); + double CappedRelDiff = std::min(1.0, std::max(-1.0, RelDiff)); + + return H.getColorString(CappedRelDiff); +} + +static Twine truncateString(const StringRef &S, size_t n) { + return (S.size() > n) ? Twine(S.substr(0, n)) + "..." : Twine(S); +} + +template static bool containsNullptr(const T &Collection) { + for (const auto &E : Collection) + if (E == nullptr) + return true; + return false; +} + +static std::string getLabel(const GraphDiffRenderer::GraphT::EdgeValueType &E, + GraphDiffRenderer::StatType EL) { + auto &EdgeAttr = E.second; + switch (EL) { + case GraphDiffRenderer::StatType::NONE: + return ""; + default: + if (containsNullptr(EdgeAttr.CorrEdgePtr)) + return ""; + + const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; + const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; + + double RelDiff = statRelDiff(LeftStat, RightStat, EL); + return formatv(R"({0:P})", RelDiff); + } +} + +static std::string getLabel(const GraphDiffRenderer::GraphT::VertexValueType &V, + GraphDiffRenderer::StatType VL, int TrunLen) { + const auto &VertexId = V.first; + const auto &VertexAttr = V.second; + switch (VL) { + case GraphDiffRenderer::StatType::NONE: + return formatv(R"({0})", truncateString(VertexId, TrunLen).str()); + default: + if (containsNullptr(VertexAttr.CorrVertexPtr)) + return formatv(R"({0})", truncateString(VertexId, TrunLen).str()); + + const auto &LeftStat = VertexAttr.CorrVertexPtr[0]->second.S; + const auto &RightStat = VertexAttr.CorrVertexPtr[1]->second.S; + + double RelDiff = statRelDiff(LeftStat, RightStat, VL); + return formatv(R"({{{0}|{1:P}})", truncateString(VertexId, TrunLen).str(), + RelDiff); + } +} + +static double getLineWidth(const GraphDiffRenderer::GraphT::EdgeValueType &E, + GraphDiffRenderer::StatType EL) { + auto &EdgeAttr = E.second; + switch (EL) { + case GraphDiffRenderer::StatType::NONE: + return 1.0; + default: + if (containsNullptr(EdgeAttr.CorrEdgePtr)) + return 1.0; + + const auto &LeftStat = EdgeAttr.CorrEdgePtr[0]->second.S; + const auto &RightStat = EdgeAttr.CorrEdgePtr[1]->second.S; + + double RelDiff = statRelDiff(LeftStat, RightStat, EL); + return (RelDiff > 1.0) ? RelDiff : 1.0; + } +} + +void GraphDiffRenderer::exportGraphAsDOT(raw_ostream &OS, StatType EdgeLabel, + StatType EdgeColor, + StatType VertexLabel, + StatType VertexColor, int TruncLen) { + // Get numbering of vertices for dot output. + StringMap VertexNo; + + int i = 0; + for (const auto &V : G.vertices()) { + VertexNo[V.first] = i++; + } + + ColorHelper H(ColorHelper::DivergingScheme::PiYG); + + OS << "digraph xrayDiff {\n"; + + if (VertexLabel != StatType::NONE) + OS << "node [shape=record]\n"; + + for (const auto &E : G.edges()) { + const auto &HeadId = E.first.first; + const auto &TailId = E.first.second; + OS << formatv(R"(F{0} -> F{1} [tooltip="{2} -> {3}" label="{4}" )" + R"(color="{5}" labelfontcolor="{5}" penwidth={6}])" + "\n", + VertexNo[HeadId], VertexNo[TailId], + (HeadId.equals("")) ? static_cast("F0") : HeadId, + TailId, getLabel(E, EdgeLabel), getColor(E, G, H, EdgeColor), + getLineWidth(E, EdgeColor)); + } + + for (const auto &V : G.vertices()) { + const auto &VertexId = V.first; + if (VertexId.equals("")) { + OS << formatv(R"(F{0} [label="F0"])" + "\n", + VertexNo[VertexId]); + continue; + } + OS << formatv(R"(F{0} [label="{1}" color="{2}"])" + "\n", + VertexNo[VertexId], getLabel(V, VertexLabel, TruncLen), + getColor(V, G, H, VertexColor)); + } + + OS << "}\n"; +} + +template static T &ifSpecified(T &A, cl::alias &AA, T &B) { + if (A.getPosition() == 0 && AA.getPosition() == 0) + return B; + + return A; +} + +static CommandRegistration Unused(&GraphDiff, []() -> Error { + std::array Factories{ + {{ifSpecified(GraphDiffKeepGoing1, GraphDiffKeepGoing1A, + GraphDiffKeepGoing), + ifSpecified(GraphDiffDeduceSiblingCalls1, GraphDiffDeduceSiblingCalls1A, + GraphDiffDeduceSiblingCalls), + ifSpecified(GraphDiffInstrMap1, GraphDiffInstrMap1A, GraphDiffInstrMap), + Trace()}, + {ifSpecified(GraphDiffKeepGoing2, GraphDiffKeepGoing2A, + GraphDiffKeepGoing), + ifSpecified(GraphDiffDeduceSiblingCalls2, GraphDiffDeduceSiblingCalls2A, + GraphDiffDeduceSiblingCalls), + ifSpecified(GraphDiffInstrMap2, GraphDiffInstrMap2A, GraphDiffInstrMap), + Trace()}}}; + + std::array Inputs{{GraphDiffInput1, GraphDiffInput2}}; + + std::array Graphs; + + for (int i = 0; i < 2; i++) { + auto TraceOrErr = loadTraceFile(Inputs[i], true); + if (!TraceOrErr) + return make_error( + Twine("Failed Loading Input File '") + Inputs[i] + "'", + make_error_code(llvm::errc::invalid_argument)); + Factories[i].Trace = std::move(*TraceOrErr); + + auto GraphRendererOrErr = Factories[i].getGraphRenderer(); + + if (!GraphRendererOrErr) + return GraphRendererOrErr.takeError(); + + auto GraphRenderer = *GraphRendererOrErr; + + Graphs[i] = GraphRenderer.getGraph(); + } + + GraphDiffRenderer::Factory DGF(Graphs[0], Graphs[1]); + + auto GDROrErr = DGF.getGraphDiffRenderer(); + if (!GDROrErr) + return GDROrErr.takeError(); + + auto &GDR = *GDROrErr; + + std::error_code EC; + raw_fd_ostream OS(GraphDiffOutput, EC, sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + GraphDiffOutput + "' for writing.", EC); + + GDR.exportGraphAsDOT(OS, GraphDiffEdgeLabel, GraphDiffEdgeColor, + GraphDiffVertexLabel, GraphDiffVertexColor, + GraphDiffVertexLabelTrunc); + + return Error::success(); +}); Index: llvm/trunk/tools/llvm-xray/xray-graph.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-graph.cc +++ llvm/trunk/tools/llvm-xray/xray-graph.cc @@ -1,516 +0,0 @@ -//===-- xray-graph.cc - XRay Function Call Graph Renderer -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Generate a DOT file to represent the function call graph encountered in -// the trace. -// -//===----------------------------------------------------------------------===// - -#include "xray-graph.h" -#include "xray-registry.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/XRay/InstrumentationMap.h" -#include "llvm/XRay/Trace.h" - -using namespace llvm; -using namespace llvm::xray; - -// Setup llvm-xray graph subcommand and its options. -static cl::SubCommand GraphC("graph", "Generate function-call graph"); -static cl::opt GraphInput(cl::Positional, - cl::desc(""), - cl::Required, cl::sub(GraphC)); - -static cl::opt - GraphKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), - cl::sub(GraphC), cl::init(false)); -static cl::alias GraphKeepGoing2("k", cl::aliasopt(GraphKeepGoing), - cl::desc("Alias for -keep-going"), - cl::sub(GraphC)); - -static cl::opt - GraphOutput("output", cl::value_desc("Output file"), cl::init("-"), - cl::desc("output file; use '-' for stdout"), cl::sub(GraphC)); -static cl::alias GraphOutput2("o", cl::aliasopt(GraphOutput), - cl::desc("Alias for -output"), cl::sub(GraphC)); - -static cl::opt - GraphInstrMap("instr_map", - cl::desc("binary with the instrumrntation map, or " - "a separate instrumentation map"), - cl::value_desc("binary with xray_instr_map"), cl::sub(GraphC), - cl::init("")); -static cl::alias GraphInstrMap2("m", cl::aliasopt(GraphInstrMap), - cl::desc("alias for -instr_map"), - cl::sub(GraphC)); - -static cl::opt GraphDeduceSiblingCalls( - "deduce-sibling-calls", - cl::desc("Deduce sibling calls when unrolling function call stacks"), - cl::sub(GraphC), cl::init(false)); -static cl::alias - GraphDeduceSiblingCalls2("d", cl::aliasopt(GraphDeduceSiblingCalls), - cl::desc("Alias for -deduce-sibling-calls"), - cl::sub(GraphC)); - -static cl::opt - GraphEdgeLabel("edge-label", - cl::desc("Output graphs with edges labeled with this field"), - cl::value_desc("field"), cl::sub(GraphC), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not label Edges"), - clEnumValN(GraphRenderer::StatType::COUNT, - "count", "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphEdgeLabel2("e", cl::aliasopt(GraphEdgeLabel), - cl::desc("Alias for -edge-label"), - cl::sub(GraphC)); - -static cl::opt GraphVertexLabel( - "vertex-label", - cl::desc("Output graphs with vertices labeled with this field"), - cl::value_desc("field"), cl::sub(GraphC), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not label Vertices"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphVertexLabel2("v", cl::aliasopt(GraphVertexLabel), - cl::desc("Alias for -edge-label"), - cl::sub(GraphC)); - -static cl::opt GraphEdgeColorType( - "color-edges", - cl::desc("Output graphs with edge colors determined by this field"), - cl::value_desc("field"), cl::sub(GraphC), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not color Edges"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphEdgeColorType2("c", cl::aliasopt(GraphEdgeColorType), - cl::desc("Alias for -color-edges"), - cl::sub(GraphC)); - -static cl::opt GraphVertexColorType( - "color-vertices", - cl::desc("Output graphs with vertex colors determined by this field"), - cl::value_desc("field"), cl::sub(GraphC), - cl::init(GraphRenderer::StatType::NONE), - cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", - "Do not color vertices"), - clEnumValN(GraphRenderer::StatType::COUNT, "count", - "function call counts"), - clEnumValN(GraphRenderer::StatType::MIN, "min", - "minimum function durations"), - clEnumValN(GraphRenderer::StatType::MED, "med", - "median function durations"), - clEnumValN(GraphRenderer::StatType::PCT90, "90p", - "90th percentile durations"), - clEnumValN(GraphRenderer::StatType::PCT99, "99p", - "99th percentile durations"), - clEnumValN(GraphRenderer::StatType::MAX, "max", - "maximum function durations"), - clEnumValN(GraphRenderer::StatType::SUM, "sum", - "sum of call durations"))); -static cl::alias GraphVertexColorType2("b", cl::aliasopt(GraphVertexColorType), - cl::desc("Alias for -edge-label"), - cl::sub(GraphC)); - -template T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } - -// Updates the statistics for a GraphRenderer::TimeStat -static void updateStat(GraphRenderer::TimeStat &S, int64_t L) { - S.Count++; - if (S.Min > L || S.Min == 0) - S.Min = L; - if (S.Max < L) - S.Max = L; - S.Sum += L; -} - -// Evaluates an XRay record and performs accounting on it. -// -// If the record is an ENTER record it pushes the FuncID and TSC onto a -// structure representing the call stack for that function. -// If the record is an EXIT record it checks computes computes the ammount of -// time the function took to complete and then stores that information in an -// edge of the graph. If there is no matching ENTER record the function tries -// to recover by assuming that there were EXIT records which were missed, for -// example caused by tail call elimination and if the option is enabled then -// then tries to recover from this. -// -// This funciton will also error if the records are out of order, as the trace -// is expected to be sorted. -// -// The graph generated has an immaginary root for functions called by no-one at -// FuncId 0. -// -// FIXME: Refactor this and account subcommand to reduce code duplication. -Error GraphRenderer::accountRecord(const XRayRecord &Record) { - using std::make_error_code; - using std::errc; - if (CurrentMaxTSC == 0) - CurrentMaxTSC = Record.TSC; - - if (Record.TSC < CurrentMaxTSC) - return make_error("Records not in order", - make_error_code(errc::invalid_argument)); - - auto &ThreadStack = PerThreadFunctionStack[Record.TId]; - switch (Record.Type) { - case RecordTypes::ENTER: - case RecordTypes::ENTER_ARG: { - if (Record.FuncId != 0 && G.count(Record.FuncId) == 0) - G[Record.FuncId].SymbolName = FuncIdHelper.SymbolOrNumber(Record.FuncId); - ThreadStack.push_back({Record.FuncId, Record.TSC}); - break; - } - case RecordTypes::EXIT: - case RecordTypes::TAIL_EXIT: { - // FIXME: Refactor this and the account subcommand to reduce code - // duplication - if (ThreadStack.size() == 0 || ThreadStack.back().FuncId != Record.FuncId) { - if (!DeduceSiblingCalls) - return make_error("No matching ENTRY record", - make_error_code(errc::invalid_argument)); - auto Parent = std::find_if( - ThreadStack.rbegin(), ThreadStack.rend(), - [&](const FunctionAttr &A) { return A.FuncId == Record.FuncId; }); - if (Parent == ThreadStack.rend()) - return make_error( - "No matching Entry record in stack", - make_error_code(errc::invalid_argument)); // There is no matching - // Function for this exit. - while (ThreadStack.back().FuncId != Record.FuncId) { - TimestampT D = diff(ThreadStack.back().TSC, Record.TSC); - VertexIdentifier TopFuncId = ThreadStack.back().FuncId; - ThreadStack.pop_back(); - assert(ThreadStack.size() != 0); - EdgeIdentifier EI(ThreadStack.back().FuncId, TopFuncId); - auto &EA = G[EI]; - EA.Timings.push_back(D); - updateStat(EA.S, D); - updateStat(G[TopFuncId].S, D); - } - } - uint64_t D = diff(ThreadStack.back().TSC, Record.TSC); - ThreadStack.pop_back(); - VertexIdentifier VI = ThreadStack.empty() ? 0 : ThreadStack.back().FuncId; - EdgeIdentifier EI(VI, Record.FuncId); - auto &EA = G[EI]; - EA.Timings.push_back(D); - updateStat(EA.S, D); - updateStat(G[Record.FuncId].S, D); - break; - } - } - - return Error::success(); -} - -template -void GraphRenderer::getStats(U begin, U end, GraphRenderer::TimeStat &S) { - if (begin == end) return; - std::ptrdiff_t MedianOff = S.Count / 2; - std::nth_element(begin, begin + MedianOff, end); - S.Median = *(begin + MedianOff); - std::ptrdiff_t Pct90Off = (S.Count * 9) / 10; - std::nth_element(begin, begin + Pct90Off, end); - S.Pct90 = *(begin + Pct90Off); - std::ptrdiff_t Pct99Off = (S.Count * 99) / 100; - std::nth_element(begin, begin + Pct99Off, end); - S.Pct99 = *(begin + Pct99Off); -} - -void GraphRenderer::updateMaxStats(const GraphRenderer::TimeStat &S, - GraphRenderer::TimeStat &M) { - M.Count = std::max(M.Count, S.Count); - M.Min = std::max(M.Min, S.Min); - M.Median = std::max(M.Median, S.Median); - M.Pct90 = std::max(M.Pct90, S.Pct90); - M.Pct99 = std::max(M.Pct99, S.Pct99); - M.Max = std::max(M.Max, S.Max); - M.Sum = std::max(M.Sum, S.Sum); -} - -void GraphRenderer::calculateEdgeStatistics() { - assert(!G.edges().empty()); - for (auto &E : G.edges()) { - auto &A = E.second; - assert(!A.Timings.empty()); - getStats(A.Timings.begin(), A.Timings.end(), A.S); - updateMaxStats(A.S, G.GraphEdgeMax); - } -} - -void GraphRenderer::calculateVertexStatistics() { - std::vector TempTimings; - for (auto &V : G.vertices()) { - if (V.first != 0) { - for (auto &E : G.inEdges(V.first)) { - auto &A = E.second; - TempTimings.insert(TempTimings.end(), A.Timings.begin(), - A.Timings.end()); - } - getStats(TempTimings.begin(), TempTimings.end(), G[V.first].S); - updateMaxStats(G[V.first].S, G.GraphVertexMax); - TempTimings.clear(); - } - } -} - -// A Helper function for normalizeStatistics which normalises a single -// TimeStat element. -static void normalizeTimeStat(GraphRenderer::TimeStat &S, - double CycleFrequency) { - int64_t OldCount = S.Count; - S = S / CycleFrequency; - S.Count = OldCount; -} - -// Normalises the statistics in the graph for a given TSC frequency. -void GraphRenderer::normalizeStatistics(double CycleFrequency) { - for (auto &E : G.edges()) { - auto &S = E.second.S; - normalizeTimeStat(S, CycleFrequency); - } - for (auto &V : G.vertices()) { - auto &S = V.second.S; - normalizeTimeStat(S, CycleFrequency); - } - - normalizeTimeStat(G.GraphEdgeMax, CycleFrequency); - normalizeTimeStat(G.GraphVertexMax, CycleFrequency); -} - -// Returns a string containing the value of statistic field T -std::string -GraphRenderer::TimeStat::getString(GraphRenderer::StatType T) const { - std::string St; - raw_string_ostream S{St}; - double TimeStat::*DoubleStatPtrs[] = {&TimeStat::Min, &TimeStat::Median, - &TimeStat::Pct90, &TimeStat::Pct99, - &TimeStat::Max, &TimeStat::Sum}; - switch (T) { - case GraphRenderer::StatType::NONE: - break; - case GraphRenderer::StatType::COUNT: - S << Count; - break; - default: - S << (*this).* - DoubleStatPtrs[static_cast(T) - - static_cast(GraphRenderer::StatType::MIN)]; - break; - } - return S.str(); -} - -// Returns the quotient between the property T of this and another TimeStat as -// a double -double GraphRenderer::TimeStat::getDouble(StatType T) const { - double retval = 0; - double TimeStat::*DoubleStatPtrs[] = {&TimeStat::Min, &TimeStat::Median, - &TimeStat::Pct90, &TimeStat::Pct99, - &TimeStat::Max, &TimeStat::Sum}; - switch (T) { - case GraphRenderer::StatType::NONE: - retval = 0.0; - break; - case GraphRenderer::StatType::COUNT: - retval = static_cast(Count); - break; - default: - retval = - (*this).*DoubleStatPtrs[static_cast(T) - - static_cast(GraphRenderer::StatType::MIN)]; - break; - } - return retval; -} - -// Outputs a DOT format version of the Graph embedded in the GraphRenderer -// object on OS. It does this in the expected way by itterating -// through all edges then vertices and then outputting them and their -// annotations. -// -// FIXME: output more information, better presented. -void GraphRenderer::exportGraphAsDOT(raw_ostream &OS, StatType ET, StatType EC, - StatType VT, StatType VC) { - OS << "digraph xray {\n"; - - if (VT != StatType::NONE) - OS << "node [shape=record];\n"; - - for (const auto &E : G.edges()) { - const auto &S = E.second.S; - OS << "F" << E.first.first << " -> " - << "F" << E.first.second << " [label=\"" << S.getString(ET) << "\""; - if (EC != StatType::NONE) - OS << " color=\"" - << CHelper.getColorString( - std::sqrt(S.getDouble(EC) / G.GraphEdgeMax.getDouble(EC))) - << "\""; - OS << "];\n"; - } - - for (const auto &V : G.vertices()) { - const auto &VA = V.second; - if (V.first == 0) - continue; - OS << "F" << V.first << " [label=\"" << (VT != StatType::NONE ? "{" : "") - << (VA.SymbolName.size() > 40 ? VA.SymbolName.substr(0, 40) + "..." - : VA.SymbolName); - if (VT != StatType::NONE) - OS << "|" << VA.S.getString(VT) << "}\""; - else - OS << "\""; - if (VC != StatType::NONE) - OS << " color=\"" - << CHelper.getColorString( - std::sqrt(VA.S.getDouble(VC) / G.GraphVertexMax.getDouble(VC))) - << "\""; - OS << "];\n"; - } - OS << "}\n"; -} - -Expected GraphRenderer::Factory::getGraphRenderer() { - InstrumentationMap Map; - if (!GraphInstrMap.empty()) { - auto InstrumentationMapOrError = loadInstrumentationMap(GraphInstrMap); - if (!InstrumentationMapOrError) - return joinErrors( - make_error( - Twine("Cannot open instrumentation map '") + GraphInstrMap + "'", - std::make_error_code(std::errc::invalid_argument)), - InstrumentationMapOrError.takeError()); - Map = std::move(*InstrumentationMapOrError); - } - - const auto &FunctionAddresses = Map.getFunctionAddresses(); - - symbolize::LLVMSymbolizer::Options Opts( - symbolize::FunctionNameKind::LinkageName, true, true, false, ""); - symbolize::LLVMSymbolizer Symbolizer(Opts); - const auto &Header = Trace.getFileHeader(); - - llvm::xray::FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer, - FunctionAddresses); - - xray::GraphRenderer GR(FuncIdHelper, DeduceSiblingCalls); - for (const auto &Record : Trace) { - auto E = GR.accountRecord(Record); - if (!E) - continue; - - for (const auto &ThreadStack : GR.getPerThreadFunctionStack()) { - errs() << "Thread ID: " << ThreadStack.first << "\n"; - auto Level = ThreadStack.second.size(); - for (const auto &Entry : llvm::reverse(ThreadStack.second)) - errs() << "#" << Level-- << "\t" - << FuncIdHelper.SymbolOrNumber(Entry.FuncId) << '\n'; - } - - if (!GraphKeepGoing) - return joinErrors(make_error( - "Error encountered generating the call graph.", - std::make_error_code(std::errc::invalid_argument)), - std::move(E)); - - handleAllErrors(std::move(E), - [&](const ErrorInfoBase &E) { E.log(errs()); }); - } - - GR.G.GraphEdgeMax = {}; - GR.G.GraphVertexMax = {}; - GR.calculateEdgeStatistics(); - GR.calculateVertexStatistics(); - - if (Header.CycleFrequency) - GR.normalizeStatistics(Header.CycleFrequency); - - return GR; -} - -// Here we register and implement the llvm-xray graph subcommand. -// The bulk of this code reads in the options, opens the required files, uses -// those files to create a context for analysing the xray trace, then there is a -// short loop which actually analyses the trace, generates the graph and then -// outputs it as a DOT. -// -// FIXME: include additional filtering and annalysis passes to provide more -// specific useful information. -static CommandRegistration Unused(&GraphC, []() -> Error { - GraphRenderer::Factory F; - - F.KeepGoing = GraphKeepGoing; - F.DeduceSiblingCalls = GraphDeduceSiblingCalls; - F.InstrMap = GraphInstrMap; - - auto TraceOrErr = loadTraceFile(GraphInput, true); - - if (!TraceOrErr) - return make_error( - Twine("Failed loading input file '") + GraphInput + "'", - make_error_code(llvm::errc::invalid_argument)); - - F.Trace = std::move(*TraceOrErr); - auto GROrError = F.getGraphRenderer(); - if (!GROrError) - return GROrError.takeError(); - auto &GR = *GROrError; - - std::error_code EC; - raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); - if (EC) - return make_error( - Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); - - GR.exportGraphAsDOT(OS, GraphEdgeLabel, GraphEdgeColorType, GraphVertexLabel, - GraphVertexColorType); - return Error::success(); -}); Index: llvm/trunk/tools/llvm-xray/xray-graph.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-graph.cpp +++ llvm/trunk/tools/llvm-xray/xray-graph.cpp @@ -0,0 +1,516 @@ +//===-- xray-graph.cpp: XRay Function Call Graph Renderer -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Generate a DOT file to represent the function call graph encountered in +// the trace. +// +//===----------------------------------------------------------------------===// + +#include "xray-graph.h" +#include "xray-registry.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/XRay/InstrumentationMap.h" +#include "llvm/XRay/Trace.h" + +using namespace llvm; +using namespace llvm::xray; + +// Setup llvm-xray graph subcommand and its options. +static cl::SubCommand GraphC("graph", "Generate function-call graph"); +static cl::opt GraphInput(cl::Positional, + cl::desc(""), + cl::Required, cl::sub(GraphC)); + +static cl::opt + GraphKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), + cl::sub(GraphC), cl::init(false)); +static cl::alias GraphKeepGoing2("k", cl::aliasopt(GraphKeepGoing), + cl::desc("Alias for -keep-going"), + cl::sub(GraphC)); + +static cl::opt + GraphOutput("output", cl::value_desc("Output file"), cl::init("-"), + cl::desc("output file; use '-' for stdout"), cl::sub(GraphC)); +static cl::alias GraphOutput2("o", cl::aliasopt(GraphOutput), + cl::desc("Alias for -output"), cl::sub(GraphC)); + +static cl::opt + GraphInstrMap("instr_map", + cl::desc("binary with the instrumrntation map, or " + "a separate instrumentation map"), + cl::value_desc("binary with xray_instr_map"), cl::sub(GraphC), + cl::init("")); +static cl::alias GraphInstrMap2("m", cl::aliasopt(GraphInstrMap), + cl::desc("alias for -instr_map"), + cl::sub(GraphC)); + +static cl::opt GraphDeduceSiblingCalls( + "deduce-sibling-calls", + cl::desc("Deduce sibling calls when unrolling function call stacks"), + cl::sub(GraphC), cl::init(false)); +static cl::alias + GraphDeduceSiblingCalls2("d", cl::aliasopt(GraphDeduceSiblingCalls), + cl::desc("Alias for -deduce-sibling-calls"), + cl::sub(GraphC)); + +static cl::opt + GraphEdgeLabel("edge-label", + cl::desc("Output graphs with edges labeled with this field"), + cl::value_desc("field"), cl::sub(GraphC), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, + "count", "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphEdgeLabel2("e", cl::aliasopt(GraphEdgeLabel), + cl::desc("Alias for -edge-label"), + cl::sub(GraphC)); + +static cl::opt GraphVertexLabel( + "vertex-label", + cl::desc("Output graphs with vertices labeled with this field"), + cl::value_desc("field"), cl::sub(GraphC), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not label Vertices"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphVertexLabel2("v", cl::aliasopt(GraphVertexLabel), + cl::desc("Alias for -edge-label"), + cl::sub(GraphC)); + +static cl::opt GraphEdgeColorType( + "color-edges", + cl::desc("Output graphs with edge colors determined by this field"), + cl::value_desc("field"), cl::sub(GraphC), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not color Edges"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphEdgeColorType2("c", cl::aliasopt(GraphEdgeColorType), + cl::desc("Alias for -color-edges"), + cl::sub(GraphC)); + +static cl::opt GraphVertexColorType( + "color-vertices", + cl::desc("Output graphs with vertex colors determined by this field"), + cl::value_desc("field"), cl::sub(GraphC), + cl::init(GraphRenderer::StatType::NONE), + cl::values(clEnumValN(GraphRenderer::StatType::NONE, "none", + "Do not color vertices"), + clEnumValN(GraphRenderer::StatType::COUNT, "count", + "function call counts"), + clEnumValN(GraphRenderer::StatType::MIN, "min", + "minimum function durations"), + clEnumValN(GraphRenderer::StatType::MED, "med", + "median function durations"), + clEnumValN(GraphRenderer::StatType::PCT90, "90p", + "90th percentile durations"), + clEnumValN(GraphRenderer::StatType::PCT99, "99p", + "99th percentile durations"), + clEnumValN(GraphRenderer::StatType::MAX, "max", + "maximum function durations"), + clEnumValN(GraphRenderer::StatType::SUM, "sum", + "sum of call durations"))); +static cl::alias GraphVertexColorType2("b", cl::aliasopt(GraphVertexColorType), + cl::desc("Alias for -edge-label"), + cl::sub(GraphC)); + +template T diff(T L, T R) { return std::max(L, R) - std::min(L, R); } + +// Updates the statistics for a GraphRenderer::TimeStat +static void updateStat(GraphRenderer::TimeStat &S, int64_t L) { + S.Count++; + if (S.Min > L || S.Min == 0) + S.Min = L; + if (S.Max < L) + S.Max = L; + S.Sum += L; +} + +// Evaluates an XRay record and performs accounting on it. +// +// If the record is an ENTER record it pushes the FuncID and TSC onto a +// structure representing the call stack for that function. +// If the record is an EXIT record it checks computes computes the ammount of +// time the function took to complete and then stores that information in an +// edge of the graph. If there is no matching ENTER record the function tries +// to recover by assuming that there were EXIT records which were missed, for +// example caused by tail call elimination and if the option is enabled then +// then tries to recover from this. +// +// This funciton will also error if the records are out of order, as the trace +// is expected to be sorted. +// +// The graph generated has an immaginary root for functions called by no-one at +// FuncId 0. +// +// FIXME: Refactor this and account subcommand to reduce code duplication. +Error GraphRenderer::accountRecord(const XRayRecord &Record) { + using std::make_error_code; + using std::errc; + if (CurrentMaxTSC == 0) + CurrentMaxTSC = Record.TSC; + + if (Record.TSC < CurrentMaxTSC) + return make_error("Records not in order", + make_error_code(errc::invalid_argument)); + + auto &ThreadStack = PerThreadFunctionStack[Record.TId]; + switch (Record.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: { + if (Record.FuncId != 0 && G.count(Record.FuncId) == 0) + G[Record.FuncId].SymbolName = FuncIdHelper.SymbolOrNumber(Record.FuncId); + ThreadStack.push_back({Record.FuncId, Record.TSC}); + break; + } + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: { + // FIXME: Refactor this and the account subcommand to reduce code + // duplication + if (ThreadStack.size() == 0 || ThreadStack.back().FuncId != Record.FuncId) { + if (!DeduceSiblingCalls) + return make_error("No matching ENTRY record", + make_error_code(errc::invalid_argument)); + auto Parent = std::find_if( + ThreadStack.rbegin(), ThreadStack.rend(), + [&](const FunctionAttr &A) { return A.FuncId == Record.FuncId; }); + if (Parent == ThreadStack.rend()) + return make_error( + "No matching Entry record in stack", + make_error_code(errc::invalid_argument)); // There is no matching + // Function for this exit. + while (ThreadStack.back().FuncId != Record.FuncId) { + TimestampT D = diff(ThreadStack.back().TSC, Record.TSC); + VertexIdentifier TopFuncId = ThreadStack.back().FuncId; + ThreadStack.pop_back(); + assert(ThreadStack.size() != 0); + EdgeIdentifier EI(ThreadStack.back().FuncId, TopFuncId); + auto &EA = G[EI]; + EA.Timings.push_back(D); + updateStat(EA.S, D); + updateStat(G[TopFuncId].S, D); + } + } + uint64_t D = diff(ThreadStack.back().TSC, Record.TSC); + ThreadStack.pop_back(); + VertexIdentifier VI = ThreadStack.empty() ? 0 : ThreadStack.back().FuncId; + EdgeIdentifier EI(VI, Record.FuncId); + auto &EA = G[EI]; + EA.Timings.push_back(D); + updateStat(EA.S, D); + updateStat(G[Record.FuncId].S, D); + break; + } + } + + return Error::success(); +} + +template +void GraphRenderer::getStats(U begin, U end, GraphRenderer::TimeStat &S) { + if (begin == end) return; + std::ptrdiff_t MedianOff = S.Count / 2; + std::nth_element(begin, begin + MedianOff, end); + S.Median = *(begin + MedianOff); + std::ptrdiff_t Pct90Off = (S.Count * 9) / 10; + std::nth_element(begin, begin + Pct90Off, end); + S.Pct90 = *(begin + Pct90Off); + std::ptrdiff_t Pct99Off = (S.Count * 99) / 100; + std::nth_element(begin, begin + Pct99Off, end); + S.Pct99 = *(begin + Pct99Off); +} + +void GraphRenderer::updateMaxStats(const GraphRenderer::TimeStat &S, + GraphRenderer::TimeStat &M) { + M.Count = std::max(M.Count, S.Count); + M.Min = std::max(M.Min, S.Min); + M.Median = std::max(M.Median, S.Median); + M.Pct90 = std::max(M.Pct90, S.Pct90); + M.Pct99 = std::max(M.Pct99, S.Pct99); + M.Max = std::max(M.Max, S.Max); + M.Sum = std::max(M.Sum, S.Sum); +} + +void GraphRenderer::calculateEdgeStatistics() { + assert(!G.edges().empty()); + for (auto &E : G.edges()) { + auto &A = E.second; + assert(!A.Timings.empty()); + getStats(A.Timings.begin(), A.Timings.end(), A.S); + updateMaxStats(A.S, G.GraphEdgeMax); + } +} + +void GraphRenderer::calculateVertexStatistics() { + std::vector TempTimings; + for (auto &V : G.vertices()) { + if (V.first != 0) { + for (auto &E : G.inEdges(V.first)) { + auto &A = E.second; + TempTimings.insert(TempTimings.end(), A.Timings.begin(), + A.Timings.end()); + } + getStats(TempTimings.begin(), TempTimings.end(), G[V.first].S); + updateMaxStats(G[V.first].S, G.GraphVertexMax); + TempTimings.clear(); + } + } +} + +// A Helper function for normalizeStatistics which normalises a single +// TimeStat element. +static void normalizeTimeStat(GraphRenderer::TimeStat &S, + double CycleFrequency) { + int64_t OldCount = S.Count; + S = S / CycleFrequency; + S.Count = OldCount; +} + +// Normalises the statistics in the graph for a given TSC frequency. +void GraphRenderer::normalizeStatistics(double CycleFrequency) { + for (auto &E : G.edges()) { + auto &S = E.second.S; + normalizeTimeStat(S, CycleFrequency); + } + for (auto &V : G.vertices()) { + auto &S = V.second.S; + normalizeTimeStat(S, CycleFrequency); + } + + normalizeTimeStat(G.GraphEdgeMax, CycleFrequency); + normalizeTimeStat(G.GraphVertexMax, CycleFrequency); +} + +// Returns a string containing the value of statistic field T +std::string +GraphRenderer::TimeStat::getString(GraphRenderer::StatType T) const { + std::string St; + raw_string_ostream S{St}; + double TimeStat::*DoubleStatPtrs[] = {&TimeStat::Min, &TimeStat::Median, + &TimeStat::Pct90, &TimeStat::Pct99, + &TimeStat::Max, &TimeStat::Sum}; + switch (T) { + case GraphRenderer::StatType::NONE: + break; + case GraphRenderer::StatType::COUNT: + S << Count; + break; + default: + S << (*this).* + DoubleStatPtrs[static_cast(T) - + static_cast(GraphRenderer::StatType::MIN)]; + break; + } + return S.str(); +} + +// Returns the quotient between the property T of this and another TimeStat as +// a double +double GraphRenderer::TimeStat::getDouble(StatType T) const { + double retval = 0; + double TimeStat::*DoubleStatPtrs[] = {&TimeStat::Min, &TimeStat::Median, + &TimeStat::Pct90, &TimeStat::Pct99, + &TimeStat::Max, &TimeStat::Sum}; + switch (T) { + case GraphRenderer::StatType::NONE: + retval = 0.0; + break; + case GraphRenderer::StatType::COUNT: + retval = static_cast(Count); + break; + default: + retval = + (*this).*DoubleStatPtrs[static_cast(T) - + static_cast(GraphRenderer::StatType::MIN)]; + break; + } + return retval; +} + +// Outputs a DOT format version of the Graph embedded in the GraphRenderer +// object on OS. It does this in the expected way by itterating +// through all edges then vertices and then outputting them and their +// annotations. +// +// FIXME: output more information, better presented. +void GraphRenderer::exportGraphAsDOT(raw_ostream &OS, StatType ET, StatType EC, + StatType VT, StatType VC) { + OS << "digraph xray {\n"; + + if (VT != StatType::NONE) + OS << "node [shape=record];\n"; + + for (const auto &E : G.edges()) { + const auto &S = E.second.S; + OS << "F" << E.first.first << " -> " + << "F" << E.first.second << " [label=\"" << S.getString(ET) << "\""; + if (EC != StatType::NONE) + OS << " color=\"" + << CHelper.getColorString( + std::sqrt(S.getDouble(EC) / G.GraphEdgeMax.getDouble(EC))) + << "\""; + OS << "];\n"; + } + + for (const auto &V : G.vertices()) { + const auto &VA = V.second; + if (V.first == 0) + continue; + OS << "F" << V.first << " [label=\"" << (VT != StatType::NONE ? "{" : "") + << (VA.SymbolName.size() > 40 ? VA.SymbolName.substr(0, 40) + "..." + : VA.SymbolName); + if (VT != StatType::NONE) + OS << "|" << VA.S.getString(VT) << "}\""; + else + OS << "\""; + if (VC != StatType::NONE) + OS << " color=\"" + << CHelper.getColorString( + std::sqrt(VA.S.getDouble(VC) / G.GraphVertexMax.getDouble(VC))) + << "\""; + OS << "];\n"; + } + OS << "}\n"; +} + +Expected GraphRenderer::Factory::getGraphRenderer() { + InstrumentationMap Map; + if (!GraphInstrMap.empty()) { + auto InstrumentationMapOrError = loadInstrumentationMap(GraphInstrMap); + if (!InstrumentationMapOrError) + return joinErrors( + make_error( + Twine("Cannot open instrumentation map '") + GraphInstrMap + "'", + std::make_error_code(std::errc::invalid_argument)), + InstrumentationMapOrError.takeError()); + Map = std::move(*InstrumentationMapOrError); + } + + const auto &FunctionAddresses = Map.getFunctionAddresses(); + + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + const auto &Header = Trace.getFileHeader(); + + llvm::xray::FuncIdConversionHelper FuncIdHelper(InstrMap, Symbolizer, + FunctionAddresses); + + xray::GraphRenderer GR(FuncIdHelper, DeduceSiblingCalls); + for (const auto &Record : Trace) { + auto E = GR.accountRecord(Record); + if (!E) + continue; + + for (const auto &ThreadStack : GR.getPerThreadFunctionStack()) { + errs() << "Thread ID: " << ThreadStack.first << "\n"; + auto Level = ThreadStack.second.size(); + for (const auto &Entry : llvm::reverse(ThreadStack.second)) + errs() << "#" << Level-- << "\t" + << FuncIdHelper.SymbolOrNumber(Entry.FuncId) << '\n'; + } + + if (!GraphKeepGoing) + return joinErrors(make_error( + "Error encountered generating the call graph.", + std::make_error_code(std::errc::invalid_argument)), + std::move(E)); + + handleAllErrors(std::move(E), + [&](const ErrorInfoBase &E) { E.log(errs()); }); + } + + GR.G.GraphEdgeMax = {}; + GR.G.GraphVertexMax = {}; + GR.calculateEdgeStatistics(); + GR.calculateVertexStatistics(); + + if (Header.CycleFrequency) + GR.normalizeStatistics(Header.CycleFrequency); + + return GR; +} + +// Here we register and implement the llvm-xray graph subcommand. +// The bulk of this code reads in the options, opens the required files, uses +// those files to create a context for analysing the xray trace, then there is a +// short loop which actually analyses the trace, generates the graph and then +// outputs it as a DOT. +// +// FIXME: include additional filtering and annalysis passes to provide more +// specific useful information. +static CommandRegistration Unused(&GraphC, []() -> Error { + GraphRenderer::Factory F; + + F.KeepGoing = GraphKeepGoing; + F.DeduceSiblingCalls = GraphDeduceSiblingCalls; + F.InstrMap = GraphInstrMap; + + auto TraceOrErr = loadTraceFile(GraphInput, true); + + if (!TraceOrErr) + return make_error( + Twine("Failed loading input file '") + GraphInput + "'", + make_error_code(llvm::errc::invalid_argument)); + + F.Trace = std::move(*TraceOrErr); + auto GROrError = F.getGraphRenderer(); + if (!GROrError) + return GROrError.takeError(); + auto &GR = *GROrError; + + std::error_code EC; + raw_fd_ostream OS(GraphOutput, EC, sys::fs::OpenFlags::F_Text); + if (EC) + return make_error( + Twine("Cannot open file '") + GraphOutput + "' for writing.", EC); + + GR.exportGraphAsDOT(OS, GraphEdgeLabel, GraphEdgeColorType, GraphVertexLabel, + GraphVertexColorType); + return Error::success(); +}); Index: llvm/trunk/tools/llvm-xray/xray-registry.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-registry.cc +++ llvm/trunk/tools/llvm-xray/xray-registry.cc @@ -1,41 +0,0 @@ -//===- xray-registry.cc - Implement a command registry. -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Implement a simple subcommand registry. -// -//===----------------------------------------------------------------------===// -#include "xray-registry.h" - -#include "llvm/Support/ManagedStatic.h" -#include - -namespace llvm { -namespace xray { - -using HandlerType = std::function; - -ManagedStatic> Commands; - -CommandRegistration::CommandRegistration(cl::SubCommand *SC, - HandlerType Command) { - assert(Commands->count(SC) == 0 && - "Attempting to overwrite a command handler"); - assert(Command && "Attempting to register an empty std::function"); - (*Commands)[SC] = Command; -} - -HandlerType dispatch(cl::SubCommand *SC) { - auto It = Commands->find(SC); - assert(It != Commands->end() && - "Attempting to dispatch on un-registered SubCommand."); - return It->second; -} - -} // namespace xray -} // namespace llvm Index: llvm/trunk/tools/llvm-xray/xray-registry.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-registry.cpp +++ llvm/trunk/tools/llvm-xray/xray-registry.cpp @@ -0,0 +1,41 @@ +//===- xray-registry.cpp: Implement a command registry. -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement a simple subcommand registry. +// +//===----------------------------------------------------------------------===// +#include "xray-registry.h" + +#include "llvm/Support/ManagedStatic.h" +#include + +namespace llvm { +namespace xray { + +using HandlerType = std::function; + +ManagedStatic> Commands; + +CommandRegistration::CommandRegistration(cl::SubCommand *SC, + HandlerType Command) { + assert(Commands->count(SC) == 0 && + "Attempting to overwrite a command handler"); + assert(Command && "Attempting to register an empty std::function"); + (*Commands)[SC] = Command; +} + +HandlerType dispatch(cl::SubCommand *SC) { + auto It = Commands->find(SC); + assert(It != Commands->end() && + "Attempting to dispatch on un-registered SubCommand."); + return It->second; +} + +} // namespace xray +} // namespace llvm Index: llvm/trunk/tools/llvm-xray/xray-stacks.cc =================================================================== --- llvm/trunk/tools/llvm-xray/xray-stacks.cc +++ llvm/trunk/tools/llvm-xray/xray-stacks.cc @@ -1,797 +0,0 @@ -//===- xray-stacks.cc - XRay Function Call Stack Accounting ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements stack-based accounting. It takes XRay traces, and -// collates statistics across these traces to show a breakdown of time spent -// at various points of the stack to provide insight into which functions -// spend the most time in terms of a call stack. We provide a few -// sorting/filtering options for zero'ing in on the useful stacks. -// -//===----------------------------------------------------------------------===// - -#include -#include - -#include "func-id-helper.h" -#include "trie-node.h" -#include "xray-registry.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Errc.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormatAdapters.h" -#include "llvm/Support/FormatVariadic.h" -#include "llvm/XRay/Graph.h" -#include "llvm/XRay/InstrumentationMap.h" -#include "llvm/XRay/Trace.h" - -using namespace llvm; -using namespace llvm::xray; - -static cl::SubCommand Stack("stack", "Call stack accounting"); -static cl::list StackInputs(cl::Positional, - cl::desc(""), cl::Required, - cl::sub(Stack), cl::OneOrMore); - -static cl::opt - StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), - cl::sub(Stack), cl::init(false)); -static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing), - cl::desc("Alias for -keep-going"), - cl::sub(Stack)); - -// TODO: Does there need to be an option to deduce tail or sibling calls? - -static cl::opt StacksInstrMap( - "instr_map", - cl::desc("instrumentation map used to identify function ids. " - "Currently supports elf file instrumentation maps."), - cl::sub(Stack), cl::init("")); -static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap), - cl::desc("Alias for -instr_map"), - cl::sub(Stack)); - -static cl::opt - SeparateThreadStacks("per-thread-stacks", - cl::desc("Report top stacks within each thread id"), - cl::sub(Stack), cl::init(false)); - -static cl::opt - AggregateThreads("aggregate-threads", - cl::desc("Aggregate stack times across threads"), - cl::sub(Stack), cl::init(false)); - -static cl::opt - DumpAllStacks("all-stacks", - cl::desc("Dump sum of timings for all stacks. " - "By default separates stacks per-thread."), - cl::sub(Stack), cl::init(false)); -static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks), - cl::desc("Alias for -all-stacks"), - cl::sub(Stack)); - -// TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format -// possibly with aggregations or just a linear trace of timings. -enum StackOutputFormat { HUMAN, FLAMETOOL }; - -static cl::opt StacksOutputFormat( - "stack-format", - cl::desc("The format that output stacks should be " - "output in. Only applies with all-stacks."), - cl::values( - clEnumValN(HUMAN, "human", - "Human readable output. Only valid without -all-stacks."), - clEnumValN(FLAMETOOL, "flame", - "Format consumable by Brendan Gregg's FlameGraph tool. " - "Only valid with -all-stacks.")), - cl::sub(Stack), cl::init(HUMAN)); - -// Types of values for each stack in a CallTrie. -enum class AggregationType { - TOTAL_TIME, // The total time spent in a stack and its callees. - INVOCATION_COUNT // The number of times the stack was invoked. -}; - -static cl::opt RequestedAggregation( - "aggregation-type", - cl::desc("The type of aggregation to do on call stacks."), - cl::values( - clEnumValN( - AggregationType::TOTAL_TIME, "time", - "Capture the total time spent in an all invocations of a stack."), - clEnumValN(AggregationType::INVOCATION_COUNT, "count", - "Capture the number of times a stack was invoked. " - "In flamegraph mode, this count also includes invocations " - "of all callees.")), - cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME)); - -/// A helper struct to work with formatv and XRayRecords. Makes it easier to -/// use instrumentation map names or addresses in formatted output. -struct format_xray_record : public FormatAdapter { - explicit format_xray_record(XRayRecord record, - const FuncIdConversionHelper &conv) - : FormatAdapter(std::move(record)), Converter(&conv) {} - void format(raw_ostream &Stream, StringRef Style) override { - Stream << formatv( - "{FuncId: \"{0}\", ThreadId: \"{1}\", RecordType: \"{2}\"}", - Converter->SymbolOrNumber(Item.FuncId), Item.TId, - DecodeRecordType(Item.RecordType)); - } - -private: - Twine DecodeRecordType(uint16_t recordType) { - switch (recordType) { - case 0: - return Twine("Fn Entry"); - case 1: - return Twine("Fn Exit"); - default: - // TODO: Add Tail exit when it is added to llvm/XRay/XRayRecord.h - return Twine("Unknown"); - } - } - - const FuncIdConversionHelper *Converter; -}; - -/// The stack command will take a set of XRay traces as arguments, and collects -/// information about the stacks of instrumented functions that appear in the -/// traces. We track the following pieces of information: -/// -/// - Total time: amount of time/cycles accounted for in the traces. -/// - Stack count: number of times a specific stack appears in the -/// traces. Only instrumented functions show up in stacks. -/// - Cumulative stack time: amount of time spent in a stack accumulated -/// across the invocations in the traces. -/// - Cumulative local time: amount of time spent in each instrumented -/// function showing up in a specific stack, accumulated across the traces. -/// -/// Example output for the kind of data we'd like to provide looks like the -/// following: -/// -/// Total time: 3.33234 s -/// Stack ID: ... -/// Stack Count: 2093 -/// # Function Local Time (%) Stack Time (%) -/// 0 main 2.34 ms 0.07% 3.33234 s 100% -/// 1 foo() 3.30000 s 99.02% 3.33 s 99.92% -/// 2 bar() 30 ms 0.90% 30 ms 0.90% -/// -/// We can also show distributions of the function call durations with -/// statistics at each level of the stack. This works by doing the following -/// algorithm: -/// -/// 1. When unwinding, record the duration of each unwound function associated -/// with the path up to which the unwinding stops. For example: -/// -/// Step Duration (? means has start time) -/// -/// push a a = ? -/// push b a = ?, a->b = ? -/// push c a = ?, a->b = ?, a->b->c = ? -/// pop c a = ?, a->b = ?, emit duration(a->b->c) -/// pop b a = ?, emit duration(a->b) -/// push c a = ?, a->c = ? -/// pop c a = ?, emit duration(a->c) -/// pop a emit duration(a) -/// -/// 2. We then account for the various stacks we've collected, and for each of -/// them will have measurements that look like the following (continuing -/// with the above simple example): -/// -/// c : [b->c"), [durations]>, c"), [durations]>] -/// b : [b"), [durations]>] -/// a : [] -/// -/// This allows us to compute, for each stack id, and each function that -/// shows up in the stack, some important statistics like: -/// -/// - median -/// - 99th percentile -/// - mean + stddev -/// - count -/// -/// 3. For cases where we don't have durations for some of the higher levels -/// of the stack (perhaps instrumentation wasn't activated when the stack was -/// entered), we can mark them appropriately. -/// -/// Computing this data also allows us to implement lookup by call stack nodes, -/// so that we can find functions that show up in multiple stack traces and -/// show the statistical properties of that function in various contexts. We -/// can compute information similar to the following: -/// -/// Function: 'c' -/// Stacks: 2 / 2 -/// Stack ID: ... -/// Stack Count: ... -/// # Function ... -/// 0 a ... -/// 1 b ... -/// 2 c ... -/// -/// Stack ID: ... -/// Stack Count: ... -/// # Function ... -/// 0 a ... -/// 1 c ... -/// ----------------... -/// -/// Function: 'b' -/// Stacks: 1 / 2 -/// Stack ID: ... -/// Stack Count: ... -/// # Function ... -/// 0 a ... -/// 1 b ... -/// 2 c ... -/// -/// -/// To do this we require a Trie data structure that will allow us to represent -/// all the call stacks of instrumented functions in an easily traversible -/// manner when we do the aggregations and lookups. For instrumented call -/// sequences like the following: -/// -/// a() -/// b() -/// c() -/// d() -/// c() -/// -/// We will have a representation like so: -/// -/// a -> b -> c -/// | | -/// | +--> d -/// | -/// +--> c -/// -/// We maintain a sequence of durations on the leaves and in the internal nodes -/// as we go through and process every record from the XRay trace. We also -/// maintain an index of unique functions, and provide a means of iterating -/// through all the instrumented call stacks which we know about. - -struct StackDuration { - llvm::SmallVector TerminalDurations; - llvm::SmallVector IntermediateDurations; -}; - -StackDuration mergeStackDuration(const StackDuration &Left, - const StackDuration &Right) { - StackDuration Data{}; - Data.TerminalDurations.reserve(Left.TerminalDurations.size() + - Right.TerminalDurations.size()); - Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + - Right.IntermediateDurations.size()); - // Aggregate the durations. - for (auto duration : Left.TerminalDurations) - Data.TerminalDurations.push_back(duration); - for (auto duration : Right.TerminalDurations) - Data.TerminalDurations.push_back(duration); - - for (auto duration : Left.IntermediateDurations) - Data.IntermediateDurations.push_back(duration); - for (auto duration : Right.IntermediateDurations) - Data.IntermediateDurations.push_back(duration); - return Data; -} - -using StackTrieNode = TrieNode; - -template -std::size_t GetValueForStack(const StackTrieNode *Node); - -// When computing total time spent in a stack, we're adding the timings from -// its callees and the timings from when it was a leaf. -template <> -std::size_t -GetValueForStack(const StackTrieNode *Node) { - auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(), - Node->ExtraData.TerminalDurations.end(), 0uLL); - return std::accumulate(Node->ExtraData.IntermediateDurations.begin(), - Node->ExtraData.IntermediateDurations.end(), TopSum); -} - -// Calculates how many times a function was invoked. -// TODO: Hook up option to produce stacks -template <> -std::size_t -GetValueForStack(const StackTrieNode *Node) { - return Node->ExtraData.TerminalDurations.size() + - Node->ExtraData.IntermediateDurations.size(); -} - -// Make sure there are implementations for each enum value. -template struct DependentFalseType : std::false_type {}; - -template -std::size_t GetValueForStack(const StackTrieNode *Node) { - static_assert(DependentFalseType::value, - "No implementation found for aggregation type provided."); - return 0; -} - -class StackTrie { - // Avoid the magic number of 4 propagated through the code with an alias. - // We use this SmallVector to track the root nodes in a call graph. - using RootVector = SmallVector; - - // We maintain pointers to the roots of the tries we see. - DenseMap Roots; - - // We make sure all the nodes are accounted for in this list. - std::forward_list NodeStore; - - // A map of thread ids to pairs call stack trie nodes and their start times. - DenseMap, 8>> - ThreadStackMap; - - StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, - StackTrieNode *Parent) { - NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}}); - auto I = NodeStore.begin(); - auto *Node = &*I; - if (!Parent) - Roots[ThreadId].push_back(Node); - return Node; - } - - StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { - const auto &RootsByThread = Roots[ThreadId]; - auto I = find_if(RootsByThread, - [&](StackTrieNode *N) { return N->FuncId == FuncId; }); - return (I == RootsByThread.end()) ? nullptr : *I; - } - -public: - enum class AccountRecordStatus { - OK, // Successfully processed - ENTRY_NOT_FOUND, // An exit record had no matching call stack entry - UNKNOWN_RECORD_TYPE - }; - - struct AccountRecordState { - // We keep track of whether the call stack is currently unwinding. - bool wasLastRecordExit; - - static AccountRecordState CreateInitialState() { return {false}; } - }; - - AccountRecordStatus accountRecord(const XRayRecord &R, - AccountRecordState *state) { - auto &TS = ThreadStackMap[R.TId]; - switch (R.Type) { - case RecordTypes::ENTER: - case RecordTypes::ENTER_ARG: { - state->wasLastRecordExit = false; - // When we encounter a new function entry, we want to record the TSC for - // that entry, and the function id. Before doing so we check the top of - // the stack to see if there are callees that already represent this - // function. - if (TS.empty()) { - auto *Root = findRootNode(R.TId, R.FuncId); - TS.emplace_back(Root ? Root : createTrieNode(R.TId, R.FuncId, nullptr), - R.TSC); - return AccountRecordStatus::OK; - } - - auto &Top = TS.back(); - auto I = find_if(Top.first->Callees, - [&](StackTrieNode *N) { return N->FuncId == R.FuncId; }); - if (I == Top.first->Callees.end()) { - // We didn't find the callee in the stack trie, so we're going to - // add to the stack then set up the pointers properly. - auto N = createTrieNode(R.TId, R.FuncId, Top.first); - Top.first->Callees.emplace_back(N); - - // Top may be invalidated after this statement. - TS.emplace_back(N, R.TSC); - } else { - // We found the callee in the stack trie, so we'll use that pointer - // instead, add it to the stack associated with the TSC. - TS.emplace_back(*I, R.TSC); - } - return AccountRecordStatus::OK; - } - case RecordTypes::EXIT: - case RecordTypes::TAIL_EXIT: { - bool wasLastRecordExit = state->wasLastRecordExit; - state->wasLastRecordExit = true; - // The exit case is more interesting, since we want to be able to deduce - // missing exit records. To do that properly, we need to look up the stack - // and see whether the exit record matches any of the entry records. If it - // does match, we attempt to record the durations as we pop the stack to - // where we see the parent. - if (TS.empty()) { - // Short circuit, and say we can't find it. - - return AccountRecordStatus::ENTRY_NOT_FOUND; - } - - auto FunctionEntryMatch = find_if( - reverse(TS), [&](const std::pair &E) { - return E.first->FuncId == R.FuncId; - }); - auto status = AccountRecordStatus::OK; - if (FunctionEntryMatch == TS.rend()) { - status = AccountRecordStatus::ENTRY_NOT_FOUND; - } else { - // Account for offset of 1 between reverse and forward iterators. We - // want the forward iterator to include the function that is exited. - ++FunctionEntryMatch; - } - auto I = FunctionEntryMatch.base(); - for (auto &E : make_range(I, TS.end() - 1)) - E.first->ExtraData.IntermediateDurations.push_back( - std::max(E.second, R.TSC) - std::min(E.second, R.TSC)); - auto &Deepest = TS.back(); - if (wasLastRecordExit) - Deepest.first->ExtraData.IntermediateDurations.push_back( - std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); - else - Deepest.first->ExtraData.TerminalDurations.push_back( - std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); - TS.erase(I, TS.end()); - return status; - } - } - return AccountRecordStatus::UNKNOWN_RECORD_TYPE; - } - - bool isEmpty() const { return Roots.empty(); } - - void printStack(raw_ostream &OS, const StackTrieNode *Top, - FuncIdConversionHelper &FN) { - // Traverse the pointers up to the parent, noting the sums, then print - // in reverse order (callers at top, callees down bottom). - SmallVector CurrentStack; - for (auto *F = Top; F != nullptr; F = F->Parent) - CurrentStack.push_back(F); - int Level = 0; - OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function", - "count", "sum"); - for (auto *F : - reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { - auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(), - F->ExtraData.IntermediateDurations.end(), 0LL); - auto FuncId = FN.SymbolOrNumber(F->FuncId); - OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, - FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, - F->ExtraData.IntermediateDurations.size(), Sum); - } - auto *Leaf = *CurrentStack.begin(); - auto LeafSum = - std::accumulate(Leaf->ExtraData.TerminalDurations.begin(), - Leaf->ExtraData.TerminalDurations.end(), 0LL); - auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); - OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, - LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." - : LeafFuncId, - Leaf->ExtraData.TerminalDurations.size(), LeafSum); - OS << "\n"; - } - - /// Prints top stacks for each thread. - void printPerThread(raw_ostream &OS, FuncIdConversionHelper &FN) { - for (auto iter : Roots) { - OS << "Thread " << iter.first << ":\n"; - print(OS, FN, iter.second); - OS << "\n"; - } - } - - /// Prints timing sums for each stack in each threads. - template - void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN, - StackOutputFormat format) { - for (auto iter : Roots) { - uint32_t threadId = iter.first; - RootVector &perThreadRoots = iter.second; - bool reportThreadId = true; - printAll(OS, FN, perThreadRoots, threadId, reportThreadId); - } - } - - /// Prints top stacks from looking at all the leaves and ignoring thread IDs. - /// Stacks that consist of the same function IDs but were called in different - /// thread IDs are not considered unique in this printout. - void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { - RootVector RootValues; - - // Function to pull the values out of a map iterator. - using RootsType = decltype(Roots.begin())::value_type; - auto MapValueFn = [](const RootsType &Value) { return Value.second; }; - - for (const auto &RootNodeRange : - make_range(map_iterator(Roots.begin(), MapValueFn), - map_iterator(Roots.end(), MapValueFn))) { - for (auto *RootNode : RootNodeRange) - RootValues.push_back(RootNode); - } - - print(OS, FN, RootValues); - } - - /// Creates a merged list of Tries for unique stacks that disregards their - /// thread IDs. - RootVector mergeAcrossThreads(std::forward_list &NodeStore) { - RootVector MergedByThreadRoots; - for (auto MapIter : Roots) { - const auto &RootNodeVector = MapIter.second; - for (auto *Node : RootNodeVector) { - auto MaybeFoundIter = - find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) { - return Node->FuncId == elem->FuncId; - }); - if (MaybeFoundIter == MergedByThreadRoots.end()) { - MergedByThreadRoots.push_back(Node); - } else { - MergedByThreadRoots.push_back(mergeTrieNodes( - **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration)); - MergedByThreadRoots.erase(MaybeFoundIter); - } - } - } - return MergedByThreadRoots; - } - - /// Print timing sums for all stacks merged by Thread ID. - template - void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, - StackOutputFormat format) { - std::forward_list AggregatedNodeStore; - RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); - bool reportThreadId = false; - printAll(OS, FN, MergedByThreadRoots, - /*threadId*/ 0, reportThreadId); - } - - /// Merges the trie by thread id before printing top stacks. - void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { - std::forward_list AggregatedNodeStore; - RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); - print(OS, FN, MergedByThreadRoots); - } - - // TODO: Add a format option when more than one are supported. - template - void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, - RootVector RootValues, uint32_t ThreadId, bool ReportThread) { - SmallVector S; - for (const auto *N : RootValues) { - S.clear(); - S.push_back(N); - while (!S.empty()) { - auto *Top = S.pop_back_val(); - printSingleStack(OS, FN, ReportThread, ThreadId, Top); - for (const auto *C : Top->Callees) - S.push_back(C); - } - } - } - - /// Prints values for stacks in a format consumable for the flamegraph.pl - /// tool. This is a line based format that lists each level in the stack - /// hierarchy in a semicolon delimited form followed by a space and a numeric - /// value. If breaking down by thread, the thread ID will be added as the - /// root level of the stack. - template - void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, - bool ReportThread, uint32_t ThreadId, - const StackTrieNode *Node) { - if (ReportThread) - OS << "thread_" << ThreadId << ";"; - SmallVector lineage{}; - lineage.push_back(Node); - while (lineage.back()->Parent != nullptr) - lineage.push_back(lineage.back()->Parent); - while (!lineage.empty()) { - OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";"; - lineage.pop_back(); - } - OS << " " << GetValueForStack(Node) << "\n"; - } - - void print(raw_ostream &OS, FuncIdConversionHelper &FN, - RootVector RootValues) { - // Go through each of the roots, and traverse the call stack, producing the - // aggregates as you go along. Remember these aggregates and stacks, and - // show summary statistics about: - // - // - Total number of unique stacks - // - Top 10 stacks by count - // - Top 10 stacks by aggregate duration - SmallVector, 11> - TopStacksByCount; - SmallVector, 11> TopStacksBySum; - auto greater_second = - [](const std::pair &A, - const std::pair &B) { - return A.second > B.second; - }; - uint64_t UniqueStacks = 0; - for (const auto *N : RootValues) { - SmallVector S; - S.emplace_back(N); - - while (!S.empty()) { - auto *Top = S.pop_back_val(); - - // We only start printing the stack (by walking up the parent pointers) - // when we get to a leaf function. - if (!Top->ExtraData.TerminalDurations.empty()) { - ++UniqueStacks; - auto TopSum = - std::accumulate(Top->ExtraData.TerminalDurations.begin(), - Top->ExtraData.TerminalDurations.end(), 0uLL); - { - auto E = std::make_pair(Top, TopSum); - TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(), - TopStacksBySum.end(), E, - greater_second), - E); - if (TopStacksBySum.size() == 11) - TopStacksBySum.pop_back(); - } - { - auto E = - std::make_pair(Top, Top->ExtraData.TerminalDurations.size()); - TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), - TopStacksByCount.end(), E, - greater_second), - E); - if (TopStacksByCount.size() == 11) - TopStacksByCount.pop_back(); - } - } - for (const auto *C : Top->Callees) - S.push_back(C); - } - } - - // Now print the statistics in the end. - OS << "\n"; - OS << "Unique Stacks: " << UniqueStacks << "\n"; - OS << "Top 10 Stacks by leaf sum:\n\n"; - for (const auto &P : TopStacksBySum) { - OS << "Sum: " << P.second << "\n"; - printStack(OS, P.first, FN); - } - OS << "\n"; - OS << "Top 10 Stacks by leaf count:\n\n"; - for (const auto &P : TopStacksByCount) { - OS << "Count: " << P.second << "\n"; - printStack(OS, P.first, FN); - } - OS << "\n"; - } -}; - -std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error, - const XRayRecord &Record, - const FuncIdConversionHelper &Converter) { - switch (Error) { - case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND: - return formatv("Found record {0} with no matching function entry\n", - format_xray_record(Record, Converter)); - default: - return formatv("Unknown error type for record {0}\n", - format_xray_record(Record, Converter)); - } -} - -static CommandRegistration Unused(&Stack, []() -> Error { - // Load each file provided as a command-line argument. For each one of them - // account to a single StackTrie, and just print the whole trie for now. - StackTrie ST; - InstrumentationMap Map; - if (!StacksInstrMap.empty()) { - auto InstrumentationMapOrError = loadInstrumentationMap(StacksInstrMap); - if (!InstrumentationMapOrError) - return joinErrors( - make_error( - Twine("Cannot open instrumentation map: ") + StacksInstrMap, - std::make_error_code(std::errc::invalid_argument)), - InstrumentationMapOrError.takeError()); - Map = std::move(*InstrumentationMapOrError); - } - - if (SeparateThreadStacks && AggregateThreads) - return make_error( - Twine("Can't specify options for per thread reporting and reporting " - "that aggregates threads."), - std::make_error_code(std::errc::invalid_argument)); - - if (!DumpAllStacks && StacksOutputFormat != HUMAN) - return make_error( - Twine("Can't specify a non-human format without -all-stacks."), - std::make_error_code(std::errc::invalid_argument)); - - if (DumpAllStacks && StacksOutputFormat == HUMAN) - return make_error( - Twine("You must specify a non-human format when reporting with " - "-all-stacks."), - std::make_error_code(std::errc::invalid_argument)); - - symbolize::LLVMSymbolizer::Options Opts( - symbolize::FunctionNameKind::LinkageName, true, true, false, ""); - symbolize::LLVMSymbolizer Symbolizer(Opts); - FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer, - Map.getFunctionAddresses()); - // TODO: Someday, support output to files instead of just directly to - // standard output. - for (const auto &Filename : StackInputs) { - auto TraceOrErr = loadTraceFile(Filename); - if (!TraceOrErr) { - if (!StackKeepGoing) - return joinErrors( - make_error( - Twine("Failed loading input file '") + Filename + "'", - std::make_error_code(std::errc::invalid_argument)), - TraceOrErr.takeError()); - logAllUnhandledErrors(TraceOrErr.takeError(), errs(), ""); - continue; - } - auto &T = *TraceOrErr; - StackTrie::AccountRecordState AccountRecordState = - StackTrie::AccountRecordState::CreateInitialState(); - for (const auto &Record : T) { - auto error = ST.accountRecord(Record, &AccountRecordState); - if (error != StackTrie::AccountRecordStatus::OK) { - if (!StackKeepGoing) - return make_error( - CreateErrorMessage(error, Record, FuncIdHelper), - make_error_code(errc::illegal_byte_sequence)); - errs() << CreateErrorMessage(error, Record, FuncIdHelper); - } - } - } - if (ST.isEmpty()) { - return make_error( - "No instrumented calls were accounted in the input file.", - make_error_code(errc::result_out_of_range)); - } - - // Report the stacks in a long form mode for another tool to analyze. - if (DumpAllStacks) { - if (AggregateThreads) { - switch (RequestedAggregation) { - case AggregationType::TOTAL_TIME: - ST.printAllAggregatingThreads( - outs(), FuncIdHelper, StacksOutputFormat); - break; - case AggregationType::INVOCATION_COUNT: - ST.printAllAggregatingThreads( - outs(), FuncIdHelper, StacksOutputFormat); - break; - } - } else { - switch (RequestedAggregation) { - case AggregationType::TOTAL_TIME: - ST.printAllPerThread(outs(), FuncIdHelper, - StacksOutputFormat); - break; - case AggregationType::INVOCATION_COUNT: - ST.printAllPerThread( - outs(), FuncIdHelper, StacksOutputFormat); - break; - } - } - return Error::success(); - } - - // We're only outputting top stacks. - if (AggregateThreads) { - ST.printAggregatingThreads(outs(), FuncIdHelper); - } else if (SeparateThreadStacks) { - ST.printPerThread(outs(), FuncIdHelper); - } else { - ST.printIgnoringThreads(outs(), FuncIdHelper); - } - return Error::success(); -}); Index: llvm/trunk/tools/llvm-xray/xray-stacks.cpp =================================================================== --- llvm/trunk/tools/llvm-xray/xray-stacks.cpp +++ llvm/trunk/tools/llvm-xray/xray-stacks.cpp @@ -0,0 +1,797 @@ +//===- xray-stacks.cpp: XRay Function Call Stack Accounting ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements stack-based accounting. It takes XRay traces, and +// collates statistics across these traces to show a breakdown of time spent +// at various points of the stack to provide insight into which functions +// spend the most time in terms of a call stack. We provide a few +// sorting/filtering options for zero'ing in on the useful stacks. +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include "func-id-helper.h" +#include "trie-node.h" +#include "xray-registry.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormatAdapters.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/XRay/Graph.h" +#include "llvm/XRay/InstrumentationMap.h" +#include "llvm/XRay/Trace.h" + +using namespace llvm; +using namespace llvm::xray; + +static cl::SubCommand Stack("stack", "Call stack accounting"); +static cl::list StackInputs(cl::Positional, + cl::desc(""), cl::Required, + cl::sub(Stack), cl::OneOrMore); + +static cl::opt + StackKeepGoing("keep-going", cl::desc("Keep going on errors encountered"), + cl::sub(Stack), cl::init(false)); +static cl::alias StackKeepGoing2("k", cl::aliasopt(StackKeepGoing), + cl::desc("Alias for -keep-going"), + cl::sub(Stack)); + +// TODO: Does there need to be an option to deduce tail or sibling calls? + +static cl::opt StacksInstrMap( + "instr_map", + cl::desc("instrumentation map used to identify function ids. " + "Currently supports elf file instrumentation maps."), + cl::sub(Stack), cl::init("")); +static cl::alias StacksInstrMap2("m", cl::aliasopt(StacksInstrMap), + cl::desc("Alias for -instr_map"), + cl::sub(Stack)); + +static cl::opt + SeparateThreadStacks("per-thread-stacks", + cl::desc("Report top stacks within each thread id"), + cl::sub(Stack), cl::init(false)); + +static cl::opt + AggregateThreads("aggregate-threads", + cl::desc("Aggregate stack times across threads"), + cl::sub(Stack), cl::init(false)); + +static cl::opt + DumpAllStacks("all-stacks", + cl::desc("Dump sum of timings for all stacks. " + "By default separates stacks per-thread."), + cl::sub(Stack), cl::init(false)); +static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks), + cl::desc("Alias for -all-stacks"), + cl::sub(Stack)); + +// TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format +// possibly with aggregations or just a linear trace of timings. +enum StackOutputFormat { HUMAN, FLAMETOOL }; + +static cl::opt StacksOutputFormat( + "stack-format", + cl::desc("The format that output stacks should be " + "output in. Only applies with all-stacks."), + cl::values( + clEnumValN(HUMAN, "human", + "Human readable output. Only valid without -all-stacks."), + clEnumValN(FLAMETOOL, "flame", + "Format consumable by Brendan Gregg's FlameGraph tool. " + "Only valid with -all-stacks.")), + cl::sub(Stack), cl::init(HUMAN)); + +// Types of values for each stack in a CallTrie. +enum class AggregationType { + TOTAL_TIME, // The total time spent in a stack and its callees. + INVOCATION_COUNT // The number of times the stack was invoked. +}; + +static cl::opt RequestedAggregation( + "aggregation-type", + cl::desc("The type of aggregation to do on call stacks."), + cl::values( + clEnumValN( + AggregationType::TOTAL_TIME, "time", + "Capture the total time spent in an all invocations of a stack."), + clEnumValN(AggregationType::INVOCATION_COUNT, "count", + "Capture the number of times a stack was invoked. " + "In flamegraph mode, this count also includes invocations " + "of all callees.")), + cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME)); + +/// A helper struct to work with formatv and XRayRecords. Makes it easier to +/// use instrumentation map names or addresses in formatted output. +struct format_xray_record : public FormatAdapter { + explicit format_xray_record(XRayRecord record, + const FuncIdConversionHelper &conv) + : FormatAdapter(std::move(record)), Converter(&conv) {} + void format(raw_ostream &Stream, StringRef Style) override { + Stream << formatv( + "{FuncId: \"{0}\", ThreadId: \"{1}\", RecordType: \"{2}\"}", + Converter->SymbolOrNumber(Item.FuncId), Item.TId, + DecodeRecordType(Item.RecordType)); + } + +private: + Twine DecodeRecordType(uint16_t recordType) { + switch (recordType) { + case 0: + return Twine("Fn Entry"); + case 1: + return Twine("Fn Exit"); + default: + // TODO: Add Tail exit when it is added to llvm/XRay/XRayRecord.h + return Twine("Unknown"); + } + } + + const FuncIdConversionHelper *Converter; +}; + +/// The stack command will take a set of XRay traces as arguments, and collects +/// information about the stacks of instrumented functions that appear in the +/// traces. We track the following pieces of information: +/// +/// - Total time: amount of time/cycles accounted for in the traces. +/// - Stack count: number of times a specific stack appears in the +/// traces. Only instrumented functions show up in stacks. +/// - Cumulative stack time: amount of time spent in a stack accumulated +/// across the invocations in the traces. +/// - Cumulative local time: amount of time spent in each instrumented +/// function showing up in a specific stack, accumulated across the traces. +/// +/// Example output for the kind of data we'd like to provide looks like the +/// following: +/// +/// Total time: 3.33234 s +/// Stack ID: ... +/// Stack Count: 2093 +/// # Function Local Time (%) Stack Time (%) +/// 0 main 2.34 ms 0.07% 3.33234 s 100% +/// 1 foo() 3.30000 s 99.02% 3.33 s 99.92% +/// 2 bar() 30 ms 0.90% 30 ms 0.90% +/// +/// We can also show distributions of the function call durations with +/// statistics at each level of the stack. This works by doing the following +/// algorithm: +/// +/// 1. When unwinding, record the duration of each unwound function associated +/// with the path up to which the unwinding stops. For example: +/// +/// Step Duration (? means has start time) +/// +/// push a a = ? +/// push b a = ?, a->b = ? +/// push c a = ?, a->b = ?, a->b->c = ? +/// pop c a = ?, a->b = ?, emit duration(a->b->c) +/// pop b a = ?, emit duration(a->b) +/// push c a = ?, a->c = ? +/// pop c a = ?, emit duration(a->c) +/// pop a emit duration(a) +/// +/// 2. We then account for the various stacks we've collected, and for each of +/// them will have measurements that look like the following (continuing +/// with the above simple example): +/// +/// c : [b->c"), [durations]>, c"), [durations]>] +/// b : [b"), [durations]>] +/// a : [] +/// +/// This allows us to compute, for each stack id, and each function that +/// shows up in the stack, some important statistics like: +/// +/// - median +/// - 99th percentile +/// - mean + stddev +/// - count +/// +/// 3. For cases where we don't have durations for some of the higher levels +/// of the stack (perhaps instrumentation wasn't activated when the stack was +/// entered), we can mark them appropriately. +/// +/// Computing this data also allows us to implement lookup by call stack nodes, +/// so that we can find functions that show up in multiple stack traces and +/// show the statistical properties of that function in various contexts. We +/// can compute information similar to the following: +/// +/// Function: 'c' +/// Stacks: 2 / 2 +/// Stack ID: ... +/// Stack Count: ... +/// # Function ... +/// 0 a ... +/// 1 b ... +/// 2 c ... +/// +/// Stack ID: ... +/// Stack Count: ... +/// # Function ... +/// 0 a ... +/// 1 c ... +/// ----------------... +/// +/// Function: 'b' +/// Stacks: 1 / 2 +/// Stack ID: ... +/// Stack Count: ... +/// # Function ... +/// 0 a ... +/// 1 b ... +/// 2 c ... +/// +/// +/// To do this we require a Trie data structure that will allow us to represent +/// all the call stacks of instrumented functions in an easily traversible +/// manner when we do the aggregations and lookups. For instrumented call +/// sequences like the following: +/// +/// a() +/// b() +/// c() +/// d() +/// c() +/// +/// We will have a representation like so: +/// +/// a -> b -> c +/// | | +/// | +--> d +/// | +/// +--> c +/// +/// We maintain a sequence of durations on the leaves and in the internal nodes +/// as we go through and process every record from the XRay trace. We also +/// maintain an index of unique functions, and provide a means of iterating +/// through all the instrumented call stacks which we know about. + +struct StackDuration { + llvm::SmallVector TerminalDurations; + llvm::SmallVector IntermediateDurations; +}; + +StackDuration mergeStackDuration(const StackDuration &Left, + const StackDuration &Right) { + StackDuration Data{}; + Data.TerminalDurations.reserve(Left.TerminalDurations.size() + + Right.TerminalDurations.size()); + Data.IntermediateDurations.reserve(Left.IntermediateDurations.size() + + Right.IntermediateDurations.size()); + // Aggregate the durations. + for (auto duration : Left.TerminalDurations) + Data.TerminalDurations.push_back(duration); + for (auto duration : Right.TerminalDurations) + Data.TerminalDurations.push_back(duration); + + for (auto duration : Left.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + for (auto duration : Right.IntermediateDurations) + Data.IntermediateDurations.push_back(duration); + return Data; +} + +using StackTrieNode = TrieNode; + +template +std::size_t GetValueForStack(const StackTrieNode *Node); + +// When computing total time spent in a stack, we're adding the timings from +// its callees and the timings from when it was a leaf. +template <> +std::size_t +GetValueForStack(const StackTrieNode *Node) { + auto TopSum = std::accumulate(Node->ExtraData.TerminalDurations.begin(), + Node->ExtraData.TerminalDurations.end(), 0uLL); + return std::accumulate(Node->ExtraData.IntermediateDurations.begin(), + Node->ExtraData.IntermediateDurations.end(), TopSum); +} + +// Calculates how many times a function was invoked. +// TODO: Hook up option to produce stacks +template <> +std::size_t +GetValueForStack(const StackTrieNode *Node) { + return Node->ExtraData.TerminalDurations.size() + + Node->ExtraData.IntermediateDurations.size(); +} + +// Make sure there are implementations for each enum value. +template struct DependentFalseType : std::false_type {}; + +template +std::size_t GetValueForStack(const StackTrieNode *Node) { + static_assert(DependentFalseType::value, + "No implementation found for aggregation type provided."); + return 0; +} + +class StackTrie { + // Avoid the magic number of 4 propagated through the code with an alias. + // We use this SmallVector to track the root nodes in a call graph. + using RootVector = SmallVector; + + // We maintain pointers to the roots of the tries we see. + DenseMap Roots; + + // We make sure all the nodes are accounted for in this list. + std::forward_list NodeStore; + + // A map of thread ids to pairs call stack trie nodes and their start times. + DenseMap, 8>> + ThreadStackMap; + + StackTrieNode *createTrieNode(uint32_t ThreadId, int32_t FuncId, + StackTrieNode *Parent) { + NodeStore.push_front(StackTrieNode{FuncId, Parent, {}, {{}, {}}}); + auto I = NodeStore.begin(); + auto *Node = &*I; + if (!Parent) + Roots[ThreadId].push_back(Node); + return Node; + } + + StackTrieNode *findRootNode(uint32_t ThreadId, int32_t FuncId) { + const auto &RootsByThread = Roots[ThreadId]; + auto I = find_if(RootsByThread, + [&](StackTrieNode *N) { return N->FuncId == FuncId; }); + return (I == RootsByThread.end()) ? nullptr : *I; + } + +public: + enum class AccountRecordStatus { + OK, // Successfully processed + ENTRY_NOT_FOUND, // An exit record had no matching call stack entry + UNKNOWN_RECORD_TYPE + }; + + struct AccountRecordState { + // We keep track of whether the call stack is currently unwinding. + bool wasLastRecordExit; + + static AccountRecordState CreateInitialState() { return {false}; } + }; + + AccountRecordStatus accountRecord(const XRayRecord &R, + AccountRecordState *state) { + auto &TS = ThreadStackMap[R.TId]; + switch (R.Type) { + case RecordTypes::ENTER: + case RecordTypes::ENTER_ARG: { + state->wasLastRecordExit = false; + // When we encounter a new function entry, we want to record the TSC for + // that entry, and the function id. Before doing so we check the top of + // the stack to see if there are callees that already represent this + // function. + if (TS.empty()) { + auto *Root = findRootNode(R.TId, R.FuncId); + TS.emplace_back(Root ? Root : createTrieNode(R.TId, R.FuncId, nullptr), + R.TSC); + return AccountRecordStatus::OK; + } + + auto &Top = TS.back(); + auto I = find_if(Top.first->Callees, + [&](StackTrieNode *N) { return N->FuncId == R.FuncId; }); + if (I == Top.first->Callees.end()) { + // We didn't find the callee in the stack trie, so we're going to + // add to the stack then set up the pointers properly. + auto N = createTrieNode(R.TId, R.FuncId, Top.first); + Top.first->Callees.emplace_back(N); + + // Top may be invalidated after this statement. + TS.emplace_back(N, R.TSC); + } else { + // We found the callee in the stack trie, so we'll use that pointer + // instead, add it to the stack associated with the TSC. + TS.emplace_back(*I, R.TSC); + } + return AccountRecordStatus::OK; + } + case RecordTypes::EXIT: + case RecordTypes::TAIL_EXIT: { + bool wasLastRecordExit = state->wasLastRecordExit; + state->wasLastRecordExit = true; + // The exit case is more interesting, since we want to be able to deduce + // missing exit records. To do that properly, we need to look up the stack + // and see whether the exit record matches any of the entry records. If it + // does match, we attempt to record the durations as we pop the stack to + // where we see the parent. + if (TS.empty()) { + // Short circuit, and say we can't find it. + + return AccountRecordStatus::ENTRY_NOT_FOUND; + } + + auto FunctionEntryMatch = find_if( + reverse(TS), [&](const std::pair &E) { + return E.first->FuncId == R.FuncId; + }); + auto status = AccountRecordStatus::OK; + if (FunctionEntryMatch == TS.rend()) { + status = AccountRecordStatus::ENTRY_NOT_FOUND; + } else { + // Account for offset of 1 between reverse and forward iterators. We + // want the forward iterator to include the function that is exited. + ++FunctionEntryMatch; + } + auto I = FunctionEntryMatch.base(); + for (auto &E : make_range(I, TS.end() - 1)) + E.first->ExtraData.IntermediateDurations.push_back( + std::max(E.second, R.TSC) - std::min(E.second, R.TSC)); + auto &Deepest = TS.back(); + if (wasLastRecordExit) + Deepest.first->ExtraData.IntermediateDurations.push_back( + std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); + else + Deepest.first->ExtraData.TerminalDurations.push_back( + std::max(Deepest.second, R.TSC) - std::min(Deepest.second, R.TSC)); + TS.erase(I, TS.end()); + return status; + } + } + return AccountRecordStatus::UNKNOWN_RECORD_TYPE; + } + + bool isEmpty() const { return Roots.empty(); } + + void printStack(raw_ostream &OS, const StackTrieNode *Top, + FuncIdConversionHelper &FN) { + // Traverse the pointers up to the parent, noting the sums, then print + // in reverse order (callers at top, callees down bottom). + SmallVector CurrentStack; + for (auto *F = Top; F != nullptr; F = F->Parent) + CurrentStack.push_back(F); + int Level = 0; + OS << formatv("{0,-5} {1,-60} {2,+12} {3,+16}\n", "lvl", "function", + "count", "sum"); + for (auto *F : + reverse(make_range(CurrentStack.begin() + 1, CurrentStack.end()))) { + auto Sum = std::accumulate(F->ExtraData.IntermediateDurations.begin(), + F->ExtraData.IntermediateDurations.end(), 0LL); + auto FuncId = FN.SymbolOrNumber(F->FuncId); + OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, + FuncId.size() > 60 ? FuncId.substr(0, 57) + "..." : FuncId, + F->ExtraData.IntermediateDurations.size(), Sum); + } + auto *Leaf = *CurrentStack.begin(); + auto LeafSum = + std::accumulate(Leaf->ExtraData.TerminalDurations.begin(), + Leaf->ExtraData.TerminalDurations.end(), 0LL); + auto LeafFuncId = FN.SymbolOrNumber(Leaf->FuncId); + OS << formatv("#{0,-4} {1,-60} {2,+12} {3,+16}\n", Level++, + LeafFuncId.size() > 60 ? LeafFuncId.substr(0, 57) + "..." + : LeafFuncId, + Leaf->ExtraData.TerminalDurations.size(), LeafSum); + OS << "\n"; + } + + /// Prints top stacks for each thread. + void printPerThread(raw_ostream &OS, FuncIdConversionHelper &FN) { + for (auto iter : Roots) { + OS << "Thread " << iter.first << ":\n"; + print(OS, FN, iter.second); + OS << "\n"; + } + } + + /// Prints timing sums for each stack in each threads. + template + void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN, + StackOutputFormat format) { + for (auto iter : Roots) { + uint32_t threadId = iter.first; + RootVector &perThreadRoots = iter.second; + bool reportThreadId = true; + printAll(OS, FN, perThreadRoots, threadId, reportThreadId); + } + } + + /// Prints top stacks from looking at all the leaves and ignoring thread IDs. + /// Stacks that consist of the same function IDs but were called in different + /// thread IDs are not considered unique in this printout. + void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { + RootVector RootValues; + + // Function to pull the values out of a map iterator. + using RootsType = decltype(Roots.begin())::value_type; + auto MapValueFn = [](const RootsType &Value) { return Value.second; }; + + for (const auto &RootNodeRange : + make_range(map_iterator(Roots.begin(), MapValueFn), + map_iterator(Roots.end(), MapValueFn))) { + for (auto *RootNode : RootNodeRange) + RootValues.push_back(RootNode); + } + + print(OS, FN, RootValues); + } + + /// Creates a merged list of Tries for unique stacks that disregards their + /// thread IDs. + RootVector mergeAcrossThreads(std::forward_list &NodeStore) { + RootVector MergedByThreadRoots; + for (auto MapIter : Roots) { + const auto &RootNodeVector = MapIter.second; + for (auto *Node : RootNodeVector) { + auto MaybeFoundIter = + find_if(MergedByThreadRoots, [Node](StackTrieNode *elem) { + return Node->FuncId == elem->FuncId; + }); + if (MaybeFoundIter == MergedByThreadRoots.end()) { + MergedByThreadRoots.push_back(Node); + } else { + MergedByThreadRoots.push_back(mergeTrieNodes( + **MaybeFoundIter, *Node, nullptr, NodeStore, mergeStackDuration)); + MergedByThreadRoots.erase(MaybeFoundIter); + } + } + } + return MergedByThreadRoots; + } + + /// Print timing sums for all stacks merged by Thread ID. + template + void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN, + StackOutputFormat format) { + std::forward_list AggregatedNodeStore; + RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); + bool reportThreadId = false; + printAll(OS, FN, MergedByThreadRoots, + /*threadId*/ 0, reportThreadId); + } + + /// Merges the trie by thread id before printing top stacks. + void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) { + std::forward_list AggregatedNodeStore; + RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore); + print(OS, FN, MergedByThreadRoots); + } + + // TODO: Add a format option when more than one are supported. + template + void printAll(raw_ostream &OS, FuncIdConversionHelper &FN, + RootVector RootValues, uint32_t ThreadId, bool ReportThread) { + SmallVector S; + for (const auto *N : RootValues) { + S.clear(); + S.push_back(N); + while (!S.empty()) { + auto *Top = S.pop_back_val(); + printSingleStack(OS, FN, ReportThread, ThreadId, Top); + for (const auto *C : Top->Callees) + S.push_back(C); + } + } + } + + /// Prints values for stacks in a format consumable for the flamegraph.pl + /// tool. This is a line based format that lists each level in the stack + /// hierarchy in a semicolon delimited form followed by a space and a numeric + /// value. If breaking down by thread, the thread ID will be added as the + /// root level of the stack. + template + void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter, + bool ReportThread, uint32_t ThreadId, + const StackTrieNode *Node) { + if (ReportThread) + OS << "thread_" << ThreadId << ";"; + SmallVector lineage{}; + lineage.push_back(Node); + while (lineage.back()->Parent != nullptr) + lineage.push_back(lineage.back()->Parent); + while (!lineage.empty()) { + OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";"; + lineage.pop_back(); + } + OS << " " << GetValueForStack(Node) << "\n"; + } + + void print(raw_ostream &OS, FuncIdConversionHelper &FN, + RootVector RootValues) { + // Go through each of the roots, and traverse the call stack, producing the + // aggregates as you go along. Remember these aggregates and stacks, and + // show summary statistics about: + // + // - Total number of unique stacks + // - Top 10 stacks by count + // - Top 10 stacks by aggregate duration + SmallVector, 11> + TopStacksByCount; + SmallVector, 11> TopStacksBySum; + auto greater_second = + [](const std::pair &A, + const std::pair &B) { + return A.second > B.second; + }; + uint64_t UniqueStacks = 0; + for (const auto *N : RootValues) { + SmallVector S; + S.emplace_back(N); + + while (!S.empty()) { + auto *Top = S.pop_back_val(); + + // We only start printing the stack (by walking up the parent pointers) + // when we get to a leaf function. + if (!Top->ExtraData.TerminalDurations.empty()) { + ++UniqueStacks; + auto TopSum = + std::accumulate(Top->ExtraData.TerminalDurations.begin(), + Top->ExtraData.TerminalDurations.end(), 0uLL); + { + auto E = std::make_pair(Top, TopSum); + TopStacksBySum.insert(std::lower_bound(TopStacksBySum.begin(), + TopStacksBySum.end(), E, + greater_second), + E); + if (TopStacksBySum.size() == 11) + TopStacksBySum.pop_back(); + } + { + auto E = + std::make_pair(Top, Top->ExtraData.TerminalDurations.size()); + TopStacksByCount.insert(std::lower_bound(TopStacksByCount.begin(), + TopStacksByCount.end(), E, + greater_second), + E); + if (TopStacksByCount.size() == 11) + TopStacksByCount.pop_back(); + } + } + for (const auto *C : Top->Callees) + S.push_back(C); + } + } + + // Now print the statistics in the end. + OS << "\n"; + OS << "Unique Stacks: " << UniqueStacks << "\n"; + OS << "Top 10 Stacks by leaf sum:\n\n"; + for (const auto &P : TopStacksBySum) { + OS << "Sum: " << P.second << "\n"; + printStack(OS, P.first, FN); + } + OS << "\n"; + OS << "Top 10 Stacks by leaf count:\n\n"; + for (const auto &P : TopStacksByCount) { + OS << "Count: " << P.second << "\n"; + printStack(OS, P.first, FN); + } + OS << "\n"; + } +}; + +std::string CreateErrorMessage(StackTrie::AccountRecordStatus Error, + const XRayRecord &Record, + const FuncIdConversionHelper &Converter) { + switch (Error) { + case StackTrie::AccountRecordStatus::ENTRY_NOT_FOUND: + return formatv("Found record {0} with no matching function entry\n", + format_xray_record(Record, Converter)); + default: + return formatv("Unknown error type for record {0}\n", + format_xray_record(Record, Converter)); + } +} + +static CommandRegistration Unused(&Stack, []() -> Error { + // Load each file provided as a command-line argument. For each one of them + // account to a single StackTrie, and just print the whole trie for now. + StackTrie ST; + InstrumentationMap Map; + if (!StacksInstrMap.empty()) { + auto InstrumentationMapOrError = loadInstrumentationMap(StacksInstrMap); + if (!InstrumentationMapOrError) + return joinErrors( + make_error( + Twine("Cannot open instrumentation map: ") + StacksInstrMap, + std::make_error_code(std::errc::invalid_argument)), + InstrumentationMapOrError.takeError()); + Map = std::move(*InstrumentationMapOrError); + } + + if (SeparateThreadStacks && AggregateThreads) + return make_error( + Twine("Can't specify options for per thread reporting and reporting " + "that aggregates threads."), + std::make_error_code(std::errc::invalid_argument)); + + if (!DumpAllStacks && StacksOutputFormat != HUMAN) + return make_error( + Twine("Can't specify a non-human format without -all-stacks."), + std::make_error_code(std::errc::invalid_argument)); + + if (DumpAllStacks && StacksOutputFormat == HUMAN) + return make_error( + Twine("You must specify a non-human format when reporting with " + "-all-stacks."), + std::make_error_code(std::errc::invalid_argument)); + + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer, + Map.getFunctionAddresses()); + // TODO: Someday, support output to files instead of just directly to + // standard output. + for (const auto &Filename : StackInputs) { + auto TraceOrErr = loadTraceFile(Filename); + if (!TraceOrErr) { + if (!StackKeepGoing) + return joinErrors( + make_error( + Twine("Failed loading input file '") + Filename + "'", + std::make_error_code(std::errc::invalid_argument)), + TraceOrErr.takeError()); + logAllUnhandledErrors(TraceOrErr.takeError(), errs(), ""); + continue; + } + auto &T = *TraceOrErr; + StackTrie::AccountRecordState AccountRecordState = + StackTrie::AccountRecordState::CreateInitialState(); + for (const auto &Record : T) { + auto error = ST.accountRecord(Record, &AccountRecordState); + if (error != StackTrie::AccountRecordStatus::OK) { + if (!StackKeepGoing) + return make_error( + CreateErrorMessage(error, Record, FuncIdHelper), + make_error_code(errc::illegal_byte_sequence)); + errs() << CreateErrorMessage(error, Record, FuncIdHelper); + } + } + } + if (ST.isEmpty()) { + return make_error( + "No instrumented calls were accounted in the input file.", + make_error_code(errc::result_out_of_range)); + } + + // Report the stacks in a long form mode for another tool to analyze. + if (DumpAllStacks) { + if (AggregateThreads) { + switch (RequestedAggregation) { + case AggregationType::TOTAL_TIME: + ST.printAllAggregatingThreads( + outs(), FuncIdHelper, StacksOutputFormat); + break; + case AggregationType::INVOCATION_COUNT: + ST.printAllAggregatingThreads( + outs(), FuncIdHelper, StacksOutputFormat); + break; + } + } else { + switch (RequestedAggregation) { + case AggregationType::TOTAL_TIME: + ST.printAllPerThread(outs(), FuncIdHelper, + StacksOutputFormat); + break; + case AggregationType::INVOCATION_COUNT: + ST.printAllPerThread( + outs(), FuncIdHelper, StacksOutputFormat); + break; + } + } + return Error::success(); + } + + // We're only outputting top stacks. + if (AggregateThreads) { + ST.printAggregatingThreads(outs(), FuncIdHelper); + } else if (SeparateThreadStacks) { + ST.printPerThread(outs(), FuncIdHelper); + } else { + ST.printIgnoringThreads(outs(), FuncIdHelper); + } + return Error::success(); +});