Index: tools/CMakeLists.txt =================================================================== --- tools/CMakeLists.txt +++ tools/CMakeLists.txt @@ -39,6 +39,7 @@ add_llvm_tool_subdirectory(llvm-config) add_llvm_tool_subdirectory(llvm-lto) add_llvm_tool_subdirectory(llvm-profdata) +add_llvm_tool_subdirectory(xray) # Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly # specified. Index: tools/xray/CMakeLists.txt =================================================================== --- /dev/null +++ tools/xray/CMakeLists.txt @@ -0,0 +1,9 @@ +set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + Support + DebugInfoDWARF + Symbolize + Object) + +add_llvm_tool(xray-fc-account + xray-fc-account.cc) Index: tools/xray/xray-fc-account.cc =================================================================== --- /dev/null +++ tools/xray/xray-fc-account.cc @@ -0,0 +1,544 @@ +//===- xray-fc-account.cc - XRay Function Call Accounting Tool ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Function Call Accounting tool that takes an XRay +// trace and generates function call accounting statistics for that given trace. +// +//===----------------------------------------------------------------------===// +// +// Usage: +// +// xray-fc-account [-instr_map=] [-verbose] [-keep_going] +// [-format=csv/text] +// +// +// This tool computes some useful statistics regarding function calls present +// from an XRay trace file. The kinds of statistics this generates fall into the +// following categories: +// +// - Basic latency statistics (count, minimum, median, 90th percentile, 99th +// percentile, maximum). +// - Per-thread timestamp accounting (minimum and maximum timestamps per +// thread) +// - Per-CPU timestamp accounting (minimum and maximum timestamps per CPU) +// +// In '-verbose' mode, we also reconstruct the stack traces encountered from the +// input trace file showing entries of the following form: +// +// [,cpu=] Entry/Exit +// + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "llvm/DebugInfo/Symbolize/Symbolize.h" +#include "llvm/Object/ELF.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +static StringRef ToolName; + +static cl::opt Input(cl::Positional, cl::desc(""), + cl::Required); + +static cl::opt BinaryInstrMap( + "instr_map", + cl::desc("file containing an xray instrumentation map section"), + cl::value_desc("filename"), cl::Optional); +static cl::alias BinaryInstrMap2("m", cl::desc("Alias for -instr_map"), + cl::aliasopt(BinaryInstrMap)); +static cl::opt Verbose( + "verbose", + cl::desc( + "play-by-play logging of encountered records, useful for debugging"), + cl::init(false)); +static cl::alias Verbose2("v", cl::desc("Alias for -verbose"), + cl::aliasopt(Verbose)); + +static cl::opt + KeepGoing("keep_going", + cl::desc("print errors and continue processing log"), + cl::init(false)); +static cl::alias KeepGoing2("k", cl::desc("Alias for -keep_going"), + cl::aliasopt(KeepGoing)); + +enum OutputFormats { TEXT, CSV }; +static cl::opt OutputFormat( + "format", cl::desc("output format"), + cl::values(clEnumValN(OutputFormats::TEXT, "text", "text summary output"), + clEnumValN(OutputFormats::CSV, "csv", "output data in csv"), + clEnumValEnd), + cl::init(OutputFormats::TEXT)); + +LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) { + outs() << ToolName << ": " << Error << ".\n"; + exit(1); +} + +class FileCloser { + int Fd; + +public: + explicit FileCloser(int Fd) : Fd(Fd) {} + + ~FileCloser() { + if (Fd != -1) + close(Fd); + } +}; + +namespace __xray { +// FIXME: Make these available in a common header between compiler-rt and this +// tool? + +struct alignas(32) XRayFileHeader { + uint16_t Version = 0; + uint16_t Type = 0; + uint64_t CycleFrequency = 0; + bool ConstantTSC = 0; + bool NonstopTSC = 0; + char Padding[14] = {}; +} __attribute__((packed)); + +struct alignas(32) XRayRecord { + uint16_t RecordType = 0; + // The CPU where the thread is running. We assume number of CPUs <= 256. + uint8_t CPU = 0; + + // The type of the event. Usually either ENTER = 0 or EXIT = 1. + uint8_t Type = 0; + + // The function ID for the record. + int32_t FuncId = 0; + + // Get the full 8 bytes of the TSC when we get the log record. + uint64_t TSC = 0; + + // The thread ID for the currently running thread. + uint32_t TId = 0; + + // Use some bytes in the end of the record for buffers. + char Buffer[4] = {}; +} __attribute__((packed)); + +struct XRaySledEntry { + uint64_t Address; + uint64_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Padding[14]; +}; +} + +typedef std::unordered_map FunctionAddressMap; + +using namespace __xray; + +FunctionAddressMap LoadBinaryInstr() { + FunctionAddressMap InstrMap; + if (BinaryInstrMap.empty()) { + errs() << ToolName << ": instrumentation map not provided, using raw " + "function IDs in output.\n"; + return InstrMap; + } + + auto ObjectFile = object::ObjectFile::createObjectFile(BinaryInstrMap); + + // FIXME: Maybe support other ELF formats. For now, 64-bit Little Endian only. + if (!ObjectFile || !ObjectFile->getBinary()->isELF()) + fail("File format not supported (only does ELF)."); + + // Find the section named "xray_instr_map". + StringRef Contents = ""; + const auto &Sections = ObjectFile->getBinary()->sections(); + auto I = llvm::find_if(Sections, [&](const object::SectionRef &Section) { + StringRef Name = ""; + if (Section.getName(Name)) + return false; + return Name == "xray_instr_map"; + }); + if (I == Sections.end()) + fail("Failed to find XRay instrumentation map."); + if (I->getContents(Contents)) + fail("Failed to get contents of 'xray_instr_map' section."); + + // Treat Contents as though it were an array of XRaySledEntry elements. + ArrayRef Sleds( + reinterpret_cast(Contents.data()), + Contents.size() / sizeof(XRaySledEntry)); + + // We replicate the function id generation scheme implemented in the runtime + // here. Ideally we should be able to break it out, or output this map from + // the runtime, but that's a design point we can discuss later on. For now, we + // replicate the logic and move on. + int32_t FuncId = 1; + uint64_t CurFn = 0; + for (const auto &Sled : Sleds) { + auto F = Sled.Function; + if (CurFn == 0) { + CurFn = F; + InstrMap[FuncId] = F; + } + if (F != CurFn) { + ++FuncId; + CurFn = F; + InstrMap[FuncId] = F; + } + } + return InstrMap; +} + +// This class consolidates common operations related to Function IDs. +class FuncIdConversionHelper { + symbolize::LLVMSymbolizer &Symbolizer; + const FunctionAddressMap &FunctionAddresses; + +public: + FuncIdConversionHelper(symbolize::LLVMSymbolizer &Symbolizer, + const FunctionAddressMap &FunctionAddresses) + : Symbolizer(Symbolizer), FunctionAddresses(FunctionAddresses) {} + + FuncIdConversionHelper(const FuncIdConversionHelper &) = default; + + std::string SymbolOrNumber(int32_t FuncId) const { + std::ostringstream F; + auto It = FunctionAddresses.find(FuncId); + if (It == FunctionAddresses.end()) { + F << "#" << FuncId; + return F.str(); + } + + auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second); + if (!ResOrErr || ResOrErr.get().FunctionName == "") { + F << "@(" << std::hex << It->second << ")"; + } else { + auto &DI = ResOrErr.get(); + F << DI.FunctionName; + } + return F.str(); + } + + std::string FileLineAndColumn(int32_t FuncId) const { + auto It = FunctionAddresses.find(FuncId); + if (It == FunctionAddresses.end()) { + return "(unknown)"; + } + std::ostringstream F; + auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second); + if (!ResOrErr || ResOrErr.get().FileName == "") + return "(unknown)"; + auto &DI = ResOrErr.get(); + auto LastSlash = DI.FileName.find_last_of('/'); + if (LastSlash != std::string::npos) { + F << DI.FileName.substr(LastSlash + 1); + } else { + F << DI.FileName; + } + F << ":" << DI.Line << ":" << DI.Column; + return F.str(); + } +}; + +// We consolidate the data into a struct which we can output in various forms. +struct ResultRow { + uint64_t Count; + double Min; + double Median; + double Pct90; + double Pct99; + double Max; + std::string DebugInfo; + std::string Function; +}; + +void PrintStatsCSV(const ResultRow &Row) { + outs() << Row.Count << ',' << Row.Min << ',' << Row.Median << ',' << Row.Pct90 + << ',' << Row.Pct99 << ',' << Row.Max << ",\"" << Row.DebugInfo + << "\"," << '"' << Row.Function << "\"\n"; +} + +void PrintStatsTEXT(const ResultRow &Row) { + outs() << Row.Count << "\t" + << "[" << Row.Min << ", " << Row.Median << ", " << Row.Pct90 << ", " + << Row.Pct99 << ", " << Row.Max << "] " << Row.DebugInfo << "\t" + << Row.Function << "\n"; +} + +void PrintStatsHeader(OutputFormats OF) { + if (OF == OutputFormats::TEXT) + outs() << "count\t[min,median,90%ile,99%ile,max]\tdebug\tfunction\n"; + if (OF == OutputFormats::CSV) + outs() << "count,min,median,90%ile,99%ile,max,debuginfo,function\n"; +} + +int main(int argc, char *argv[]) { + ToolName = argv[0]; + cl::ParseCommandLineOptions( + argc, argv, "XRay Function Call Accounting (xray-fc-account)\n\n" + " This program takes an XRay trace file and does some basic " + "function call accounting statistics.\n"); + + if (Input.empty()) + fail("No input file provided."); + + FunctionAddressMap FunctionAddresses = LoadBinaryInstr(); + if (OutputFormat == OutputFormats::TEXT) + outs() << "Loaded " << FunctionAddresses.size() + << " entries for the instrumentation map.\n"; + + int Fd; + do { + Fd = open(Input.c_str(), O_RDONLY); + } while (Fd == -1 && errno == EINTR); + if (Fd == -1) + fail(Twine("Failed to open file '") + Input + "'; errno=" + Twine(errno)); + FileCloser FC(Fd); + symbolize::LLVMSymbolizer::Options Opts( + symbolize::FunctionNameKind::LinkageName, true, true, false, ""); + symbolize::LLVMSymbolizer Symbolizer(Opts); + XRayFileHeader Header; + int ReadBytes = 0; + while ((ReadBytes = read(Fd, &Header, sizeof(XRayFileHeader))) == -1 && + errno == EINTR) { + // Do nothing. + } + if (ReadBytes == 0 || ReadBytes == -1) + fail(Twine("Failed to read file '") + Input + "'; " + Twine(errno)); + + XRayRecord Record; + std::map>> FunctionStack; + std::map> FunctionTimings; + std::map> ThreadMinMaxTSC; + std::map> CPUMinMaxTSC; + uint64_t CurTSC = 0; + FuncIdConversionHelper H(Symbolizer, FunctionAddresses); + while (auto ReadBytes = read(Fd, &Record, sizeof(XRayRecord))) { + if (ReadBytes == -1) { + if (errno == EINTR) + continue; + fail(Twine("Failed to read file '") + Input + "'; errno=" + Twine(errno)); + } + + if (CurTSC == 0) + CurTSC = Record.TSC; + + auto &ThreadFuncStack = FunctionStack[Record.TId]; + if (Verbose) { + outs() << "[tid=" << Record.TId << ",cpu=" << int32_t{Record.CPU} << "] " + << Record.TSC << ": " << (Record.Type == 0 ? "E" : "X") + << Record.FuncId << "@("; + auto FuncAddr = FunctionAddresses.find(Record.FuncId); + if (FuncAddr == FunctionAddresses.end()) { + outs() << "unknown"; + } else { + outs().write_hex(FuncAddr->second); + } + outs() << ") "; + + // Let's indent according to stack depth for this thread. For Entry, we + // just add the correct number of indents. For Exit, we reduce it by 1. + for (size_t i = 0 + Record.Type; i < ThreadFuncStack.size(); i++) { + outs() << " "; + } + outs() << H.SymbolOrNumber(Record.FuncId) << "\n"; + } + + // Record Thread min/max TSC's. + { + auto &MinMaxTSC = ThreadMinMaxTSC[Record.TId]; + if (MinMaxTSC.first == 0 || MinMaxTSC.second == 0) + MinMaxTSC = std::make_pair(Record.TSC, Record.TSC); + MinMaxTSC = std::make_pair(std::min(MinMaxTSC.first, Record.TSC), + std::max(MinMaxTSC.second, Record.TSC)); + } + + // Record CPU min/max TSC's. + { + auto &MinMaxTSC = CPUMinMaxTSC[Record.CPU]; + if (MinMaxTSC.first == 0 || MinMaxTSC.second == 0) + MinMaxTSC = std::make_pair(Record.TSC, Record.TSC); + MinMaxTSC = std::make_pair(std::min(MinMaxTSC.first, Record.TSC), + std::max(MinMaxTSC.second, Record.TSC)); + } + + if (Record.TSC < CurTSC) { + errs() << "Record TSC (" << Record.TSC << ") < Latest TSC (" << CurTSC + << ")\n"; + if (!KeepGoing) + fail("Out-of-order TSC records found."); + } + + if (Record.Type == 0) + FunctionStack[Record.TId].push_back({Record.FuncId, Record.TSC}); + + if (Record.Type == 1) { + if (FunctionStack[Record.TId].back().first != Record.FuncId) { + // This could be a tail call, so look up the stack to see whether we've + // found an entry that shouldn't be there. + auto Parent = + std::find_if(ThreadFuncStack.rbegin(), ThreadFuncStack.rend(), + [&](const std::pair &Entry) { + return Entry.first == Record.FuncId; + }); + if (Parent != ThreadFuncStack.rend()) { + auto It = ThreadFuncStack.rbegin(); + while (It <= Parent) { + const auto &Top = *It; + int64_t Timing = std::max(Top.second, Record.TSC) - + std::min(Top.second, Record.TSC); + FunctionTimings[Top.first].push_back(Timing); + ThreadFuncStack.pop_back(); + It = ThreadFuncStack.rbegin(); + } + } else { + errs() << ToolName << ": Encountered X" << Record.FuncId << "(" + << H.SymbolOrNumber(Record.FuncId) << ") not matching E" + << ThreadFuncStack.back().first << "(" + << H.SymbolOrNumber(ThreadFuncStack.back().first) << ")\n"; + errs() << "Current stack dump:\n"; + for (auto &ThreadStack : FunctionStack) { + errs() << "Thread ID: " << ThreadStack.first << "\n"; + auto Level = ThreadStack.second.size(); + for (const auto &Entry : llvm::reverse(ThreadStack.second)) { + errs() << "#" << Level-- << "\t" << H.SymbolOrNumber(Entry.first) + << '\n'; + } + } + if (!KeepGoing) + fail("Corrupted input file found."); + } + } else { + const auto &Top = FunctionStack[Record.TId].back(); + int64_t Timing = + std::max(Top.second, Record.TSC) - std::min(Top.second, Record.TSC); + auto &Timings = FunctionTimings[Record.FuncId]; + Timings.push_back(Timing); + ThreadFuncStack.pop_back(); + } + } + CurTSC = Record.TSC; + } + + bool HasCyclesPerSec = Header.CycleFrequency != 0; + auto CyclesPerSec = static_cast(Header.CycleFrequency); + + if (OutputFormat == OutputFormats::TEXT && HasCyclesPerSec) + outs() << "Cycle counter frequency = " << Header.CycleFrequency << "hz\n" + << "Durations below are in seconds.\n"; + + if (!FunctionTimings.empty()) { + if (OutputFormat == OutputFormats::TEXT) + outs() << "\nTotal functions with timings: " << FunctionTimings.size() + << '\n'; + PrintStatsHeader(OutputFormat); + for (auto &FT : FunctionTimings) { + const auto &FuncId = FT.first; + auto &Timings = FT.second; + std::nth_element(Timings.begin(), Timings.begin(), Timings.end()); + auto Min = Timings.front(); + auto MedianOff = Timings.size() / 2; + std::nth_element(Timings.begin(), Timings.begin() + MedianOff, + Timings.end()); + auto Median = Timings[MedianOff]; + auto Pct90Off = std::floor(Timings.size() * 0.9); + std::nth_element(Timings.begin(), Timings.begin() + Pct90Off, + Timings.end()); + auto Pct90 = Timings[Pct90Off]; + auto Pct99Off = std::floor(Timings.size() * 0.99); + std::nth_element(Timings.begin(), Timings.begin() + Pct99Off, + Timings.end()); + auto Pct99 = Timings[Pct99Off]; + std::nth_element(Timings.begin(), Timings.begin() + Timings.size(), + Timings.end()); + auto Max = Timings.back(); + ResultRow Row; + Row.Count = Timings.size(); + Row.Min = Min; + Row.Median = Median; + Row.Pct90 = Pct90; + Row.Pct99 = Pct99; + Row.Max = Max; + if (HasCyclesPerSec) { + Row.Min /= CyclesPerSec; + Row.Median /= CyclesPerSec; + Row.Pct90 /= CyclesPerSec; + Row.Pct99 /= CyclesPerSec; + Row.Max /= CyclesPerSec; + } + Row.DebugInfo = H.FileLineAndColumn(FuncId); + Row.Function = H.SymbolOrNumber(FuncId); + switch (OutputFormat) { + case OutputFormats::TEXT: + PrintStatsTEXT(Row); + break; + case OutputFormats::CSV: + PrintStatsCSV(Row); + break; + }; + } + } + + // Print out thread total durations. + if (!ThreadMinMaxTSC.empty()) { + if (OutputFormat == OutputFormats::TEXT) + outs() << "\nTotal threads: " << ThreadMinMaxTSC.size() + << "\nTID\t(min,max)\tduration in secs\n"; + if (OutputFormat == OutputFormats::CSV) + outs() << "\nTID,min tsc,max tsc,duration\n"; + for (auto &Thread : ThreadMinMaxTSC) { + auto &MinMax = Thread.second; + if (OutputFormat == OutputFormats::TEXT) + outs() << Thread.first << "\t(" << MinMax.first << ", " << MinMax.second + << ")\t"; + if (OutputFormat == OutputFormats::CSV) + outs() << Thread.first << ',' << MinMax.first << ',' << MinMax.second + << ','; + if (HasCyclesPerSec) { + outs() << (MinMax.second - MinMax.first) / CyclesPerSec; + } else { + outs() << (MinMax.second - MinMax.first); + } + outs() << '\n'; + } + } + + // Print out CPU start/end ranges and durations. + if (!CPUMinMaxTSC.empty()) { + if (OutputFormat == OutputFormats::TEXT) + outs() << "\nTotal CPUs: " << CPUMinMaxTSC.size() + << "\nCPUID\t(min,max)\tduration in secs\n"; + if (OutputFormat == OutputFormats::CSV) + outs() << "\nCPUID,min tsc,max tsc,duration\n"; + for (auto &CPU : CPUMinMaxTSC) { + auto &MinMax = CPU.second; + if (OutputFormat == OutputFormats::TEXT) + outs() << static_cast(CPU.first) << "\t(" << MinMax.first + << ", " << MinMax.second << ")\t"; + if (OutputFormat == OutputFormats::CSV) + outs() << static_cast(CPU.first) << ',' << MinMax.first << ',' + << MinMax.second << ','; + if (HasCyclesPerSec) { + outs() << (MinMax.second - MinMax.first) / CyclesPerSec; + } else { + outs() << (MinMax.second - MinMax.first); + } + outs() << '\n'; + } + } +}