Index: tools/CMakeLists.txt
===================================================================
--- tools/CMakeLists.txt
+++ tools/CMakeLists.txt
@@ -39,6 +39,7 @@
 add_llvm_tool_subdirectory(llvm-config)
 add_llvm_tool_subdirectory(llvm-lto)
 add_llvm_tool_subdirectory(llvm-profdata)
+add_llvm_tool_subdirectory(xray)
 
 # Projects supported via LLVM_EXTERNAL_*_SOURCE_DIR need to be explicitly
 # specified.
Index: tools/xray/CMakeLists.txt
===================================================================
--- /dev/null
+++ tools/xray/CMakeLists.txt
@@ -0,0 +1,9 @@
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+  Support
+  DebugInfoDWARF
+  Symbolize
+  Object)
+
+add_llvm_tool(xray-fc-account
+  xray-fc-account.cc)
Index: tools/xray/xray-fc-account.cc
===================================================================
--- /dev/null
+++ tools/xray/xray-fc-account.cc
@@ -0,0 +1,523 @@
+//===- xray-fc-account.cc - XRay Function Call Accounting Tool ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Function Call Accounting tool that takes an XRay
+// trace and generates function call accounting statistics for that given trace.
+//
+//===----------------------------------------------------------------------===//
+
+#include <algorithm>
+#include <fcntl.h>
+#include <map>
+#include <sstream>
+#include <stack>
+#include <unistd.h>
+#include <unordered_map>
+#include <utility>
+
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+static StringRef ToolName;
+
+static cl::opt<std::string> Input(cl::Positional, cl::desc("<input file>"),
+                                  cl::Required);
+
+static cl::opt<std::string> BinaryInstrMap(
+    "instr_map",
+    cl::desc("file containing an xray instrumentation map section"),
+    cl::value_desc("filename"), cl::Optional);
+static cl::alias BinaryInstrMap2("m", cl::desc("Alias for -instr_map"),
+                                 cl::aliasopt(BinaryInstrMap));
+static cl::opt<bool> Verbose(
+    "verbose",
+    cl::desc(
+        "play-by-play logging of encountered records, useful for debugging"),
+    cl::init(false));
+static cl::alias Verbose2("v", cl::desc("Alias for -verbose"),
+                          cl::aliasopt(Verbose));
+
+static cl::opt<bool>
+    KeepGoing("keep_going",
+              cl::desc("print errors and continue processing log"),
+              cl::init(false));
+static cl::alias KeepGoing2("k", cl::desc("Alias for -keep_going"),
+                            cl::aliasopt(KeepGoing));
+
+enum OutputFormats { TEXT, CSV };
+static cl::opt<OutputFormats> OutputFormat(
+    "format", cl::desc("output format"),
+    cl::values(clEnumValN(OutputFormats::TEXT, "text", "text summary output"),
+               clEnumValN(OutputFormats::CSV, "csv", "output data in csv"),
+               clEnumValEnd),
+    cl::init(OutputFormats::TEXT));
+
+LLVM_ATTRIBUTE_NORETURN static void fail(Twine Error) {
+  outs() << ToolName << ": " << Error << ".\n";
+  exit(1);
+}
+
+class FileCloser {
+  int Fd;
+
+public:
+  explicit FileCloser(int Fd) : Fd(Fd) {}
+
+  ~FileCloser() noexcept {
+    if (Fd != -1)
+      close(Fd);
+  }
+};
+
+namespace __xray {
+
+// FIXME: Make this available in a common header between compiler-rt and this
+// tool.
+struct alignas(32) XRayFileHeader {
+  uint16_t Version = 0;
+  uint16_t Type = 0;
+  uint64_t CycleFrequency = 0;
+  bool ConstantTSC = 0;
+  bool NonstopTSC = 0;
+  char Padding[14] = {};
+} __attribute__((packed));
+
+struct alignas(32) XRayRecord {
+  uint16_t RecordType = 0;
+  // The CPU where the thread is running. We assume number of CPUs <= 256.
+  uint8_t CPU = 0;
+
+  // The type of the event. Usually either ENTER = 0 or EXIT = 1.
+  uint8_t Type = 0;
+
+  // The function ID for the record.
+  int32_t FuncId = 0;
+
+  // Get the full 8 bytes of the TSC when we get the log record.
+  uint64_t TSC = 0;
+
+  // The thread ID for the currently running thread.
+  uint32_t TId = 0;
+
+  // Use some bytes in the end of the record for buffers.
+  char Buffer[4] = {};
+} __attribute__((packed));
+
+// FIXME: Make this available in a common header between compiler-rt and this
+// tool.
+struct XRaySledEntry {
+  uint64_t Address;
+  uint64_t Function;
+  unsigned char Kind;
+  unsigned char AlwaysInstrument;
+  unsigned char Padding[14];
+};
+}
+
+using FunctionAddressMap = std::unordered_map<int32_t, uint64_t>;
+
+FunctionAddressMap LoadBinaryInstr() {
+  FunctionAddressMap InstrMap;
+  if (BinaryInstrMap.empty()) {
+    errs() << ToolName << ": instrumentation map not provided, using raw "
+                          "function IDs in output.\n";
+    return InstrMap;
+  }
+
+  auto ObjectFile = object::ObjectFile::createObjectFile(BinaryInstrMap);
+
+  // FIXME: Maybe support other ELF formats. For now, 64-bit Little Endian only.
+  if (!ObjectFile || !ObjectFile->getBinary()->isELF())
+    fail("File format not supported (only does ELF).");
+
+  // Find the section named "xray_instr_map".
+  StringRef Contents = "";
+  for (const auto &Section : ObjectFile->getBinary()->sections()) {
+    StringRef Name = "";
+    if (Section.getName(Name))
+      continue;
+
+    if (Name != "xray_instr_map")
+      continue;
+
+    // Iterate through the elements of the section, coercing it into
+    // __xray::XraySledEntry objects.
+    if (Section.getContents(Contents))
+      fail("Failed to get contents of 'xray_instr_map' section.");
+
+    break;
+  }
+
+  if (Contents.empty())
+    fail("Failed to find XRay instrumentation map.");
+
+  // Treat Contents as though it were an array of __xray::XRaySledEntry
+  // elements.
+  const __xray::XRaySledEntry *Sleds =
+      reinterpret_cast<const __xray::XRaySledEntry *>(Contents.data());
+  size_t Entries = Contents.size() / sizeof(__xray::XRaySledEntry);
+
+  // We replicate the function id generation scheme implemented in the runtime
+  // here. Ideally we should be able to break it out, or output this map from
+  // the runtime, but that's a design point we can discuss later on. For now, we
+  // replicate the logic and move on.
+  int32_t FuncId = 1;
+  uint64_t CurFn = 0;
+  for (size_t I = 0; I < Entries; ++I) {
+    const auto &Sled = Sleds[I];
+    auto F = Sled.Function;
+    if (CurFn == 0) {
+      CurFn = F;
+      InstrMap[FuncId] = F;
+    }
+    if (F != CurFn) {
+      ++FuncId;
+      CurFn = F;
+      InstrMap[FuncId] = F;
+    }
+  }
+  return InstrMap;
+}
+
+std::string SymbolOrNumber(symbolize::LLVMSymbolizer &Symbolizer,
+                           const FunctionAddressMap &FunctionAddresses,
+                           int32_t FuncId) {
+  std::ostringstream F;
+  auto It = FunctionAddresses.find(FuncId);
+  if (It == FunctionAddresses.end()) {
+    F << "#" << FuncId;
+    return F.str();
+  }
+
+  auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second);
+  if (!ResOrErr || ResOrErr.get().FunctionName == "<invalid>") {
+    F << "@(" << std::hex << It->second << ")";
+  } else {
+    auto &DI = ResOrErr.get();
+    F << DI.FunctionName;
+  }
+  return F.str();
+}
+
+std::string FileLineAndColumn(symbolize::LLVMSymbolizer &Symbolizer,
+                              const FunctionAddressMap &FunctionAddresses,
+                              int32_t FuncId) {
+  auto It = FunctionAddresses.find(FuncId);
+  if (It == FunctionAddresses.end()) {
+    return "(unknown)";
+  }
+  std::ostringstream F;
+  auto ResOrErr = Symbolizer.symbolizeCode(BinaryInstrMap, It->second);
+  if (!ResOrErr || ResOrErr.get().FileName == "<invalid>")
+    return "(unknown)";
+  auto &DI = ResOrErr.get();
+  auto LastSlash = DI.FileName.find_last_of('/');
+  if (LastSlash != std::string::npos) {
+    F << DI.FileName.substr(LastSlash + 1);
+  } else {
+    F << DI.FileName;
+  }
+  F << ":" << DI.Line << ":" << DI.Column;
+  return F.str();
+}
+
+int main(int argc, char *argv[]) {
+  ToolName = argv[0];
+  cl::ParseCommandLineOptions(
+      argc, argv, "XRay Function Call Accounting (xray-fc-account)\n\n"
+                  "  This program takes an XRay trace file and does some basic "
+                  "function call accounting statistics.\n");
+
+  if (Input.empty())
+    fail("No input file provided.");
+
+  FunctionAddressMap FunctionAddresses = LoadBinaryInstr();
+  if (OutputFormat == OutputFormats::TEXT)
+    outs() << "Loaded " << FunctionAddresses.size()
+           << " entries for the instrumentation map.\n";
+
+  int Fd = open(Input.c_str(), O_RDONLY);
+  while (Fd == -1 && errno == EINTR) {
+    Fd = open(Input.c_str(), O_RDONLY);
+  }
+  if (Fd == -1)
+    fail(Twine("Failed to open file '") + Input + "'; errno=" + Twine(errno));
+  FileCloser FC(Fd);
+
+  symbolize::LLVMSymbolizer::Options Opts(
+      symbolize::FunctionNameKind::LinkageName, true, true, false, "");
+  symbolize::LLVMSymbolizer Symbolizer(Opts);
+
+  __xray::XRayFileHeader Header;
+  bool ReadHeader = false;
+  while (auto ReadBytes = read(Fd, &Header, sizeof(__xray::XRayFileHeader))) {
+    if (ReadBytes == -1) {
+      if (errno == EINTR)
+        continue;
+      fail(Twine("Failed to read file '") + Input + "'; " + Twine(errno));
+    }
+    ReadHeader = true;
+    break;
+  }
+  if (!ReadHeader) {
+    fail(Twine("Failed to read file '") + Input + "'; " + Twine(errno));
+  }
+  __xray::XRayRecord Record;
+  std::map<pid_t, std::vector<std::pair<int32_t, uint64_t>>> FunctionStack;
+  std::map<int32_t, std::vector<int64_t>> FunctionTimings;
+  std::map<pid_t, std::pair<uint64_t, uint64_t>> ThreadMinMaxTSC;
+  std::map<uint8_t, std::pair<uint64_t, uint64_t>> CPUMinMaxTSC;
+  uint64_t CurTSC = 0;
+  while (auto ReadBytes = read(Fd, &Record, sizeof(__xray::XRayRecord))) {
+    if (ReadBytes == -1) {
+      if (errno == EINTR)
+        continue;
+      fail(Twine("Failed to read file '") + Input + "'; errno=" + Twine(errno));
+    }
+
+    if (CurTSC == 0)
+      CurTSC = Record.TSC;
+
+    auto &ThreadFuncStack = FunctionStack[Record.TId];
+    if (Verbose) {
+      auto FuncAddr = FunctionAddresses.find(Record.FuncId);
+      outs() << "[tid=" << Record.TId << ",cpu=" << int32_t{Record.CPU} << "] "
+             << Record.TSC << ": " << (Record.Type == 0 ? "E" : "X")
+             << Record.FuncId << "@(";
+      if (FuncAddr == FunctionAddresses.end()) {
+        outs() << "unknown";
+      } else {
+        outs().write_hex(FuncAddr->second);
+      }
+      outs() << ") ";
+
+      // Let's indent according to stack depth for this thread. For Entry, we
+      // just add the correct number of indents. For Exit, we reduce it by 1.
+      for (size_t I = 0 + Record.Type; I < ThreadFuncStack.size(); I++) {
+        outs() << "  ";
+      }
+      outs() << SymbolOrNumber(Symbolizer, FunctionAddresses, Record.FuncId)
+             << "\n";
+    }
+
+    // Record Thread min/max TSC's.
+    {
+      auto &MinMaxTSC = ThreadMinMaxTSC[Record.TId];
+      if (MinMaxTSC.first == 0 || MinMaxTSC.second == 0)
+        MinMaxTSC = std::make_pair(Record.TSC, Record.TSC);
+      MinMaxTSC = std::make_pair(std::min(MinMaxTSC.first, Record.TSC),
+                                 std::max(MinMaxTSC.second, Record.TSC));
+    }
+
+    // Record CPU min/max TSC's.
+    {
+      auto &MinMaxTSC = CPUMinMaxTSC[Record.CPU];
+      if (MinMaxTSC.first == 0 || MinMaxTSC.second == 0)
+        MinMaxTSC = std::make_pair(Record.TSC, Record.TSC);
+      MinMaxTSC = std::make_pair(std::min(MinMaxTSC.first, Record.TSC),
+                                 std::max(MinMaxTSC.second, Record.TSC));
+    }
+
+    if (Record.TSC < CurTSC) {
+      errs() << "Record TSC (" << Record.TSC << ") < Latest TSC (" << CurTSC
+             << ")\n";
+      if (!KeepGoing)
+        fail("Out-of-order TSC records found.");
+    }
+
+    if (Record.Type == 0)
+      FunctionStack[Record.TId].push_back({Record.FuncId, Record.TSC});
+
+    if (Record.Type == 1) {
+      if (FunctionStack[Record.TId].back().first != Record.FuncId) {
+        // This could be a tail call, so look up the stack to see whether we've
+        // found an entry that shouldn't be there.
+        auto Parent = std::find_if(
+            ThreadFuncStack.rbegin(), ThreadFuncStack.rend(),
+            [&Record](const std::pair<const int32_t, uint64_t> &Entry) {
+              return Entry.first == Record.FuncId;
+            });
+        if (Parent != ThreadFuncStack.rend()) {
+          auto It = ThreadFuncStack.rbegin();
+          while (It <= Parent) {
+            const auto &Top = *It;
+            int64_t Timing = std::max(Top.second, Record.TSC) -
+                             std::min(Top.second, Record.TSC);
+            FunctionTimings[Top.first].push_back(Timing);
+            ThreadFuncStack.pop_back();
+            It = ThreadFuncStack.rbegin();
+          }
+        } else {
+          errs() << ToolName << ": Encountered X" << Record.FuncId << "("
+                 << SymbolOrNumber(Symbolizer, FunctionAddresses, Record.FuncId)
+                 << ") not matching E" << ThreadFuncStack.back().first << "("
+                 << SymbolOrNumber(Symbolizer, FunctionAddresses,
+                                   ThreadFuncStack.back().first)
+                 << ")\n";
+          errs() << "Current stack dump:\n";
+          for (auto &ThreadStack : FunctionStack) {
+            errs() << "Thread ID: " << ThreadStack.first << "\n";
+            auto Level = ThreadStack.second.size();
+            for (auto I = ThreadStack.second.rbegin();
+                 I != ThreadStack.second.rend(); I++) {
+              errs() << "#" << Level-- << "\t"
+                     << SymbolOrNumber(Symbolizer, FunctionAddresses, I->first)
+                     << '\n';
+            }
+          }
+          if (!KeepGoing)
+            fail("Corrupted input file found.");
+        }
+      } else {
+        const auto &Top = FunctionStack[Record.TId].back();
+        int64_t Timing =
+            std::max(Top.second, Record.TSC) - std::min(Top.second, Record.TSC);
+        auto &Timings = FunctionTimings[Record.FuncId];
+        Timings.push_back(Timing);
+        ThreadFuncStack.pop_back();
+      }
+    }
+    CurTSC = Record.TSC;
+  }
+
+  bool HasCyclesPerSec = Header.CycleFrequency != 0;
+  auto CyclesPerSec = static_cast<double>(Header.CycleFrequency);
+
+  if (OutputFormat == OutputFormats::TEXT && HasCyclesPerSec) {
+    outs() << "Cycle counter frequency = " << Header.CycleFrequency << "hz\n"
+           << "Durations below are in seconds.\n";
+  }
+
+  if (!FunctionTimings.empty()) {
+    if (OutputFormat == OutputFormats::TEXT)
+      outs() << "Total functions with records: " << FunctionTimings.size()
+             << "\ncount\t[min,median,90%ile,99%ile,max]\tdebug\tfunction\n";
+    if (OutputFormat == OutputFormats::CSV)
+      outs() << "count,min,median,90%ile,99%ile,max,debuginfo,function\n";
+    for (auto &FT : FunctionTimings) {
+      const auto &FuncId = FT.first;
+      auto &Timings = FT.second;
+      outs() << Timings.size();
+      if (OutputFormat == OutputFormats::TEXT)
+        outs() << '\t';
+      if (OutputFormat == OutputFormats::CSV)
+        outs() << ',';
+      std::nth_element(Timings.begin(), Timings.begin(), Timings.end());
+      auto Min = Timings.front();
+      auto MedianOff = std::distance(Timings.begin(), Timings.end()) / 2;
+      std::nth_element(Timings.begin(), Timings.begin() + (MedianOff),
+                       Timings.end());
+      auto Median = Timings[MedianOff];
+      auto Pct90Off =
+          std::floor(std::distance(Timings.begin(), Timings.end()) * 0.9);
+      std::nth_element(Timings.begin(), Timings.begin() + Pct90Off,
+                       Timings.end());
+      auto Pct90 = Timings[Pct90Off];
+      auto Pct99Off =
+          std::floor(std::distance(Timings.begin(), Timings.end()) * 0.99);
+      std::nth_element(Timings.begin(), Timings.begin() + Pct99Off,
+                       Timings.end());
+      auto Pct99 = Timings[Pct99Off];
+      std::nth_element(Timings.begin(),
+                       Timings.begin() +
+                           std::distance(Timings.begin(), Timings.end()),
+                       Timings.end());
+      auto Max = Timings.back();
+      if (HasCyclesPerSec) {
+        auto MinCPS = Min / CyclesPerSec;
+        auto MedianCPS = Median / CyclesPerSec;
+        auto Pct90CPS = Pct90 / CyclesPerSec;
+        auto Pct99CPS = Pct99 / CyclesPerSec;
+        auto MaxCPS = Max / CyclesPerSec;
+        if (OutputFormat == OutputFormats::TEXT)
+          outs() << "[" << MinCPS << ", " << MedianCPS << ", " << Pct90CPS
+                 << ", " << Pct99CPS << ", " << MaxCPS << "] ";
+        if (OutputFormat == OutputFormats::CSV)
+          outs() << MinCPS << ',' << MedianCPS << ',' << Pct90CPS << ','
+                 << Pct99CPS << ',' << MaxCPS << ',';
+      } else {
+        if (OutputFormat == OutputFormats::TEXT)
+          outs() << "[" << Min << ", " << Median << ", " << Pct90 << ", "
+                 << Pct99 << ", " << Max << "] ";
+        if (OutputFormat == OutputFormats::CSV)
+          outs() << Min << ',' << Median << ',' << Pct90 << ',' << Pct99 << ','
+                 << Max << ',';
+      }
+      outs() << FileLineAndColumn(Symbolizer, FunctionAddresses, FuncId);
+      if (OutputFormat == OutputFormats::TEXT)
+        outs() << "\t";
+      if (OutputFormat == OutputFormats::CSV)
+        outs() << ',';
+      if (FunctionAddresses.count(FuncId)) {
+        auto FuncName = SymbolOrNumber(Symbolizer, FunctionAddresses, FuncId);
+        if (OutputFormat == OutputFormats::TEXT)
+          outs() << FuncName;
+        if (OutputFormat == OutputFormats::CSV)
+          outs() << '"' << FuncName << '"';
+      } else {
+        outs() << "#" << FuncId;
+      }
+      outs() << "\n";
+    }
+  }
+
+  // Print out thread total durations.
+  if (!ThreadMinMaxTSC.empty()) {
+    if (OutputFormat == OutputFormats::TEXT)
+      outs() << "\nTotal threads: " << ThreadMinMaxTSC.size()
+             << "\nTID\t(min,max)\tduration in secs\n";
+    if (OutputFormat == OutputFormats::CSV)
+      outs() << "\nTID,min tsc,max tsc,duration\n";
+    for (auto &Thread : ThreadMinMaxTSC) {
+      auto &MinMax = Thread.second;
+      if (OutputFormat == OutputFormats::TEXT)
+        outs() << Thread.first << "\t(" << MinMax.first << ", " << MinMax.second
+               << ")\t";
+      if (OutputFormat == OutputFormats::CSV)
+        outs() << Thread.first << ',' << MinMax.first << ',' << MinMax.second
+               << ',';
+      if (HasCyclesPerSec) {
+        outs() << (MinMax.second - MinMax.first) / CyclesPerSec;
+      } else {
+        outs() << (MinMax.second - MinMax.first);
+      }
+      outs() << '\n';
+    }
+  }
+
+  // Print out CPU start/end ranges and durations.
+  if (!CPUMinMaxTSC.empty()) {
+    if (OutputFormat == OutputFormats::TEXT)
+      outs() << "\nTotal CPUs: " << CPUMinMaxTSC.size()
+             << "\nCPUID\t(min,max)\tduration in secs\n";
+    if (OutputFormat == OutputFormats::CSV)
+      outs() << "\nCPUID,min tsc,max tsc,duration\n";
+    for (auto &CPU : CPUMinMaxTSC) {
+      auto &MinMax = CPU.second;
+      if (OutputFormat == OutputFormats::TEXT)
+        outs() << static_cast<uint32_t>(CPU.first) << "\t(" << MinMax.first
+               << ", " << MinMax.second << ")\t";
+      if (OutputFormat == OutputFormats::CSV)
+        outs() << static_cast<uint32_t>(CPU.first) << ',' << MinMax.first << ','
+               << MinMax.second << ',';
+      if (HasCyclesPerSec) {
+        outs() << (MinMax.second - MinMax.first) / CyclesPerSec;
+      } else {
+        outs() << (MinMax.second - MinMax.first);
+      }
+      outs() << '\n';
+    }
+  }
+}