Index: llvm/include/llvm/XRay/Profile.h
===================================================================
--- /dev/null
+++ llvm/include/llvm/XRay/Profile.h
@@ -0,0 +1,109 @@
+//===- Profile.h - XRay Profile Abstraction -------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_XRAY_PROFILE_H
+#define LLVM_XRAY_PROFILE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <list>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+namespace xray {
+
+class Profile;
+
+/// This function will attempt to load an XRay Profiling Mode profile from the
+/// provided |Filename|.
+Expected<Profile> loadProfile(StringRef Filename);
+
+/// This algorithm will merge two Profile instances into a single Profile
+/// instance, aggregating blocks by Thread ID.
+Profile mergeProfilesByThread(const Profile &L, const Profile &R);
+
+/// Profile instances are thread-compatible.
+class Profile {
+public:
+  using ThreadID = uint64_t;
+  using PathID = unsigned;
+  using FuncID = int32_t;
+
+  struct Data {
+    uint64_t CallCount;
+    uint64_t CumulativeLocalTime;
+  };
+
+  struct Block {
+    ThreadID Thread;
+    std::vector<std::pair<PathID, Data>> PathData;
+  };
+
+  /// Provides a sequence of function IDs from a previously interned PathID.
+  Expected<std::vector<FuncID>> path(PathID P) const;
+
+  /// The stack represented in |P| must be in stack order (leaf to root).
+  PathID internPath(ArrayRef<FuncID> P);
+
+  /// Appends a fully-formed Block instance into the Profile.
+  Error addBlock(Block &&B);
+
+  // No copies.
+  Profile(const Profile &) = delete;
+  Profile &operator=(const Profile &) = delete;
+
+  // Move only.
+  Profile(Profile &&) noexcept = default;
+  Profile &operator=(Profile &&) noexcept = default;
+
+  Profile() = default;
+  ~Profile() = default;
+
+private:
+  using BlockList = std::list<Block>;
+
+  struct TrieNode {
+    FuncID Func = 0;
+    std::vector<TrieNode *> Callees{};
+    TrieNode *Caller = nullptr;
+    PathID ID = 0;
+  };
+
+  // List of blocks associated with a Profile.
+  BlockList Blocks;
+
+  // List of TrieNode elements we've seen.
+  std::list<TrieNode> NodeStorage;
+
+  // List of call stack roots.
+  SmallVector<TrieNode *, 4> Roots;
+
+  // Reverse mapping between a PathID to a TrieNode*.
+  DenseMap<PathID, TrieNode *> PathIDMap;
+
+  // Used to increment
+  PathID NextID = 1;
+
+public:
+  using const_iterator = BlockList::const_iterator;
+  const_iterator begin() const { return Blocks.begin(); }
+  const_iterator end() const { return Blocks.end(); }
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif
Index: llvm/include/llvm/XRay/Trace.h
===================================================================
--- llvm/include/llvm/XRay/Trace.h
+++ llvm/include/llvm/XRay/Trace.h
@@ -18,7 +18,6 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/XRay/XRayRecord.h"
 
 namespace llvm {
@@ -62,7 +61,9 @@
 };
 
 /// This function will attempt to load XRay trace records from the provided
-/// |Filename|.
+/// |Filename|. If the file provided contains an XRay Profile, this will error
+/// out and suggest to the user that the llvm/XRay/Profile.h header and
+/// `loadProfile(...)` function be called instead.
 Expected<Trace> loadTraceFile(StringRef Filename, bool Sort = false);
 
 } // namespace xray
Index: llvm/lib/XRay/CMakeLists.txt
===================================================================
--- llvm/lib/XRay/CMakeLists.txt
+++ llvm/lib/XRay/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_llvm_library(LLVMXRay
   InstrumentationMap.cpp
+	Profile.cpp
   Trace.cpp
 
   ADDITIONAL_HEADER_DIRS
Index: llvm/lib/XRay/Profile.cpp
===================================================================
--- /dev/null
+++ llvm/lib/XRay/Profile.cpp
@@ -0,0 +1,280 @@
+//===- Profile.cpp - XRay Profile Abstraction -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the XRay Profile class representing the latency profile generated by
+// XRay's profiling mode.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/Profile.h"
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include <deque>
+#include <memory>
+
+namespace llvm {
+namespace xray {
+
+namespace {
+
+struct BlockHeader {
+  uint32_t Size;
+  uint32_t Number;
+  uint64_t Thread;
+};
+
+static Expected<BlockHeader> readBlockHeader(DataExtractor &Extractor,
+                                             uint32_t &Offset) {
+  BlockHeader H;
+  uint32_t CurrentOffset = Offset;
+  H.Size = Extractor.getU32(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header size at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  H.Number = Extractor.getU32(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header number at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  H.Thread = Extractor.getU64(&Offset);
+  if (Offset == CurrentOffset)
+    return make_error<StringError>(
+        Twine("Error parsing block header thread id at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  return H;
+}
+
+static Expected<std::vector<Profile::FuncID>> readPath(DataExtractor &Extractor,
+                                                       uint32_t &Offset) {
+  // We're reading a sequence of int32_t's until we find a 0.
+  std::vector<Profile::FuncID> Path;
+  auto CurrentOffset = Offset;
+  int32_t FuncId;
+  do {
+    FuncId = Extractor.getSigned(&Offset, 4);
+    if (CurrentOffset == Offset)
+      return make_error<StringError>(
+          Twine("Error parsing path at offset '") + Twine(CurrentOffset) + "'",
+          std::make_error_code(std::errc::invalid_argument));
+    CurrentOffset = Offset;
+    Path.push_back(FuncId);
+  } while (FuncId != 0);
+  return std::move(Path);
+}
+
+static Expected<Profile::Data> readData(DataExtractor &Extractor,
+                                        uint32_t &Offset) {
+  // We expect a certain number of elements for Data:
+  //   - A 64-bit CallCount
+  //   - A 64-bit CumulativeLocalTime counter
+  Profile::Data D;
+  auto CurrentOffset = Offset;
+  D.CallCount = Extractor.getU64(&Offset);
+  if (CurrentOffset == Offset)
+    return make_error<StringError>(
+        Twine("Error parsing call counts at offset '") + Twine(CurrentOffset) +
+            "'",
+        std::make_error_code(std::errc::invalid_argument));
+  CurrentOffset = Offset;
+  D.CumulativeLocalTime = Extractor.getU64(&Offset);
+  if (CurrentOffset == Offset)
+    return make_error<StringError>(
+        Twine("Error parsing cumulative local time at offset '") +
+            Twine(CurrentOffset) + "'",
+        std::make_error_code(std::errc::invalid_argument));
+  return D;
+}
+
+} // namespace
+
+Profile::PathID Profile::internPath(ArrayRef<Profile::FuncID> Path) {
+  if (Path.empty())
+    return 0;
+
+  auto RevPath = reverse(Path);
+
+  // Find the root.
+  auto It = RevPath.begin();
+  auto PathRoot = *It++;
+  auto RootIt =
+      find_if(Roots, [PathRoot](TrieNode *N) { return N->Func == PathRoot; });
+
+  // If we've not seen this root before, remember it.
+  TrieNode *Node = nullptr;
+  if (RootIt == Roots.end()) {
+    NodeStorage.emplace_back();
+    Node = &NodeStorage.back();
+    Node->Func = PathRoot;
+    Roots.push_back(Node);
+  } else {
+    Node = *RootIt;
+  }
+
+  // Now traverse the path, re-creating if necessary.
+  while (It != RevPath.end()) {
+    auto NodeFuncID = *It++;
+    auto CalleeIt = find_if(Node->Callees, [NodeFuncID](TrieNode *N) {
+      return N->Func == NodeFuncID;
+    });
+    if (CalleeIt == Node->Callees.end()) {
+      NodeStorage.emplace_back();
+      auto NewNode = &NodeStorage.back();
+      NewNode->Func = NodeFuncID;
+      NewNode->Caller = Node;
+      Node->Callees.push_back(NewNode);
+      Node = NewNode;
+    } else {
+      Node = *CalleeIt;
+    }
+  }
+
+  // At this point, Node *must* be pointing at the leaf.
+  assert(Node->Func == Path.front());
+  if (Node->ID == 0) {
+    Node->ID = NextID++;
+    PathIDMap.insert({Node->ID, Node});
+  }
+  return Node->ID;
+}
+
+Error Profile::addBlock(Block &&B) {
+  if (B.Thread == 0)
+    return make_error<StringError>(
+        "Block may not have Thread ID 0.",
+        std::make_error_code(std::errc::invalid_argument));
+  if (B.PathData.empty())
+    return make_error<StringError>(
+        "Block may not have empty path data.",
+        std::make_error_code(std::errc::invalid_argument));
+
+  Blocks.emplace_back(std::move(B));
+  return Error::success();
+}
+
+Expected<std::vector<Profile::FuncID>> Profile::path(Profile::PathID P) const {
+  auto It = PathIDMap.find(P);
+  if (It == PathIDMap.end())
+    return make_error<StringError>(
+        Twine("PathID not found: ") + Twine(P),
+        std::make_error_code(std::errc::invalid_argument));
+  std::vector<Profile::FuncID> Path;
+  for (auto Node = It->second; Node; Node = Node->Caller)
+    Path.push_back(Node->Func);
+  return std::move(Path);
+}
+
+Profile mergeProfilesByThread(const Profile &L, const Profile &R) {
+  Profile Merged;
+  using PathDataMap = DenseMap<Profile::PathID, Profile::Data>;
+  using PathDataMapPtr = std::unique_ptr<PathDataMap>;
+  using PathDataVector = decltype(Profile::Block::PathData);
+  DenseMap<Profile::ThreadID, PathDataMapPtr> ThreadProfileIndex;
+  for (const auto &Block : L) {
+    auto Res = ThreadProfileIndex.insert(
+        {Block.Thread, PathDataMapPtr{new PathDataMap()}});
+    auto &It = Res.first;
+    for (const auto &PathAndData : Block.PathData) {
+      auto &PathID = PathAndData.first;
+      auto &Data = PathAndData.second;
+      auto NewPathID = Merged.internPath(cantFail(L.path(PathID)));
+      It->second->insert({NewPathID, Data});
+    }
+  }
+  for (const auto &Block : R) {
+    auto Res = ThreadProfileIndex.insert(
+        {Block.Thread, PathDataMapPtr{new PathDataMap()}});
+    auto &It = Res.first;
+    for (const auto &PathAndData : Block.PathData) {
+      auto &PathID = PathAndData.first;
+      auto &Data = PathAndData.second;
+      auto NewPathID = Merged.internPath(cantFail(R.path(PathID)));
+      PathDataMap::iterator PathDataIt;
+      bool Inserted;
+      std::tie(PathDataIt, Inserted) = It->second->insert({NewPathID, Data});
+      if (!Inserted) {
+        auto &ExistingData = PathDataIt->second;
+        ExistingData.CallCount += Data.CallCount;
+        ExistingData.CumulativeLocalTime += Data.CumulativeLocalTime;
+      }
+    }
+  }
+
+  for (const auto &IndexedThreadBlock : ThreadProfileIndex) {
+    PathDataVector PathAndData;
+    PathAndData.reserve(IndexedThreadBlock.second->size());
+    copy(*IndexedThreadBlock.second, std::back_inserter(PathAndData));
+    cantFail(
+        Merged.addBlock({IndexedThreadBlock.first, std::move(PathAndData)}));
+  }
+  return Merged;
+}
+
+Expected<Profile> loadProfile(StringRef Filename) {
+  int Fd;
+  if (auto EC = sys::fs::openFileForRead(Filename, Fd))
+    return make_error<StringError>(
+        Twine("Cannot read profile from '") + Filename + "'", EC);
+
+  uint64_t FileSize;
+  if (auto EC = sys::fs::file_size(Filename, FileSize))
+    return make_error<StringError>(
+        Twine("Cannot get filesize of '") + Filename + "'", EC);
+
+  std::error_code EC;
+  sys::fs::mapped_file_region MappedFile(
+      Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
+  if (EC)
+    return make_error<StringError>(
+        Twine("Cannot mmap profile '") + Filename + "'", EC);
+  StringRef Data(MappedFile.data(), MappedFile.size());
+
+  Profile P;
+  uint32_t Offset = 0;
+  DataExtractor Extractor(Data, true, 8);
+
+  // For each block we get from the file:
+  while (Offset != MappedFile.size()) {
+    auto HeaderOrError = readBlockHeader(Extractor, Offset);
+    if (!HeaderOrError)
+      return HeaderOrError.takeError();
+
+    // TODO: Maybe store this header information for each block, even just for
+    // debugging?
+    const auto &Header = HeaderOrError.get();
+
+    // Read in the path data.
+    auto PathOrError = readPath(Extractor, Offset);
+    if (!PathOrError)
+      return PathOrError.takeError();
+    const auto &Path = PathOrError.get();
+
+    // For each path we encounter, we should intern it to get a PathID.
+    auto DataOrError = readData(Extractor, Offset);
+    if (!DataOrError)
+      return DataOrError.takeError();
+    auto &Data = DataOrError.get();
+
+    if (auto E =
+            P.addBlock(Profile::Block{Profile::ThreadID{Header.Thread},
+                                      {{P.internPath(Path), std::move(Data)}}}))
+      return std::move(E);
+  }
+
+  return P;
+}
+
+} // namespace xray
+} // namespace llvm
Index: llvm/lib/XRay/Trace.cpp
===================================================================
--- llvm/lib/XRay/Trace.cpp
+++ llvm/lib/XRay/Trace.cpp
@@ -30,16 +30,34 @@
   // FIXME: Maybe deduce whether the data is little or big-endian using some
   // magic bytes in the beginning of the file?
 
-  // First 32 bytes of the file will always be the header. We assume a certain
-  // format here:
+  // First 32 bytes of the file will always be the header. For FDR and Basic
+  // mode, we assume a specific format:
   //
   //   (2)   uint16 : version
   //   (2)   uint16 : type
   //   (4)   uint32 : bitfield
   //   (8)   uint64 : cycle frequency
   //   (16)  -      : padding
+  //
+  // We also look for the Profiling Mode output, which uses an alternate data
+  // format using a set of magic bytes at the beginning of the file. If we see
+  // the first 64 bits (16 bytes) to be that set of magic bytes, then we bail
+  // out advising the caller to use a separate loader. We're doing this because
+  // the data format for the profiling mode doesn't provide access to specific
+  // events compared to FDR and Basic mode.
 
   DataExtractor HeaderExtractor(Data, true, 8);
+
+  {
+    uint32_t OffsetPtr = 0;
+    uint64_t MagicBytes = HeaderExtractor.getU64(&OffsetPtr);
+    if (MagicBytes == 0x7872617970726f66)
+      return make_error<StringError>(
+          Twine("Detected XRay profiling mode file; use the `Profile` loader "
+                "instead."),
+          std::make_error_code(std::errc::invalid_argument));
+  }
+
   uint32_t OffsetPtr = 0;
   FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
   FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
@@ -122,8 +140,8 @@
         return make_error<StringError>(
             Twine("Corrupted log, found arg payload following non-matching "
                   "function + thread record. Record for function ") +
-                Twine(Record.FuncId) + " != " + Twine(FuncId) + "; offset: " +
-                Twine(S.data() - Data.data()),
+                Twine(Record.FuncId) + " != " + Twine(FuncId) +
+                "; offset: " + Twine(S.data() - Data.data()),
             std::make_error_code(std::errc::executable_format_error));
       // Advance another four bytes to avoid padding.
       OffsetPtr += 4;
@@ -708,9 +726,9 @@
 
   if (Sort)
     std::stable_sort(T.Records.begin(), T.Records.end(),
-              [&](const XRayRecord &L, const XRayRecord &R) {
-                return L.TSC < R.TSC;
-              });
+                     [&](const XRayRecord &L, const XRayRecord &R) {
+                       return L.TSC < R.TSC;
+                     });
 
   return std::move(T);
 }
Index: llvm/tools/llvm-xray/xray-stacks.cpp
===================================================================
--- llvm/tools/llvm-xray/xray-stacks.cpp
+++ llvm/tools/llvm-xray/xray-stacks.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/XRay/Graph.h"
 #include "llvm/XRay/InstrumentationMap.h"
+#include "llvm/XRay/Profile.h"
 #include "llvm/XRay/Trace.h"
 
 using namespace llvm;
@@ -723,9 +724,18 @@
   symbolize::LLVMSymbolizer Symbolizer(Opts);
   FuncIdConversionHelper FuncIdHelper(StacksInstrMap, Symbolizer,
                                       Map.getFunctionAddresses());
+
   // TODO: Someday, support output to files instead of just directly to
   // standard output.
   for (const auto &Filename : StackInputs) {
+    // First, we try to load the file using the Profile loader. If we happen to
+    // get a Profile object, we can then translate those directly into the
+    // format that we need.
+    auto ProfileOrErr = loadProfile(Filename);
+    if (!ProfileOrErr)
+      logAllUnhandledErrors(ProfileOrErr.takeError(), errs(), "");
+
+    // TODO: Handle Profile objects.
     auto TraceOrErr = loadTraceFile(Filename);
     if (!TraceOrErr) {
       if (!StackKeepGoing)
Index: llvm/unittests/XRay/CMakeLists.txt
===================================================================
--- llvm/unittests/XRay/CMakeLists.txt
+++ llvm/unittests/XRay/CMakeLists.txt
@@ -1,9 +1,11 @@
 set(LLVM_LINK_COMPONENTS
   Support
+	XRay
   )
 
 add_llvm_unittest(XRayTests
   GraphTest.cpp
+	ProfileTest.cpp
   )
 
 add_dependencies(XRayTests intrinsics_gen)
Index: llvm/unittests/XRay/ProfileTest.cpp
===================================================================
--- /dev/null
+++ llvm/unittests/XRay/ProfileTest.cpp
@@ -0,0 +1,70 @@
+//===- ProfileTest.cpp - XRay Profile unit tests ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/XRay/Profile.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace xray {
+namespace {
+
+using ::testing::AllOf;
+using ::testing::ElementsAre;
+using ::testing::Eq;
+using ::testing::Field;
+using ::testing::Not;
+using ::testing::Pair;
+
+TEST(ProfileTest, CreateProfile) { Profile P; }
+
+TEST(ProfileTest, InternPath) {
+  Profile P;
+  auto Path0 = P.internPath({3, 2, 1});
+  auto Path1 = P.internPath({3, 2, 1});
+  auto Path2 = P.internPath({2, 1});
+  EXPECT_THAT(Path0, Eq(Path1));
+  EXPECT_THAT(Path0, Not(Eq(Path2)));
+}
+
+TEST(ProfileTest, AddBlocks) {
+  Profile P;
+  EXPECT_TRUE(errorToBool(P.addBlock({})));
+  EXPECT_TRUE(errorToBool(P.addBlock({1, {}})));
+  EXPECT_FALSE(errorToBool(P.addBlock(
+      Profile::Block{Profile::ThreadID{1},
+                     {
+                         {P.internPath({2, 1}), Profile::Data{1, 1000}},
+                         {P.internPath({3, 2, 1}), Profile::Data{10, 100}},
+                     }})));
+}
+
+TEST(ProfileTest, MergeProfilesByThread) {
+  Profile P0, P1;
+  EXPECT_FALSE(errorToBool(P0.addBlock(
+      Profile::Block{Profile::ThreadID{1},
+                     {{P0.internPath({2, 1}), Profile::Data{1, 1000}}}})));
+  EXPECT_FALSE(errorToBool(P1.addBlock(
+      Profile::Block{Profile::ThreadID{1},
+                     {{P1.internPath({2, 1}), Profile::Data{1, 1000}}}})));
+
+  Profile Merged = mergeProfilesByThread(P0, P1);
+  EXPECT_THAT(Merged,
+              ElementsAre(AllOf(
+                  Field(&Profile::Block::Thread, Eq(Profile::ThreadID{1})),
+                  Field(&Profile::Block::PathData,
+                        ElementsAre(Pair(
+                            Merged.internPath({2, 1}),
+                            AllOf(Field(&Profile::Data::CallCount, Eq(2u)),
+                                  Field(&Profile::Data::CumulativeLocalTime,
+                                        Eq(2000u)))))))));
+}
+
+} // namespace
+} // namespace xray
+} // namespace llvm