Index: CMakeLists.txt
===================================================================
--- CMakeLists.txt
+++ CMakeLists.txt
@@ -38,8 +38,6 @@
 mark_as_advanced(COMPILER_RT_BUILD_BUILTINS)
 option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON)
 mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS)
-option(COMPILER_RT_BUILD_XRAY "Build xray" ON)
-mark_as_advanced(COMPILER_RT_BUILD_XRAY)
 
 if (COMPILER_RT_STANDALONE_BUILD)
   load_llvm_config()
Index: include/xray/xray_records.h
===================================================================
--- /dev/null
+++ include/xray/xray_records.h
@@ -0,0 +1,82 @@
+//===-- xray_records.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// This header exposes some record types useful for the XRay in-memory logging
+// implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XRAY_XRAY_RECORDS_H
+#define XRAY_XRAY_RECORDS_H
+
+namespace __xray {
+
+enum FileTypes {
+  NAIVE_LOG = 0,
+};
+
+// This data structure is used to describe the contents of the file. We use this
+// for versioning the supported XRay file formats.
+struct alignas(32) XRayFileHeader {
+  uint16_t Version = 0;
+
+  // The type of file we're writing out. See the FileTypes enum for more
+  // information. This allows different implementations of the XRay logging to
+  // have different files for different information being stored.
+  uint16_t Type = 0;
+
+  // The frequency by which TSC increases per-second.
+  uint64_t CycleFrequency = 0;
+
+  // What follows are a set of flags that indicate useful things for when
+  // reading the data in the file.
+  bool ConstantTSC : 1;
+  bool NonstopTSC : 1;
+
+  char Padding[14] = {};
+} __attribute__((packed));
+
+static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes");
+
+enum RecordTypes {
+  NORMAL = 0,
+};
+
+struct alignas(32) XRayRecord {
+  // This is the type of the record being written. We use 16 bits to allow us to
+  // treat this as a discriminant, and so that the first 4 bytes get packed
+  // properly. See RecordTypes for more supported types.
+  uint16_t RecordType = 0;
+
+  // The CPU where the thread is running. We assume number of CPUs <= 256.
+  uint8_t CPU = 0;
+
+  // The type of the event. Usually either ENTER = 0 or EXIT = 1.
+  uint8_t Type = 0;
+
+  // The function ID for the record.
+  int32_t FuncId = 0;
+
+  // Get the full 8 bytes of the TSC when we get the log record.
+  uint64_t TSC = 0;
+
+  // The thread ID for the currently running thread.
+  uint32_t TId = 0;
+
+  // Use some bytes in the end of the record for buffers.
+  char Buffer[4] = {};
+} __attribute__((packed));
+
+static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
+
+} // namespace __xray
+
+#endif // XRAY_XRAY_RECORDS_H
Index: lib/xray/CMakeLists.txt
===================================================================
--- lib/xray/CMakeLists.txt
+++ lib/xray/CMakeLists.txt
@@ -4,10 +4,11 @@
   xray_init.cc
 	xray_interface.cc
 	xray_flags.cc
+  xray_inmemory_log.cc
 )
 
 set(x86_64_SOURCES
-		xray_trampoline_x86.S
+		xray_trampoline_x86_64.S
 		${XRAY_SOURCES})
 
 include_directories(..)
@@ -18,9 +19,9 @@
 set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1)
 
 add_compiler_rt_object_libraries(RTXray
-		ARCHS ${XRAY_SUPPORTED_ARCH}
-		SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
-		DEFS ${XRAY_COMMON_DEFINITIONS})
+    ARCHS ${XRAY_SUPPORTED_ARCH}
+    SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS}
+    DEFS ${XRAY_COMMON_DEFINITIONS})
 
 add_custom_target(xray)
 set(XRAY_COMMON_RUNTIME_OBJECT_LIBS
Index: lib/xray/xray_flags.inc
===================================================================
--- lib/xray/xray_flags.inc
+++ lib/xray/xray_flags.inc
@@ -16,3 +16,7 @@
 
 XRAY_FLAG(bool, patch_premain, true,
           "Whether to patch instrumentation points before main.")
+XRAY_FLAG(bool, xray_naive_log, true,
+          "Whether to install the naive log implementation.")
+XRAY_FLAG(const char *, xray_logfile_base, "xray-log.",
+          "Filename base for the xray logfile.")
Index: lib/xray/xray_inmemory_log.cc
===================================================================
--- /dev/null
+++ lib/xray/xray_inmemory_log.cc
@@ -0,0 +1,215 @@
+//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of XRay, a dynamic runtime instrumentation system.
+//
+// Implementation of a simple in-memory log of XRay events. This defines a
+// logging function that's compatible with the XRay handler interface, and
+// routines for exporting data to files.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cassert>
+#include <fcntl.h>
+#include <mutex>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <thread>
+#include <unistd.h>
+#include <x86intrin.h>
+
+#include "sanitizer_common/sanitizer_libc.h"
+#include "xray/xray_records.h"
+#include "xray_flags.h"
+#include "xray_interface_internal.h"
+
+// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a
+// certain size (32kb by default) and use it as if it were a circular buffer for
+// events. We store simple fixed-sized entries in the log for external analysis.
+
+extern "C" {
+void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type);
+}
+
+namespace __xray {
+
+std::mutex LogMutex;
+
+static constexpr size_t BuffLen = 1024;
+
+static void retryingWrite(int Fd, char *Begin, char *End) {
+  if (Begin == End)
+    return;
+  auto TotalBytes = std::distance(Begin, End);
+  while (auto Written = write(Fd, Begin, TotalBytes)) {
+    if (Written < 0) {
+      if (errno == EINTR)
+        continue; // Try again.
+      Report("Failed to write; errno = %d", errno);
+      return;
+    }
+
+    // FIXME: Figure out whether/how to assert properly.
+    assert(static_cast<uint64_t>(Written) <= TotalBytes);
+    TotalBytes -= Written;
+    if (TotalBytes == 0)
+      break;
+    Begin += Written;
+  }
+}
+
+static bool retryingRead(int Fd, char *Begin, char *End) {
+  auto BytesToRead = std::distance(Begin, End);
+  int BytesRead;
+  while ((BytesRead = read(Fd, Begin, BytesToRead)) < 0) {
+    if (errno == EINTR)
+      continue;
+    Report("Read error; errno = %d", errno);
+    return false;
+
+    BytesToRead -= BytesRead;
+    if (BytesToRead == 0)
+      break;
+    Begin += BytesRead;
+  }
+  return true;
+}
+
+static bool readValueFromFile(const char *Filename, long long *Value) {
+  int Fd = open(Filename, O_RDONLY | O_CLOEXEC);
+  if (Fd == -1)
+    return false;
+  char Line[256] = {};
+  if (!retryingRead(Fd, Line, Line + 256))
+    return false;
+  close(Fd);
+  char *End = nullptr;
+  long long Tmp = internal_simple_strtoll(Line, &End, 10);
+  bool Result = false;
+  if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) {
+    *Value = Tmp;
+    Result = true;
+  }
+  return Result;
+}
+
+class ThreadExitFlusher {
+  int Fd;
+  XRayRecord *Start;
+  size_t &Offset;
+
+public:
+  explicit ThreadExitFlusher(int Fd, XRayRecord *Start, size_t &Offset)
+      : Fd(Fd), Start(Start), Offset(Offset) {}
+
+  ~ThreadExitFlusher() {
+    std::lock_guard<std::mutex> L(LogMutex);
+    if (Fd > 0 && Start != nullptr) {
+      retryingWrite(Fd, reinterpret_cast<char *>(Start),
+                    reinterpret_cast<char *>(Start + Offset));
+      // Because this thread's exit could be the last one trying to write to the
+      // file and that we're not able to close out the file properly, we sync
+      // instead and hope that the pending writes are flushed as the thread
+      // exits.
+      fsync(Fd);
+    }
+  }
+};
+
+} // namespace __xray
+
+using namespace __xray;
+
+void PrintToStdErr(const char *Buffer) { fprintf(stderr, Buffer); }
+
+void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
+  using Buffer =
+      std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
+  thread_local static Buffer InMemoryBuffer[BuffLen] = {};
+  thread_local static size_t Offset = 0;
+  static int Fd = [] {
+    // FIXME: Figure out how to make this less stderr-dependent.
+    SetPrintfAndReportCallback(PrintToStdErr);
+    // Open a temporary file once for the log.
+    static char TmpFilename[256] = {};
+    auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base, 246);
+    if ((E + 6) - TmpFilename > 255) {
+      Report("XRay log file base too long: %s", flags()->xray_logfile_base);
+      return -1;
+    }
+    internal_strncat(TmpFilename, "XXXXXX", 6);
+    int Fd = mkstemp(TmpFilename);
+    if (Fd == -1) {
+      Report("XRay: Failed opening temporary file '%s'; not logging events.",
+             TmpFilename);
+      return -1;
+    }
+    if (Verbosity())
+      fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename);
+
+    // Get the cycle frequency from SysFS on Linux.
+    long long CPUFrequency = -1;
+    if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
+                          &CPUFrequency)) {
+      CPUFrequency *= 1000;
+    } else if (readValueFromFile(
+                   "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
+                   &CPUFrequency)) {
+      CPUFrequency *= 1000;
+    }
+
+    // Since we're here, we get to write the header. We set it up so that the
+    // header will only be written once, at the start, and let the threads
+    // logging do writes which just append.
+    XRayFileHeader Header;
+    Header.Version = 1;
+    Header.Type = FileTypes::NAIVE_LOG;
+    Header.CycleFrequency =
+        CPUFrequency == -1 ? 0 : static_cast<uint64_t>(CPUFrequency);
+
+    // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc'
+    // before setting the values in the header.
+    Header.ConstantTSC = 1;
+    Header.NonstopTSC = 1;
+    retryingWrite(Fd, reinterpret_cast<char *>(&Header),
+                  reinterpret_cast<char *>(&Header) + sizeof(Header));
+    return Fd;
+  }();
+  if (Fd == -1)
+    return;
+  thread_local __xray::ThreadExitFlusher Flusher(
+      Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset);
+  thread_local pid_t TId = syscall(SYS_gettid);
+
+  // First we get the useful data, and stuff it into the already aligned buffer
+  // through a pointer offset.
+  auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
+  unsigned CPU;
+  R.RecordType = RecordTypes::NORMAL;
+  R.TSC = __rdtscp(&CPU);
+  R.CPU = CPU;
+  R.TId = TId;
+  R.Type = Type;
+  R.FuncId = FuncId;
+  ++Offset;
+  if (Offset == __xray::BuffLen) {
+    std::lock_guard<std::mutex> L(LogMutex);
+    auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer);
+    retryingWrite(Fd, reinterpret_cast<char *>(RecordBuffer),
+                  reinterpret_cast<char *>(RecordBuffer + Offset));
+    Offset = 0;
+  }
+}
+
+static auto Unused = [] {
+  if (flags()->xray_naive_log)
+    __xray_set_handler(__xray_InMemoryRawLog);
+  return true;
+}();
Index: lib/xray/xray_trampoline_x86.S
===================================================================
--- /dev/null
+++ lib/xray/xray_trampoline_x86.S
@@ -1,112 +0,0 @@
-//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file is a part of XRay, a dynamic runtime instrumentation system.
-//
-// This implements the X86-specific assembler for the trampolines.
-//
-//===----------------------------------------------------------------------===//
-
-	.text
-	.file "xray_trampoline_x86.S"
-	.globl __xray_FunctionEntry
-	.align 16, 0x90
-	.type __xray_FunctionEntry,@function
-
-__xray_FunctionEntry:
-  .cfi_startproc
-  // Save caller provided registers before doing any actual work.
-	pushq %rbp
-	.cfi_def_cfa_offset 16
-	subq $200, %rsp
-	movupd	%xmm0, 184(%rsp)
-	movupd	%xmm1, 168(%rsp)
-	movupd	%xmm2, 152(%rsp)
-	movupd	%xmm3, 136(%rsp)
-	movupd	%xmm4, 120(%rsp)
-	movupd	%xmm5, 104(%rsp)
-	movupd	%xmm6, 88(%rsp)
-	movupd	%xmm7, 72(%rsp)
-	movq	%rdi, 64(%rsp)
-	movq  %rax, 56(%rsp)
-	movq  %rdx, 48(%rsp)
-	movq	%rsi, 40(%rsp)
-	movq	%rcx, 32(%rsp)
-	movq	%r8, 24(%rsp)
-	movq	%r9, 16(%rsp)
-
-	// de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire
-	// load (on x86 is a normal mov instruction).
-	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
-	testq	%rax, %rax
-	je	.Ltmp0
-
-	// assume that %r10d has the function id.
-	movl	%r10d, %edi
-	xor	%esi,%esi
-	callq	*%rax
-.Ltmp0:
-  // restore the registers
-	movupd	184(%rsp), %xmm0
-	movupd	168(%rsp), %xmm1
-	movupd	152(%rsp), %xmm2
-	movupd	136(%rsp), %xmm3
-	movupd	120(%rsp), %xmm4
-	movupd	104(%rsp), %xmm5
-	movupd	88(%rsp) , %xmm6
-	movupd	72(%rsp) , %xmm7
-	movq	64(%rsp), %rdi
-	movq  56(%rsp), %rax
-	movq  48(%rsp), %rdx
-	movq	40(%rsp), %rsi
-	movq	32(%rsp), %rcx
-	movq	24(%rsp), %r8
-	movq	16(%rsp), %r9
-	addq	$200, %rsp
-	popq	%rbp
-	retq
-.Ltmp1:
-	.size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry
-	.cfi_endproc
-
-	.globl __xray_FunctionExit
-	.align 16, 0x90
-	.type __xray_FunctionExit,@function
-__xray_FunctionExit:
-	.cfi_startproc
-	// Save the important registers first. Since we're assuming that this
-	// function is only jumped into, we only preserve the registers for
-	// returning.
-	pushq	%rbp
-	.cfi_def_cfa_offset 16
-	subq	$56, %rsp
-	.cfi_def_cfa_offset 32
-	movupd	%xmm0, 40(%rsp)
-	movupd	%xmm1, 24(%rsp)
-	movq	%rax, 16(%rsp)
-	movq	%rdx, 8(%rsp)
-	movq	_ZN6__xray19XRayPatchedFunctionE(%rip), %rax
-	testq %rax,%rax
-	je	.Ltmp2
-
-	movl	%r10d, %edi
-	movl	$1, %esi
-	callq	*%rax
-.Ltmp2:
-  // Restore the important registers.
-	movupd	40(%rsp), %xmm0
-	movupd	24(%rsp), %xmm1
-	movq	16(%rsp), %rax
-	movq	8(%rsp), %rdx
-	addq	$56, %rsp
-	popq	%rbp
-	retq
-.Ltmp3:
-	.size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit
-	.cfi_endproc
Index: test/lit.common.cfg
===================================================================
--- test/lit.common.cfg
+++ test/lit.common.cfg
@@ -52,7 +52,8 @@
                                'LIBCLANG_LOGGING', 'LIBCLANG_BGPRIO_INDEX',
                                'LIBCLANG_BGPRIO_EDIT', 'LIBCLANG_NOTHREADS',
                                'LIBCLANG_RESOURCE_USAGE',
-                               'LIBCLANG_CODE_COMPLETION_LOGGING']
+                               'LIBCLANG_CODE_COMPLETION_LOGGING',
+                               'XRAY_OPTIONS']
 # Clang/Win32 may refer to %INCLUDE%. vsvarsall.bat sets it.
 if platform.system() != 'Windows':
     possibly_dangerous_env_vars.append('INCLUDE')
Index: test/xray/TestCases/Linux/fixedsize-logging.cc
===================================================================
--- /dev/null
+++ test/xray/TestCases/Linux/fixedsize-logging.cc
@@ -0,0 +1,20 @@
+// Check to make sure that we have a log file with a fixed-size.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="verbosity=1 xray_logfile_base=fixedsize-logging-" %run %t 2>&1 | FileCheck %s
+//
+// After all that, clean up the output xray log.
+//
+// RUN: rm fixedsize-logging-*
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void foo() {
+  printf("foo() is always instrumented!");
+}
+
+int main() {
+  // CHECK: XRay: Log file in 'fixedsize-logging-{{.*}}'
+  foo();
+  // CHECK: foo() is always instrumented!
+}
Index: test/xray/TestCases/Linux/optional-inmemory-log.cc
===================================================================
--- /dev/null
+++ test/xray/TestCases/Linux/optional-inmemory-log.cc
@@ -0,0 +1,21 @@
+// Make sure that we don't get the inmemory logging implementation enabled when
+// we turn it off via options.
+
+// RUN: %clangxx_xray -std=c++11 %s -o %t
+// RUN: XRAY_OPTIONS="verbosity=1 xray_naive_log=false xray_logfile_base=optional-inmemory-log.xray-" %run %t 2>&1 | FileCheck %s
+//
+// Make sure we clean out the logs in case there was a bug.
+//
+// RUN: rm -f optional-inmemory-log.xray-*
+
+#include <cstdio>
+
+[[clang::xray_always_instrument]] void foo() {
+  printf("foo() is always instrumented!");
+}
+
+int main() {
+  // CHECK-NOT: XRay: Log file in 'optional-inmemory-log.xray-{{.*}}'
+  foo();
+  // CHECK: foo() is always instrumented!
+}