Index: compiler-rt/trunk/include/xray/xray_interface.h
===================================================================
--- compiler-rt/trunk/include/xray/xray_interface.h
+++ compiler-rt/trunk/include/xray/xray_interface.h
@@ -18,7 +18,7 @@
 
 extern "C" {
 
-enum XRayEntryType { ENTRY = 0, EXIT = 1 };
+enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 };
 
 // Provide a function to invoke for when instrumentation points are hit. This is
 // a user-visible control surface that overrides the default implementation. The
Index: compiler-rt/trunk/lib/xray/xray_arm.cc
===================================================================
--- compiler-rt/trunk/lib/xray/xray_arm.cc
+++ compiler-rt/trunk/lib/xray/xray_arm.cc
@@ -127,4 +127,11 @@
   return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
 }
 
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) {
+  // FIXME: In the future we'd need to distinguish between non-tail exits and
+  // tail exits for better information preservation.
+  return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
+}
+
 } // namespace __xray
Index: compiler-rt/trunk/lib/xray/xray_interface.cc
===================================================================
--- compiler-rt/trunk/lib/xray/xray_interface.cc
+++ compiler-rt/trunk/lib/xray/xray_interface.cc
@@ -174,6 +174,9 @@
     case XRayEntryType::EXIT:
       Success = patchFunctionExit(Enable, FuncId, Sled);
       break;
+    case XRayEntryType::TAIL:
+      Success = patchFunctionTailExit(Enable, FuncId, Sled);
+      break;
     default:
       Report("Unsupported sled kind: %d", int(Sled.Kind));
       continue;
Index: compiler-rt/trunk/lib/xray/xray_interface_internal.h
===================================================================
--- compiler-rt/trunk/lib/xray/xray_interface_internal.h
+++ compiler-rt/trunk/lib/xray/xray_interface_internal.h
@@ -48,10 +48,11 @@
   size_t Entries;
 };
 
-bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
+bool patchFunctionEntry(bool Enable, uint32_t FuncId,
                         const XRaySledEntry &Sled);
-bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
-                       const XRaySledEntry &Sled);
+bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled);
+bool patchFunctionTailExit(bool Enable, uint32_t FuncId,
+                           const XRaySledEntry &Sled);
 
 } // namespace __xray
 
Index: compiler-rt/trunk/lib/xray/xray_x86_64.cc
===================================================================
--- compiler-rt/trunk/lib/xray/xray_x86_64.cc
+++ compiler-rt/trunk/lib/xray/xray_x86_64.cc
@@ -111,4 +111,37 @@
   return true;
 }
 
+bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId,
+                           const XRaySledEntry &Sled) {
+  // Here we do the dance of replacing the tail call sled with a similar
+  // sequence as the entry sled, but calls the exit sled instead, so we can
+  // treat tail call exits as if they were normal exits.
+  //
+  // FIXME: In the future we'd need to distinguish between non-tail exits and
+  // tail exits for better information preservation.
+  int64_t TrampolineOffset = reinterpret_cast<int64_t>(__xray_FunctionExit) -
+                             (static_cast<int64_t>(Sled.Address) + 11);
+  if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) {
+    Report("XRay Exit trampoline (%p) too far from sled (%p); distance = "
+           "%ld\n",
+           __xray_FunctionExit, reinterpret_cast<void *>(Sled.Address),
+           TrampolineOffset);
+    return false;
+  }
+  if (Enable) {
+    *reinterpret_cast<uint32_t *>(Sled.Address + 2) = FuncId;
+    *reinterpret_cast<uint8_t *>(Sled.Address + 6) = CallOpCode;
+    *reinterpret_cast<uint32_t *>(Sled.Address + 7) = TrampolineOffset;
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), MovR10Seq,
+        std::memory_order_release);
+  } else {
+    std::atomic_store_explicit(
+        reinterpret_cast<std::atomic<uint16_t> *>(Sled.Address), Jmp9Seq,
+        std::memory_order_release);
+    // FIXME: Write out the nops still?
+  }
+  return true;
+}
+
 } // namespace __xray