Index: include/xray/xray_interface.h =================================================================== --- include/xray/xray_interface.h +++ include/xray/xray_interface.h @@ -18,7 +18,7 @@ extern "C" { -enum XRayEntryType { ENTRY = 0, EXIT = 1 }; +enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 }; // Provide a function to invoke for when instrumentation points are hit. This is // a user-visible control surface that overrides the default implementation. The Index: lib/xray/xray_interface.cc =================================================================== --- lib/xray/xray_interface.cc +++ lib/xray/xray_interface.cc @@ -259,6 +259,37 @@ // FIXME: Write out the nops still? } } + + if (Sled.Kind == XRayEntryType::TAIL) { + // FIXME: Implement this in a more extensible manner, per-platform. + // Here we do the dance of replacing the tail call sled with a similar + // sequence as the entry sled, but jump to the exit sled instead, so we + // can treat tail call exits as if they were normal exits. In the future + // we'd need to distinguish between non-tail exits and tail exits for + // better information preservation. + int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionExit, reinterpret_cast(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + } + } XRayPatching.store(false, std::memory_order_release); PatchingSuccess = true;