Index: include/xray/xray_interface.h =================================================================== --- include/xray/xray_interface.h +++ include/xray/xray_interface.h @@ -18,7 +18,7 @@ extern "C" { -enum XRayEntryType { ENTRY = 0, EXIT = 1 }; +enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 }; // Provide a function to invoke for when instrumentation points are hit. This is // a user-visible control surface that overrides the default implementation. The Index: lib/xray/xray_arm.cc =================================================================== --- lib/xray/xray_arm.cc +++ lib/xray/xray_arm.cc @@ -128,4 +128,11 @@ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + } // namespace __xray Index: lib/xray/xray_interface.cc =================================================================== --- lib/xray/xray_interface.cc +++ lib/xray/xray_interface.cc @@ -27,12 +27,13 @@ namespace __xray { #if defined(__x86_64__) - // FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() ? - static const int16_t cSledLength = 12; +// FIXME: The actual length is 11 bytes. Why was length 12 passed to mprotect() +// ? +static const int16_t cSledLength = 12; #elif defined(__arm__) - static const int16_t cSledLength = 28; +static const int16_t cSledLength = 28; #else - #error "Unsupported CPU Architecture" +#error "Unsupported CPU Architecture" #endif /* CPU architecture */ // This is the function to call when we encounter the entry or exit sleds. @@ -136,7 +137,7 @@ return XRayPatchingStatus::NOT_INITIALIZED; const uint64_t PageSize = GetPageSizeCached(); - if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) { + if ((PageSize == 0) || ((PageSize & (PageSize - 1)) != 0)) { Report("System page size is not a power of two: %lld", PageSize); return XRayPatchingStatus::FAILED; } @@ -156,9 +157,9 @@ // While we're here, we should patch the nop sled. To do that we mprotect // the page containing the function to be writeable. void *PageAlignedAddr = - reinterpret_cast(Sled.Address & ~(PageSize-1)); - std::size_t MProtectLen = - (Sled.Address + cSledLength) - reinterpret_cast(PageAlignedAddr); + reinterpret_cast(Sled.Address & ~(PageSize - 1)); + std::size_t MProtectLen = (Sled.Address + cSledLength) - + reinterpret_cast(PageAlignedAddr); MProtectHelper Protector(PageAlignedAddr, MProtectLen); if (Protector.MakeWriteable() == -1) { printf("Failed mprotect: %d\n", errno); @@ -166,13 +167,16 @@ } bool Success = false; - switch(Sled.Kind) { + switch (Sled.Kind) { case XRayEntryType::ENTRY: Success = patchFunctionEntry(Enable, FuncId, Sled); break; case XRayEntryType::EXIT: Success = patchFunctionExit(Enable, FuncId, Sled); break; + case XRayEntryType::TAIL: + Success = patchFunctionTailExit(Enable, FuncId, Sled); + break; default: Report("Unsupported sled kind: %d", int(Sled.Kind)); continue; Index: lib/xray/xray_interface_internal.h =================================================================== --- lib/xray/xray_interface_internal.h +++ lib/xray/xray_interface_internal.h @@ -49,8 +49,9 @@ size_t Entries; }; -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); +bool patchFunctionEntry(bool Enable,uint32_t FuncId, const XRaySledEntry& Sled); +bool patchFunctionExit(bool Enable,uint32_t FuncId, const XRaySledEntry& Sled); +bool patchFunctionTailExit(bool Enable, uint32_t FuncId, const XRaySledEntry& Sled); } // namespace __xray Index: lib/xray/xray_x86_64.cc =================================================================== --- lib/xray/xray_x86_64.cc +++ lib/xray/xray_x86_64.cc @@ -1,5 +1,5 @@ -#include "xray_interface_internal.h" #include "sanitizer_common/sanitizer_common.h" +#include "xray_interface_internal.h" #include #include #include @@ -15,8 +15,8 @@ static constexpr int64_t MinOffset{std::numeric_limits::min()}; static constexpr int64_t MaxOffset{std::numeric_limits::max()}; -bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) -{ +bool patchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { // Here we do the dance of replacing the following sled: // // xray_sled_n: @@ -39,9 +39,8 @@ // // Prerequisite is to compute the relative offset to the // __xray_FunctionEntry function's address. - int64_t TrampolineOffset = - reinterpret_cast(__xray_FunctionEntry) - - (static_cast(Sled.Address) + 11); + int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionEntry) - + (static_cast(Sled.Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " "%ld\n", @@ -65,8 +64,8 @@ return true; } -bool patchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) -{ +bool patchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { // Here we do the dance of replacing the following sled: // // xray_sled_n: @@ -87,9 +86,8 @@ // // Prerequisite is to compute the relative offset fo the // __xray_FunctionExit function's address. - int64_t TrampolineOffset = - reinterpret_cast(__xray_FunctionExit) - - (static_cast(Sled.Address) + 11); + int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - + (static_cast(Sled.Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " "%ld\n", @@ -113,4 +111,37 @@ return true; } +bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { + // Here we do the dance of replacing the tail call sled with a similar + // sequence as the entry sled, but calls the exit sled instead, so we can + // treat tail call exits as if they were normal exits. + // + // FIXME: In the future we'd need to distinguish between non-tail exits and + // tail exits for better information preservation. + int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionExit, reinterpret_cast(Sled.Address), + TrampolineOffset); + return false; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } + return true; +} + } // namespace __xray