Index: include/xray/xray_interface.h =================================================================== --- include/xray/xray_interface.h +++ include/xray/xray_interface.h @@ -18,7 +18,7 @@ extern "C" { -enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2 }; +enum XRayEntryType { ENTRY = 0, EXIT = 1, TAIL = 2, CUSTOM_EVENT = 3 }; // Provide a function to invoke for when instrumentation points are hit. This is // a user-visible control surface that overrides the default implementation. The @@ -42,6 +42,8 @@ // Returns 1 on success, 0 on error. extern int __xray_set_handler(void (*entry)(int32_t, XRayEntryType)); +extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t)); + // This removes whatever the currently provided handler is. Returns 1 on // success, 0 on error. extern int __xray_remove_handler(); Index: lib/xray/xray_AArch64.cc =================================================================== --- lib/xray/xray_AArch64.cc +++ lib/xray/xray_AArch64.cc @@ -18,8 +18,7 @@ #include #include - -extern "C" void __clear_cache(void* start, void* end); +extern "C" void __clear_cache(void *start, void *end); namespace __xray { @@ -86,8 +85,8 @@ reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B32), std::memory_order_release); } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } @@ -106,6 +105,12 @@ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySled &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in aarch64? + return false; +} + // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } Index: lib/xray/xray_arm.cc =================================================================== --- lib/xray/xray_arm.cc +++ lib/xray/xray_arm.cc @@ -18,7 +18,7 @@ #include #include -extern "C" void __clear_cache(void* start, void* end); +extern "C" void __clear_cache(void *start, void *end); namespace __xray { @@ -122,8 +122,8 @@ reinterpret_cast *>(FirstAddress), uint32_t(PatchOpcodes::PO_B20), std::memory_order_release); } - __clear_cache(reinterpret_cast(FirstAddress), - reinterpret_cast(CurAddress)); + __clear_cache(reinterpret_cast(FirstAddress), + reinterpret_cast(CurAddress)); return true; } @@ -142,6 +142,12 @@ return patchSled(Enable, FuncId, Sled, __xray_FunctionTailExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySled &Sled) + XRAY_NEVER_INSTRUMENT { // FIXME: Implement in arm? + return false; +} + // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -503,6 +503,11 @@ FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); break; + case XRayEntryType::CUSTOM_EVENT: + // FIXME: This never should occur. This would imply a patching bug, so at + // this point we proceed as if we're still doing the right thing but really + // no longer aren't. + break; } std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); Index: lib/xray/xray_interface.cc =================================================================== --- lib/xray/xray_interface.cc +++ lib/xray/xray_interface.cc @@ -48,6 +48,9 @@ // This is the function to call when we encounter the entry or exit sleds. std::atomic XRayPatchedFunction{nullptr}; +// This is the function to call when we encounter a custom event log call. +std::atomic XRayPatchedCustomEvent{nullptr}; + // MProtectHelper is an RAII wrapper for calls to mprotect(...) that will undo // any successful mprotect(...) changes. This is used to make a page writeable // and executable, and upon destruction if it was successful in doing so returns @@ -97,6 +100,14 @@ return 0; } +int __xray_set_customevent_handler(void (*entry)(void*, size_t)) XRAY_NEVER_INSTRUMENT { + if (XRayInitialized.load(std::memory_order_acquire)) { + __xray::XRayPatchedCustomEvent.store(entry, std::memory_order_release); + return 1; + } + return 0; +} + int __xray_remove_handler() XRAY_NEVER_INSTRUMENT { return __xray_set_handler(nullptr); } @@ -193,6 +204,9 @@ case XRayEntryType::TAIL: Success = patchFunctionTailExit(Enable, FuncId, Sled); break; + case XRayEntryType::CUSTOM_EVENT: + Success = patchCustomEvent(Enable, FuncId, Sled); + break; default: Report("Unsupported sled kind: %d\n", int(Sled.Kind)); continue; Index: lib/xray/xray_interface_internal.h =================================================================== --- lib/xray/xray_interface_internal.h +++ lib/xray/xray_interface_internal.h @@ -53,6 +53,7 @@ bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); bool patchFunctionTailExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); +bool patchCustomEvent(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); } // namespace __xray @@ -62,6 +63,7 @@ extern void __xray_FunctionEntry(); extern void __xray_FunctionExit(); extern void __xray_FunctionTailExit(); +extern void __xray_CustomEvent(); } #endif Index: lib/xray/xray_mips.cc =================================================================== --- lib/xray/xray_mips.cc +++ lib/xray/xray_mips.cc @@ -95,7 +95,8 @@ // B #44 if (Enable) { - uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xffff; + uint32_t LoTracingHookAddr = + reinterpret_cast(TracingHook) & 0xffff; uint32_t HiTracingHookAddr = (reinterpret_cast(TracingHook) >> 16) & 0xffff; uint32_t LoFunctionID = FuncId & 0xffff; @@ -150,4 +151,10 @@ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips? + return false; +} + } // namespace __xray Index: lib/xray/xray_mips64.cc =================================================================== --- lib/xray/xray_mips64.cc +++ lib/xray/xray_mips64.cc @@ -93,7 +93,8 @@ if (Enable) { uint32_t LoTracingHookAddr = reinterpret_cast(TracingHook) & 0xffff; - uint32_t HiTracingHookAddr = (reinterpret_cast(TracingHook) >> 16) & 0xffff; + uint32_t HiTracingHookAddr = + (reinterpret_cast(TracingHook) >> 16) & 0xffff; uint32_t HigherTracingHookAddr = (reinterpret_cast(TracingHook) >> 32) & 0xffff; uint32_t HighestTracingHookAddr = @@ -159,4 +160,9 @@ return patchSled(Enable, FuncId, Sled, __xray_FunctionExit); } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in mips64? + return false; +} } // namespace __xray Index: lib/xray/xray_powerpc64.cc =================================================================== --- lib/xray/xray_powerpc64.cc +++ lib/xray/xray_powerpc64.cc @@ -92,4 +92,10 @@ // FIXME: Maybe implement this better? bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // FIXME: Implement in powerpc64? + return false; +} + } // namespace __xray Index: lib/xray/xray_trampoline_x86_64.S =================================================================== --- lib/xray/xray_trampoline_x86_64.S +++ lib/xray/xray_trampoline_x86_64.S @@ -145,3 +145,126 @@ .Ltmp5: .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit .cfi_endproc + + .global __xray_CustomEvent + .align 16, 0x90 + .type __xray_CustomEvent,@function +__xray_CustomEvent: + .cfi_startproc + pushq %rbp + .cfi_def_cfa_offset 16 + // We take two arguments to this trampoline, which should be in stack + // locations that we know beforehand: + // + // -8, size=8 : arg0 (the pointer) + // -16, size=8 : arg1 (the length) + // + // First we stash some important registers. To make this efficient and minimal + // we first stash the %rdi and %rsi registers (these will be used as the first + // two arguments to the call to the custom log handler) onto the stack. We + // also then stash %rax to use it for the function pointer. We do the check + // right away first if the function pointer is null, and if t is we pop just + // the %rax, %rdi, and %rsi registers. + subq $24, %rsp + movq %rax, 16(%rsp) + movq %rsi, 8(%rsp) + movq %rdi, (%rsp) + + movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax + testq %rax,%rax + je .Ltmp6 + + // This time we need to adjust the stack a bit more, but before doing so load + // the arguments that were pushed into the stack into %rdi and %rsi (first + // and second arguments) before doing further adjustments. To do that, we do + // some math: + // + // stack contains: + // rax (8 bytes) + // rsi (8 bytes) + // rdi (8 bytes) + // rbp (8 bytes) + // arg0 (8 bytes) + // arg1 (8 bytes) + // + // So we want to get the value higher up in the stack, which is: + // + // arg0 = 32(%rsp) + movq 32(%rsp), %rdi + + // arg1 = 40(%rsp) + movq 40(%rsp), %rsi + + // At this point, we need to also stash the caller-saved general purpose + // registers (r8-r11), all the SSE registers (xmm8-15), and all registers + // outside of those we already stashed (%rdi, %rsi, and %rax). + subq $320, %rsp + movupd %xmm15, 304(%rsp) + movupd %xmm14, 288(%rsp) + movupd %xmm13, 272(%rsp) + movupd %xmm12, 256(%rsp) + movupd %xmm11, 240(%rsp) + movupd %xmm10, 224(%rsp) + movupd %xmm9, 208(%rsp) + movupd %xmm8, 192(%rsp) + movupd %xmm7, 176(%rsp) + movupd %xmm6, 160(%rsp) + movupd %xmm5, 144(%rsp) + movupd %xmm4, 128(%rsp) + movupd %xmm3, 112(%rsp) + movupd %xmm2, 96(%rsp) + movupd %xmm1, 80(%rsp) + movupd %xmm0, 64(%rsp) + movq %r11, 48(%rsp) + movq %r10, 40(%rsp) + movq %r9, 32(%rsp) + movq %r8, 24(%rsp) + movq %rbx, 16(%rsp) + movq %rdx, 8(%rsp) + movq %rcx, (%rsp) + + // Then we do the call. + callq *%rax + jmp .Ltmp7 + +.Ltmp6: + // This is cleanup for the case when custom event handling is null. + movq (%rsp), %rdi + movq 8(%rsp), %rsi + movq 16(%rsp), %rax + addq $24, %rsp + popq %rbp + retq + +.Ltmp7: + // This is cleanup for the case when custom event handling is not null. + movq (%rsp), %rcx + movq 8(%rsp), %rdx + movq 16(%rsp), %rbx + movq 24(%rsp), %r8 + movq 32(%rsp), %r9 + movq 40(%rsp), %r10 + movq 48(%rsp), %r11 + movupd 64(%rsp), %xmm0 + movupd 80(%rsp), %xmm1 + movupd 96(%rsp), %xmm2 + movupd 112(%rsp), %xmm3 + movupd 128(%rsp), %xmm4 + movupd 144(%rsp), %xmm5 + movupd 160(%rsp), %xmm6 + movupd 176(%rsp), %xmm7 + movupd 192(%rsp), %xmm8 + movupd 208(%rsp), %xmm9 + movupd 224(%rsp), %xmm10 + movupd 240(%rsp), %xmm11 + movupd 256(%rsp), %xmm12 + movupd 272(%rsp), %xmm13 + movupd 288(%rsp), %xmm14 + movupd 304(%rsp), %xmm15 + addq $320, %rsp + popq %rbp + retq + +.Ltmp8: + .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent + .cfi_endproc Index: lib/xray/xray_x86_64.cc =================================================================== --- lib/xray/xray_x86_64.cc +++ lib/xray/xray_x86_64.cc @@ -75,8 +75,10 @@ static constexpr uint8_t CallOpCode = 0xe8; static constexpr uint16_t MovR10Seq = 0xba41; static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint16_t Jmp24Seq = 0x18eb; static constexpr uint8_t JmpOpCode = 0xe9; static constexpr uint8_t RetOpCode = 0xc3; +static constexpr uint16_t NopwSeq = 0x9066; static constexpr int64_t MinOffset{std::numeric_limits::min()}; static constexpr int64_t MaxOffset{std::numeric_limits::max()}; @@ -201,6 +203,55 @@ return true; } +bool patchCustomEvent(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +24 // 2 bytes + // push arg1 // 5 bytes + // push arg0 // 5 bytes + // 5-byte nop // 5 bytes + // addq $8, esp // 9 bytes + // + // With the following: + // + // nopw // 2 bytes* + // push arg1 // 5 bytes + // push arg0 // 5 bytes + // call (relative) // 5 bytes* + // addq $8, esp // 9 bytes + // + // We need to do this in the following order: + // + // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is + // the relative offset to the __xray_CustomEvent trampoline. + // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'. + // This allows us to "enable" this code once the changes have committed. + // + // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'. + // + const int64_t TrampolineOffset = + reinterpret_cast(__xray_CustomEvent) - + (static_cast(Sled.Address) + 15); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Custom Event trampoline (%p) too far from sled (%p)\n", + __xray_CustomEvent, reinterpret_cast(Sled.Address)); + } + if (Enable) { + *reinterpret_cast(Sled.Address + 12) = CallOpCode; + *reinterpret_cast(Sled.Address + 13) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp24Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), NopwSeq, + std::memory_order_release); + } + return false; +} + // We determine whether the CPU we're running on has the correct features we // need. In x86_64 this will be rdtscp support. bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { Index: test/xray/TestCases/Linux/custom-event-logging.cc =================================================================== --- /dev/null +++ test/xray/TestCases/Linux/custom-event-logging.cc @@ -0,0 +1,35 @@ +// Use the clang feature for custom xray event logging. +// +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s +// +#include +#include "xray/xray_interface.h" + +[[clang::xray_always_instrument]] void foo() { + static constexpr char CustomLogged[] = "hello custom logging!\n"; + printf("before calling the custom logging...\n"); + __xray_customevent(CustomLogged, sizeof(CustomLogged)); + printf("after calling the custom logging...\n"); +} + +void myprinter(void* ptr, size_t size) { + printf("%.*s\n", static_cast(size), static_cast(ptr)); +} + +int main() { + foo(); + // CHECK: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... + printf("setting up custom event handler...\n"); + // CHECK-NEXT: setting up custom event handler... + __xray_set_customevent_handler(myprinter); + __xray_patch(); + foo(); + printf("removing custom event handler...\n"); + // CHECK-NEXT: removing custom event handler... + __xray_set_customevent_handler(nullptr); + foo(); + // CHECK-NEXT: before calling the custom logging... + // CHECK-NEXT: after calling the custom logging... +}