Index: lib/xray/xray_interface_internal.h =================================================================== --- lib/xray/xray_interface_internal.h +++ lib/xray/xray_interface_internal.h @@ -61,6 +61,7 @@ // basis. See xray_trampoline_*.S files for implementations. extern void __xray_FunctionEntry(); extern void __xray_FunctionExit(); +extern void __xray_FunctionTailExit(); } #endif Index: lib/xray/xray_trampoline_x86_64.S =================================================================== --- lib/xray/xray_trampoline_x86_64.S +++ lib/xray/xray_trampoline_x86_64.S @@ -110,3 +110,63 @@ .Ltmp3: .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit .cfi_endproc + + .global __xray_FunctionTailExit + .align 16, 0x90 + .type __xray_FunctionTailExit,@function +__xray_FunctionTailExit: + .cfi_startproc + // Save the important registers as in the entry trampoline, but indicate that + // this is an exit. In the future, we will introduce a new entry type that + // differentiates between a normal exit and a tail exit, but we'd have to do + // this and increment the version number for the header. + pushq %rbp + .cfi_def_cfa_offset 16 + subq $200, %rsp + movupd %xmm0, 184(%rsp) + movupd %xmm1, 168(%rsp) + movupd %xmm2, 152(%rsp) + movupd %xmm3, 136(%rsp) + movupd %xmm4, 120(%rsp) + movupd %xmm5, 104(%rsp) + movupd %xmm6, 88(%rsp) + movupd %xmm7, 72(%rsp) + movq %rdi, 64(%rsp) + movq %rax, 56(%rsp) + movq %rdx, 48(%rsp) + movq %rsi, 40(%rsp) + movq %rcx, 32(%rsp) + movq %r8, 24(%rsp) + movq %r9, 16(%rsp) + + movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + testq %rax,%rax + je .Ltmp4 + + movl %r10d, %edi + movl $1, %esi + callq *%rax + +.Ltmp4: + // Restore the registers. + movupd 184(%rsp), %xmm0 + movupd 168(%rsp), %xmm1 + movupd 152(%rsp), %xmm2 + movupd 136(%rsp), %xmm3 + movupd 120(%rsp), %xmm4 + movupd 104(%rsp), %xmm5 + movupd 88(%rsp) , %xmm6 + movupd 72(%rsp) , %xmm7 + movq 64(%rsp), %rdi + movq 56(%rsp), %rax + movq 48(%rsp), %rdx + movq 40(%rsp), %rsi + movq 32(%rsp), %rcx + movq 24(%rsp), %r8 + movq 16(%rsp), %r9 + addq $200, %rsp + popq %rbp + retq +.Ltmp5: + .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit + .cfi_endproc Index: lib/xray/xray_x86_64.cc =================================================================== --- lib/xray/xray_x86_64.cc +++ lib/xray/xray_x86_64.cc @@ -114,13 +114,10 @@ bool patchFunctionTailExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry &Sled) { // Here we do the dance of replacing the tail call sled with a similar - // sequence as the entry sled, but calls the exit sled instead, so we can - // treat tail call exits as if they were normal exits. - // - // FIXME: In the future we'd need to distinguish between non-tail exits and - // tail exits for better information preservation. - int64_t TrampolineOffset = reinterpret_cast(__xray_FunctionExit) - - (static_cast(Sled.Address) + 11); + // sequence as the entry sled, but calls the tail exit sled instead. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionTailExit) - + (static_cast(Sled.Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " "%ld\n",