Index: compiler-rt/trunk/lib/xray/xray_interface_internal.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_interface_internal.h +++ compiler-rt/trunk/lib/xray/xray_interface_internal.h @@ -28,13 +28,15 @@ uint64_t Function; unsigned char Kind; unsigned char AlwaysInstrument; - unsigned char Padding[14]; // Need 32 bytes + unsigned char Version; + unsigned char Padding[13]; // Need 32 bytes #elif SANITIZER_WORDSIZE == 32 uint32_t Address; uint32_t Function; unsigned char Kind; unsigned char AlwaysInstrument; - unsigned char Padding[6]; // Need 16 bytes + unsigned char Version; + unsigned char Padding[5]; // Need 16 bytes #else #error "Unsupported word size." #endif Index: compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S =================================================================== --- compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S +++ compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S @@ -202,10 +202,7 @@ .type __xray_CustomEvent,@function __xray_CustomEvent: .cfi_startproc - subq $16, %rsp - .cfi_def_cfa_offset 24 - movq %rbp, 8(%rsp) - movq %rax, 0(%rsp) + SAVE_REGISTERS // We take two arguments to this trampoline, which should be in rdi and rsi // already. We also make sure that we stash %rax because we use that register @@ -215,14 +212,20 @@ je .LcustomEventCleanup // At this point we know that rcx and rdx already has the data, so we just - // call the logging handler. + // call the logging handler, after aligning the stack to a 16-byte boundary. + // The approach we're taking here uses additional stack space to stash the + // stack pointer twice before aligning the pointer to 16-bytes. If the stack + // was 8-byte aligned, it will become 16-byte aligned -- when restoring the + // pointer, we can always look -8 bytes from the current position to get + // either of the values we've stashed in the first place. + pushq %rsp + pushq (%rsp) + andq $-0x10, %rsp callq *%rax + movq 8(%rsp), %rsp .LcustomEventCleanup: - movq 0(%rsp), %rax - movq 8(%rsp), %rbp - addq $16, %rsp - .cfi_def_cfa_offset 8 + RESTORE_REGISTERS retq .Ltmp8: Index: compiler-rt/trunk/lib/xray/xray_x86_64.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_x86_64.cc +++ compiler-rt/trunk/lib/xray/xray_x86_64.cc @@ -76,6 +76,7 @@ static constexpr uint16_t MovR10Seq = 0xba41; static constexpr uint16_t Jmp9Seq = 0x09eb; static constexpr uint16_t Jmp20Seq = 0x14eb; +static constexpr uint16_t Jmp15Seq = 0x0feb; static constexpr uint8_t JmpOpCode = 0xe9; static constexpr uint8_t RetOpCode = 0xc3; static constexpr uint16_t NopwSeq = 0x9066; @@ -207,8 +208,10 @@ const XRaySledEntry &Sled) XRAY_NEVER_INSTRUMENT { // Here we do the dance of replacing the following sled: // + // In Version 0: + // // xray_sled_n: - // jmp +19 // 2 bytes + // jmp +20 // 2 bytes // ... // // With the following: @@ -216,24 +219,35 @@ // nopw // 2 bytes* // ... // - // We need to do this in the following order: // - // 1. Overwrite the 5-byte nop with the call (relative), where (relative) is - // the relative offset to the __xray_CustomEvent trampoline. - // 2. Do a two-byte atomic write over the 'jmp +24' to turn it into a 'nopw'. - // This allows us to "enable" this code once the changes have committed. + // The "unpatch" should just turn the 'nopw' back to a 'jmp +20'. + // + // --- // - // The "unpatch" should just turn the 'nopw' back to a 'jmp +24'. + // In Version 1: + // + // The jump offset is now 15 bytes (0x0f), so when restoring the nopw back + // to a jmp, use 15 bytes instead. // if (Enable) { std::atomic_store_explicit( reinterpret_cast *>(Sled.Address), NopwSeq, std::memory_order_release); } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp20Seq, - std::memory_order_release); - } + switch (Sled.Version) { + case 1: + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp15Seq, + std::memory_order_release); + break; + case 0: + default: + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp20Seq, + std::memory_order_release); + break; + } + } return false; } Index: compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-handler-alignment.cc =================================================================== --- compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-handler-alignment.cc +++ compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-handler-alignment.cc @@ -0,0 +1,42 @@ +// Make sure we're aligning the stack properly when lowering the custom event +// calls. +// +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false" \ +// RUN: %run %t 2>&1 +// REQUIRES: x86_64-linux +// REQUIRES: built-in-llvm-tree +#include +#include +#include "xray/xray_interface.h" + +[[clang::xray_never_instrument]] __attribute__((weak)) __m128 f(__m128 *i) { + return *i; +} + +[[clang::xray_always_instrument]] void foo() { + __xray_customevent(0, 0); + __m128 v = {}; + f(&v); +} + +[[clang::xray_always_instrument]] void bar() { + __xray_customevent(0, 0); +} + +void printer(void* ptr, size_t size) { + printf("handler called\n"); + __m128 v = {}; + f(&v); +} + +int main(int argc, char* argv[]) { + __xray_set_customevent_handler(printer); + __xray_patch(); + foo(); // CHECK: handler called + bar(); // CHECK: handler called + __xray_unpatch(); + __xray_remove_customevent_handler(); + foo(); + bar(); +} Index: compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-logging.cc =================================================================== --- compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-logging.cc +++ compiler-rt/trunk/test/xray/TestCases/Linux/custom-event-logging.cc @@ -2,6 +2,8 @@ // // RUN: %clangxx_xray -std=c++11 %s -o %t // RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s +// RUN: %clangxx_xray -std=c++11 -fpic -fpie %s -o %t +// RUN: XRAY_OPTIONS="patch_premain=false verbosity=1 xray_naive_log=false xray_logfile_base=custom-event-logging.xray-" %run %t 2>&1 | FileCheck %s // FIXME: Support this in non-x86_64 as well // REQUIRES: x86_64-linux // REQUIRES: built-in-llvm-tree