Index: lib/xray/CMakeLists.txt =================================================================== --- lib/xray/CMakeLists.txt +++ lib/xray/CMakeLists.txt @@ -14,46 +14,38 @@ set(x86_64_SOURCES xray_x86_64.cc xray_trampoline_x86_64.S - ${XRAY_SOURCES}) + xray_trampoline_x86_64_customevent.cc) set(arm_SOURCES xray_arm.cc - xray_trampoline_arm.S - ${XRAY_SOURCES}) + xray_trampoline_arm.S) -set(armhf_SOURCES - ${arm_SOURCES}) +set(armhf_SOURCES) set(aarch64_SOURCES xray_AArch64.cc - xray_trampoline_AArch64.S - ${XRAY_SOURCES}) + xray_trampoline_AArch64.S) set(mips_SOURCES xray_mips.cc - xray_trampoline_mips.S - ${XRAY_SOURCES}) + xray_trampoline_mips.S) set(mipsel_SOURCES xray_mips.cc - xray_trampoline_mips.S - ${XRAY_SOURCES}) + xray_trampoline_mips.S) set(mips64_SOURCES xray_mips64.cc - xray_trampoline_mips64.S - ${XRAY_SOURCES}) + xray_trampoline_mips64.S) set(mips64el_SOURCES xray_mips64.cc - xray_trampoline_mips64.S - ${XRAY_SOURCES}) + xray_trampoline_mips64.S) set(powerpc64le_SOURCES xray_powerpc64.cc xray_trampoline_powerpc64.cc - xray_trampoline_powerpc64_asm.S - ${XRAY_SOURCES}) + xray_trampoline_powerpc64_asm.S) include_directories(..) include_directories(../../include) @@ -65,26 +57,57 @@ append_list_if( COMPILER_RT_BUILD_XRAY_NO_PREINIT XRAY_NO_PREINIT XRAY_COMMON_DEFINITIONS) -add_compiler_rt_object_libraries(RTXray +add_compiler_rt_object_libraries(RTXRay ARCHS ${XRAY_SUPPORTED_ARCH} SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} DEFS ${XRAY_COMMON_DEFINITIONS}) +set(XRAY_DYNAMIC_DEFINITIONS ${XRAY_COMMON_DEFINITIONS} + XRAY_DYNAMIC=1 + XRAY_NO_PREINIT # Force building the dynamic library with no preinit support. + ) +set(XRAY_DYNAMIC_CFLAGS ${XRAY_CFLAGS}) +set(XRAY_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}) + +add_compiler_rt_object_libraries(RTXRay_dynamic + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_DYNAMIC_CFLAGS} + DEFS ${XRAY_DYNAMIC_DEFINITIONS}) + add_compiler_rt_component(xray) set(XRAY_COMMON_RUNTIME_OBJECT_LIBS RTSanitizerCommon RTSanitizerCommonLibc) +set(XRAY_DYNAMIC_LIBS ${SANITIZER_COMMON_LINK_LIBS}) +append_list_if(COMPILER_RT_HAS_LIBDL dl XRAY_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread XRAY_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBRT rt XRAY_DYNAMIC_LIBS) +append_list_if(COMPILER_RT_HAS_LIBM m XRAY_DYNAMIC_LIBS) +list(APPEND XRAY_DYNAMIC_LIBS ${SANITIZER_CXX_ABI_LIBRARY}) + foreach(arch ${XRAY_SUPPORTED_ARCH}) if(CAN_TARGET_${arch}) add_compiler_rt_runtime(clang_rt.xray STATIC ARCHS ${arch} SOURCES ${${arch}_SOURCES} + OBJECT_LIBS RTXRay + ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} CFLAGS ${XRAY_CFLAGS} DEFS ${XRAY_COMMON_DEFINITIONS} - OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} + PARENT_TARGET xray) + add_compiler_rt_runtime(clang_rt.xray + SHARED + ARCHS ${arch} + SOURCES ${${arch}_SOURCES} + OBJECT_LIBS RTXRay_dynamic + ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} + CFLAGS ${XRAY_DYNAMIC_CFLAGS} + LINK_FLAGS ${XRAY_DYNAMIC_LINK_FLAGS} + LINK_LIBS ${XRAY_DYNAMIC_LIBS} + DEFS ${XRAY_DYNAMIC_DEFINITIONS} PARENT_TARGET xray) endif() endforeach() Index: lib/xray/xray_trampoline_x86_64.S =================================================================== --- lib/xray/xray_trampoline_x86_64.S +++ lib/xray/xray_trampoline_x86_64.S @@ -195,41 +195,5 @@ .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry .cfi_endproc -//===----------------------------------------------------------------------===// - - .global __xray_CustomEvent - .align 16, 0x90 - .type __xray_CustomEvent,@function -__xray_CustomEvent: - .cfi_startproc - SAVE_REGISTERS - - // We take two arguments to this trampoline, which should be in rdi and rsi - // already. We also make sure that we stash %rax because we use that register - // to call the logging handler. - movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax - testq %rax,%rax - je .LcustomEventCleanup - - // At this point we know that rcx and rdx already has the data, so we just - // call the logging handler, after aligning the stack to a 16-byte boundary. - // The approach we're taking here uses additional stack space to stash the - // stack pointer twice before aligning the pointer to 16-bytes. If the stack - // was 8-byte aligned, it will become 16-byte aligned -- when restoring the - // pointer, we can always look -8 bytes from the current position to get - // either of the values we've stashed in the first place. - pushq %rsp - pushq (%rsp) - andq $-0x10, %rsp - callq *%rax - movq 8(%rsp), %rsp - -.LcustomEventCleanup: - RESTORE_REGISTERS - retq - -.Ltmp8: - .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent - .cfi_endproc NO_EXEC_STACK_DIRECTIVE Index: lib/xray/xray_trampoline_x86_64_customevent.cc =================================================================== --- /dev/null +++ lib/xray/xray_trampoline_x86_64_customevent.cc @@ -0,0 +1,79 @@ +//===-- xray_trampoline_x86_64_customevent.cc -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Here we implement the __xray_CustomEvent trampoline in C++, so that we can +// let the compiler define the appropriate entries for the global object table +// (GOT) and the procedure linkage table (PLT) for us, instead of having to +// write it out by hand in assembler. +// +//===----------------------------------------------------------------------===// + +#include "sanitizer_common/sanitizer_atomic.h" +#include + +namespace __xray { + +// Forward-declaration of the XRayPatchedCustomEvent global. +__attribute__((weak)) __sanitizer::atomic_uintptr_t XRayPatchedCustomEvent; + +} // namespace __xray + +extern "C" { +void __xray_CustomEvent(void *ptr, std::size_t size) { + asm volatile( + R"(subq $192, %%rsp + .cfi_def_cfa_offset 200 + // At this point, the stack pointer should be aligned to an 8-byte boundary, + // because any call instructions that come after this will add another 8 + // bytes and therefore align it to 16-bytes. + movq %%rbp, 184(%%rsp) + movupd %%xmm0, 168(%%rsp) + movupd %%xmm1, 152(%%rsp) + movupd %%xmm2, 136(%%rsp) + movupd %%xmm3, 120(%%rsp) + movupd %%xmm4, 104(%%rsp) + movupd %%xmm5, 88(%%rsp) + movupd %%xmm6, 72(%%rsp) + movupd %%xmm7, 56(%%rsp) + movq %%rdi, 48(%%rsp) + movq %%rax, 40(%%rsp) + movq %%rdx, 32(%%rsp) + movq %%rsi, 24(%%rsp) + movq %%rcx, 16(%%rsp) + movq %%r8, 8(%%rsp) + movq %%r9, 0(%%rsp))" :: + : "memory"); + if (uint64_t f = __sanitizer::atomic_load( + &__xray::XRayPatchedCustomEvent, __sanitizer::memory_order_acquire)) { + reinterpret_cast(f)(ptr, size); + } + asm volatile( + R"(movq 184(%%rsp), %%rbp + movupd 168(%%rsp), %%xmm0 + movupd 152(%%rsp), %%xmm1 + movupd 136(%%rsp), %%xmm2 + movupd 120(%%rsp), %%xmm3 + movupd 104(%%rsp), %%xmm4 + movupd 88(%%rsp), %%xmm5 + movupd 72(%%rsp) , %%xmm6 + movupd 56(%%rsp) , %%xmm7 + movq 48(%%rsp), %%rdi + movq 40(%%rsp), %%rax + movq 32(%%rsp), %%rdx + movq 24(%%rsp), %%rsi + movq 16(%%rsp), %%rcx + movq 8(%%rsp), %%r8 + movq 0(%%rsp), %%r9 + addq $192, %%rsp + .cfi_def_cfa_offset 8)" :: + : "memory"); +} +} Index: test/xray/lit.cfg =================================================================== --- test/xray/lit.cfg +++ test/xray/lit.cfg @@ -34,7 +34,8 @@ config.substitutions.append( ('%xraylib', ('-lm -lpthread -ldl -lrt -L%s ' - '-Wl,-whole-archive -lclang_rt.xray-%s -Wl,-no-whole-archive') + '-Wl,-Bstatic -Wl,-whole-archive -Wl,-no-as-needed -lclang_rt.xray-%s ' + '-Wl,-no-whole-archive -Wl,-as-needed -Wl,-Bdynamic') % (config.compiler_rt_libdir, config.host_arch))) # Default test suffixes.