Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -37,6 +37,8 @@ mark_as_advanced(COMPILER_RT_BUILD_BUILTINS) option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON) mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS) +option(COMPILER_RT_BUILD_XRAY "Build xray" ON) +mark_as_advanced(COMPILER_RT_BUILD_XRAY) if (COMPILER_RT_STANDALONE_BUILD) if (NOT LLVM_CONFIG_PATH) Index: cmake/config-ix.cmake =================================================================== --- cmake/config-ix.cmake +++ cmake/config-ix.cmake @@ -161,6 +161,7 @@ set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64}) set(ALL_ESAN_SUPPORTED_ARCH ${X86_64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64}) +set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) if(APPLE) include(CompilerRTDarwinUtils) @@ -350,6 +351,9 @@ list_intersect(SCUDO_SUPPORTED_ARCH ALL_SCUDO_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(XRAY_SUPPORTED_ARCH + ALL_XRAY_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) else() # Architectures supported by compiler-rt libraries. filter_available_targets(SANITIZER_COMMON_SUPPORTED_ARCH @@ -373,6 +377,7 @@ filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH}) filter_available_targets(SCUDO_SUPPORTED_ARCH ${ALL_SCUDO_SUPPORTED_ARCH}) + filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH}) endif() if (MSVC) Index: include/CMakeLists.txt =================================================================== --- include/CMakeLists.txt +++ include/CMakeLists.txt @@ -7,7 +7,8 @@ sanitizer/linux_syscall_hooks.h sanitizer/lsan_interface.h sanitizer/msan_interface.h - sanitizer/tsan_interface_atomic.h) + sanitizer/tsan_interface_atomic.h + xray/xray_interface.h) set(output_dir ${COMPILER_RT_OUTPUT_DIR}/include) Index: include/xray/xray_interface.h =================================================================== --- /dev/null +++ include/xray/xray_interface.h @@ -0,0 +1,60 @@ +//===-- xray_interface.h ----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// APIs for controlling XRay functionality explicitly. +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_INTERFACE_H +#define XRAY_XRAY_INTERFACE_H + +namespace __xray { +enum class EntryType : unsigned short { ENTRY = 0, EXIT = 1 }; +} + +extern "C" { + +// Provide a function to invoke for when instrumentation points are hit. This is +// a user-visible control surface that overrides the default implementation. The +// function provided should take the following arguments: +// +// - function id: an identifier that indicates the id of a function; this id +// is generated by xray; the mapping between the function id +// and the actual function pointer is available through +// __xray_table. +// - entry type: identifies what kind of instrumentation point was encountered +// (function entry, function exit, etc.). See the enum +// __xray::EntryType for more details. +// +// Returns 1 on success, 0 on error. +extern int __xray_set_handler(void (*entry)(int32_t, unsigned short)); + +// This removes whatever the currently provided handler is. Returns 1 on +// success, 0 on error. +extern int __xray_remove_handler(); + +// This tells XRay to patch the instrumentation points. This is an asynchronous +// process, and returns the following status in specific cases: +// +// - 0 : XRay is not initialized. +// - 1 : We've done the notification. +// - 2 : Patching / un-patching is on-going. +extern int __xray_patch(); + +// Reverses the effect of __xray_patch(). This is an asynchronous process, and +// returns the following status in specific cases. +// +// - 0 : XRay is not initialized. +// - 1 : We've done the notification. +// - 2 : Patching / un-patching is on-going. +extern int __xray_unpatch(); + +} + +#endif Index: lib/CMakeLists.txt =================================================================== --- lib/CMakeLists.txt +++ lib/CMakeLists.txt @@ -57,3 +57,7 @@ add_subdirectory(scudo) endif() endif() + +if(COMPILER_RT_BUILD_XRAY) + add_subdirectory(xray) +endif() Index: lib/xray/CMakeLists.txt =================================================================== --- /dev/null +++ lib/xray/CMakeLists.txt @@ -0,0 +1,32 @@ +# Build for the XRay runtime support library. + +set(XRAY_SOURCES + xray_init.cc + xray_interface.cc +) + +include_directories(..) + +set(XRAY_CFLAGS ${SANITIZER_COMMON_CFLAGS}) + +set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1) + +add_compiler_rt_object_libraries(RTXray + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) + +add_custom_target(xray) +set(XRAY_COMMON_RUNTIME_OBJECT_LIBS RTXray) +add_compiler_rt_runtime(clang_rt.xray + STATIC + ARCHS ${XRAY_SUPPORTED_ARCH} + OBJECT_LIBS RTXray ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} + CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS} + PARENT_TARGET xray) +add_dependencies(compiler-rt xray) + +# if(COMPILER_RT_INCLUDE_TESTS) +# add_subdirectory(tests) +# endif() Index: lib/xray/xray_init.cc =================================================================== --- /dev/null +++ lib/xray/xray_init.cc @@ -0,0 +1,111 @@ +//===-- xray_init.cc --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// XRay initialisation logic. +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xray_interface_internal.h" +#include "llvm/Support/ELF.h" + +extern "C" { +extern void __xray_init(); +extern const XRaySledEntry __start_xray_instr_map[] __attribute__((weak)); +extern const XRaySledEntry __stop_xray_instr_map[] __attribute__((weak)); +} + +// We initialize some global variables that pertain to specific sections of XRay +// data structures in the binary. We do this for the current process using +// /proc/curproc/map and make sure that we're able to get it. We signal failure +// via a global atomic boolean to indicate whether we've initialized properly. +// +std::atomic XRayInitialized{false}; + +// This should always be updated before XRayInitialized is updated. +std::atomic<__xray::XRaySledMap> XRayInstrMap{}; + +void __xray_dump() { + // FIXME: This is just a proof-of-concept implementation, should make this + // more accessible for the real thing. + auto LocalEntries = XRayInstrMap.load(std::memory_order_acquire); + if (LocalEntries.Entries == 0) { + printf("Instrumentation map is empty.\n"); + return; + } + + printf("__xray_instr_map@%p..%p\n", __start_xray_instr_map, + __stop_xray_instr_map); + auto Countdown = LocalEntries.Entries; + auto Sled = LocalEntries.Sleds; + static constexpr char EntrySled[] = "E"; + static constexpr char ExitSled[] = "X"; + static constexpr char Always[] = "*"; + static constexpr char Maybe[] = "?"; + while (Countdown != 0) { + printf("%lx\t%s\t%s\t@function(%lx)\n", Sled->Address, + Sled->Kind == static_cast(__xray::EntryType::ENTRY) + ? EntrySled + : ExitSled, + Sled->AlwaysInstrument == 1 ? Always : Maybe, Sled->Function); + --Countdown; + ++Sled; + } +} + +extern "C" { +void __xray_DemoLog(int32_t FuncId, unsigned short Type) { + uint64_t Hi; + uint32_t Lo, CPUId; + __asm__ __volatile__("rdtscp" : "=a"(Lo), "=d"(Hi), "=c"(CPUId)); + int ignored = printf( + "%lu: [%lu] %s%d\n", CPUId, (Hi << 32) | Lo, + Type == static_cast(__xray::EntryType::ENTRY) ? "E" : "X", + FuncId); + (void)(ignored); +} +} + +// __xray_init() will do the actual loading of the current process' memory map +// and then proceed to look for the .xray_instr_map section/segment. +void __xray_init() { + if (__start_xray_instr_map == nullptr) { + printf("XRay instrumentation map missing. Not initializing XRay.\n"); + return; + } + + // Now initialize the XRayInstrMap global struct with the address of the + // entries, reinterpreted as an array of XRaySledEntry objects. We use the + // virtual pointer we have from the section to provide us the correct + // information. + __xray::XRaySledMap SledMap{}; + SledMap.Sleds = __start_xray_instr_map; + SledMap.Entries = __stop_xray_instr_map - __start_xray_instr_map; + XRayInstrMap.store(SledMap, std::memory_order_release); + XRayInitialized.store(true, std::memory_order_release); + + // FIXME: Only for demo, patch the functions before we run main. + __xray_dump(); + __xray_set_handler(__xray_DemoLog); + __xray_patch(); +} + +__attribute__((section(".preinit_array"), + used)) void (*__local_xray_preinit)(void) = __xray_init; Index: lib/xray/xray_interface.cc =================================================================== --- /dev/null +++ lib/xray/xray_interface.cc @@ -0,0 +1,229 @@ +//===-- xray_interface.cpp --------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. +// +//===----------------------------------------------------------------------===// + +#include "xray_interface_internal.h" +#include +#include +#include +#include +#include +#include + +namespace __xray { + +// This is the function to call when we encounter the entry or exit sleds. +std::atomic XRayPatchedFunction{nullptr}; +} + +extern std::atomic XRayInitialized; +extern std::atomic<__xray::XRaySledMap> XRayInstrMap; + +void __xray_FunctionEntry() { + // First thing we do is save the caller provided registers before doing any + // actual work. + uint64_t Rrdi, Rrax, Rrdx, Rrsi, Rrcx, Rr8, Rr9, Rrbp; + __asm__ __volatile__("mov %%rbp, %0" : "=m"(Rrbp)); + __asm__ __volatile__("mov %%rdi, %0" : "=m"(Rrdi)); + __asm__ __volatile__("mov %%rax, %0" : "=m"(Rrax)); + __asm__ __volatile__("mov %%rdx, %0" : "=m"(Rrdx)); + __asm__ __volatile__("mov %%rsi, %0" : "=m"(Rrsi)); + __asm__ __volatile__("mov %%rcx, %0" : "=m"(Rrcx)); + __asm__ __volatile__("mov %%r8, %0" : "=m"(Rr8)); + __asm__ __volatile__("mov %%r9, %0" : "=m"(Rr9)); + + // FIXME: Handle async signal safety, and prevent recursive calls. + auto Fn = __xray::XRayPatchedFunction.load(std::memory_order_acquire); + if (Fn != nullptr) { + int32_t FunctionID; + static constexpr unsigned short Type = + static_cast(__xray::EntryType::ENTRY); + __asm__("mov %%r10d, %0" : "=g"(FunctionID) : : "%r10"); + (*Fn)(FunctionID, Type); + } + + // Then restore the registers before returning. + __asm__ __volatile__("mov %0,%%r9" : : "m"(Rr9) : "%r9"); + __asm__ __volatile__("mov %0,%%r8" : : "m"(Rr8) : "%r8"); + __asm__ __volatile__("mov %0,%%rcx" : : "m"(Rrcx) : "%rcx"); + __asm__ __volatile__("mov %0,%%rsi" : : "m"(Rrsi) : "%rsi"); + __asm__ __volatile__("mov %0,%%rdx" : : "m"(Rrdx) : "%rdx"); + __asm__ __volatile__("mov %0,%%rax" : : "m"(Rrax) : "%rax"); + __asm__ __volatile__("mov %0,%%rdi" : : "m"(Rrdi) : "%rdi"); + __asm__ __volatile__("mov %0,%%rbp" : : "m"(Rrbp) : "%rbp"); +} + +void __xray_FunctionExit() { + // First thing we do is save the caller provided registers before doing any + // actual work. + uint64_t Rrax, Rrdx, Rrbp; + __asm__ __volatile__("mov %%rax, %0" : "=m"(Rrax)); + __asm__ __volatile__("mov %%rdx, %0" : "=m"(Rrdx)); + __asm__ __volatile__("mov %%rbp, %0" : "=m"(Rrbp)); + + // Then it's safe to call the provided function. + // FIXME: Handle async signal safety, and prevent recursive calls. + auto Fn = __xray::XRayPatchedFunction.load(std::memory_order_acquire); + if (Fn != nullptr) { + int32_t FunctionID; + static constexpr unsigned short Type = + static_cast(__xray::EntryType::EXIT); + __asm__("mov %%r10d, %0" : "=g"(FunctionID) : : "r10"); + (*Fn)(FunctionID, Type); + } + + // Then restore the registers before returning. + __asm__ __volatile__("mov %0,%%rbp" : : "m"(Rrbp) : "%rbp"); + __asm__ __volatile__("mov %0,%%rdx" : : "m"(Rrdx) : "%rdx"); + __asm__ __volatile__("mov %0,%%rax" : : "m"(Rrax) : "%rax"); +} + +int __xray_set_handler(void (*entry)(int32_t, unsigned short)) { + if (XRayInitialized.load(std::memory_order_acquire)) { + __xray::XRayPatchedFunction.store(entry, std::memory_order_release); + return 1; + } + return 0; +} + +std::atomic XRayPatching{false}; + +int __xray_patch() { + // FIXME: Make this happen asynchronously. For now just do this sequentially. + if (!XRayInitialized.load(std::memory_order_acquire)) + return -1; // Not initialized. + + static bool NotPatching = false; + if (!XRayPatching.compare_exchange_strong(NotPatching, true, + std::memory_order_acq_rel, + std::memory_order_acquire)) { + return 0; // Already patching. + } + + // Step 1: Compute the function id, as a unique identifier per function in the + // instrumentation map. + __xray::XRaySledMap InstrMap = XRayInstrMap.load(std::memory_order_acquire); + if (InstrMap.Entries == 0) + return -1; + int32_t FuncId = 1; + static constexpr uint8_t CallOpCode = 0xe8; + static constexpr uint16_t MovR10Seq = 0xba41; + static constexpr uint8_t JmpOpCode = 0xe9; + uint64_t CurFun = 0; + for (std::size_t I = 0; I < InstrMap.Entries; I++) { + auto Sled = InstrMap.Sleds[I]; + auto F = Sled.Function; + if (CurFun == 0) + CurFun = F; + if (F != CurFun) { + ++FuncId; + CurFun = F; + } + + // While we're here, we should patch the nop sled. To do that we mprotect + // the page containing the function to be writeable. + void *PageAlignedAddr = + reinterpret_cast(Sled.Address & ~((2 << 16) - 1)); + std::size_t MProtectLen = + (Sled.Address + 12) - reinterpret_cast(PageAlignedAddr); + if (mprotect(PageAlignedAddr, MProtectLen, + PROT_READ | PROT_WRITE | PROT_EXEC) == -1) { + printf("Failed mprotect: %d\n", errno); + return -1; + } + + static constexpr int64_t MinOffset{std::numeric_limits::min()}; + static constexpr int64_t MaxOffset{std::numeric_limits::max()}; + if (Sled.Kind == static_cast(__xray::EntryType::ENTRY)) { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, + // call + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the + // __xray_FunctionEntry function's address. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionEntry) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + // FIXME: Print out an error here. + continue; + } + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } + + if (Sled.Kind == static_cast(__xray::EntryType::EXIT)) { + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, + // jmp + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionExit) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + // FIXME: Print out an error here. + continue; + } + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } + + if (mprotect(PageAlignedAddr, MProtectLen, PROT_READ | PROT_EXEC) == -1) { + printf("Failed mprotect: %d\n", errno); + return -1; + } + } + XRayPatching.store(false, std::memory_order_release); + return 1; +} Index: lib/xray/xray_interface_internal.h =================================================================== --- /dev/null +++ lib/xray/xray_interface_internal.h @@ -0,0 +1,54 @@ +//===-- xray_interface_internal.h -------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of the API functions. See also include/xray/xray_interface.h. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_INTERFACE_INTERNAL_H +#define XRAY_INTERFACE_INTERNAL_H + +#include + +extern "C" { +// This is the trampoline function called when the function entry sled is +// instrumented appropriately, and patched by the XRay runtime. +extern void __xray_FunctionEntry(); + +// This is the trampoline function called when the function exit sled is +// instrumented appropriately, and patched by the XRay runtime. +extern void __xray_FunctionExit(); + +extern int __xray_set_handler(void (*entry)(int32_t, unsigned short)); +extern int __xray_remove_handler(); +extern int __xray_patch(); +extern int __xray_unpatch(); + +struct XRaySledEntry { + uint64_t Address; + uint64_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Padding[14]; // Need 32 bytes +}; +} + +namespace __xray { + +enum class EntryType : unsigned short { ENTRY = 0, EXIT = 1 }; + +struct XRaySledMap { + const XRaySledEntry *Sleds; + std::size_t Entries; +}; + +} + +#endif