Index: cmake/config-ix.cmake =================================================================== --- cmake/config-ix.cmake +++ cmake/config-ix.cmake @@ -161,7 +161,7 @@ set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64}) set(ALL_ESAN_SUPPORTED_ARCH ${X86_64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64}) -set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) +set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32}) if(APPLE) include(CompilerRTDarwinUtils) Index: lib/sanitizer_common/scripts/gen_dynamic_list.py =================================================================== --- lib/sanitizer_common/scripts/gen_dynamic_list.py +++ lib/sanitizer_common/scripts/gen_dynamic_list.py @@ -19,6 +19,7 @@ import re import subprocess import sys +import platform new_delete = set([ '_Znam', '_ZnamRKSt9nothrow_t', # operator new[](unsigned long) @@ -50,7 +51,7 @@ raise subprocess.CalledProcessError(nm_proc.returncode, nm) func_symbols = ['T', 'W'] # On PowerPC, nm prints function descriptors from .data section. - if os.uname()[4] in ["powerpc", "ppc64"]: + if platform.uname()[4] in ["powerpc", "ppc64"]: func_symbols += ['D'] for line in nm_out: cols = line.split(' ') Index: lib/xray/CMakeLists.txt =================================================================== --- lib/xray/CMakeLists.txt +++ lib/xray/CMakeLists.txt @@ -8,8 +8,16 @@ ) set(x86_64_SOURCES + xray_x86_64.cc xray_trampoline_x86_64.S ${XRAY_SOURCES}) + +set(arm_SOURCES + xray_arm.cc + xray_trampoline_arm.S + ${XRAY_SOURCES}) + +set(armhf_SOURCES ${arm_SOURCES}) include_directories(..) include_directories(../../include) Index: lib/xray/xray_arm.cc =================================================================== --- lib/xray/xray_arm.cc +++ lib/xray/xray_arm.cc @@ -0,0 +1,128 @@ +//===-- xray_arm.cpp --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of ARM-specific routines (32-bit). +// +//===----------------------------------------------------------------------===// +#include "sanitizer_common/sanitizer_common.h" +#include "xray_interface_internal.h" +#include + +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.s files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +} + +namespace __xray { + +static const uint32_t cOpcPush_r0_lr = 0xE92D4001; +static const uint32_t cOpcBlx_ip = 0xE12FFF3C; +static const uint32_t cOpcPop_r0_lr = 0xE8BD4001; +static const uint32_t cOpcB20 = 0xEA000005; + +// 0xUUUUWXYZ -> 0x000W0XYZ +inline static uint32_t GetMovwMask(const uint32_t Value) { + return (Value & 0xfff) | ((Value & 0xf000) << 4); +} + +// 0xWXYZUUUU -> 0x000W0XYZ +inline static uint32_t GetMovtMask(const uint32_t Value) { + return GetMovwMask(Value >> 16); +} + +// Writes the following instructions: +// MOVW r0, # +// MOVT r0, # +inline static uint32_t *Write32bitLoadR0(uint32_t *Address, + const uint32_t Value) { + // MOVW r0, #0xWXYZ in machine code is 0xE30W0XYZ + *Address = (0xE3000000 | GetMovwMask(Value)); + Address++; + // MOVT r0, #0xWXYZ in machine code is 0xE34W0XYZ + *Address = (0xE3400000 | GetMovtMask(Value)); + return Address + 1; +} + +// Writes the following instructions: +// MOVW ip, # +// MOVT ip, # +inline static uint32_t *Write32bitLoadIP(uint32_t *Address, + const uint32_t Value) { + // MOVW ip, #0xWXYZ in machine code is 0xE30WCXYZ + *Address = (0xE300C000 | GetMovwMask(Value)); + Address++; + // MOVT ip, #0xWXYZ in machine code is 0xE34WCXYZ + *Address = (0xE340C000 | GetMovtMask(Value)); + return Address + 1; +} + +inline static bool PatchSled(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled, void (*TracingHook)()) { + // When |Enable| == true, + // We replace the following compile-time stub (sled): + // + // xray_sled_n: + // B #20 + // 6 NOPs (24 bytes) + // + // With the following runtime patch: + // + // xray_sled_n: + // PUSH {r0, lr} + // MOVW r0, # + // MOVT r0, # + // MOVW ip, # + // MOVT ip, # + // BLX ip + // POP {r0, lr} + // + // Replacement of the first 4-byte instruction should be the last and atomic + // operation, so that the user code which reaches the sled concurrently + // either jumps over the whole sled, or executes the whole sled when the + // latter is ready. + // + // When |Enable|==false, we set back the first instruction in the sled to be + // B #20 + + uint32_t *FirstAddress = reinterpret_cast(Sled.Address); + if (Enable) { + uint32_t *CurAddress = FirstAddress + 1; + CurAddress = + Write32bitLoadR0(CurAddress, reinterpret_cast(FuncId)); + CurAddress = + Write32bitLoadIP(CurAddress, reinterpret_cast(TracingHook)); + *CurAddress = cOpcBlx_ip; + CurAddress++; + *CurAddress = cOpcPop_r0_lr; + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), cOpcPush_r0_lr, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(FirstAddress), cOpcB20, + std::memory_order_release); + } + return true; +} + +bool PatchFunctionEntry(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { + return PatchSled(Enable, FuncId, Sled, __xray_FunctionEntry); +} + +bool PatchFunctionExit(const bool Enable, const uint32_t FuncId, + const XRaySledEntry &Sled) { + return PatchSled(Enable, FuncId, Sled, __xray_FunctionExit); +} + +} // namespace __xray Index: lib/xray/xray_interface.cc =================================================================== --- lib/xray/xray_interface.cc +++ lib/xray/xray_interface.cc @@ -64,13 +64,6 @@ } // namespace __xray -extern "C" { -// The following functions have to be defined in assembler, on a per-platform -// basis. See xray_trampoline_*.s files for implementations. -extern void __xray_FunctionEntry(); -extern void __xray_FunctionExit(); -} - extern std::atomic XRayInitialized; extern std::atomic<__xray::XRaySledMap> XRayInstrMap; @@ -133,12 +126,13 @@ if (InstrMap.Entries == 0) return XRayPatchingStatus::NOT_INITIALIZED; - int32_t FuncId = 1; - static constexpr uint8_t CallOpCode = 0xe8; - static constexpr uint16_t MovR10Seq = 0xba41; - static constexpr uint16_t Jmp9Seq = 0x09eb; - static constexpr uint8_t JmpOpCode = 0xe9; - static constexpr uint8_t RetOpCode = 0xc3; + const uint64_t PageSize = GetPageSizeCached(); + if((PageSize == 0) || ( (PageSize & (PageSize-1)) != 0) ) { + Report("System page size is not a power of two: %lld", PageSize); + return XRayPatchingStatus::FAILED; + } + + uint32_t FuncId = 1; uint64_t CurFun = 0; for (std::size_t I = 0; I < InstrMap.Entries; I++) { auto Sled = InstrMap.Sleds[I]; @@ -153,7 +147,7 @@ // While we're here, we should patch the nop sled. To do that we mprotect // the page containing the function to be writeable. void *PageAlignedAddr = - reinterpret_cast(Sled.Address & ~((2 << 16) - 1)); + reinterpret_cast(Sled.Address & ~(PageSize-1)); std::size_t MProtectLen = (Sled.Address + 12) - reinterpret_cast(PageAlignedAddr); MProtectHelper Protector(PageAlignedAddr, MProtectLen); @@ -162,103 +156,19 @@ return XRayPatchingStatus::FAILED; } - static constexpr int64_t MinOffset{std::numeric_limits::min()}; - static constexpr int64_t MaxOffset{std::numeric_limits::max()}; - if (Sled.Kind == XRayEntryType::ENTRY) { - // FIXME: Implement this in a more extensible manner, per-platform. - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // jmp +9 - // <9 byte nop> - // - // With the following: - // - // mov r10d, - // call - // - // We need to do this in the following order: - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 2-byte jmp instruction). - // 2. Put the call opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset to the - // __xray_FunctionEntry function's address. - int64_t TrampolineOffset = - reinterpret_cast(__xray_FunctionEntry) - - (static_cast(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " - "%ld\n", - __xray_FunctionEntry, reinterpret_cast(Sled.Address), - TrampolineOffset); - continue; - } - if (Enable) { - *reinterpret_cast(Sled.Address + 2) = FuncId; - *reinterpret_cast(Sled.Address + 6) = CallOpCode; - *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), Jmp9Seq, - std::memory_order_release); - // FIXME: Write out the nops still? - } - } - - if (Sled.Kind == XRayEntryType::EXIT) { - // FIXME: Implement this in a more extensible manner, per-platform. - // Here we do the dance of replacing the following sled: - // - // xray_sled_n: - // ret - // <10 byte nop> - // - // With the following: - // - // mov r10d, - // jmp - // - // 1. Put the function id first, 2 bytes from the start of the sled (just - // after the 1-byte ret instruction). - // 2. Put the jmp opcode 6 bytes from the start of the sled. - // 3. Put the relative offset 7 bytes from the start of the sled. - // 4. Do an atomic write over the jmp instruction for the "mov r10d" - // opcode and first operand. - // - // Prerequisite is to compute the relative offset fo the - // __xray_FunctionExit function's address. - int64_t TrampolineOffset = - reinterpret_cast(__xray_FunctionExit) - - (static_cast(Sled.Address) + 11); - if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " - "%ld\n", - __xray_FunctionExit, reinterpret_cast(Sled.Address), - TrampolineOffset); - continue; - } - if (Enable) { - *reinterpret_cast(Sled.Address + 2) = FuncId; - *reinterpret_cast(Sled.Address + 6) = JmpOpCode; - *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), MovR10Seq, - std::memory_order_release); - } else { - std::atomic_store_explicit( - reinterpret_cast *>(Sled.Address), RetOpCode, - std::memory_order_release); - // FIXME: Write out the nops still? - } + bool Success = false; + switch(Sled.Kind) { + case XRayEntryType::ENTRY: + Success = PatchFunctionEntry(Enable, FuncId, Sled); + break; + case XRayEntryType::EXIT: + Success = PatchFunctionExit(Enable, FuncId, Sled); + break; + default: + Report("Unsupported sled kind: %d", int(Sled.Kind)); + continue; } + (void)Success; } XRayPatching.store(false, std::memory_order_release); PatchingSuccess = true; Index: lib/xray/xray_interface_internal.h =================================================================== --- lib/xray/xray_interface_internal.h +++ lib/xray/xray_interface_internal.h @@ -16,18 +16,30 @@ #define XRAY_INTERFACE_INTERNAL_H #include "xray/xray_interface.h" +#include "sanitizer_common/sanitizer_platform.h" #include #include extern "C" { struct XRaySledEntry { +#if SANITIZER_WORDSIZE == 64 uint64_t Address; uint64_t Function; unsigned char Kind; unsigned char AlwaysInstrument; unsigned char Padding[14]; // Need 32 bytes +#elif SANITIZER_WORDSIZE == 32 + uint32_t Address; + uint32_t Function; + unsigned char Kind; + unsigned char AlwaysInstrument; + unsigned char Padding[6]; // Need 16 bytes +#else + #error "Unsupported word size." +#endif }; + } namespace __xray { @@ -37,6 +49,9 @@ size_t Entries; }; +bool PatchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); +bool PatchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled); + } // namespace __xray #endif Index: lib/xray/xray_trampoline_arm.S =================================================================== --- lib/xray/xray_trampoline_arm.S +++ lib/xray/xray_trampoline_arm.S @@ -0,0 +1,42 @@ + .syntax unified + .code 32 + .global _ZN6__xray19XRayPatchedFunctionE + .p2align 2 // word-aligned function entry point + // It preserves all registers except r0, r12(ip), r14(lr) and r15(pc) + // Assume that "q" part of the floating-point registers is not used in C/C++ + .global __xray_FunctionEntry // let C/C++ see the symbol + .type __xray_FunctionEntry, %function +__xray_FunctionEntry: + PUSH {r1-r3,lr} + VPUSH {d0-d7} // save floating-point parameters of the instrumented function + MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE + MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE + LDR r2, [r1] + CMP r2, #0 + BEQ FunctionEntry_restore + // Function ID is already in r0 (the first parameter). + MOV r1, #0 // 0 means that we are tracing an entry event + BLX r2 // call the handler with 2 parameters in r0 and r1 +FunctionEntry_restore: + VPOP {d0-d7} // restore floating-point parameters of the instrumented function + POP {r1-r3,pc} + + .p2align 2 // word-aligned function entry point + // Assume that d1-d7 are not used for the return value + // Assume that "q" part of the floating-point registers is not used in C/C++ + .global __xray_FunctionExit // let C/C++ see the symbol + .type __xray_FunctionExit, %function +__xray_FunctionExit: + PUSH {r1-r3,lr} + VPUSH {d0} // save the floating-point return value of the instrumented function + MOVW r1,#:lower16:_ZN6__xray19XRayPatchedFunctionE + MOVT r1,#:upper16:_ZN6__xray19XRayPatchedFunctionE + LDR r2, [r1] + CMP r2, #0 + BEQ FunctionExit_restore + // Function ID is already in r0 (the first parameter). + MOV r1, #1 // 1 means that we are tracing an exit event + BLX r2 // call the handler with 2 parameters in r0 and r1 +FunctionExit_restore: + VPOP {d0} // restore the floating-point return value of the instrumented function + POP {r1-r3,pc} Index: lib/xray/xray_x86_64.cc =================================================================== --- lib/xray/xray_x86_64.cc +++ lib/xray/xray_x86_64.cc @@ -0,0 +1,120 @@ +#include "xray_interface_internal.h" +#include "sanitizer_common/sanitizer_common.h" +#include + + +extern "C" { +// The following functions have to be defined in assembler, on a per-platform +// basis. See xray_trampoline_*.s files for implementations. +extern void __xray_FunctionEntry(); +extern void __xray_FunctionExit(); +} + +namespace __xray { + +static constexpr uint8_t CallOpCode = 0xe8; +static constexpr uint16_t MovR10Seq = 0xba41; +static constexpr uint16_t Jmp9Seq = 0x09eb; +static constexpr uint8_t JmpOpCode = 0xe9; +static constexpr uint8_t RetOpCode = 0xc3; + +static constexpr int64_t MinOffset{std::numeric_limits::min()}; +static constexpr int64_t MaxOffset{std::numeric_limits::max()}; + +bool PatchFunctionEntry(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) +{ + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // jmp +9 + // <9 byte nop> + // + // With the following: + // + // mov r10d, + // call + // + // We need to do this in the following order: + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 2-byte jmp instruction). + // 2. Put the call opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset to the + // __xray_FunctionEntry function's address. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionEntry) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Entry trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionEntry, reinterpret_cast(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = CallOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), Jmp9Seq, + std::memory_order_release); + // FIXME: Write out the nops still? + } +} + +bool PatchFunctionExit(const bool Enable, const uint32_t FuncId, const XRaySledEntry& Sled) +{ + // Here we do the dance of replacing the following sled: + // + // xray_sled_n: + // ret + // <10 byte nop> + // + // With the following: + // + // mov r10d, + // jmp + // + // 1. Put the function id first, 2 bytes from the start of the sled (just + // after the 1-byte ret instruction). + // 2. Put the jmp opcode 6 bytes from the start of the sled. + // 3. Put the relative offset 7 bytes from the start of the sled. + // 4. Do an atomic write over the jmp instruction for the "mov r10d" + // opcode and first operand. + // + // Prerequisite is to compute the relative offset fo the + // __xray_FunctionExit function's address. + int64_t TrampolineOffset = + reinterpret_cast(__xray_FunctionExit) - + (static_cast(Sled.Address) + 11); + if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { + Report("XRay Exit trampoline (%p) too far from sled (%p); distance = " + "%ld\n", + __xray_FunctionExit, reinterpret_cast(Sled.Address), + TrampolineOffset); + continue; + } + if (Enable) { + *reinterpret_cast(Sled.Address + 2) = FuncId; + *reinterpret_cast(Sled.Address + 6) = JmpOpCode; + *reinterpret_cast(Sled.Address + 7) = TrampolineOffset; + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), MovR10Seq, + std::memory_order_release); + } else { + std::atomic_store_explicit( + reinterpret_cast *>(Sled.Address), RetOpCode, + std::memory_order_release); + // FIXME: Write out the nops still? + } +} + +} // namespace __xray