Index: CMakeLists.txt =================================================================== --- CMakeLists.txt +++ CMakeLists.txt @@ -38,8 +38,6 @@ mark_as_advanced(COMPILER_RT_BUILD_BUILTINS) option(COMPILER_RT_BUILD_SANITIZERS "Build sanitizers" ON) mark_as_advanced(COMPILER_RT_BUILD_SANITIZERS) -option(COMPILER_RT_BUILD_XRAY "Build xray" ON) -mark_as_advanced(COMPILER_RT_BUILD_XRAY) if (COMPILER_RT_STANDALONE_BUILD) load_llvm_config() Index: include/xray/xray_records.h =================================================================== --- /dev/null +++ include/xray/xray_records.h @@ -0,0 +1,84 @@ +//===-- xray_records.h ------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// This header exposes some record types useful for the XRay in-memory logging +// implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef XRAY_XRAY_RECORDS_H +#define XRAY_XRAY_RECORDS_H + +namespace __xray { + +enum FileTypes { + NAIVE_LOG = 0, +}; + +// This data structure is used to describe the contents of the file. We use this +// for versioning the supported XRay file formats. +struct alignas(32) XRayFileHeader { + uint16_t Version = 0; + + // The type of file we're writing out. See the FileTypes enum for more + // information. This allows different implementations of the XRay logging to + // have different files for different information being stored. + uint16_t Type = 0; + + // What follows are a set of flags that indicate useful things for when + // reading the data in the file. + bool ConstantTSC : 1; + bool NonstopTSC : 1; + + // NOTE: We can have 6 more bits to use here, before CycleFrequency. + + // The frequency by which TSC increases per-second. + uint64_t CycleFrequency = 0; + + char Padding[15] = {}; +} __attribute__((packed)); + +static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes"); + +enum RecordTypes { + NORMAL = 0, +}; + +struct alignas(32) XRayRecord { + // This is the type of the record being written. We use 16 bits to allow us to + // treat this as a discriminant, and so that the first 4 bytes get packed + // properly. See RecordTypes for more supported types. + uint16_t RecordType = 0; + + // The CPU where the thread is running. We assume number of CPUs <= 256. + uint8_t CPU = 0; + + // The type of the event. Usually either ENTER = 0 or EXIT = 1. + uint8_t Type = 0; + + // The function ID for the record. + int32_t FuncId = 0; + + // Get the full 8 bytes of the TSC when we get the log record. + uint64_t TSC = 0; + + // The thread ID for the currently running thread. + uint32_t TId = 0; + + // Use some bytes in the end of the record for buffers. + char Buffer[4] = {}; +} __attribute__((packed)); + +static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes"); + +} // namespace __xray + +#endif // XRAY_XRAY_RECORDS_H Index: lib/xray/CMakeLists.txt =================================================================== --- lib/xray/CMakeLists.txt +++ lib/xray/CMakeLists.txt @@ -4,10 +4,11 @@ xray_init.cc xray_interface.cc xray_flags.cc + xray_inmemory_log.cc ) set(x86_64_SOURCES - xray_trampoline_x86.S + xray_trampoline_x86_64.S ${XRAY_SOURCES}) include_directories(..) @@ -18,9 +19,9 @@ set(XRAY_COMMON_DEFINITIONS XRAY_HAS_EXCEPTIONS=1) add_compiler_rt_object_libraries(RTXray - ARCHS ${XRAY_SUPPORTED_ARCH} - SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} - DEFS ${XRAY_COMMON_DEFINITIONS}) + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) add_custom_target(xray) set(XRAY_COMMON_RUNTIME_OBJECT_LIBS Index: lib/xray/xray_flags.inc =================================================================== --- lib/xray/xray_flags.inc +++ lib/xray/xray_flags.inc @@ -16,3 +16,7 @@ XRAY_FLAG(bool, patch_premain, true, "Whether to patch instrumentation points before main.") +XRAY_FLAG(bool, xray_naive_log, true, + "Whether to install the naive log implementation.") +XRAY_FLAG(const char *, xray_logfile_base, "xray-log.", + "Filename base for the xray logfile.") Index: lib/xray/xray_inmemory_log.cc =================================================================== --- /dev/null +++ lib/xray/xray_inmemory_log.cc @@ -0,0 +1,215 @@ +//===-- xray_inmemory_log.cc ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Implementation of a simple in-memory log of XRay events. This defines a +// logging function that's compatible with the XRay handler interface, and +// routines for exporting data to files. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sanitizer_common/sanitizer_libc.h" +#include "xray/xray_records.h" +#include "xray_flags.h" +#include "xray_interface_internal.h" + +// __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a +// certain size (32kb by default) and use it as if it were a circular buffer for +// events. We store simple fixed-sized entries in the log for external analysis. + +extern "C" { +void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type); +} + +namespace __xray { + +std::mutex LogMutex; + +static constexpr size_t BuffLen = 1024; + +static void retryingWrite(int Fd, char *Begin, char *End) { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + while (auto Written = write(Fd, Begin, TotalBytes)) { + if (Written < 0) { + if (errno == EINTR) + continue; // Try again. + Report("Failed to write; errno = %d", errno); + return; + } + + // FIXME: Figure out whether/how to assert properly. + assert(static_cast(Written) <= TotalBytes); + TotalBytes -= Written; + if (TotalBytes == 0) + break; + Begin += Written; + } +} + +static bool retryingRead(int Fd, char *Begin, char *End) { + auto BytesToRead = std::distance(Begin, End); + int BytesRead; + while ((BytesRead = read(Fd, Begin, BytesToRead)) < 0) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d", errno); + return false; + + BytesToRead -= BytesRead; + if (BytesToRead == 0) + break; + Begin += BytesRead; + } + return true; +} + +static bool readValueFromFile(const char *Filename, long long *Value) { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + char Line[256] = {}; + if (!retryingRead(Fd, Line, Line + 256)) + return false; + close(Fd); + char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +class ThreadExitFlusher { + int Fd; + XRayRecord *Start; + size_t &Offset; + +public: + explicit ThreadExitFlusher(int Fd, XRayRecord *Start, size_t &Offset) + : Fd(Fd), Start(Start), Offset(Offset) {} + + ~ThreadExitFlusher() { + std::lock_guard L(LogMutex); + if (Fd > 0 && Start != nullptr) { + retryingWrite(Fd, reinterpret_cast(Start), + reinterpret_cast(Start + Offset)); + // Because this thread's exit could be the last one trying to write to the + // file and that we're not able to close out the file properly, we sync + // instead and hope that the pending writes are flushed as the thread + // exits. + fsync(Fd); + } + } +}; + +} // namespace __xray + +using namespace __xray; + +void PrintToStdErr(const char *Buffer) { fprintf(stderr, Buffer); } + +void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) { + using Buffer = + std::aligned_storage::type; + thread_local static Buffer InMemoryBuffer[BuffLen] = {}; + thread_local static size_t Offset = 0; + static int Fd = [] { + // FIXME: Figure out how to make this less stderr-dependent. + SetPrintfAndReportCallback(PrintToStdErr); + // Open a temporary file once for the log. + static char TmpFilename[256] = {}; + auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base, 246); + if ((E + 6) - TmpFilename > 255) { + Report("XRay log file base too long: %s", flags()->xray_logfile_base); + return -1; + } + internal_strncat(TmpFilename, "XXXXXX", 6); + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.", + TmpFilename); + return -1; + } + if (Verbosity()) + fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); + + // Get the cycle frequency from SysFS on Linux. + long long CPUFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &CPUFrequency)) { + CPUFrequency *= 1000; + } + + // Since we're here, we get to write the header. We set it up so that the + // header will only be written once, at the start, and let the threads + // logging do writes which just append. + XRayFileHeader Header; + Header.Version = 1; + Header.Type = FileTypes::NAIVE_LOG; + Header.CycleFrequency = + CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); + + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + retryingWrite(Fd, reinterpret_cast(&Header), + reinterpret_cast(&Header) + sizeof(Header)); + return Fd; + }(); + if (Fd == -1) + return; + thread_local __xray::ThreadExitFlusher Flusher( + Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset); + thread_local pid_t TId = syscall(SYS_gettid); + + // First we get the useful data, and stuff it into the already aligned buffer + // through a pointer offset. + auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; + unsigned CPU; + R.RecordType = RecordTypes::NORMAL; + R.TSC = __rdtscp(&CPU); + R.CPU = CPU; + R.TId = TId; + R.Type = Type; + R.FuncId = FuncId; + ++Offset; + if (Offset == __xray::BuffLen) { + std::lock_guard L(LogMutex); + auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); + retryingWrite(Fd, reinterpret_cast(RecordBuffer), + reinterpret_cast(RecordBuffer + Offset)); + Offset = 0; + } +} + +static auto Unused = [] { + if (flags()->xray_naive_log) + __xray_set_handler(__xray_InMemoryRawLog); + return true; +}(); Index: lib/xray/xray_trampoline_x86.S =================================================================== --- /dev/null +++ lib/xray/xray_trampoline_x86.S @@ -1,112 +0,0 @@ -//===-- xray_trampoline_x86.s -----------------------------------*- ASM -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file is a part of XRay, a dynamic runtime instrumentation system. -// -// This implements the X86-specific assembler for the trampolines. -// -//===----------------------------------------------------------------------===// - - .text - .file "xray_trampoline_x86.S" - .globl __xray_FunctionEntry - .align 16, 0x90 - .type __xray_FunctionEntry,@function - -__xray_FunctionEntry: - .cfi_startproc - // Save caller provided registers before doing any actual work. - pushq %rbp - .cfi_def_cfa_offset 16 - subq $200, %rsp - movupd %xmm0, 184(%rsp) - movupd %xmm1, 168(%rsp) - movupd %xmm2, 152(%rsp) - movupd %xmm3, 136(%rsp) - movupd %xmm4, 120(%rsp) - movupd %xmm5, 104(%rsp) - movupd %xmm6, 88(%rsp) - movupd %xmm7, 72(%rsp) - movq %rdi, 64(%rsp) - movq %rax, 56(%rsp) - movq %rdx, 48(%rsp) - movq %rsi, 40(%rsp) - movq %rcx, 32(%rsp) - movq %r8, 24(%rsp) - movq %r9, 16(%rsp) - - // de-mangled, that's __xray::XRayPatchedFunction, and we're doing an acquire - // load (on x86 is a normal mov instruction). - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax - testq %rax, %rax - je .Ltmp0 - - // assume that %r10d has the function id. - movl %r10d, %edi - xor %esi,%esi - callq *%rax -.Ltmp0: - // restore the registers - movupd 184(%rsp), %xmm0 - movupd 168(%rsp), %xmm1 - movupd 152(%rsp), %xmm2 - movupd 136(%rsp), %xmm3 - movupd 120(%rsp), %xmm4 - movupd 104(%rsp), %xmm5 - movupd 88(%rsp) , %xmm6 - movupd 72(%rsp) , %xmm7 - movq 64(%rsp), %rdi - movq 56(%rsp), %rax - movq 48(%rsp), %rdx - movq 40(%rsp), %rsi - movq 32(%rsp), %rcx - movq 24(%rsp), %r8 - movq 16(%rsp), %r9 - addq $200, %rsp - popq %rbp - retq -.Ltmp1: - .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry - .cfi_endproc - - .globl __xray_FunctionExit - .align 16, 0x90 - .type __xray_FunctionExit,@function -__xray_FunctionExit: - .cfi_startproc - // Save the important registers first. Since we're assuming that this - // function is only jumped into, we only preserve the registers for - // returning. - pushq %rbp - .cfi_def_cfa_offset 16 - subq $56, %rsp - .cfi_def_cfa_offset 32 - movupd %xmm0, 40(%rsp) - movupd %xmm1, 24(%rsp) - movq %rax, 16(%rsp) - movq %rdx, 8(%rsp) - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax - testq %rax,%rax - je .Ltmp2 - - movl %r10d, %edi - movl $1, %esi - callq *%rax -.Ltmp2: - // Restore the important registers. - movupd 40(%rsp), %xmm0 - movupd 24(%rsp), %xmm1 - movq 16(%rsp), %rax - movq 8(%rsp), %rdx - addq $56, %rsp - popq %rbp - retq -.Ltmp3: - .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit - .cfi_endproc Index: test/lit.common.cfg =================================================================== --- test/lit.common.cfg +++ test/lit.common.cfg @@ -52,7 +52,8 @@ 'LIBCLANG_LOGGING', 'LIBCLANG_BGPRIO_INDEX', 'LIBCLANG_BGPRIO_EDIT', 'LIBCLANG_NOTHREADS', 'LIBCLANG_RESOURCE_USAGE', - 'LIBCLANG_CODE_COMPLETION_LOGGING'] + 'LIBCLANG_CODE_COMPLETION_LOGGING', + 'XRAY_OPTIONS'] # Clang/Win32 may refer to %INCLUDE%. vsvarsall.bat sets it. if platform.system() != 'Windows': possibly_dangerous_env_vars.append('INCLUDE') Index: test/xray/TestCases/Linux/fixedsize-logging.cc =================================================================== --- /dev/null +++ test/xray/TestCases/Linux/fixedsize-logging.cc @@ -0,0 +1,20 @@ +// Check to make sure that we have a log file with a fixed-size. + +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="verbosity=1 xray_logfile_base=fixedsize-logging-" %run %t 2>&1 | FileCheck %s +// +// After all that, clean up the output xray log. +// +// RUN: rm fixedsize-logging-* + +#include + +[[clang::xray_always_instrument]] void foo() { + printf("foo() is always instrumented!"); +} + +int main() { + // CHECK: XRay: Log file in 'fixedsize-logging-{{.*}}' + foo(); + // CHECK: foo() is always instrumented! +} Index: test/xray/TestCases/Linux/optional-inmemory-log.cc =================================================================== --- /dev/null +++ test/xray/TestCases/Linux/optional-inmemory-log.cc @@ -0,0 +1,21 @@ +// Make sure that we don't get the inmemory logging implementation enabled when +// we turn it off via options. + +// RUN: %clangxx_xray -std=c++11 %s -o %t +// RUN: XRAY_OPTIONS="verbosity=1 xray_naive_log=false xray_logfile_base=optional-inmemory-log.xray-" %run %t 2>&1 | FileCheck %s +// +// Make sure we clean out the logs in case there was a bug. +// +// RUN: rm -f optional-inmemory-log.xray-* + +#include + +[[clang::xray_always_instrument]] void foo() { + printf("foo() is always instrumented!"); +} + +int main() { + // CHECK-NOT: XRay: Log file in 'optional-inmemory-log.xray-{{.*}}' + foo(); + // CHECK: foo() is always instrumented! +}