Index: clang/include/clang/Driver/XRayArgs.h =================================================================== --- clang/include/clang/Driver/XRayArgs.h +++ clang/include/clang/Driver/XRayArgs.h @@ -30,6 +30,7 @@ XRayArgs(const ToolChain &TC, const llvm::opt::ArgList &Args); void addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, types::ID InputType) const; + bool needsXRayRt() const { return XRayInstrument; } }; } // namespace driver Index: clang/lib/Driver/ToolChains/Darwin.cpp =================================================================== --- clang/lib/Driver/ToolChains/Darwin.cpp +++ clang/lib/Driver/ToolChains/Darwin.cpp @@ -18,6 +18,7 @@ #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/Options.h" #include "clang/Driver/SanitizerArgs.h" +#include "clang/Driver/XRayArgs.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/Path.h" @@ -1098,6 +1099,11 @@ if (Sanitize.needsEsanRt()) AddLinkSanitizerLibArgs(Args, CmdArgs, "esan"); + const XRayArgs& XRay = getXRayArgs(); + if (XRay.needsXRayRt() && isTargetMacOS()) { + AddLinkRuntimeLib(Args, CmdArgs, "libclang_rt.xray_osx.a", RLO_AlwaysLink); + } + // Otherwise link libSystem, then the dynamic runtime library, and finally any // target specific static runtime library. CmdArgs.push_back("-lSystem"); Index: clang/lib/Driver/XRayArgs.cpp =================================================================== --- clang/lib/Driver/XRayArgs.cpp +++ clang/lib/Driver/XRayArgs.cpp @@ -51,6 +51,15 @@ D.Diag(diag::err_drv_clang_unsupported) << (std::string(XRayInstrumentOption) + " on " + Triple.str()); } + else if (Triple.getOS() == llvm::Triple::Darwin) + // Experimental support for macos. + switch (Triple.getArch()) { + case llvm::Triple::x86_64: + break; + default: + D.Diag(diag::err_drv_clang_unsupported) + << (std::string(XRayInstrumentOption) + " on " + Triple.str()); + } else D.Diag(diag::err_drv_clang_unsupported) << (std::string(XRayInstrumentOption) + " on non-Linux target OS"); Index: compiler-rt/cmake/config-ix.cmake =================================================================== --- compiler-rt/cmake/config-ix.cmake +++ compiler-rt/cmake/config-ix.cmake @@ -211,7 +211,11 @@ set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64}) set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64}) set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64}) +if(APPLE) +set(ALL_XRAY_SUPPORTED_ARCH ${X86_64}) +else() set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} powerpc64le) +endif() if(APPLE) include(CompilerRTDarwinUtils) @@ -256,6 +260,7 @@ set(SANITIZER_COMMON_SUPPORTED_OS osx) set(PROFILE_SUPPORTED_OS osx) set(TSAN_SUPPORTED_OS osx) + set(XRAY_SUPPORTED_OS osx) if(NOT SANITIZER_MIN_OSX_VERSION) string(REGEX MATCH "-mmacosx-version-min=([.0-9]+)" MACOSX_VERSION_MIN_FLAG "${CMAKE_CXX_FLAGS}") @@ -412,12 +417,12 @@ list_intersect(SCUDO_SUPPORTED_ARCH ALL_SCUDO_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) - list_intersect(XRAY_SUPPORTED_ARCH - ALL_XRAY_SUPPORTED_ARCH - SANITIZER_COMMON_SUPPORTED_ARCH) list_intersect(FUZZER_SUPPORTED_ARCH ALL_FUZZER_SUPPORTED_ARCH ALL_SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(XRAY_SUPPORTED_ARCH + ALL_XRAY_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) else() # Architectures supported by compiler-rt libraries. @@ -580,7 +585,7 @@ endif() if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND - OS_NAME MATCHES "Linux") + OS_NAME MATCHES "Darwin|Linux") set(COMPILER_RT_HAS_XRAY TRUE) else() set(COMPILER_RT_HAS_XRAY FALSE) Index: compiler-rt/lib/xray/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/CMakeLists.txt +++ compiler-rt/lib/xray/CMakeLists.txt @@ -65,19 +65,53 @@ append_list_if( COMPILER_RT_BUILD_XRAY_NO_PREINIT XRAY_NO_PREINIT XRAY_COMMON_DEFINITIONS) -add_compiler_rt_object_libraries(RTXray - ARCHS ${XRAY_SUPPORTED_ARCH} - SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} - DEFS ${XRAY_COMMON_DEFINITIONS}) - add_compiler_rt_component(xray) set(XRAY_COMMON_RUNTIME_OBJECT_LIBS + RTXray RTSanitizerCommon RTSanitizerCommonLibc) +if (APPLE) + set(XRAY_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS}) + set(XRAY_ASM_SOURCES xray_trampoline_x86_64.S) + + if (${CMAKE_GENERATOR} STREQUAL "Xcode") + enable_language(ASM) + else() + set_source_files_properties(${XRAY_ASM_SOURCES} PROPERTIES LANGUAGE C) + endif() + + add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS) + add_weak_symbols("xray" WEAK_SYMBOL_LINK_FLAGS) + + add_compiler_rt_object_libraries(RTXray + OS ${XRAY_SUPPORTED_OS} + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${x86_64_SOURCES} + CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) + + # We only support running on osx for now. + add_compiler_rt_runtime(clang_rt.xray + STATIC + OS ${XRAY_SUPPORTED_OS} + ARCHS ${XRAY_SUPPORTED_ARCH} + OBJECT_LIBS RTXray + RTSanitizerCommon + RTSanitizerCommonLibc + CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS} + LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS} + LINK_LIBS ${XRAY_LINK_LIBS} + PARENT_TARGET xray) +else() foreach(arch ${XRAY_SUPPORTED_ARCH}) if(CAN_TARGET_${arch}) + add_compiler_rt_object_libraries(RTXray + ARCHS ${XRAY_SUPPORTED_ARCH} + SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} + DEFS ${XRAY_COMMON_DEFINITIONS}) add_compiler_rt_runtime(clang_rt.xray STATIC ARCHS ${arch} @@ -88,6 +122,7 @@ PARENT_TARGET xray) endif() endforeach() +endif() if(COMPILER_RT_INCLUDE_TESTS) add_subdirectory(tests) Index: compiler-rt/lib/xray/tests/CMakeLists.txt =================================================================== --- compiler-rt/lib/xray/tests/CMakeLists.txt +++ compiler-rt/lib/xray/tests/CMakeLists.txt @@ -12,21 +12,50 @@ -I${COMPILER_RT_SOURCE_DIR}/lib) set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH}) +set(XRAY_LINK_FLAGS) +append_list_if(COMPILER_RT_HAS_LIBRT -lrt XRAY_LINK_FLAGS) +append_list_if(COMPILER_RT_HAS_LIBM -lm XRAY_LINK_FLAGS) +append_list_if(COMPILER_RT_HAS_LIBPTHREAD -lpthread XRAY_LINK_FLAGS) + +if (APPLE) + list(APPEND XRAY_LINK_FLAGS -lc++) + list(APPEND XRAY_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}) + set(XRAY_TEST_RUNTIME_OBJECTS + $ + $ + $) + set(XRAY_TEST_RUNTIME RTXRayTest) + add_library(${XRAY_TEST_RUNTIME} STATIC ${XRAY_TEST_RUNTIME_OBJECTS}) + set_target_properties(${XRAY_TEST_RUNTIME} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + FOLDER "Compiler-RT Runtime tests") + + darwin_filter_host_archs(XRAY_SUPPORTED_ARCH XRAY_TEST_ARCH) + list(APPEND XRAY_UNITTEST_CFLAGS ${DARWIN_osx_CFLAGS}) + list(APPEND XRAY_LINK_FLAGS "-lc++") + list(APPEND XRAY_LINK_FLAGS "-fxray-instrument") + add_weak_symbols("sanitizer_common" XRAY_LINK_FLAGS) + add_weak_symbols("xray" XRAY_LINK_FLAGS) +else() + append_list_if(COMPILER_RT_HAS_LIBSTDCXX lstdc++ XRAY_LINK_FLAGS) +endif() + macro(add_xray_unittest testname) cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN}) - if(UNIX AND NOT APPLE) + if(UNIX) foreach(arch ${XRAY_TEST_ARCH}) set(TEST_OBJECTS) generate_compiler_rt_tests(TEST_OBJECTS XRayUnitTests "${testname}-${arch}-Test" "${arch}" SOURCES ${TEST_SOURCES} ${COMPILER_RT_GTEST_SOURCE} + RUNTIME ${XRAY_TEST_RUNTIME} + COMPILE_DEPS ${TEST_HEADERS} DEPS gtest xray llvm-xray CFLAGS ${XRAY_UNITTEST_CFLAGS} LINK_FLAGS -fxray-instrument ${TARGET_LINK_FLAGS} - -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT} - -lpthread - -ldl -lrt) + ${CMAKE_THREAD_LIBS_INIT} + ${XRAY_LINK_FLAGS}) set_target_properties(XRayUnitTests PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) endforeach() endif() Index: compiler-rt/lib/xray/weak_symbols.txt =================================================================== --- /dev/null +++ compiler-rt/lib/xray/weak_symbols.txt @@ -0,0 +1,4 @@ +___start_xray_fn_idx +___start_xray_instr_map +___stop_xray_fn_idx +___stop_xray_instr_map Index: compiler-rt/lib/xray/xray_fdr_logging.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.h +++ compiler-rt/lib/xray/xray_fdr_logging.h @@ -32,7 +32,6 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1); XRayLogFlushStatus fdrLoggingFlush(); -XRayLogInitStatus fdrLoggingReset(); } // namespace __xray Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -15,9 +15,9 @@ // //===----------------------------------------------------------------------===// #include "xray_fdr_logging.h" +#include #include #include -#include #include #include @@ -146,32 +146,6 @@ return XRayLogInitStatus::XRAY_LOG_FINALIZED; } -XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT { - s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED; - if (__sanitizer::atomic_compare_exchange_strong( - &LoggingStatus, &CurrentStatus, - XRayLogInitStatus::XRAY_LOG_INITIALIZED, - __sanitizer::memory_order_release)) - return static_cast(CurrentStatus); - - // Release the in-memory buffer queue. - BQ->reset(); - - // Spin until the flushing status is flushed. - s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED; - while (__sanitizer::atomic_compare_exchange_weak( - &LogFlushStatus, &CurrentFlushingStatus, - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, - __sanitizer::memory_order_release)) { - if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING) - break; - CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED; - } - - // At this point, we know that the status is flushed, and that we can assume - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; -} - struct TSCAndCPU { uint64_t TSC; unsigned char CPU; @@ -205,15 +179,15 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, - TC.CPU, 0, clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, + clock_gettime, *BQ); } void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - __xray_fdr_internal::processFunctionHook( - FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, + clock_gettime, *BQ); } void fdrLoggingHandleCustomEvent(void *Event, @@ -270,22 +244,68 @@ XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax, void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - if (OptionsSize != sizeof(FDRLoggingOptions)) + if (OptionsSize != sizeof(FDRLoggingOptions)) { + if (__sanitizer::Verbosity()) + Report("Invalid options provided; size: %d\n", OptionsSize); return static_cast(__sanitizer::atomic_load( &LoggingStatus, __sanitizer::memory_order_acquire)); + } s32 CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!__sanitizer::atomic_compare_exchange_strong( &LoggingStatus, &CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, - __sanitizer::memory_order_release)) - return static_cast(CurrentStatus); + __sanitizer::memory_order_release)) { + if (CurrentStatus != XRayLogInitStatus::XRAY_LOG_FINALIZED) { + if (__sanitizer::Verbosity()) + Report("Invalid state pre-initialization: %d\n", CurrentStatus); + return static_cast(CurrentStatus); + } + if (!__sanitizer::atomic_compare_exchange_strong( + &LoggingStatus, &CurrentStatus, XRAY_LOG_INITIALIZING, + __sanitizer::memory_order_release)) { + if (__sanitizer::Verbosity()) + Report("Invalid state pre-initialization: %d\n", CurrentStatus); + return static_cast(CurrentStatus); + } + } { __sanitizer::SpinMutexLock Guard(&FDROptionsMutex); memcpy(&FDROptions, Options, OptionsSize); } + thread_local bool UNUSED Once = [] { + using namespace __xray_fdr_internal; + pthread_key_create(&PThreadKey, +[](void *) { + auto &TLD = getThreadLocalData(); + auto &RecordPtr = TLD.RecordPtr; + auto &Buffers = TLD.LocalBQ; + auto &Buffer = TLD.Buffer; + if (RecordPtr == nullptr) + return; + + // We make sure that upon exit, a thread will write out the EOB + // MetadataRecord in the thread-local log, and also release the buffer + // to the queue. + assert((RecordPtr + MetadataRecSize) - + static_cast(Buffer.Buffer) >= + static_cast(MetadataRecSize)); + if (Buffers) { + writeEOBMetadata(); + auto EC = Buffers->releaseBuffer(Buffer); + if (EC != BufferQueue::ErrorCode::Ok) + Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, + BufferQueue::getErrorString(EC)); + Buffers = nullptr; + return; + } + }); + return false; + }(); + bool Success = false; + if (BQ != nullptr) + BQ->reset(); if (BQ == nullptr) BQ = new std::shared_ptr(); @@ -315,9 +335,7 @@ using namespace __xray; if (flags()->xray_fdr_log) { XRayLogImpl Impl{ - fdrLoggingInit, - fdrLoggingFinalize, - fdrLoggingHandleArg0, + fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0, fdrLoggingFlush, }; __xray_set_log_impl(Impl); Index: compiler-rt/lib/xray/xray_fdr_logging_impl.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging_impl.h +++ compiler-rt/lib/xray/xray_fdr_logging_impl.h @@ -64,10 +64,6 @@ /// memory without checks. static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr); -/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the -/// thread local state machine and writes directly to memory without checks. -static void writeEOBMetadata(char *&MemPtr); - /// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses /// the thread local state machine and directly writes to memory without checks. static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr); @@ -97,8 +93,8 @@ // call so that it can be initialized on first use instead of as a global. We // force the alignment to 64-bytes for x86 cache line alignment, as this // structure is used in the hot path of implementation. -struct alignas(64) ThreadLocalData { - BufferQueue::Buffer Buffer; +struct ALIGNED(64) ThreadLocalData { + BufferQueue::Buffer Buffer{}; char *RecordPtr = nullptr; // The number of FunctionEntry records immediately preceding RecordPtr. uint8_t NumConsecutiveFnEnters = 0; @@ -122,12 +118,19 @@ std::shared_ptr LocalBQ = nullptr; }; +static_assert(sizeof(ThreadLocalData) == 64, + "Not cache aligned ThreadLocalData!"); +static_assert(alignof(ThreadLocalData) == 64, + "Not cache aligned ThreadLocalData!"); + // Forward-declare, defined later. static ThreadLocalData &getThreadLocalData(); static constexpr auto MetadataRecSize = sizeof(MetadataRecord); static constexpr auto FunctionRecSize = sizeof(FunctionRecord); +static pthread_key_t PThreadKey; + // This function will initialize the thread-local data structure used by the FDR // logging implementation and return a reference to it. The implementation // details require a bit of care to maintain. @@ -172,51 +175,15 @@ // With the approach taken where, we attempt to avoid the potential for // deadlocks by relying instead on pthread's memory management routines. static ThreadLocalData &getThreadLocalData() { - thread_local pthread_key_t key; - - // We need aligned, uninitialized storage for the TLS object which is - // trivially destructible. We're going to use this as raw storage and - // placement-new the ThreadLocalData object into it later. - alignas(alignof(ThreadLocalData)) thread_local unsigned char - TLSBuffer[sizeof(ThreadLocalData)]; + thread_local ALIGNED(64) ThreadLocalData TLD{}; // Ensure that we only actually ever do the pthread initialization once. thread_local bool UNUSED Unused = [] { - new (&TLSBuffer) ThreadLocalData(); - auto result = pthread_key_create(&key, +[](void *) { - auto &TLD = *reinterpret_cast(&TLSBuffer); - auto &RecordPtr = TLD.RecordPtr; - auto &Buffers = TLD.LocalBQ; - auto &Buffer = TLD.Buffer; - if (RecordPtr == nullptr) - return; - - // We make sure that upon exit, a thread will write out the EOB - // MetadataRecord in the thread-local log, and also release the buffer - // to the queue. - assert((RecordPtr + MetadataRecSize) - - static_cast(Buffer.Buffer) >= - static_cast(MetadataRecSize)); - if (Buffers) { - writeEOBMetadata(); - auto EC = Buffers->releaseBuffer(Buffer); - if (EC != BufferQueue::ErrorCode::Ok) - Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, - BufferQueue::getErrorString(EC)); - Buffers = nullptr; - return; - } - }); - if (result != 0) { - Report("Failed to allocate thread-local data through pthread; error=%d", - result); - return false; - } - pthread_setspecific(key, &TLSBuffer); + pthread_setspecific(PThreadKey, &TLD); return true; }(); - return *reinterpret_cast(TLSBuffer); + return TLD; } //-----------------------------------------------------------------------------| @@ -332,22 +299,18 @@ writeNewCPUIdMetadata(CPU, TSC, getThreadLocalData().RecordPtr); } -inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT { +inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); MetadataRecord EOBMeta; EOBMeta.Type = uint8_t(RecordType::Metadata); EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer); // For now we don't write any bytes into the Data field. - std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord)); - MemPtr += sizeof(MetadataRecord); + std::memcpy(TLD.RecordPtr, &EOBMeta, sizeof(MetadataRecord)); + TLD.RecordPtr += sizeof(MetadataRecord); TLD.NumConsecutiveFnEnters = 0; TLD.NumTailCalls = 0; } -inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { - writeEOBMetadata(getThreadLocalData().RecordPtr); -} - inline void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); @@ -531,14 +494,15 @@ size_t MaxSize) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); char *BufferStart = static_cast(TLD.Buffer.Buffer); - if ((TLD.RecordPtr + MaxSize) > - (BufferStart + TLD.Buffer.Size - MetadataRecSize)) { + if (TLD.RecordPtr != nullptr && + (TLD.RecordPtr + MaxSize) > + (BufferStart + TLD.Buffer.Size - MetadataRecSize)) { writeEOBMetadata(); if (!releaseThreadLocalBuffer(*TLD.LocalBQ)) return false; auto EC = TLD.LocalBQ->getBuffer(TLD.Buffer); if (EC != BufferQueue::ErrorCode::Ok) { - Report("Failed to acquire a buffer; error=%s\n", + Report("While preparing buffer, failed to acquire a buffer; error=%s\n", BufferQueue::getErrorString(EC)); return false; } @@ -559,6 +523,8 @@ auto Status = __sanitizer::atomic_load(&LoggingStatus, __sanitizer::memory_order_acquire); auto &TLD = getThreadLocalData(); + if (LBQ == nullptr) + return false; if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { if (TLD.RecordPtr != nullptr && (Status == XRayLogInitStatus::XRAY_LOG_FINALIZING || @@ -568,6 +534,7 @@ return false; TLD.RecordPtr = nullptr; LBQ = nullptr; + TLD.LocalBQ = nullptr; return false; } return false; @@ -577,6 +544,8 @@ __sanitizer::memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_INITIALIZED || LBQ->finalizing()) { + if (TLD.RecordPtr == nullptr) + return false; writeEOBMetadata(); if (!releaseThreadLocalBuffer(*LBQ)) return false; Index: compiler-rt/lib/xray/xray_init.cc =================================================================== --- compiler-rt/lib/xray/xray_init.cc +++ compiler-rt/lib/xray/xray_init.cc @@ -88,7 +88,8 @@ #endif } -#ifndef XRAY_NO_PREINIT +// Only add the preinit array initialization if the sanitizers can. +#if !defined(XRAY_NO_PREINIT) && SANITIZER_CAN_USE_PREINIT_ARRAY __attribute__((section(".preinit_array"), used)) void (*__local_xray_preinit)(void) = __xray_init; #endif Index: compiler-rt/lib/xray/xray_inmemory_log.cc =================================================================== --- compiler-rt/lib/xray/xray_inmemory_log.cc +++ compiler-rt/lib/xray/xray_inmemory_log.cc @@ -33,6 +33,12 @@ #include "xray_tsc.h" #include "xray_utils.h" +#ifdef __APPLE__ +// We use a different set of APIs for getting time on Apple platforms. +#include +#include +#endif + // __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a // certain size (32kb by default) and use it as if it were a circular buffer for // events. We store simple fixed-sized entries in the log for external analysis. @@ -216,6 +222,29 @@ __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC); } +void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + __xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC); +} + +#if __APPLE__ +void __xray_InMemoryEmulateTSC(int32_t FuncId, + XRayEntryType Type) XRAY_NEVER_INSTRUMENT { + __xray_InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + CPU = 0; + return mach_absolute_time(); + }); +} + +void __xray_InMemoryRawLogWithArgEmulateTSC( + int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + __xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, + [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + CPU = 0; + return mach_absolute_time(); + }); +} +#else void __xray_InMemoryEmulateTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { __xray_InMemoryRawLog(FuncId, Type, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { @@ -230,11 +259,6 @@ }); } -void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type, - uint64_t Arg1) XRAY_NEVER_INSTRUMENT { - __xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC); -} - void __xray_InMemoryRawLogWithArgEmulateTSC( int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { __xray_InMemoryRawLogWithArg( @@ -249,6 +273,7 @@ return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; }); } +#endif static auto UNUSED Unused = [] { auto UseRealTSC = probeRequiredCPUFeatures(); Index: compiler-rt/lib/xray/xray_trampoline_x86_64.S =================================================================== --- compiler-rt/lib/xray/xray_trampoline_x86_64.S +++ compiler-rt/lib/xray/xray_trampoline_x86_64.S @@ -14,10 +14,13 @@ //===----------------------------------------------------------------------===// #include "../builtins/assembly.h" +#include "../sanitizer_common/sanitizer_asm.h" + + .macro SAVE_REGISTERS subq $192, %rsp - .cfi_def_cfa_offset 200 + CFI_DEF_CFA_OFFSET(200) // At this point, the stack pointer should be aligned to an 8-byte boundary, // because any call instructions that come after this will add another 8 // bytes and therefore align it to 16-bytes. @@ -57,7 +60,7 @@ movq 8(%rsp), %r8 movq 0(%rsp), %r9 addq $192, %rsp - .cfi_def_cfa_offset 8 + CFI_DEF_CFA_OFFSET(8) .endm .macro ALIGNED_CALL_RAX @@ -75,21 +78,25 @@ .endm .text +#if !defined(__APPLE__) + .section .text +#else + .section __TEXT,__text +#endif .file "xray_trampoline_x86.S" //===----------------------------------------------------------------------===// - .globl __xray_FunctionEntry + .globl ASM_TSAN_SYMBOL(__xray_FunctionEntry) .align 16, 0x90 - .type __xray_FunctionEntry,@function - -__xray_FunctionEntry: - .cfi_startproc + ASM_TYPE_FUNCTION(__xray_FunctionEntry) +ASM_TSAN_SYMBOL(__xray_FunctionEntry): + CFI_STARTPROC SAVE_REGISTERS // This load has to be atomic, it's concurrent with __xray_patch(). // On x86/amd64, a simple (type-aligned) MOV instruction is enough. - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax testq %rax, %rax je .Ltmp0 @@ -101,28 +108,27 @@ .Ltmp0: RESTORE_REGISTERS retq -.Ltmp1: - .size __xray_FunctionEntry, .Ltmp1-__xray_FunctionEntry - .cfi_endproc + ASM_SIZE(__xray_FunctionEntry) + CFI_ENDPROC //===----------------------------------------------------------------------===// - .globl __xray_FunctionExit + .globl ASM_TSAN_SYMBOL(__xray_FunctionExit) .align 16, 0x90 - .type __xray_FunctionExit,@function -__xray_FunctionExit: - .cfi_startproc + ASM_TYPE_FUNCTION(__xray_FunctionExit) +ASM_TSAN_SYMBOL(__xray_FunctionExit): + CFI_STARTPROC // Save the important registers first. Since we're assuming that this // function is only jumped into, we only preserve the registers for // returning. subq $56, %rsp - .cfi_def_cfa_offset 64 + CFI_DEF_CFA_OFFSET(64) movq %rbp, 48(%rsp) movupd %xmm0, 32(%rsp) movupd %xmm1, 16(%rsp) movq %rax, 8(%rsp) movq %rdx, 0(%rsp) - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax testq %rax,%rax je .Ltmp2 @@ -138,22 +144,21 @@ movq 8(%rsp), %rax movq 0(%rsp), %rdx addq $56, %rsp - .cfi_def_cfa_offset 8 + CFI_DEF_CFA_OFFSET(8) retq -.Ltmp3: - .size __xray_FunctionExit, .Ltmp3-__xray_FunctionExit - .cfi_endproc + ASM_SIZE(__xray_FunctionExit) + CFI_ENDPROC //===----------------------------------------------------------------------===// - .global __xray_FunctionTailExit + .globl ASM_TSAN_SYMBOL(__xray_FunctionTailExit) .align 16, 0x90 - .type __xray_FunctionTailExit,@function -__xray_FunctionTailExit: - .cfi_startproc + ASM_TYPE_FUNCTION(__xray_FunctionTailExit) +ASM_TSAN_SYMBOL(__xray_FunctionTailExit): + CFI_STARTPROC SAVE_REGISTERS - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax testq %rax,%rax je .Ltmp4 @@ -165,26 +170,25 @@ .Ltmp4: RESTORE_REGISTERS retq -.Ltmp5: - .size __xray_FunctionTailExit, .Ltmp5-__xray_FunctionTailExit - .cfi_endproc + ASM_SIZE(__xray_FunctionTailExit) + CFI_ENDPROC //===----------------------------------------------------------------------===// - .globl __xray_ArgLoggerEntry + .globl ASM_TSAN_SYMBOL(__xray_ArgLoggerEntry) .align 16, 0x90 - .type __xray_ArgLoggerEntry,@function -__xray_ArgLoggerEntry: - .cfi_startproc + ASM_TYPE_FUNCTION(__xray_ArgLoggerEntry) +ASM_TSAN_SYMBOL(__xray_ArgLoggerEntry): + CFI_STARTPROC SAVE_REGISTERS // Again, these function pointer loads must be atomic; MOV is fine. - movq _ZN6__xray13XRayArgLoggerE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray13XRayArgLoggerE)(%rip), %rax testq %rax, %rax jne .Larg1entryLog // If [arg1 logging handler] not set, defer to no-arg logging. - movq _ZN6__xray19XRayPatchedFunctionE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray19XRayPatchedFunctionE)(%rip), %rax testq %rax, %rax je .Larg1entryFail @@ -203,24 +207,22 @@ .Larg1entryFail: RESTORE_REGISTERS retq - -.Larg1entryEnd: - .size __xray_ArgLoggerEntry, .Larg1entryEnd-__xray_ArgLoggerEntry - .cfi_endproc + ASM_SIZE(__xray_ArgLoggerEntry) + CFI_ENDPROC //===----------------------------------------------------------------------===// - .global __xray_CustomEvent + .global ASM_TSAN_SYMBOL(__xray_CustomEvent) .align 16, 0x90 - .type __xray_CustomEvent,@function -__xray_CustomEvent: - .cfi_startproc + ASM_TYPE_FUNCTION(__xray_CustomEvent) +ASM_TSAN_SYMBOL(__xray_CustomEvent): + CFI_STARTPROC SAVE_REGISTERS // We take two arguments to this trampoline, which should be in rdi and rsi // already. We also make sure that we stash %rax because we use that register // to call the logging handler. - movq _ZN6__xray22XRayPatchedCustomEventE(%rip), %rax + movq ASM_TSAN_SYMBOL(_ZN6__xray22XRayPatchedCustomEventE)(%rip), %rax testq %rax,%rax je .LcustomEventCleanup @@ -229,9 +231,7 @@ .LcustomEventCleanup: RESTORE_REGISTERS retq - -.Ltmp8: - .size __xray_CustomEvent, .Ltmp8-__xray_CustomEvent - .cfi_endproc + ASM_SIZE(__xray_CustomEvent) + CFI_ENDPROC NO_EXEC_STACK_DIRECTIVE Index: compiler-rt/lib/xray/xray_x86_64.cc =================================================================== --- compiler-rt/lib/xray/xray_x86_64.cc +++ compiler-rt/lib/xray/xray_x86_64.cc @@ -12,8 +12,24 @@ #include #include +#ifdef __APPLE__ +#include +#endif + namespace __xray { +#ifdef __APPLE__ +uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { + int64_t CPUFreq = 0; + size_t Len = 0; + if (sysctlbyname("hw.cpufrequency_max", &CPUFreq, &Len, NULL, 0) == -1) { + Report("Unable to determine CPU frequency for TSC accounting; errno = %d\n", + errno); + return 0; + } + return CPUFreq; +} +#else static std::pair retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { auto BytesToRead = std::distance(Begin, End); @@ -57,6 +73,8 @@ return Result; } +// TODO: Use a more robust means of getting the CPU frequency for the current +// machine on Linux. uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { long long TSCFrequency = -1; if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", @@ -71,6 +89,7 @@ } return TSCFrequency == -1 ? 0 : static_cast(TSCFrequency); } +#endif static constexpr uint8_t CallOpCode = 0xe8; static constexpr uint16_t MovR10Seq = 0xba41; @@ -111,8 +130,8 @@ int64_t TrampolineOffset = reinterpret_cast(Trampoline) - (static_cast(Sled.Address) + 11); if (TrampolineOffset < MinOffset || TrampolineOffset > MaxOffset) { - Report("XRay Entry trampoline (%p) too far from sled (%p)\n", - Trampoline, reinterpret_cast(Sled.Address)); + Report("XRay Entry trampoline (%p) too far from sled (%p)\n", Trampoline, + reinterpret_cast(Sled.Address)); return false; } if (Enable) { @@ -247,7 +266,7 @@ std::memory_order_release); break; } - } + } return false; } Index: compiler-rt/test/xray/TestCases/Darwin/always-never-instrument.cc =================================================================== --- /dev/null +++ compiler-rt/test/xray/TestCases/Darwin/always-never-instrument.cc @@ -0,0 +1,23 @@ +// Test that the always/never instrument lists apply. +// RUN: echo "fun:main" > %tmp-always.txt +// RUN: echo "fun:__xray*" > %tmp-never.txt +// RUN: %clangxx_xray \ +// RUN: -fxray-never-instrument=%tmp-never.txt \ +// RUN: -fxray-always-instrument=%tmp-always.txt \ +// RUN: %s -o %t +// RUN: %llvm_xray extract -symbolize %t | \ +// RUN: FileCheck %s --check-prefix NOINSTR +// RUN: %llvm_xray extract -symbolize %t | \ +// RUN: FileCheck %s --check-prefix ALWAYSINSTR +// REQUIRES: x86_64-linux +// REQUIRES: built-in-llvm-tree + +// NOINSTR-NOT: {{.*__xray_NeverInstrumented.*}} +int __xray_NeverInstrumented() { + return 0; +} + +// ALWAYSINSTR: {{.*function-name:.*main.*}} +int main(int argc, char *argv[]) { + return __xray_NeverInstrumented(); +} Index: compiler-rt/test/xray/TestCases/Darwin/lit.local.cfg =================================================================== --- /dev/null +++ compiler-rt/test/xray/TestCases/Darwin/lit.local.cfg @@ -0,0 +1,9 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_os not in ['Darwin']: + config.unsupported = True Index: compiler-rt/test/xray/TestCases/Linux/lit.local.cfg =================================================================== --- /dev/null +++ compiler-rt/test/xray/TestCases/Linux/lit.local.cfg @@ -0,0 +1,9 @@ +def getRoot(config): + if not config.parent: + return config + return getRoot(config.parent) + +root = getRoot(config) + +if root.host_os not in ['Linux']: + config.unsupported = True Index: compiler-rt/test/xray/lit.cfg =================================================================== --- compiler-rt/test/xray/lit.cfg +++ compiler-rt/test/xray/lit.cfg @@ -40,7 +40,7 @@ # Default test suffixes. config.suffixes = ['.c', '.cc', '.cpp'] -if config.host_os not in ['Linux']: +if config.host_os not in ['Linux', 'Darwin']: config.unsupported = True elif '64' not in config.host_arch: if 'arm' in config.host_arch: