Index: compiler-rt/trunk/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_fdr_logging.cc +++ compiler-rt/trunk/lib/xray/xray_fdr_logging.cc @@ -15,11 +15,11 @@ // //===----------------------------------------------------------------------===// #include "xray_fdr_logging.h" -#include #include #include #include #include +#include #include #include #include @@ -80,6 +80,16 @@ // Use a global pthread key to identify thread-local data for logging. static pthread_key_t Key; +// Global BufferQueue. +static BufferQueue *BQ = nullptr; + +static atomic_sint32_t LogFlushStatus = { + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +static FDRLoggingOptions FDROptions; + +static SpinMutex FDROptionsMutex; + // This function will initialize the thread-local data structure used by the FDR // logging implementation and return a reference to it. The implementation // details require a bit of care to maintain. @@ -114,15 +124,18 @@ // critical section, calling a function that might be XRay instrumented (and // thus in turn calling into malloc by virtue of registration of the // thread_local's destructor). +static_assert(alignof(ThreadLocalData) >= 64, + "ThreadLocalData must be cache line aligned."); static ThreadLocalData &getThreadLocalData() { - static_assert(alignof(ThreadLocalData) >= 64, - "ThreadLocalData must be cache line aligned."); - thread_local ThreadLocalData TLD; - thread_local bool UNUSED ThreadOnce = [] { - pthread_setspecific(Key, &TLD); - return false; - }(); - return TLD; + thread_local typename std::aligned_storage< + sizeof(ThreadLocalData), alignof(ThreadLocalData)>::type TLDStorage{}; + + if (pthread_getspecific(Key) == NULL) { + new (reinterpret_cast(&TLDStorage)) ThreadLocalData{}; + pthread_setspecific(Key, &TLDStorage); + } + + return *reinterpret_cast(&TLDStorage); } static void writeNewBufferPreamble(tid_t Tid, timespec TS, @@ -416,7 +429,7 @@ return false; auto EC = TLD.BQ->getBuffer(TLD.Buffer); if (EC != BufferQueue::ErrorCode::Ok) { - Report("Failed to acquire a buffer; error=%s\n", + Report("Failed to prepare a buffer; error = '%s'\n", BufferQueue::getErrorString(EC)); return false; } @@ -462,7 +475,7 @@ auto LS = atomic_load(&LoggingStatus, memory_order_acquire); if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING && LS != XRayLogInitStatus::XRAY_LOG_FINALIZED) - Report("Failed to acquire a buffer; error=%s\n", + Report("Failed to acquire a buffer; error = '%s'\n", BufferQueue::getErrorString(EC)); return false; } @@ -541,8 +554,8 @@ static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, uint64_t Arg1, int (*wall_clock_reader)(clockid_t, - struct timespec *), - BufferQueue *BQ) XRAY_NEVER_INSTRUMENT { + struct timespec *)) + XRAY_NEVER_INSTRUMENT { __asm volatile("# LLVM-MCA-BEGIN processFunctionHook"); // Prevent signal handler recursion, so in case we're already in a log writing // mode and the signal handler comes in (and is also instrumented) then we @@ -557,8 +570,6 @@ auto &TLD = getThreadLocalData(); - // In case the reference has been cleaned up before, we make sure we - // initialize it to the provided BufferQueue. if (TLD.BQ == nullptr) TLD.BQ = BQ; @@ -656,15 +667,6 @@ __asm volatile("# LLVM-MCA-END"); } -// Global BufferQueue. -BufferQueue *BQ = nullptr; - -atomic_sint32_t LogFlushStatus = {XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; - -FDRLoggingOptions FDROptions; - -SpinMutex FDROptionsMutex; - static XRayFileHeader &fdrCommonHeaderInfo() { static std::aligned_storage::type HStorage; static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; @@ -789,15 +791,28 @@ // we decide to do anything further with the global buffer queue. __xray_log_remove_buffer_iterator(); + // Once flushed, we should set the global status of the logging implementation + // to "uninitialized" to allow for FDR-logging multiple runs. + auto ResetToUnitialized = at_scope_exit([] { + atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + memory_order_release); + }); + + auto CleanupBuffers = at_scope_exit([] { + if (BQ != nullptr) { + auto &TLD = getThreadLocalData(); + if (TLD.RecordPtr != nullptr && TLD.BQ != nullptr) + releaseThreadLocalBuffer(*TLD.BQ); + BQ->~BufferQueue(); + InternalFree(BQ); + BQ = nullptr; + } + }); + if (fdrFlags()->no_file_flush) { if (Verbosity()) Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); - // Clean up the buffer queue, and do not bother writing out the files! - BQ->~BufferQueue(); - InternalFree(BQ); - BQ = nullptr; - atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; @@ -913,13 +928,13 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime, BQ); + processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime); } void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, BQ); + processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime); } void fdrLoggingHandleCustomEvent(void *Event, @@ -1121,8 +1136,10 @@ probeRequiredCPUFeatures() ? getTSCFrequency() : __xray::NanosecondsPerSecond, memory_order_release); - pthread_key_create(&Key, +[](void *) { - auto &TLD = getThreadLocalData(); + pthread_key_create(&Key, +[](void *TLDPtr) { + if (TLDPtr == nullptr) + return; + auto &TLD = *reinterpret_cast(TLDPtr); if (TLD.BQ == nullptr) return; auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); Index: compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc =================================================================== --- compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc +++ compiler-rt/trunk/test/xray/TestCases/Posix/fdr-mode-multiple.cc @@ -0,0 +1,76 @@ +// RUN: %clangxx_xray -g -std=c++11 %s -o %t -fxray-modes=xray-fdr +// RUN: rm -f fdr-inmemory-test-* +// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ +// RUN: verbosity=1" \ +// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ +// RUN: %run %t 2>&1 | FileCheck %s +// RUN: FILES=`find %T -name 'fdr-inmemory-test-*' | wc -l` +// RUN: [ $FILES -eq 0 ] +// RUN: rm -f fdr-inmemory-test-* +// +// REQUIRES: x86_64-target-arch +// REQUIRES: built-in-llvm-tree + +#include "xray/xray_log_interface.h" +#include +#include + +uint64_t var = 0; +uint64_t buffers = 0; +[[clang::xray_always_instrument]] void __attribute__((noinline)) f() { ++var; } + +int main(int argc, char *argv[]) { + assert(__xray_log_select_mode("xray-fdr") == + XRayLogRegisterStatus::XRAY_REGISTRATION_OK); + auto status = __xray_log_init_mode( + "xray-fdr", + "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0"); + assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED); + __xray_patch(); + + // Create enough entries. + for (int i = 0; i != 1 << 20; ++i) { + f(); + } + + // Then we want to verify that we're getting 10 buffers outside of the initial + // header. + auto finalize_status = __xray_log_finalize(); + assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED); + auto process_status = + __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; }); + std::cout << "buffers = " << buffers << std::endl; + assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + auto flush_status = __xray_log_flushLog(); + assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + // We expect 11 buffers because 1 header buffer + 10 actual FDR buffers. + // CHECK: Buffers = 11 + std::cout << "Buffers = " << buffers << std::endl; + + // In this test we ensure that we can restart the cycle after the flush. + status = __xray_log_init_mode( + "xray-fdr", + "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0"); + assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED); + __xray_patch(); + + // Create enough entries. + for (int i = 0; i != 1 << 20; ++i) { + f(); + } + + // Then we want to verify that we're getting 10 buffers outside of the initial + // header. + finalize_status = __xray_log_finalize(); + assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED); + process_status = + __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; }); + std::cout << "buffers = " << buffers << std::endl; + assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + flush_status = __xray_log_flushLog(); + assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + // We expect 22 buffers because 1 header buffer + 10 actual FDR buffers, plus + // the number of buffers we got from the previous run (also 11). + // CHECK: Buffers = 22 + std::cout << "Buffers = " << buffers << std::endl; +}