Index: include/xray/xray_records.h =================================================================== --- include/xray/xray_records.h +++ include/xray/xray_records.h @@ -17,6 +17,8 @@ #ifndef XRAY_XRAY_RECORDS_H #define XRAY_XRAY_RECORDS_H +#include + namespace __xray { enum FileTypes { Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -39,7 +39,10 @@ namespace __xray { // Global BufferQueue. -std::shared_ptr BQ; +// NOTE: This is a pointer to avoid having to do atomic operations at +// initialization time. This is OK to leak as there will only be one bufferqueue +// for the runtime, initialized once through the fdrInit(...) sequence. +std::shared_ptr* BQ = nullptr; __sanitizer::atomic_sint32_t LogFlushStatus = { XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; @@ -64,7 +67,7 @@ // Make a copy of the BufferQueue pointer to prevent other threads that may be // resetting it from blowing away the queue prematurely while we're dealing // with it. - auto LocalBQ = BQ; + auto LocalBQ = *BQ; // We write out the file in the following format: // @@ -129,7 +132,7 @@ // Do special things to make the log finalize itself, and not allow any more // operations to be performed until re-initialized. - BQ->finalize(); + (*BQ)->finalize(); __sanitizer::atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_FINALIZED, @@ -146,7 +149,7 @@ return static_cast(CurrentStatus); // Release the in-memory buffer queue. - BQ.reset(); + (*BQ).reset(); // Spin until the flushing status is flushed. s32 CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED; @@ -195,7 +198,7 @@ auto TSC_CPU = getTimestamp(); __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), std::get<1>(TSC_CPU), clock_gettime, - LoggingStatus, BQ); + LoggingStatus, *BQ); } void fdrLoggingHandleCustomEvent(void *Event, @@ -220,7 +223,7 @@ (void)Once; } int32_t ReducedEventSize = static_cast(EventSize); - if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, clock_gettime)) + if (!isLogInitializedAndReady(*LocalBQ, TSC, CPU, clock_gettime)) return; // Here we need to prepare the log to handle: @@ -268,7 +271,10 @@ } bool Success = false; - BQ = std::make_shared(BufferSize, BufferMax, Success); + if (BQ == nullptr) + BQ = new std::shared_ptr(nullptr); + + *BQ = std::make_shared(BufferSize, BufferMax, Success); if (!Success) { Report("BufferQueue init failed.\n"); return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -169,8 +169,9 @@ // Make sure a thread that's ever called handleArg0 has a thread-local // live reference to the buffer queue for this particular instance of // FDRLogging, and that we're going to clean it up when the thread exits. -thread_local std::shared_ptr LocalBQ = nullptr; -thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer); +thread_local std::shared_ptr* LocalBQ = + new std::shared_ptr(nullptr); +thread_local ThreadExitBufferCleanup Cleanup(*LocalBQ, Buffer); class RecursionGuard { bool &Running; @@ -467,9 +468,9 @@ char *BufferStart = static_cast(Buffer.Buffer); if ((RecordPtr + MaxSize) > (BufferStart + Buffer.Size - MetadataRecSize)) { writeEOBMetadata(); - if (!releaseThreadLocalBuffer(LocalBQ.get())) + if (!releaseThreadLocalBuffer(LocalBQ->get())) return false; - auto EC = LocalBQ->getBuffer(Buffer); + auto EC = (*LocalBQ)->getBuffer(Buffer); if (EC != BufferQueue::ErrorCode::Ok) { Report("Failed to acquire a buffer; error=%s\n", BufferQueue::getErrorString(EC)); @@ -538,7 +539,7 @@ auto BufferStart = static_cast(Buffer.Buffer); if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) { writeEOBMetadata(); - if (!releaseThreadLocalBuffer(LocalBQ.get())) + if (!releaseThreadLocalBuffer(LocalBQ->get())) return; RecordPtr = nullptr; } @@ -563,10 +564,10 @@ // In case the reference has been cleaned up before, we make sure we // initialize it to the provided BufferQueue. - if (LocalBQ == nullptr) - LocalBQ = BQ; + if ((*LocalBQ) == nullptr) + *LocalBQ = BQ; - if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader)) + if (!isLogInitializedAndReady(*LocalBQ, TSC, CPU, wall_clock_reader)) return; // Before we go setting up writing new function entries, we need to be really @@ -606,7 +607,7 @@ // Buffer, set it up properly before doing any further writing. // if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) { - LocalBQ = nullptr; + *LocalBQ = nullptr; return; } Index: lib/xray/xray_inmemory_log.cc =================================================================== --- lib/xray/xray_inmemory_log.cc +++ lib/xray/xray_inmemory_log.cc @@ -16,12 +16,12 @@ //===----------------------------------------------------------------------===// #include +#include #include -#include #include #include #include -#include +#include #include #include "sanitizer_common/sanitizer_libc.h" @@ -43,7 +43,7 @@ namespace __xray { -std::mutex LogMutex; +__sanitizer::SpinMutex LogMutex; class ThreadExitFlusher { int Fd; @@ -58,7 +58,7 @@ Offset(Offset) {} ~ThreadExitFlusher() XRAY_NEVER_INSTRUMENT { - std::lock_guard L(LogMutex); + __sanitizer::SpinMutexLock L(&LogMutex); if (Fd > 0 && Start != nullptr) { retryingWriteAll(Fd, reinterpret_cast(Start), reinterpret_cast(Start + Offset)); @@ -82,8 +82,8 @@ // Test for required CPU features and cache the cycle frequency static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() - : __xray::NanosecondsPerSecond; + static uint64_t CycleFrequency = + TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads @@ -127,7 +127,7 @@ R.FuncId = FuncId; ++Offset; if (Offset == BuffLen) { - std::lock_guard L(LogMutex); + __sanitizer::SpinMutexLock L(&LogMutex); auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), reinterpret_cast(RecordBuffer + Offset));