Index: compiler-rt/lib/xray/xray_basic_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_basic_logging.cc +++ compiler-rt/lib/xray/xray_basic_logging.cc @@ -29,6 +29,7 @@ #include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" +#include "xray/xray_recursion_guard.h" #include "xray_basic_flags.h" #include "xray_basic_logging.h" #include "xray_defs.h" @@ -70,12 +71,12 @@ BasicLoggingOptions GlobalOptions; -thread_local volatile bool RecursionGuard = false; +thread_local atomic_uint8_t Guard{0}; static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT { static uint64_t TicksPerSec = probeRequiredCPUFeatures() ? getTSCFrequency() - : __xray::NanosecondsPerSecond; + : NanosecondsPerSecond; static const uint64_t ThresholdTicks = TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000; return ThresholdTicks; @@ -89,7 +90,7 @@ // Test for required CPU features and cache the cycle frequency static bool TSCSupported = probeRequiredCPUFeatures(); static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; + TSCSupported ? getTSCFrequency() : NanosecondsPerSecond; // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads @@ -165,10 +166,9 @@ // Use a simple recursion guard, to handle cases where we're already logging // and for one reason or another, this function gets called again in the same // thread. - if (RecursionGuard) + RecursionGuard G(Guard); + if (!G) return; - RecursionGuard = true; - auto ExitGuard = at_scope_exit([] { RecursionGuard = false; }); uint8_t CPU = 0; uint64_t TSC = ReadTSC(CPU); @@ -233,14 +233,14 @@ // First determine whether the delta between the function's enter record and // the exit record is higher than the threshold. - __xray::XRayRecord R; + XRayRecord R; R.RecordType = RecordTypes::NORMAL; R.CPU = CPU; R.TSC = TSC; R.TId = TLD.TID; R.Type = Type; R.FuncId = FuncId; - auto FirstEntry = reinterpret_cast<__xray::XRayRecord *>(TLD.InMemoryBuffer); + auto FirstEntry = reinterpret_cast(TLD.InMemoryBuffer); internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); if (++TLD.BufferOffset == TLD.BufferSize) { SpinMutexLock L(&LogMutex); @@ -256,7 +256,7 @@ RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto FirstEntry = - reinterpret_cast<__xray::XRayArgPayload *>(TLD.InMemoryBuffer); + reinterpret_cast(TLD.InMemoryBuffer); const auto &BuffLen = TLD.BufferSize; int Fd = getGlobalFd(); if (Fd == -1) @@ -276,13 +276,12 @@ // Then we write the "we have an argument" record. InMemoryRawLog(FuncId, Type, ReadTSC); - if (RecursionGuard) + RecursionGuard G(Guard); + if (!G) return; - RecursionGuard = true; - auto ExitGuard = at_scope_exit([] { RecursionGuard = false; }); // And from here on write the arg payload. - __xray::XRayArgPayload R; + XRayArgPayload R; R.RecordType = RecordTypes::ARG_PAYLOAD; R.FuncId = FuncId; R.TId = TLD.TID; @@ -299,7 +298,7 @@ void basicLoggingHandleArg0RealTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { - InMemoryRawLog(FuncId, Type, __xray::readTSC); + InMemoryRawLog(FuncId, Type, readTSC); } void basicLoggingHandleArg0EmulateTSC(int32_t FuncId, XRayEntryType Type) @@ -312,13 +311,13 @@ TS = {0, 0}; } CPU = 0; - return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; }); } void basicLoggingHandleArg1RealTSC(int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { - InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC); + InMemoryRawLogWithArg(FuncId, Type, Arg1, readTSC); } void basicLoggingHandleArg1EmulateTSC(int32_t FuncId, XRayEntryType Type, @@ -332,7 +331,7 @@ TS = {0, 0}; } CPU = 0; - return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; }); } @@ -359,7 +358,7 @@ SpinMutexLock L(&LogMutex); retryingWriteAll(TLD.Fd, reinterpret_cast(TLD.InMemoryBuffer), reinterpret_cast(TLD.InMemoryBuffer) + - (sizeof(__xray::XRayRecord) * TLD.BufferOffset)); + (sizeof(XRayRecord) * TLD.BufferOffset)); } // Because this thread's exit could be the last one trying to write to @@ -373,8 +372,8 @@ void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 1, memory_order_acq_rel)) { + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, + memory_order_acq_rel)) { if (Verbosity()) Report("Basic logging already initialized.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; @@ -438,8 +437,8 @@ XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 0, memory_order_acq_rel) && + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, + memory_order_acq_rel) && Verbosity()) Report("Basic logging already finalized.\n"); Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -15,20 +15,21 @@ // //===----------------------------------------------------------------------===// #include "xray_fdr_logging.h" - #include #include #include -#include +#include #include #include #include #include +#include "sanitizer_common/sanitizer_allocator_internal.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "xray/xray_interface.h" #include "xray/xray_records.h" +#include "xray/xray_recursion_guard.h" #include "xray_buffer_queue.h" #include "xray_defs.h" #include "xray_fdr_flags.h" @@ -38,19 +39,8 @@ namespace __xray { -atomic_sint32_t LoggingStatus = {XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; - -/// We expose some of the state transitions when FDR logging mode is operating -/// such that we can simulate a series of log events that may occur without -/// and test with determinism without worrying about the real CPU time. -/// -/// Because the code uses thread_local allocation extensively as part of its -/// design, callers that wish to test events occuring on different threads -/// will actually have to run them on different threads. -/// -/// This also means that it is possible to break invariants maintained by -/// cooperation with xray_fdr_logging class, so be careful and think twice. -namespace __xray_fdr_internal { +__sanitizer::atomic_sint32_t LoggingStatus = { + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; /// Writes the new buffer record and wallclock time that begin a buffer for the /// current thread. @@ -158,33 +148,6 @@ // to grow the stack when we've hijacked the binary for logging. | //-----------------------------------------------------------------------------| -namespace { - -class RecursionGuard { - volatile bool &Running; - const bool Valid; - -public: - explicit RecursionGuard(volatile bool &R) : Running(R), Valid(!R) { - if (Valid) - Running = true; - } - - RecursionGuard(const RecursionGuard &) = delete; - RecursionGuard(RecursionGuard &&) = delete; - RecursionGuard &operator=(const RecursionGuard &) = delete; - RecursionGuard &operator=(RecursionGuard &&) = delete; - - explicit operator bool() const { return Valid; } - - ~RecursionGuard() noexcept { - if (Valid) - Running = false; - } -}; - -} // namespace - static void writeNewBufferPreamble(tid_t Tid, timespec TS) XRAY_NEVER_INSTRUMENT { static constexpr int InitRecordsCount = 2; @@ -229,16 +192,16 @@ TLD.RecordPtr += sizeof(Metadata); // Since we write out the extents as the first metadata record of the // buffer, we need to write out the extents including the extents record. - atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata), - memory_order_release); + __sanitizer::atomic_store(&TLD.Buffer.Extents->Size, sizeof(Metadata), + __sanitizer::memory_order_release); } -inline void setupNewBuffer(int (*wall_clock_reader)( +static void setupNewBuffer(int (*wall_clock_reader)( clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto &B = TLD.Buffer; TLD.RecordPtr = static_cast(B.Data); - tid_t Tid = GetTid(); + tid_t Tid = __sanitizer::GetTid(); timespec TS{0, 0}; // This is typically clock_gettime, but callers have injection ability. wall_clock_reader(CLOCK_MONOTONIC, &TS); @@ -249,15 +212,17 @@ static void incrementExtents(size_t Add) { auto &TLD = getThreadLocalData(); - atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, memory_order_acq_rel); + __sanitizer::atomic_fetch_add(&TLD.Buffer.Extents->Size, Add, + __sanitizer::memory_order_acq_rel); } static void decrementExtents(size_t Subtract) { auto &TLD = getThreadLocalData(); - atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, memory_order_acq_rel); + __sanitizer::atomic_fetch_sub(&TLD.Buffer.Extents->Size, Subtract, + __sanitizer::memory_order_acq_rel); } -inline void writeNewCPUIdMetadata(uint16_t CPU, +static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); MetadataRecord NewCPUId; @@ -277,7 +242,7 @@ incrementExtents(sizeof(MetadataRecord)); } -inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { +static void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); MetadataRecord TSCWrap; TSCWrap.Type = uint8_t(RecordType::Metadata); @@ -296,7 +261,7 @@ // Call Argument metadata records store the arguments to a function in the // order of their appearance; holes are not supported by the buffer format. -static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT { +static void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); MetadataRecord CallArg; CallArg.Type = uint8_t(RecordType::Metadata); @@ -308,9 +273,8 @@ incrementExtents(sizeof(MetadataRecord)); } -static inline void -writeFunctionRecord(int FuncId, uint32_t TSCDelta, - XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT { +static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, + XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT { FunctionRecord FuncRecord; FuncRecord.Type = uint8_t(RecordType::Function); // Only take 28 bits of the function id. @@ -354,23 +318,25 @@ break; case XRayEntryType::CUSTOM_EVENT: { // This is a bug in patching, so we'll report it once and move on. - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_load_relaxed(&ErrorLatch)) Report("Internal error: patched an XRay custom event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; + static pthread_once_t ErrorOnce = PTHREAD_ONCE_INIT; + pthread_once(&ErrorOnce, + +[] { atomic_store(&ErrorLatch, 1, memory_order_release); }); return; } case XRayEntryType::TYPED_EVENT: { - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_load_relaxed(&ErrorLatch)) Report("Internal error: patched an XRay typed event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; + static pthread_once_t ErrorOnce = PTHREAD_ONCE_INIT; + pthread_once(&ErrorOnce, + +[] { atomic_store(&ErrorLatch, 1, memory_order_release); }); return; } } @@ -380,14 +346,8 @@ incrementExtents(sizeof(FunctionRecord)); } -static uint64_t thresholdTicks() { - static uint64_t TicksPerSec = probeRequiredCPUFeatures() - ? getTSCFrequency() - : __xray::NanosecondsPerSecond; - static const uint64_t ThresholdTicks = - TicksPerSec * fdrFlags()->func_duration_threshold_us / 1000000; - return ThresholdTicks; -} +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t ThresholdTicks{0}; // Re-point the thread local pointer into this thread's Buffer before the recent // "Function Entry" record and any "Tail Call Exit" records after that. @@ -398,10 +358,10 @@ decrementExtents(FunctionRecSize); FunctionRecord FuncRecord; internal_memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize); - assert(FuncRecord.RecordKind == + DCHECK(FuncRecord.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && "Expected to find function entry recording when rewinding."); - assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) && + DCHECK(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) && "Expected matching function id when rewinding Exit"); --TLD.NumConsecutiveFnEnters; LastTSC -= FuncRecord.TSCDelta; @@ -423,7 +383,7 @@ FunctionRecord ExpectedTailExit; internal_memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize); - assert(ExpectedTailExit.RecordKind == + DCHECK(ExpectedTailExit.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) && "Expected to find tail exit when rewinding."); RewindingRecordPtr -= FunctionRecSize; @@ -431,14 +391,14 @@ FunctionRecord ExpectedFunctionEntry; internal_memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize); - assert(ExpectedFunctionEntry.RecordKind == + DCHECK(ExpectedFunctionEntry.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && "Expected to find function entry when rewinding tail call."); - assert(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId && + DCHECK(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId && "Expected funcids to match when rewinding tail call."); // This tail call exceeded the threshold duration. It will not be erased. - if ((TSC - RewindingTSC) >= thresholdTicks()) { + if ((TSC - RewindingTSC) >= atomic_load_relaxed(&ThresholdTicks)) { TLD.NumTailCalls = 0; return; } @@ -454,7 +414,7 @@ } } -inline bool releaseThreadLocalBuffer(BufferQueue &BQArg) { +static bool releaseThreadLocalBuffer(BufferQueue &BQArg) { auto &TLD = getThreadLocalData(); auto EC = BQArg.releaseBuffer(TLD.Buffer); if (EC != BufferQueue::ErrorCode::Ok) { @@ -465,7 +425,7 @@ return true; } -inline bool prepareBuffer(uint64_t TSC, unsigned char CPU, +static bool prepareBuffer(uint64_t TSC, unsigned char CPU, int (*wall_clock_reader)(clockid_t, struct timespec *), size_t MaxSize) XRAY_NEVER_INSTRUMENT { @@ -488,13 +448,14 @@ return true; } -inline bool +static bool isLogInitializedAndReady(BufferQueue *LBQ, uint64_t TSC, unsigned char CPU, int (*wall_clock_reader)(clockid_t, struct timespec *)) XRAY_NEVER_INSTRUMENT { // Bail out right away if logging is not initialized yet. // We should take the opportunity to release the buffer though. - auto Status = atomic_load(&LoggingStatus, memory_order_acquire); + auto Status = __sanitizer::atomic_load(&LoggingStatus, + __sanitizer::memory_order_acquire); auto &TLD = getThreadLocalData(); if (Status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) { if (TLD.RecordPtr != nullptr && @@ -508,7 +469,8 @@ return false; } - if (atomic_load(&LoggingStatus, memory_order_acquire) != + if (__sanitizer::atomic_load(&LoggingStatus, + __sanitizer::memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_INITIALIZED || LBQ->finalizing()) { if (!releaseThreadLocalBuffer(*LBQ)) @@ -519,7 +481,8 @@ if (TLD.Buffer.Data == nullptr) { auto EC = LBQ->getBuffer(TLD.Buffer); if (EC != BufferQueue::ErrorCode::Ok) { - auto LS = atomic_load(&LoggingStatus, memory_order_acquire); + auto LS = __sanitizer::atomic_load(&LoggingStatus, + __sanitizer::memory_order_acquire); if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING && LS != XRayLogInitStatus::XRAY_LOG_FINALIZED) Report("Failed to acquire a buffer; error=%s\n", @@ -541,7 +504,7 @@ } return true; -} // namespace __xray_fdr_internal +} // Compute the TSC difference between the time of measurement and the previous // event. There are a few interesting situations we need to account for: @@ -561,7 +524,7 @@ // - The TSC delta is representable within the 32 bits we can store in a // FunctionRecord. In this case we write down just a FunctionRecord with // the correct TSC delta. -inline uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC, +static uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC, uint8_t CPU) { if (CPU != TLD.CurrentCPU) { // We've moved to a new CPU. @@ -579,7 +542,7 @@ return 0; } -inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT { +static void endBufferIfFull() XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto BufferStart = static_cast(TLD.Buffer.Data); if ((TLD.RecordPtr + MetadataRecSize) - BufferStart <= @@ -590,7 +553,7 @@ } } -thread_local volatile bool Running = false; +thread_local atomic_uint8_t Running{0}; /// Here's where the meat of the processing happens. The writer captures /// function entry, exit and tail exit points with a time and will create @@ -598,7 +561,7 @@ /// walk backward through its buffer and erase trivial functions to avoid /// polluting the log and may use the buffer queue to obtain or release a /// buffer. -inline void processFunctionHook(int32_t FuncId, XRayEntryType Entry, +static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, uint64_t Arg1, int (*wall_clock_reader)(clockid_t, struct timespec *), @@ -611,7 +574,7 @@ // handleArg0 to happen at any given time. RecursionGuard Guard{Running}; if (!Guard) { - assert(Running == true && "RecursionGuard is buggy!"); + DCHECK(atomic_load_relaxed(&Running) && "RecursionGuard is buggy!"); return; } @@ -665,7 +628,7 @@ } // By this point, we are now ready to write up to 40 bytes (explained above). - assert((TLD.RecordPtr + MaxSize) - static_cast(TLD.Buffer.Data) >= + DCHECK((TLD.RecordPtr + MaxSize) - static_cast(TLD.Buffer.Data) >= static_cast(MetadataRecSize) && "Misconfigured BufferQueue provided; Buffer size not large enough."); @@ -682,29 +645,32 @@ case XRayEntryType::EXIT: // Break out and write the exit record if we can't erase any functions. if (TLD.NumConsecutiveFnEnters == 0 || - (TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks()) + (TSC - TLD.LastFunctionEntryTSC) >= + atomic_load_relaxed(&ThresholdTicks)) break; rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId); return; // without writing log. case XRayEntryType::CUSTOM_EVENT: { // This is a bug in patching, so we'll report it once and move on. - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_load_relaxed(&ErrorLatch)) Report("Internal error: patched an XRay custom event call as a function; " - "func id = %d", + "func id = %d\n", FuncId); - return true; - }(); - (void)Once; + static pthread_once_t ErrorOnce = PTHREAD_ONCE_INIT; + pthread_once(&ErrorOnce, + +[] { atomic_store(&ErrorLatch, 1, memory_order_release); }); return; } case XRayEntryType::TYPED_EVENT: { - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_load_relaxed(&ErrorLatch)) Report("Internal error: patched an XRay typed event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; + static pthread_once_t ErrorOnce = PTHREAD_ONCE_INIT; + pthread_once(&ErrorOnce, + +[] { atomic_store(&ErrorLatch, 1, memory_order_release); }); return; } } @@ -719,8 +685,6 @@ __asm volatile("# LLVM-MCA-END"); } -} // namespace __xray_fdr_internal - // Global BufferQueue. BufferQueue *BQ = nullptr; @@ -730,32 +694,32 @@ SpinMutex FDROptionsMutex; -namespace { -XRayFileHeader &fdrCommonHeaderInfo() { - static XRayFileHeader Header = [] { - XRayFileHeader H; +static XRayFileHeader &fdrCommonHeaderInfo() { + static std::aligned_storage::type HStorage; + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + static uint64_t CycleFrequency = __xray::NanosecondsPerSecond; + pthread_once(&OnceInit, +[] { + XRayFileHeader &H = reinterpret_cast(HStorage); // Version 2 of the log writes the extents of the buffer, instead of // relying on an end-of-buffer record. H.Version = 2; H.Type = FileTypes::FDR_LOG; // Test for required CPU features and cache the cycle frequency - static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; + TSCSupported = probeRequiredCPUFeatures(); + if (TSCSupported) + CycleFrequency = getTSCFrequency(); H.CycleFrequency = CycleFrequency; // FIXME: Actually check whether we have 'constant_tsc' and // 'nonstop_tsc' before setting the values in the header. H.ConstantTSC = 1; H.NonstopTSC = 1; - return H; - }(); - return Header; + }); + return reinterpret_cast(HStorage); } -} // namespace - // This is the iterator implementation, which knows how to handle FDR-mode // specific buffers. This is used as an implementation of the iterator function // needed by __xray_set_buffer_iterator(...). It maintains a global state of the @@ -788,7 +752,14 @@ // initialized the first time this function is called. We'll update one part // of this information with some relevant data (in particular the number of // buffers to expect). - static XRayFileHeader Header = fdrCommonHeaderInfo(); + static std::aligned_storage::type HeaderStorage; + static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; + pthread_once(&HeaderOnce, +[] { + reinterpret_cast(HeaderStorage) = fdrCommonHeaderInfo(); + }); + + // We use a convenience alias for code referring to Header from here on out. + auto &Header = reinterpret_cast(HeaderStorage); if (B.Data == nullptr && B.Size == 0) { Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; return XRayBuffer{static_cast(&Header), sizeof(Header)}; @@ -797,7 +768,6 @@ static BufferQueue::const_iterator It{}; static BufferQueue::const_iterator End{}; if (B.Data == static_cast(&Header) && B.Size == sizeof(Header)) { - // From this point on, we provide raw access to the raw buffer we're getting // from the BufferQueue. We're relying on the iterators from the current // Buffer queue. @@ -851,11 +821,12 @@ if (Verbosity()) Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); - // Clean up the buffer queue, and do not bother writing out the files! - delete BQ; - BQ = nullptr; - atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, - memory_order_release); + __sanitizer::atomic_store(&LogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_FLUSHED, + __sanitizer::memory_order_release); + __sanitizer::atomic_store(&LoggingStatus, + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + __sanitizer::memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; } @@ -967,20 +938,17 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, - clock_gettime, BQ); + processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, 0, clock_gettime, BQ); } void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg) XRAY_NEVER_INSTRUMENT { auto TC = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, - clock_gettime, BQ); + processFunctionHook(FuncId, Entry, TC.TSC, TC.CPU, Arg, clock_gettime, BQ); } void fdrLoggingHandleCustomEvent(void *Event, std::size_t EventSize) XRAY_NEVER_INSTRUMENT { - using namespace __xray_fdr_internal; auto TC = getTimestamp(); auto &TSC = TC.TSC; auto &CPU = TC.CPU; @@ -1031,7 +999,6 @@ void fdrLoggingHandleTypedEvent( uint16_t EventType, const void *Event, std::size_t EventSize) noexcept XRAY_NEVER_INSTRUMENT { - using namespace __xray_fdr_internal; auto TC = getTimestamp(); auto &TSC = TC.TSC; auto &CPU = TC.CPU; @@ -1179,8 +1146,8 @@ } static bool UNUSED Once = [] { - pthread_key_create(&__xray_fdr_internal::Key, +[](void *) { - auto &TLD = __xray_fdr_internal::getThreadLocalData(); + pthread_key_create(&Key, +[](void *) { + auto &TLD = getThreadLocalData(); if (TLD.BQ == nullptr) return; auto EC = TLD.BQ->releaseBuffer(TLD.Buffer); @@ -1188,9 +1155,18 @@ Report("At thread exit, failed to release buffer at %p; error=%s\n", TLD.Buffer.Data, BufferQueue::getErrorString(EC)); }); + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : NanosecondsPerSecond, + memory_order_release); return false; }(); + atomic_store(&ThresholdTicks, + atomic_load(&TicksPerSec, memory_order_acquire) * + fdrFlags()->func_duration_threshold_us / 1000000, + memory_order_release); + // Arg1 handler should go in first to avoid concurrent code accidentally // falling back to arg0 when it should have ran arg1. __xray_set_handler_arg1(fdrLoggingHandleArg1); @@ -1211,7 +1187,6 @@ } bool fdrLogDynamicInitializer() XRAY_NEVER_INSTRUMENT { - using namespace __xray; XRayLogImpl Impl{ fdrLoggingInit, fdrLoggingFinalize, Index: compiler-rt/lib/xray/xray_recursion_guard.h =================================================================== --- /dev/null +++ compiler-rt/lib/xray/xray_recursion_guard.h @@ -0,0 +1,57 @@ +//===-- xray_recursion_guard.h ---------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_RECURSION_GUARD_H +#define XRAY_XRAY_RECURSION_GUARD_H + +#include "sanitizer_common/sanitizer_atomic.h" + +namespace __xray { + +/// The RecursionGuard is useful for guarding against signal handlers which are +/// also potentially calling XRay-instrumented functions. To use the +/// RecursionGuard, you'll typically need a thread_local atomic_uint8_t: +/// +/// thread_local atomic_uint8_t Guard{0}; +/// +/// // In a handler function: +/// void handleArg0(int32_t F, XRayEntryType T) { +/// RecursionGuard G(Guard); +/// if (!G) +/// return; // Failed to acquire the guard. +/// ... +/// } +/// +class RecursionGuard { + atomic_uint8_t &Running; + const bool Valid; + +public: + explicit inline RecursionGuard(atomic_uint8_t &R) + : Running(R), Valid(!atomic_exchange(&R, 1, memory_order_acq_rel)) {} + + inline RecursionGuard(const RecursionGuard &) = delete; + inline RecursionGuard(RecursionGuard &&) = delete; + inline RecursionGuard &operator=(const RecursionGuard &) = delete; + inline RecursionGuard &operator=(RecursionGuard &&) = delete; + + explicit inline operator bool() const { return Valid; } + + inline ~RecursionGuard() noexcept { + if (Valid) + atomic_store(&Running, 0, memory_order_release); + } +}; + +} // namespace __xray + +#endif // XRAY_XRAY_RECURSION_GUARD_H