Index: compiler-rt/lib/xray/xray_basic_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_basic_logging.cc +++ compiler-rt/lib/xray/xray_basic_logging.cc @@ -15,8 +15,6 @@ // //===----------------------------------------------------------------------===// -#include -#include #include #include #include @@ -73,24 +71,21 @@ thread_local atomic_uint8_t Guard{0}; -static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT { - static uint64_t TicksPerSec = probeRequiredCPUFeatures() - ? getTSCFrequency() - : NanosecondsPerSecond; - static const uint64_t ThresholdTicks = - TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000; - return ThresholdTicks; -} +static atomic_uint8_t UseRealTSC{0}; +static atomic_uint64_t ThresholdTicks{0}; +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t CycleFrequency{NanosecondsPerSecond}; static int openLogFile() XRAY_NEVER_INSTRUMENT { int F = getLogFD(); if (F == -1) return -1; - // Test for required CPU features and cache the cycle frequency - static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : NanosecondsPerSecond; + static pthread_once_t DetectOnce = PTHREAD_ONCE_INIT; + pthread_once(&DetectOnce, +[] { + if (atomic_load(&UseRealTSC, memory_order_acquire)) + atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); + }); // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads @@ -98,7 +93,7 @@ XRayFileHeader Header; Header.Version = 2; // Version 2 includes tail exit records. Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = CycleFrequency; + Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_acquire); // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. @@ -109,12 +104,14 @@ return F; } -int getGlobalFd() XRAY_NEVER_INSTRUMENT { - static int Fd = openLogFile(); +static int getGlobalFd() XRAY_NEVER_INSTRUMENT { + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static int Fd = 0; + pthread_once(&OnceInit, +[] { Fd = openLogFile(); }); return Fd; } -ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { +static ThreadLocalData &getThreadLocalData() XRAY_NEVER_INSTRUMENT { thread_local ThreadLocalData TLD; thread_local bool UNUSED TOnce = [] { if (GlobalOptions.ThreadBufferSize == 0) { @@ -143,13 +140,6 @@ alignof(StackEntry))); TLD.StackSize = GlobalOptions.MaxStackDepth; TLD.StackEntries = 0; - if (Verbosity() >= 2) { - static auto UNUSED Once = [] { - auto ticks = thresholdTicks(); - Report("Ticks threshold: %d\n", ticks); - return false; - }(); - } return false; }(); return TLD; @@ -217,8 +207,8 @@ if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && StackTop.TSC < TSC) { auto Delta = TSC - StackTop.TSC; - if (Delta < thresholdTicks()) { - assert(TLD.BufferOffset > 0); + if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { + DCHECK(TLD.BufferOffset > 0); TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; return; } @@ -227,7 +217,7 @@ } default: // Should be unreachable. - assert(false && "Unsupported XRayEntryType encountered."); + DCHECK(false && "Unsupported XRayEntryType encountered."); break; } @@ -372,17 +362,26 @@ void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 1, memory_order_acq_rel)) { + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, + memory_order_acq_rel)) { if (Verbosity()) Report("Basic logging already initialized.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } - static bool UNUSED Once = [] { + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once(&OnceInit, +[] { pthread_key_create(&PThreadKey, TLDDestructor); - return false; - }(); + atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); + // Initialize the global TicksPerSec value. + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : NanosecondsPerSecond, + memory_order_release); + if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) + Report("WARNING: Required CPU features missing for XRay instrumentation, " + "using emulation instead.\n"); + }); if (BufferSize == 0 && BufferMax == 0 && Options != nullptr) { FlagParser P; @@ -409,6 +408,7 @@ GlobalOptions.ThreadBufferSize = F.thread_buffer_size; GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; GlobalOptions.MaxStackDepth = F.max_stack_depth; + *basicFlags() = F; } else if (OptionsSize != sizeof(BasicLoggingOptions)) { Report("Invalid options size, potential ABI mismatch; expected %d got %d", sizeof(BasicLoggingOptions), OptionsSize); @@ -420,15 +420,19 @@ GlobalOptions = *reinterpret_cast(Options); } - static auto UseRealTSC = probeRequiredCPUFeatures(); - if (!UseRealTSC && Verbosity()) - Report("WARNING: Required CPU features missing for XRay instrumentation, " - "using emulation instead.\n"); - - __xray_set_handler_arg1(UseRealTSC ? basicLoggingHandleArg1RealTSC - : basicLoggingHandleArg1EmulateTSC); - __xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC - : basicLoggingHandleArg0EmulateTSC); + atomic_store(&ThresholdTicks, + atomic_load(&TicksPerSec, memory_order_acquire) * + GlobalOptions.DurationFilterMicros / 1000000, + memory_order_release); + __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg1RealTSC + : basicLoggingHandleArg1EmulateTSC); + __xray_set_handler(atomic_load(&UseRealTSC, memory_order_acquire) + ? basicLoggingHandleArg0RealTSC + : basicLoggingHandleArg0EmulateTSC); + + // TODO: Implement custom event and typed event handling support in Basic + // Mode. __xray_remove_customevent_handler(); __xray_remove_typedevent_handler(); @@ -437,8 +441,8 @@ XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 0, memory_order_acq_rel) && + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, + memory_order_acq_rel) && Verbosity()) Report("Basic logging already finalized.\n"); @@ -490,11 +494,18 @@ Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); return false; } - static auto UNUSED Once = [] { - static auto UNUSED &TLD = getThreadLocalData(); - Atexit(+[] { TLDDestructor(&TLD); }); - return false; - }(); + + // At this point we know that we've successfully initialized Basic mode + // tracing, and the only chance we're going to get for the current thread to + // clean-up may be at thread/program exit. To ensure that we're going to get + // the cleanup even without calling the finalization routines, we're + // registering a program exit function that will do the cleanup. + static pthread_once_t DynamicOnce = PTHREAD_ONCE_INIT; + pthread_once(&DynamicOnce, +[] { + static void *FakeTLD = nullptr; + FakeTLD = &getThreadLocalData(); + Atexit(+[] { TLDDestructor(FakeTLD); }); + }); } return true; } Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -15,9 +15,10 @@ // //===----------------------------------------------------------------------===// #include "xray_fdr_logging.h" +#include #include #include -#include +#include #include #include #include @@ -28,11 +29,11 @@ #include "sanitizer_common/sanitizer_common.h" #include "xray/xray_interface.h" #include "xray/xray_records.h" -#include "xray_recursion_guard.h" #include "xray_buffer_queue.h" #include "xray_defs.h" #include "xray_fdr_flags.h" #include "xray_flags.h" +#include "xray_recursion_guard.h" #include "xray_tsc.h" #include "xray_utils.h" @@ -291,23 +292,19 @@ break; case XRayEntryType::CUSTOM_EVENT: { // This is a bug in patching, so we'll report it once and move on. - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel)) Report("Internal error: patched an XRay custom event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; return; } case XRayEntryType::TYPED_EVENT: { - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel)) Report("Internal error: patched an XRay typed event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; return; } } @@ -317,14 +314,8 @@ incrementExtents(sizeof(FunctionRecord)); } -static uint64_t thresholdTicks() { - static uint64_t TicksPerSec = probeRequiredCPUFeatures() - ? getTSCFrequency() - : __xray::NanosecondsPerSecond; - static const uint64_t ThresholdTicks = - TicksPerSec * fdrFlags()->func_duration_threshold_us / 1000000; - return ThresholdTicks; -} +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t ThresholdTicks{0}; // Re-point the thread local pointer into this thread's Buffer before the recent // "Function Entry" record and any "Tail Call Exit" records after that. @@ -375,7 +366,7 @@ "Expected funcids to match when rewinding tail call."); // This tail call exceeded the threshold duration. It will not be erased. - if ((TSC - RewindingTSC) >= thresholdTicks()) { + if ((TSC - RewindingTSC) >= atomic_load_relaxed(&ThresholdTicks)) { TLD.NumTailCalls = 0; return; } @@ -619,29 +610,26 @@ case XRayEntryType::EXIT: // Break out and write the exit record if we can't erase any functions. if (TLD.NumConsecutiveFnEnters == 0 || - (TSC - TLD.LastFunctionEntryTSC) >= thresholdTicks()) + (TSC - TLD.LastFunctionEntryTSC) >= + atomic_load_relaxed(&ThresholdTicks)) break; rewindRecentCall(TSC, TLD.LastTSC, TLD.LastFunctionEntryTSC, FuncId); return; // without writing log. case XRayEntryType::CUSTOM_EVENT: { // This is a bug in patching, so we'll report it once and move on. - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel)) Report("Internal error: patched an XRay custom event call as a function; " - "func id = %d", + "func id = %d\n", FuncId); - return true; - }(); - (void)Once; return; } case XRayEntryType::TYPED_EVENT: { - static bool Once = [&] { + static atomic_uint8_t ErrorLatch{0}; + if (!atomic_exchange(&ErrorLatch, 1, memory_order_acq_rel)) Report("Internal error: patched an XRay typed event call as a function; " "func id = %d\n", FuncId); - return true; - }(); - (void)Once; return; } } @@ -665,32 +653,32 @@ SpinMutex FDROptionsMutex; -namespace { -XRayFileHeader &fdrCommonHeaderInfo() { - static XRayFileHeader Header = [] { - XRayFileHeader H; +static XRayFileHeader &fdrCommonHeaderInfo() { + static std::aligned_storage::type HStorage; + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + static uint64_t CycleFrequency = NanosecondsPerSecond; + pthread_once(&OnceInit, +[] { + XRayFileHeader &H = reinterpret_cast(HStorage); // Version 2 of the log writes the extents of the buffer, instead of // relying on an end-of-buffer record. H.Version = 2; H.Type = FileTypes::FDR_LOG; // Test for required CPU features and cache the cycle frequency - static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; + TSCSupported = probeRequiredCPUFeatures(); + if (TSCSupported) + CycleFrequency = getTSCFrequency(); H.CycleFrequency = CycleFrequency; // FIXME: Actually check whether we have 'constant_tsc' and // 'nonstop_tsc' before setting the values in the header. H.ConstantTSC = 1; H.NonstopTSC = 1; - return H; - }(); - return Header; + }); + return reinterpret_cast(HStorage); } -} // namespace - // This is the iterator implementation, which knows how to handle FDR-mode // specific buffers. This is used as an implementation of the iterator function // needed by __xray_set_buffer_iterator(...). It maintains a global state of the @@ -723,7 +711,14 @@ // initialized the first time this function is called. We'll update one part // of this information with some relevant data (in particular the number of // buffers to expect). - static XRayFileHeader Header = fdrCommonHeaderInfo(); + static std::aligned_storage::type HeaderStorage; + static pthread_once_t HeaderOnce = PTHREAD_ONCE_INIT; + pthread_once(&HeaderOnce, +[] { + reinterpret_cast(HeaderStorage) = fdrCommonHeaderInfo(); + }); + + // We use a convenience alias for code referring to Header from here on out. + auto &Header = reinterpret_cast(HeaderStorage); if (B.Data == nullptr && B.Size == 0) { Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; return XRayBuffer{static_cast(&Header), sizeof(Header)}; @@ -732,7 +727,6 @@ static BufferQueue::const_iterator It{}; static BufferQueue::const_iterator End{}; if (B.Data == static_cast(&Header) && B.Size == sizeof(Header)) { - // From this point on, we provide raw access to the raw buffer we're getting // from the BufferQueue. We're relying on the iterators from the current // Buffer queue. @@ -787,8 +781,10 @@ Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); // Clean up the buffer queue, and do not bother writing out the files! - delete BQ; + BQ->~BufferQueue(); + InternalFree(BQ); BQ = nullptr; + atomic_store(&LogFlushStatus, XRayLogFlushStatus::XRAY_LOG_FLUSHED, memory_order_release); return XRayLogFlushStatus::XRAY_LOG_FLUSHED; @@ -881,7 +877,9 @@ TSCAndCPU Result; // Test once for required CPU features - static bool TSCSupported = probeRequiredCPUFeatures(); + static pthread_once_t OnceProbe = PTHREAD_ONCE_INIT; + static bool TSCSupported = true; + pthread_once(&OnceProbe, +[] { TSCSupported = probeRequiredCPUFeatures(); }); if (TSCSupported) { Result.TSC = __xray::readTSC(Result.CPU); @@ -920,13 +918,8 @@ if (!Guard) return; if (EventSize > std::numeric_limits::max()) { - using Empty = struct {}; - static Empty Once = [&] { - Report("Event size too large = %zu ; > max = %d\n", EventSize, - std::numeric_limits::max()); - return Empty(); - }(); - (void)Once; + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once(&Once, +[] { Report("Event size too large.\n"); }); } int32_t ReducedEventSize = static_cast(EventSize); auto &TLD = getThreadLocalData(); @@ -970,13 +963,8 @@ if (!Guard) return; if (EventSize > std::numeric_limits::max()) { - using Empty = struct {}; - static Empty Once = [&] { - Report("Event size too large = %zu ; > max = %d\n", EventSize, - std::numeric_limits::max()); - return Empty(); - }(); - (void)Once; + static pthread_once_t Once = PTHREAD_ONCE_INIT; + pthread_once(&Once, +[] { Report("Event size too large.\n"); }); } int32_t ReducedEventSize = static_cast(EventSize); auto &TLD = getThreadLocalData(); @@ -1093,23 +1081,33 @@ bool Success = false; if (BQ != nullptr) { - delete BQ; + BQ->~BufferQueue(); + InternalFree(BQ); BQ = nullptr; } - if (BQ == nullptr) - BQ = new BufferQueue(BufferSize, BufferMax, Success); + if (BQ == nullptr) { + BQ = reinterpret_cast( + InternalAlloc(sizeof(BufferQueue), nullptr, 64)); + new (BQ) BufferQueue(BufferSize, BufferMax, Success); + } if (!Success) { Report("BufferQueue init failed.\n"); if (BQ != nullptr) { - delete BQ; + BQ->~BufferQueue(); + InternalFree(BQ); BQ = nullptr; } return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } - static bool UNUSED Once = [] { + static pthread_once_t OnceInit = PTHREAD_ONCE_INIT; + pthread_once(&OnceInit, +[] { + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : __xray::NanosecondsPerSecond, + memory_order_release); pthread_key_create(&Key, +[](void *) { auto &TLD = getThreadLocalData(); if (TLD.BQ == nullptr) @@ -1119,9 +1117,12 @@ Report("At thread exit, failed to release buffer at %p; error=%s\n", TLD.Buffer.Data, BufferQueue::getErrorString(EC)); }); - return false; - }(); + }); + atomic_store(&ThresholdTicks, + atomic_load_relaxed(&TicksPerSec) * + fdrFlags()->func_duration_threshold_us / 1000000, + memory_order_release); // Arg1 handler should go in first to avoid concurrent code accidentally // falling back to arg0 when it should have ran arg1. __xray_set_handler_arg1(fdrLoggingHandleArg1); Index: compiler-rt/test/xray/TestCases/Posix/c-test.cc =================================================================== --- /dev/null +++ compiler-rt/test/xray/TestCases/Posix/c-test.cc @@ -0,0 +1,15 @@ +// RUN: %clang_xray -g -o %t %s +// RUN: rm xray-log.c-test.* || true +// RUN: XRAY_OPTIONS=patch_premain=true:verbosity=1:xray_mode=xray-basic %t \ +// RUN: 2>&1 | FileCheck %s +// RUN: rm xray-log.c-test.* || true +// +// REQUIRES: x86_64-target-arch +// REQUIRES: built-in-llvm-tree +__attribute__((xray_always_instrument)) void always() {} + +int main() { + always(); +} + +// CHECK: =={{[0-9].*}}==XRay: Log file in '{{.*}}'