Index: compiler-rt/lib/xray/xray_basic_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_basic_logging.cc +++ compiler-rt/lib/xray/xray_basic_logging.cc @@ -16,7 +16,6 @@ //===----------------------------------------------------------------------===// #include -#include #include #include #include @@ -39,7 +38,7 @@ namespace __xray { -__sanitizer::SpinMutex LogMutex; +SpinMutex LogMutex; // We use elements of this type to record the entry TSC of every function ID we // see as we're tracing a particular thread's execution. @@ -48,7 +47,7 @@ uint16_t Type; uint8_t CPU; uint8_t Padding; - uint64_t TSC; + u64 TSC; }; static_assert(sizeof(StackEntry) == 16, "Wrong size for StackEntry"); @@ -66,30 +65,27 @@ static pthread_key_t PThreadKey; -static __sanitizer::atomic_uint8_t BasicInitialized{0}; +static atomic_uint8_t BasicInitialized{0}; BasicLoggingOptions GlobalOptions; thread_local volatile bool RecursionGuard = false; -static uint64_t thresholdTicks() XRAY_NEVER_INSTRUMENT { - static uint64_t TicksPerSec = probeRequiredCPUFeatures() - ? getTSCFrequency() - : __xray::NanosecondsPerSecond; - static const uint64_t ThresholdTicks = - TicksPerSec * GlobalOptions.DurationFilterMicros / 1000000; - return ThresholdTicks; -} +static atomic_uint8_t UseRealTSC{0}; +static atomic_uint64_t ThresholdTicks{0}; +static atomic_uint64_t TicksPerSec{0}; +static atomic_uint64_t CycleFrequency{__xray::NanosecondsPerSecond}; static int openLogFile() XRAY_NEVER_INSTRUMENT { int F = getLogFD(); if (F == -1) return -1; - // Test for required CPU features and cache the cycle frequency - static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; + static pthread_once_t DetectOnce; + pthread_once(&DetectOnce, +[] { + if (atomic_load(&UseRealTSC, memory_order_relaxed)) + atomic_store(&CycleFrequency, getTSCFrequency(), memory_order_release); + }); // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads @@ -97,7 +93,7 @@ XRayFileHeader Header; Header.Version = 2; // Version 2 includes tail exit records. Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = CycleFrequency; + Header.CycleFrequency = atomic_load(&CycleFrequency, memory_order_relaxed); // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. @@ -108,8 +104,10 @@ return F; } -int getGlobalFd() XRAY_NEVER_INSTRUMENT { - static int Fd = openLogFile(); +static int getGlobalFd() XRAY_NEVER_INSTRUMENT { + static pthread_once_t OnceInit; + static int Fd = 0; + pthread_once(&OnceInit, +[] { Fd = openLogFile(); }); return Fd; } @@ -117,11 +115,11 @@ thread_local ThreadLocalData TLD; thread_local bool UNUSED TOnce = [] { if (GlobalOptions.ThreadBufferSize == 0) { - if (__sanitizer::Verbosity()) + if (Verbosity()) Report("Not initializing TLD since ThreadBufferSize == 0.\n"); return false; } - TLD.TID = __sanitizer::GetTid(); + TLD.TID = GetTid(); pthread_setspecific(PThreadKey, &TLD); TLD.Fd = getGlobalFd(); TLD.InMemoryBuffer = reinterpret_cast( @@ -130,7 +128,7 @@ TLD.BufferSize = GlobalOptions.ThreadBufferSize; TLD.BufferOffset = 0; if (GlobalOptions.MaxStackDepth == 0) { - if (__sanitizer::Verbosity()) + if (Verbosity()) Report("Not initializing the ShadowStack since MaxStackDepth == 0.\n"); TLD.StackSize = 0; TLD.StackEntries = 0; @@ -142,13 +140,6 @@ alignof(StackEntry))); TLD.StackSize = GlobalOptions.MaxStackDepth; TLD.StackEntries = 0; - if (__sanitizer::Verbosity() >= 2) { - static auto UNUSED Once = [] { - auto ticks = thresholdTicks(); - Report("Ticks threshold: %d\n", ticks); - return false; - }(); - } return false; }(); return TLD; @@ -168,10 +159,10 @@ if (RecursionGuard) return; RecursionGuard = true; - auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); + auto ExitGuard = at_scope_exit([] { RecursionGuard = false; }); uint8_t CPU = 0; - uint64_t TSC = ReadTSC(CPU); + u64 TSC = ReadTSC(CPU); switch (Type) { case XRayEntryType::ENTRY: @@ -189,7 +180,7 @@ E.TSC = TSC; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * (TLD.StackEntries - 1)); - __sanitizer::internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); + internal_memcpy(StackEntryPtr, &E, sizeof(StackEntry)); break; } case XRayEntryType::EXIT: @@ -213,12 +204,12 @@ StackEntry StackTop; auto StackEntryPtr = static_cast(TLD.ShadowStack) + (sizeof(StackEntry) * TLD.StackEntries); - __sanitizer::internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); + internal_memcpy(&StackTop, StackEntryPtr, sizeof(StackEntry)); if (StackTop.FuncId == FuncId && StackTop.CPU == CPU && StackTop.TSC < TSC) { auto Delta = TSC - StackTop.TSC; - if (Delta < thresholdTicks()) { - assert(TLD.BufferOffset > 0); + if (Delta < atomic_load(&ThresholdTicks, memory_order_relaxed)) { + DCHECK(TLD.BufferOffset > 0); TLD.BufferOffset -= StackTop.Type == XRayEntryType::ENTRY ? 1 : 2; return; } @@ -227,7 +218,7 @@ } default: // Should be unreachable. - assert(false && "Unsupported XRayEntryType encountered."); + Die(); break; } @@ -241,9 +232,9 @@ R.Type = Type; R.FuncId = FuncId; auto FirstEntry = reinterpret_cast<__xray::XRayRecord *>(TLD.InMemoryBuffer); - __sanitizer::internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); if (++TLD.BufferOffset == TLD.BufferSize) { - __sanitizer::SpinMutexLock L(&LogMutex); + SpinMutexLock L(&LogMutex); retryingWriteAll(Fd, reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; @@ -252,7 +243,7 @@ } template -void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, uint64_t Arg1, +void InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, u64 Arg1, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto FirstEntry = @@ -266,7 +257,7 @@ // in the thread-local buffer. If not, we first flush the buffer before // attempting to write the two records that must be consecutive. if (TLD.BufferOffset + 2 > BuffLen) { - __sanitizer::SpinMutexLock L(&LogMutex); + SpinMutexLock L(&LogMutex); retryingWriteAll(Fd, reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; @@ -279,7 +270,7 @@ if (RecursionGuard) return; RecursionGuard = true; - auto ExitGuard = __sanitizer::at_scope_exit([] { RecursionGuard = false; }); + auto ExitGuard = at_scope_exit([] { RecursionGuard = false; }); // And from here on write the arg payload. __xray::XRayArgPayload R; @@ -287,9 +278,9 @@ R.FuncId = FuncId; R.TId = TLD.TID; R.Arg = Arg1; - __sanitizer::internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); + internal_memcpy(FirstEntry + TLD.BufferOffset, &R, sizeof(R)); if (++TLD.BufferOffset == BuffLen) { - __sanitizer::SpinMutexLock L(&LogMutex); + SpinMutexLock L(&LogMutex); retryingWriteAll(Fd, reinterpret_cast(FirstEntry), reinterpret_cast(FirstEntry + TLD.BufferOffset)); TLD.BufferOffset = 0; @@ -338,25 +329,25 @@ static void TLDDestructor(void *P) XRAY_NEVER_INSTRUMENT { ThreadLocalData &TLD = *reinterpret_cast(P); - auto ExitGuard = __sanitizer::at_scope_exit([&TLD] { + auto ExitGuard = at_scope_exit([&TLD] { // Clean up dynamic resources. if (TLD.InMemoryBuffer) InternalFree(TLD.InMemoryBuffer); if (TLD.ShadowStack) InternalFree(TLD.ShadowStack); - if (__sanitizer::Verbosity()) + if (Verbosity()) Report("Cleaned up log for TID: %d\n", TLD.TID); }); if (TLD.Fd == -1 || TLD.BufferOffset == 0) { - if (__sanitizer::Verbosity()) + if (Verbosity()) Report("Skipping buffer for TID: %d; Fd = %d; Offset = %llu\n", TLD.TID, TLD.Fd, TLD.BufferOffset); return; } { - __sanitizer::SpinMutexLock L(&LogMutex); + SpinMutexLock L(&LogMutex); retryingWriteAll(TLD.Fd, reinterpret_cast(TLD.InMemoryBuffer), reinterpret_cast(TLD.InMemoryBuffer) + (sizeof(__xray::XRayRecord) * TLD.BufferOffset)); @@ -373,17 +364,30 @@ void *Options, size_t OptionsSize) XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!__sanitizer::atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 1, __sanitizer::memory_order_acq_rel)) { - if (__sanitizer::Verbosity()) + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 1, + memory_order_acq_rel)) { + if (Verbosity()) Report("Basic logging already initialized.\n"); return XRayLogInitStatus::XRAY_LOG_INITIALIZED; } - static bool UNUSED Once = [] { + // Initialize the global TicksPerSec value. + atomic_store(&TicksPerSec, + probeRequiredCPUFeatures() ? getTSCFrequency() + : __xray::NanosecondsPerSecond, + memory_order_release); + atomic_store(&ThresholdTicks, + atomic_load(&TicksPerSec, memory_order_acquire) * + GlobalOptions.DurationFilterMicros / 1000000, + memory_order_release); + static pthread_once_t OnceInit; + pthread_once(&OnceInit, +[] { pthread_key_create(&PThreadKey, TLDDestructor); - return false; - }(); + atomic_store(&UseRealTSC, probeRequiredCPUFeatures(), memory_order_release); + if (!atomic_load(&UseRealTSC, memory_order_relaxed) && Verbosity()) + Report("WARNING: Required CPU features missing for XRay instrumentation, " + "using emulation instead.\n"); + }); if (BufferSize == 0 && BufferMax == 0 && Options != nullptr) { FlagParser P; @@ -410,6 +414,7 @@ GlobalOptions.ThreadBufferSize = F.thread_buffer_size; GlobalOptions.DurationFilterMicros = F.func_duration_threshold_us; GlobalOptions.MaxStackDepth = F.max_stack_depth; + *basicFlags() = F; } else if (OptionsSize != sizeof(BasicLoggingOptions)) { Report("Invalid options size, potential ABI mismatch; expected %d got %d", sizeof(BasicLoggingOptions), OptionsSize); @@ -421,15 +426,15 @@ GlobalOptions = *reinterpret_cast(Options); } - static auto UseRealTSC = probeRequiredCPUFeatures(); - if (!UseRealTSC && __sanitizer::Verbosity()) - Report("WARNING: Required CPU features missing for XRay instrumentation, " - "using emulation instead.\n"); + __xray_set_handler_arg1(atomic_load(&UseRealTSC, memory_order_relaxed) + ? basicLoggingHandleArg1RealTSC + : basicLoggingHandleArg1EmulateTSC); + __xray_set_handler(atomic_load(&UseRealTSC, memory_order_relaxed) + ? basicLoggingHandleArg0RealTSC + : basicLoggingHandleArg0EmulateTSC); - __xray_set_handler_arg1(UseRealTSC ? basicLoggingHandleArg1RealTSC - : basicLoggingHandleArg1EmulateTSC); - __xray_set_handler(UseRealTSC ? basicLoggingHandleArg0RealTSC - : basicLoggingHandleArg0EmulateTSC); + // TODO: Implement custom event and typed event handling support in Basic + // Mode. __xray_remove_customevent_handler(); __xray_remove_typedevent_handler(); @@ -438,9 +443,9 @@ XRayLogInitStatus basicLoggingFinalize() XRAY_NEVER_INSTRUMENT { uint8_t Expected = 0; - if (!__sanitizer::atomic_compare_exchange_strong( - &BasicInitialized, &Expected, 0, __sanitizer::memory_order_acq_rel) && - __sanitizer::Verbosity()) + if (!atomic_compare_exchange_strong(&BasicInitialized, &Expected, 0, + memory_order_acq_rel) && + Verbosity()) Report("Basic logging already finalized.\n"); // Nothing really to do aside from marking state of the global to be @@ -491,11 +496,12 @@ Report("Failed initializing XRay Basic Mode; error = %d\n", InitResult); return false; } - static auto UNUSED Once = [] { - static auto UNUSED &TLD = getThreadLocalData(); - __sanitizer::Atexit(+[] { TLDDestructor(&TLD); }); - return false; - }(); + static pthread_once_t DynamicOnce; + static void *FakeTLD = nullptr; + pthread_once(&DynamicOnce, +[] { + FakeTLD = &getThreadLocalData(); + Atexit(+[] { TLDDestructor(FakeTLD); }); + }); } return true; } Index: compiler-rt/test/xray/TestCases/Posix/c-test.cc =================================================================== --- /dev/null +++ compiler-rt/test/xray/TestCases/Posix/c-test.cc @@ -0,0 +1,4 @@ +// RUN: %clang_xray -g -o %t %s +// REQUIRES: x86_64-target-arch +// REQUIRES: built-in-llvm-tree +int main() {}