Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -163,47 +163,17 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } -static std::tuple -getTimestamp() XRAY_NEVER_INSTRUMENT { - // We want to get the TSC as early as possible, so that we can check whether - // we've seen this CPU before. We also do it before we load anything else, to - // allow for forward progress with the scheduling. - unsigned char CPU; - uint64_t TSC; - - // Test once for required CPU features - static bool TSCSupported = probeRequiredCPUFeatures(); - - if (TSCSupported) { - TSC = __xray::readTSC(CPU); - } else { - // FIXME: This code needs refactoring as it appears in multiple locations - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); - TS = {0, 0}; - } - CPU = 0; - TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; - } - return std::make_tuple(TSC, CPU); -} - void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { - auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), - std::get<1>(TSC_CPU), clock_gettime, + __xray_fdr_internal::processFunctionHook(FuncId, Entry, clock_gettime, LoggingStatus, BQ); } void fdrLoggingHandleCustomEvent(void *Event, std::size_t EventSize) XRAY_NEVER_INSTRUMENT { using namespace __xray_fdr_internal; - auto TSC_CPU = getTimestamp(); - auto &TSC = std::get<0>(TSC_CPU); - auto &CPU = std::get<1>(TSC_CPU); + uint8_t CPU; + uint64_t TSC = __xray::readTSC(CPU); thread_local bool Running = false; RecursionGuard Guard{Running}; if (!Guard) { @@ -240,9 +210,8 @@ CustomEvent.Type = uint8_t(RecordType::Metadata); CustomEvent.RecordKind = uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); - constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU)); std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); - std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); + std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, sizeof(TSC)); std::memcpy(RecordPtr, &CustomEvent, sizeof(CustomEvent)); RecordPtr += sizeof(CustomEvent); std::memcpy(RecordPtr, Event, ReducedEventSize); Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -98,7 +98,6 @@ /// polluting the log and may use the buffer queue to obtain or release a /// buffer. static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, - uint64_t TSC, unsigned char CPU, int (*wall_clock_reader)(clockid_t, struct timespec *), __sanitizer::atomic_sint32_t &LoggingStatus, @@ -545,7 +544,7 @@ } inline void processFunctionHook( - int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, + int32_t FuncId, XRayEntryType Entry, int (*wall_clock_reader)(clockid_t, struct timespec *), __sanitizer::atomic_sint32_t &LoggingStatus, const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { @@ -566,6 +565,8 @@ if (LocalBQ == nullptr) LocalBQ = BQ; + uint8_t CPU; + uint64_t TSC = readTSC(CPU); if (!isLogInitializedAndReady(LocalBQ, TSC, CPU, wall_clock_reader)) return; Index: lib/xray/xray_tsc.h =================================================================== --- lib/xray/xray_tsc.h +++ lib/xray/xray_tsc.h @@ -15,6 +15,23 @@ namespace __xray { static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; + +// Even if an architecture supports a TSC-like mechanism, a particular box can +// still prevent us from using it; be it for security (SECCOMP), virtualization +// (VMware's deterministic lockstep emulating RDTSC using APIC to eliminate VM +// randomness) or age (older CPUs with broken/missing TSC support). Therefore, +// we always keep this function around. +ALWAYS_INLINE uint64_t emulateReadTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) returned %d, errno=%d.\n", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} + } #if defined(__x86_64__) @@ -44,15 +61,7 @@ inline bool probeRequiredCPUFeatures() XRAY_NEVER_INSTRUMENT { return true; } ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno)); - TS.tv_sec = 0; - TS.tv_nsec = 0; - } - CPU = 0; - return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; + return emulateReadTSC(CPU); } inline uint64_t getTSCFrequency() XRAY_NEVER_INSTRUMENT { Index: lib/xray/xray_x86_64.inc =================================================================== --- lib/xray/xray_x86_64.inc +++ lib/xray/xray_x86_64.inc @@ -19,7 +19,13 @@ namespace __xray { +bool probeRequiredCPUFeatures(); + ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + static bool TSCSupported = probeRequiredCPUFeatures(); + if (UNLIKELY(!TSCSupported)) { + return emulateReadTSC(CPU); + } unsigned LongCPU; uint64_t TSC = __rdtscp(&LongCPU); CPU = LongCPU; @@ -28,6 +34,4 @@ uint64_t getTSCFrequency(); -bool probeRequiredCPUFeatures(); - } // namespace __xray