Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -15,15 +15,10 @@ // //===----------------------------------------------------------------------===// #include "xray_fdr_logging.h" -#include -#include -#include -#include #include #include #include #include -#include #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -176,19 +171,22 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } -static std::tuple -getTimestamp() XRAY_NEVER_INSTRUMENT { +struct TSCAndCPU { + uint64_t TSC; + unsigned char CPU; +}; + +static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { // We want to get the TSC as early as possible, so that we can check whether // we've seen this CPU before. We also do it before we load anything else, to // allow for forward progress with the scheduling. - unsigned char CPU; - uint64_t TSC; + TSCAndCPU Result; // Test once for required CPU features static bool TSCSupported = probeRequiredCPUFeatures(); if (TSCSupported) { - TSC = __xray::readTSC(CPU); + Result.TSC = __xray::readTSC(Result.CPU); } else { // FIXME: This code needs refactoring as it appears in multiple locations timespec TS; @@ -197,34 +195,32 @@ Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); TS = {0, 0}; } - CPU = 0; - TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + Result.CPU = 0; + Result.TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; } - return std::make_tuple(TSC, CPU); + return Result; } void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), - std::get<1>(TSC_CPU), 0, - clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC_CPU.TSC, + TSC_CPU.CPU, 0, clock_gettime, *BQ); } void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg) XRAY_NEVER_INSTRUMENT { auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), - std::get<1>(TSC_CPU), Arg, - clock_gettime, *BQ); + __xray_fdr_internal::processFunctionHook( + FuncId, Entry, TSC_CPU.TSC, TSC_CPU.CPU, Arg, clock_gettime, *BQ); } void fdrLoggingHandleCustomEvent(void *Event, std::size_t EventSize) XRAY_NEVER_INSTRUMENT { using namespace __xray_fdr_internal; auto TSC_CPU = getTimestamp(); - auto &TSC = std::get<0>(TSC_CPU); - auto &CPU = std::get<1>(TSC_CPU); + auto &TSC = TSC_CPU.TSC; + auto &CPU = TSC_CPU.CPU; RecursionGuard Guard{Running}; if (!Guard) { assert(Running && "RecursionGuard is buggy!"); @@ -261,7 +257,7 @@ CustomEvent.Type = uint8_t(RecordType::Metadata); CustomEvent.RecordKind = uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); - constexpr auto TSCSize = sizeof(std::get<0>(TSC_CPU)); + constexpr auto TSCSize = sizeof(TSC_CPU.TSC); std::memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); std::memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); std::memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent)); Index: compiler-rt/lib/xray/xray_fdr_logging_impl.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging_impl.h +++ compiler-rt/lib/xray/xray_fdr_logging_impl.h @@ -18,16 +18,17 @@ #define XRAY_XRAY_FDR_LOGGING_IMPL_H #include -#include +#include #include #include -#include #include -#include #include #include #include +// FIXME: Implement analogues to std::shared_ptr and std::weak_ptr +#include + #include "sanitizer_common/sanitizer_common.h" #include "xray/xray_log_interface.h" #include "xray_buffer_queue.h" @@ -96,7 +97,7 @@ // call so that it can be initialized on first use instead of as a global. We // force the alignment to 64-bytes for x86 cache line alignment, as this // structure is used in the hot path of implementation. -struct ALIGNED(64) ThreadLocalData { +struct alignas(64) ThreadLocalData { BufferQueue::Buffer Buffer; char *RecordPtr = nullptr; // The number of FunctionEntry records immediately preceding RecordPtr. @@ -176,8 +177,8 @@ // We need aligned, uninitialized storage for the TLS object which is // trivially destructible. We're going to use this as raw storage and // placement-new the ThreadLocalData object into it later. - thread_local std::aligned_storage::type TLSBuffer; + alignas(alignof(ThreadLocalData)) thread_local unsigned char + TLSBuffer[sizeof(ThreadLocalData)]; // Ensure that we only actually ever do the pthread initialization once. thread_local bool UNUSED Unused = [] { @@ -215,7 +216,7 @@ return true; }(); - return *reinterpret_cast(&TLSBuffer); + return *reinterpret_cast(TLSBuffer); } //-----------------------------------------------------------------------------| @@ -255,14 +256,14 @@ inline void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr) XRAY_NEVER_INSTRUMENT { static constexpr int InitRecordsCount = 2; - std::aligned_storage::type Records[InitRecordsCount]; + alignas(16) unsigned char Records[InitRecordsCount * MetadataRecSize]; { // Write out a MetadataRecord to signify that this is the start of a new // buffer, associated with a particular thread, with a new CPU. For the // data, we have 15 bytes to squeeze as much information as we can. At this // point we only write down the following bytes: // - Thread ID (pid_t, 4 bytes) - auto &NewBuffer = *reinterpret_cast(&Records[0]); + auto &NewBuffer = *reinterpret_cast(Records); NewBuffer.Type = uint8_t(RecordType::Metadata); NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer); std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t)); @@ -270,7 +271,8 @@ // Also write the WalltimeMarker record. { static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); - auto &WalltimeMarker = *reinterpret_cast(&Records[1]); + auto &WalltimeMarker = + *reinterpret_cast(Records + MetadataRecSize); WalltimeMarker.Type = uint8_t(RecordType::Metadata); WalltimeMarker.RecordKind = uint8_t(MetadataRecord::RecordKinds::WalltimeMarker); @@ -382,10 +384,7 @@ static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, XRayEntryType EntryType, char *&MemPtr) XRAY_NEVER_INSTRUMENT { - std::aligned_storage::type - AlignedFuncRecordBuffer; - auto &FuncRecord = - *reinterpret_cast(&AlignedFuncRecordBuffer); + FunctionRecord FuncRecord; FuncRecord.Type = uint8_t(RecordType::Function); // Only take 28 bits of the function id. FuncRecord.FuncId = FuncId & ~(0x0F << 28); @@ -439,7 +438,7 @@ } } - std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); + std::memcpy(MemPtr, &FuncRecord, sizeof(FunctionRecord)); MemPtr += sizeof(FunctionRecord); } @@ -456,14 +455,10 @@ // "Function Entry" record and any "Tail Call Exit" records after that. static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, uint64_t &LastFunctionEntryTSC, int32_t FuncId) { - using AlignedFuncStorage = - std::aligned_storage::type; auto &TLD = getThreadLocalData(); TLD.RecordPtr -= FunctionRecSize; - AlignedFuncStorage AlignedFuncRecordBuffer; - const auto &FuncRecord = *reinterpret_cast( - std::memcpy(&AlignedFuncRecordBuffer, TLD.RecordPtr, FunctionRecSize)); + FunctionRecord FuncRecord; + std::memcpy(&FuncRecord, TLD.RecordPtr, FunctionRecSize); assert(FuncRecord.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && "Expected to find function entry recording when rewinding."); @@ -485,27 +480,25 @@ auto RewindingTSC = LastTSC; auto RewindingRecordPtr = TLD.RecordPtr - FunctionRecSize; while (TLD.NumTailCalls > 0) { - AlignedFuncStorage TailExitRecordBuffer; // Rewind the TSC back over the TAIL EXIT record. - const auto &ExpectedTailExit = - *reinterpret_cast(std::memcpy( - &TailExitRecordBuffer, RewindingRecordPtr, FunctionRecSize)); + FunctionRecord ExpectedTailExit; + std::memcpy(&ExpectedTailExit, RewindingRecordPtr, FunctionRecSize); assert(ExpectedTailExit.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionTailExit) && "Expected to find tail exit when rewinding."); RewindingRecordPtr -= FunctionRecSize; RewindingTSC -= ExpectedTailExit.TSCDelta; - AlignedFuncStorage FunctionEntryBuffer; - const auto &ExpectedFunctionEntry = *reinterpret_cast( - std::memcpy(&FunctionEntryBuffer, RewindingRecordPtr, FunctionRecSize)); + FunctionRecord ExpectedFunctionEntry; + std::memcpy(&ExpectedFunctionEntry, RewindingRecordPtr, FunctionRecSize); assert(ExpectedFunctionEntry.RecordKind == uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && "Expected to find function entry when rewinding tail call."); assert(ExpectedFunctionEntry.FuncId == ExpectedTailExit.FuncId && "Expected funcids to match when rewinding tail call."); - // This tail call exceeded the threshold duration. It will not be erased. + // This tail call exceeded the threshold duration. It will not be + // erased. if ((TSC - RewindingTSC) >= thresholdTicks()) { TLD.NumTailCalls = 0; return;