Index: compiler-rt/trunk/include/xray/xray_records.h =================================================================== --- compiler-rt/trunk/include/xray/xray_records.h +++ compiler-rt/trunk/include/xray/xray_records.h @@ -67,13 +67,14 @@ enum RecordTypes { NORMAL = 0, + ARG_PAYLOAD = 1, }; struct alignas(32) XRayRecord { // This is the type of the record being written. We use 16 bits to allow us to // treat this as a discriminant, and so that the first 4 bytes get packed // properly. See RecordTypes for more supported types. - uint16_t RecordType = 0; + uint16_t RecordType = RecordTypes::NORMAL; // The CPU where the thread is running. We assume number of CPUs <= 256. uint8_t CPU = 0; @@ -82,6 +83,7 @@ // ENTER = 0 // EXIT = 1 // TAIL_EXIT = 2 + // ENTER_ARG = 3 uint8_t Type = 0; // The function ID for the record. @@ -99,6 +101,32 @@ static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes"); +struct alignas(32) XRayArgPayload { + // We use the same 16 bits as a discriminant for the records in the log here + // too, and so that the first 4 bytes are packed properly. + uint16_t RecordType = RecordTypes::ARG_PAYLOAD; + + // Add a few bytes to pad. + uint8_t Padding[2] = {}; + + // The function ID for the record. + int32_t FuncId = 0; + + // The thread ID for the currently running thread. + uint32_t TId = 0; + + // Add more padding. + uint8_t Padding2[4] = {}; + + // The argument payload. + uint64_t Arg = 0; + + // The rest of this record ought to be left as padding. + uint8_t TailPadding[8] = {}; +} __attribute__((packed)); + +static_assert(sizeof(XRayArgPayload) == 32, "XRayArgPayload != 32 bytes"); + } // namespace __xray #endif // XRAY_XRAY_RECORDS_H Index: compiler-rt/trunk/lib/xray/xray_inmemory_log.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_inmemory_log.cc +++ compiler-rt/trunk/lib/xray/xray_inmemory_log.cc @@ -16,6 +16,7 @@ //===----------------------------------------------------------------------===// #include +#include #include #include #include @@ -82,14 +83,14 @@ // Test for required CPU features and cache the cycle frequency static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = TSCSupported ? getTSCFrequency() - : __xray::NanosecondsPerSecond; + static uint64_t CycleFrequency = + TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; - Header.Version = 2; // Version 2 includes tail exit records. + Header.Version = 2; // Version 2 includes tail exit records. Header.Type = FileTypes::NAIVE_LOG; Header.CycleFrequency = CycleFrequency; @@ -102,26 +103,43 @@ return F; } +using Buffer = + std::aligned_storage::type; + +static constexpr size_t BuffLen = 1024; +thread_local size_t Offset = 0; + +Buffer (&getThreadLocalBuffer())[BuffLen] XRAY_NEVER_INSTRUMENT { + thread_local static Buffer InMemoryBuffer[BuffLen] = {}; + return InMemoryBuffer; +} + +pid_t getTId() XRAY_NEVER_INSTRUMENT { + thread_local pid_t TId = syscall(SYS_gettid); + return TId; +} + +int getGlobalFd() XRAY_NEVER_INSTRUMENT { + static int Fd = __xray_OpenLogFile(); + return Fd; +} + +thread_local volatile bool RecusionGuard = false; template void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type, RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { - using Buffer = - std::aligned_storage::type; - static constexpr size_t BuffLen = 1024; - thread_local static Buffer InMemoryBuffer[BuffLen] = {}; - thread_local static size_t Offset = 0; - static int Fd = __xray_OpenLogFile(); + auto &InMemoryBuffer = getThreadLocalBuffer(); + int Fd = getGlobalFd(); if (Fd == -1) return; thread_local __xray::ThreadExitFlusher Flusher( Fd, reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer), Offset); - thread_local pid_t TId = syscall(SYS_gettid); // Use a simple recursion guard, to handle cases where we're already logging // and for one reason or another, this function gets called again in the same // thread. - thread_local volatile bool RecusionGuard = false; - if (RecusionGuard) return; + if (RecusionGuard) + return; RecusionGuard = true; // First we get the useful data, and stuff it into the already aligned buffer @@ -129,7 +147,7 @@ auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; R.RecordType = RecordTypes::NORMAL; R.TSC = ReadTSC(R.CPU); - R.TId = TId; + R.TId = getTId(); R.Type = Type; R.FuncId = FuncId; ++Offset; @@ -144,6 +162,55 @@ RecusionGuard = false; } +template +void __xray_InMemoryRawLogWithArg(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1, + RDTSC ReadTSC) XRAY_NEVER_INSTRUMENT { + auto &InMemoryBuffer = getThreadLocalBuffer(); + int Fd = getGlobalFd(); + if (Fd == -1) + return; + + // First we check whether there's enough space to write the data consecutively + // in the thread-local buffer. If not, we first flush the buffer before + // attempting to write the two records that must be consecutive. + if (Offset + 2 > BuffLen) { + __sanitizer::SpinMutexLock L(&LogMutex); + auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); + retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), + reinterpret_cast(RecordBuffer + Offset)); + Offset = 0; + } + + // Then we write the "we have an argument" record. + __xray_InMemoryRawLog(FuncId, Type, ReadTSC); + + if (RecusionGuard) + return; + + RecusionGuard = true; + + // And from here on write the arg payload. + __xray::XRayArgPayload R; + R.RecordType = RecordTypes::ARG_PAYLOAD; + R.FuncId = FuncId; + R.TId = getTId(); + R.Arg = Arg1; + auto EntryPtr = + &reinterpret_cast<__xray::XRayArgPayload *>(&InMemoryBuffer)[Offset]; + std::memcpy(EntryPtr, &R, sizeof(R)); + ++Offset; + if (Offset == BuffLen) { + __sanitizer::SpinMutexLock L(&LogMutex); + auto RecordBuffer = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer); + retryingWriteAll(Fd, reinterpret_cast(RecordBuffer), + reinterpret_cast(RecordBuffer + Offset)); + Offset = 0; + } + + RecusionGuard = false; +} + void __xray_InMemoryRawLogRealTSC(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { __xray_InMemoryRawLog(FuncId, Type, __xray::readTSC); @@ -163,13 +230,38 @@ }); } +void __xray_InMemoryRawLogWithArgRealTSC(int32_t FuncId, XRayEntryType Type, + uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + __xray_InMemoryRawLogWithArg(FuncId, Type, Arg1, __xray::readTSC); +} + +void __xray_InMemoryRawLogWithArgEmulateTSC( + int32_t FuncId, XRayEntryType Type, uint64_t Arg1) XRAY_NEVER_INSTRUMENT { + __xray_InMemoryRawLogWithArg( + FuncId, Type, Arg1, [](uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettimg(2) return %d, errno=%d.", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + return TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; + }); +} + static auto UNUSED Unused = [] { auto UseRealTSC = probeRequiredCPUFeatures(); if (!UseRealTSC) Report("WARNING: Required CPU features missing for XRay instrumentation, " "using emulation instead.\n"); - if (flags()->xray_naive_log) + if (flags()->xray_naive_log) { + __xray_set_handler_arg1(UseRealTSC + ? __xray_InMemoryRawLogWithArgRealTSC + : __xray_InMemoryRawLogWithArgEmulateTSC); __xray_set_handler(UseRealTSC ? __xray_InMemoryRawLogRealTSC : __xray_InMemoryEmulateTSC); + } + return true; }();