Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -106,9 +106,18 @@ //-----------------------------------------------------------------------------| namespace { + +// Hardcoded setting to skip logging simple functions that run less than 5 us. +// Will probably be customized at a later time. +const uint64_t FuncDurationThresholdMicros = 5; + thread_local BufferQueue::Buffer Buffer; thread_local char *RecordPtr = nullptr; +// NumConsecutiveFnEnters is used to safely rewind the buffer with functions too +// fast to waste log space on. +thread_local size_t NumConsecutiveFnEnters = 0; + constexpr auto MetadataRecSize = sizeof(MetadataRecord); constexpr auto FunctionRecSize = sizeof(FunctionRecord); @@ -218,6 +227,7 @@ // This is typically clock_gettime, but callers have injection ability. wall_clock_reader(CLOCK_MONOTONIC, &TS); writeNewBufferPreamble(Tid, TS, RecordPtr); + NumConsecutiveFnEnters = 0; } static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, @@ -238,6 +248,7 @@ static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { + NumConsecutiveFnEnters = 0; writeNewCPUIdMetadata(CPU, TSC, RecordPtr); } @@ -251,6 +262,7 @@ } static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { + NumConsecutiveFnEnters = 0; writeEOBMetadata(RecordPtr); } @@ -269,6 +281,7 @@ } static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { + NumConsecutiveFnEnters = 0; writeTSCWrapMetadata(TSC, RecordPtr); } @@ -286,13 +299,20 @@ switch (EntryType) { case XRayEntryType::ENTRY: + ++NumConsecutiveFnEnters; + FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); + break; case XRayEntryType::LOG_ARGS_ENTRY: + // We should not rewind functions with logged args. + NumConsecutiveFnEnters = 0; FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); break; case XRayEntryType::EXIT: + NumConsecutiveFnEnters = 0; FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit); break; case XRayEntryType::TAIL: + NumConsecutiveFnEnters = 0; FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); break; @@ -319,6 +339,7 @@ // We assume that we'll support only 65536 CPUs for x86_64. thread_local uint16_t CurrentCPU = std::numeric_limits::max(); thread_local uint64_t LastTSC = 0; + thread_local uint64_t LastFunctionEntryTSC = 0; // Make sure a thread that's ever called handleArg0 has a thread-local // live reference to the buffer queue for this particular instance of @@ -467,11 +488,80 @@ RecordTSCDelta = Delta; } - // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid - // us in future computations of this TSC delta value. LastTSC = TSC; CurrentCPU = CPU; + using AlignedFuncStorage = + std::aligned_storage::type; + switch (Entry) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + // Update the thread local state for the next invocation. + LastFunctionEntryTSC = TSC; + break; + case XRayEntryType::EXIT: + // We unwind functions that are too short to log by decrementing our view + // ptr into the buffer. We can erase a Function Entry record and neglect + // to log our EXIT. We have to make sure to update some state like the + // LastTSC and count of consecutive FunctionEntry records. + if (NumConsecutiveFnEnters > 0 && + (TSC - LastFunctionEntryTSC) < FuncDurationThresholdMicros) { + RecordPtr -= FunctionRecSize; + AlignedFuncStorage AlignedFuncRecordBuffer; + const auto &FuncRecord = *reinterpret_cast( + std::memcpy(&AlignedFuncRecordBuffer, RecordPtr, FunctionRecSize)); + assert(FuncRecord.RecordKind == + uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && + "Expected to find function entry recording when rewinding."); + assert(FuncRecord.FuncId == (FuncId & ~(0x0F << 28)) && + "Expected matching function id when rewinding Exit"); + --NumConsecutiveFnEnters; + LastTSC -= FuncRecord.TSCDelta; + LastFunctionEntryTSC = NumConsecutiveFnEnters == 0 + ? 0 + : LastFunctionEntryTSC - FuncRecord.TSCDelta; + return; // without writing log. + } + break; + case XRayEntryType::TAIL: + // If we fail the loop condition, one of two things have occurred. + // a. We've rewound over all of the function entries that are part of the + // tail exit call stack and were invoked recently enough for our heuristic + // to say not to log them. + // b. We've encountered an interruption in the stream of function entry + // records and we won't try to rewind the stream further. + // + // Either way, we have not found the function matching our tail exit, but + // we'll write a tail exit record for the analysis layer to know that + // control travels up the stack and returns from the given function id. + // + // If we do find the matching function entry by rewinding, we will short + // circuit and avoid writing a tail exit record. + for (auto PrevFuncPtr = RecordPtr - FunctionRecSize; + NumConsecutiveFnEnters > 0 && + (TSC - LastFunctionEntryTSC) < FuncDurationThresholdMicros; + PrevFuncPtr -= FunctionRecSize) { + AlignedFuncStorage AlignedFuncRecordBuffer; + const auto &FuncRecord = *reinterpret_cast( + std::memcpy(&AlignedFuncRecordBuffer, PrevFuncPtr, FunctionRecSize)); + assert(FuncRecord.RecordKind == + uint8_t(FunctionRecord::RecordKinds::FunctionEnter) && + "Expected to find function entry when rewinding from TAIL"); + --NumConsecutiveFnEnters; + RecordPtr = PrevFuncPtr; + LastTSC -= FuncRecord.TSCDelta; + LastFunctionEntryTSC = NumConsecutiveFnEnters == 0 + ? 0 + : LastFunctionEntryTSC - FuncRecord.TSCDelta; + if (FuncRecord.FuncId == (FuncId & ~(0x0F << 28))) { + // We rewound the whole call stack up to the matching entry. + return; // without writing log. + } + } + break; + } // switch + writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr); // If we've exhausted the buffer by this time, we then release the buffer to @@ -491,5 +581,4 @@ } // namespace __xray_fdr_internal } // namespace __xray - #endif // XRAY_XRAY_FDR_LOGGING_IMPL_H