Index: include/xray/xray_log_interface.h =================================================================== --- include/xray/xray_log_interface.h +++ include/xray/xray_log_interface.h @@ -155,11 +155,12 @@ /// when called. XRayLogInitStatus (*log_finalize)(); - /// The 0-argument function call handler. XRay logging implementations MUST + /// The function call logging handlers. XRay logging implementations MUST /// always have a handler for function entry and exit events. In case the /// implementation wants to support arg1 (or other future extensions to XRay /// logging) those MUST be installed by the installed 'log_init' handler. void (*handle_arg0)(int32_t, XRayEntryType); + void (*handle_arg1)(int32_t, XRayEntryType, uint64_t); /// The log implementation provided routine for when __xray_log_flushLog() is /// called. Index: lib/xray/xray_fdr_log_records.h =================================================================== --- lib/xray/xray_fdr_log_records.h +++ lib/xray/xray_fdr_log_records.h @@ -29,6 +29,7 @@ NewCPUId, TSCWrap, WalltimeMarker, + CallArgument, }; // Use 7 bits to identify this record type. /* RecordKinds */ uint8_t RecordKind : 7; Index: lib/xray/xray_fdr_logging.h =================================================================== --- lib/xray/xray_fdr_logging.h +++ lib/xray/xray_fdr_logging.h @@ -30,6 +30,7 @@ void *Options, size_t OptionsSize); XRayLogInitStatus fdrLoggingFinalize(); void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1); XRayLogFlushStatus fdrLoggingFlush(); XRayLogInitStatus fdrLoggingReset(); Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -71,8 +71,9 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } - // Install the actual handleArg0 handler after initialising the buffers. + // Install the actual logging handlers after initialising the buffers. __xray_set_handler(fdrLoggingHandleArg0); + __xray_set_handler_arg1(fdrLoggingHandleArg1); __sanitizer::atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, @@ -192,6 +193,25 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } +static void fdrReadTime(uint64_t &TSC, uint8_t &CPU) { + // Test once for required CPU features + static bool TSCSupported = probeRequiredCPUFeatures(); + + if (TSCSupported) { + TSC = __xray::readTSC(CPU); + return; + } + // FIXME: This code needs refactoring as it appears in multiple locations + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); + TS = {0, 0}; + } + CPU = 0; + TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; +} + void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { // We want to get the TSC as early as possible, so that we can check whether @@ -200,25 +220,20 @@ unsigned char CPU; uint64_t TSC; - // Test once for required CPU features - static bool TSCSupported = probeRequiredCPUFeatures(); + fdrReadTime(TSC, CPU); + __xray_fdr_internal::processFunctionHook(FuncId, Entry, 0, TSC, CPU, + clock_gettime, LoggingStatus, BQ); +} - if(TSCSupported) { - TSC = __xray::readTSC(CPU); - } else { - // FIXME: This code needs refactoring as it appears in multiple locations - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime(2) return %d, errno=%d", result, int(errno)); - TS = {0, 0}; - } - CPU = 0; - TSC = TS.tv_sec * __xray::NanosecondsPerSecond + TS.tv_nsec; - } +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType, + uint64_t Arg) XRAY_NEVER_INSTRUMENT { + unsigned char CPU; + uint64_t TSC; - __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU, - clock_gettime, LoggingStatus, BQ); + fdrReadTime(TSC, CPU); + __xray_fdr_internal::processFunctionHook( + FuncId, XRayEntryType::LOG_ARGS_ENTRY, Arg, TSC, CPU, clock_gettime, + LoggingStatus, BQ); } } // namespace __xray @@ -227,7 +242,8 @@ using namespace __xray; if (flags()->xray_fdr_log) { XRayLogImpl Impl{ - fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0, + fdrLoggingInit, fdrLoggingFinalize, + fdrLoggingHandleArg0, fdrLoggingHandleArg1, fdrLoggingFlush, }; __xray_set_log_impl(Impl); Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -95,7 +95,7 @@ /// polluting the log and may use the buffer queue to obtain or release a /// buffer. static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, - uint64_t TSC, unsigned char CPU, + uint64_t Arg1, uint64_t TSC, unsigned char CPU, int (*wall_clock_reader)(clockid_t, struct timespec *), __sanitizer::atomic_sint32_t &LoggingStatus, @@ -293,6 +293,23 @@ writeTSCWrapMetadata(TSC, RecordPtr); } +static inline void writeCallArgumentMetadata(uint64_t A, char *&MemPtr) + XRAY_NEVER_INSTRUMENT { + MetadataRecord CallArg; + CallArg.Type = uint8_t(RecordType::Metadata); + CallArg.RecordKind = uint8_t(MetadataRecord::RecordKinds::CallArgument); + + const uint16_t WhichArg = 1; /* We only support the first argument for now. */ + std::memcpy(CallArg.Data, &WhichArg, sizeof(WhichArg)); + std::memcpy(CallArg.Data + sizeof(WhichArg), &A, sizeof(A)); + std::memcpy(MemPtr, &CallArg, sizeof(MetadataRecord)); + MemPtr += sizeof(MetadataRecord); +} + +static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT { + writeCallArgumentMetadata(A, RecordPtr); +} + static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, XRayEntryType EntryType, char *&MemPtr) XRAY_NEVER_INSTRUMENT { @@ -433,8 +450,8 @@ } static inline void processFunctionHook( - int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, - int (*wall_clock_reader)(clockid_t, struct timespec *), + int32_t FuncId, XRayEntryType Entry, uint64_t Arg1, uint64_t TSC, + unsigned char CPU, int (*wall_clock_reader)(clockid_t, struct timespec *), __sanitizer::atomic_sint32_t &LoggingStatus, const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { // Bail out right away if logging is not initialized yet. @@ -519,10 +536,10 @@ // - The least number of bytes we will ever write is 8 // (sizeof(FunctionRecord)) only if the delta between the previous entry // and this entry is within 32 bits. - // - The most number of bytes we will ever write is 8 + 16 = 24. This is - // computed by: + // - The most number of bytes we will ever write is 8 + 16 + 16 = 40. + // This is computed by: // - // sizeof(FunctionRecord) + sizeof(MetadataRecord) + // sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord) // // These arise in the following cases: // @@ -536,6 +553,7 @@ // FunctionRecord. // 3. When we learn about a new CPU ID, we need to write down a "new cpu // id" MetadataRecord before writing out the actual FunctionRecord. + // 4. The second MetadataRecord is the optional function call argument. // // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes. // @@ -546,7 +564,7 @@ // Buffer, set it up properly before doing any further writing. // char *BufferStart = static_cast(Buffer.Buffer); - if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart < + if ((RecordPtr + (2 * MetadataRecSize + FunctionRecSize)) - BufferStart < static_cast(MetadataRecSize)) { writeEOBMetadata(); if (!releaseThreadLocalBuffer(LocalBQ.get())) @@ -560,10 +578,10 @@ setupNewBuffer(wall_clock_reader); } - // By this point, we are now ready to write at most 24 bytes (one metadata - // record and one function record). + // By this point, we are now ready to write at most 40 bytes (two metadata + // records and one function record). BufferStart = static_cast(Buffer.Buffer); - assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >= + assert((RecordPtr + (2 * MetadataRecSize + FunctionRecSize)) - BufferStart >= static_cast(MetadataRecSize) && "Misconfigured BufferQueue provided; Buffer size not large enough."); @@ -622,6 +640,8 @@ } writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr); + if (Entry == XRayEntryType::LOG_ARGS_ENTRY) + writeCallArgumentMetadata(Arg1); // If we've exhausted the buffer by this time, we then release the buffer to // make sure that other threads may start using this buffer. Index: lib/xray/xray_log_interface.cc =================================================================== --- lib/xray/xray_log_interface.cc +++ lib/xray/xray_log_interface.cc @@ -24,7 +24,8 @@ void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || - Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { + Impl.handle_arg0 == nullptr || Impl.handle_arg1 == nullptr || + Impl.flush_log == nullptr) { __sanitizer::SpinMutexLock Guard(&XRayImplMutex); GlobalXRayImpl.reset(); __xray_remove_handler(); Index: test/xray/TestCases/Linux/fdr-mode.cc =================================================================== --- test/xray/TestCases/Linux/fdr-mode.cc +++ test/xray/TestCases/Linux/fdr-mode.cc @@ -29,6 +29,8 @@ [[clang::xray_always_instrument]] void __attribute__((noinline)) fB() { fC(); } [[clang::xray_always_instrument]] void __attribute__((noinline)) fA() { fB(); } +[[clang::xray_always_instrument, clang::xray_log_args(1)]] +void __attribute__((noinline)) fArg(int) { } int main(int argc, char *argv[]) { using namespace __xray; @@ -52,6 +54,7 @@ fC(); fB(); fA(); + fArg(1); }); other_thread.join(); std::cout << "Joined" << std::endl; @@ -85,8 +88,15 @@ // TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} } // TRACE: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} } +// TRACE-DAG: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', arg1: 1, cpu: {{.*}}, thread: [[THREAD2]], kind: function-enter-arg, tsc: {{[0-9]+}} } +// TRACE-NEXT: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', arg1: 0, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} } + // Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but // is unlikely given the test program. -// UNWRITE: header +// Even with a high threshold, arg1 logging is never unwritten. +// UNWRITE: header: +// UNWRITE: records: +// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', arg1: 1, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter-arg, tsc: {{[0-9]+}} } +// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', arg1: 0, cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} } // UNWRITE-NOT: function-enter // UNWRITE-NOT: function-exit