Index: compiler-rt/trunk/include/xray/xray_log_interface.h =================================================================== --- compiler-rt/trunk/include/xray/xray_log_interface.h +++ compiler-rt/trunk/include/xray/xray_log_interface.h @@ -159,6 +159,9 @@ /// always have a handler for function entry and exit events. In case the /// implementation wants to support arg1 (or other future extensions to XRay /// logging) those MUST be installed by the installed 'log_init' handler. + /// + /// Because we didn't want to change the ABI of this struct, the arg1 handler + /// may be silently overwritten during initialization as well. void (*handle_arg0)(int32_t, XRayEntryType); /// The log implementation provided routine for when __xray_log_flushLog() is Index: compiler-rt/trunk/lib/xray/xray_fdr_log_records.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_fdr_log_records.h +++ compiler-rt/trunk/lib/xray/xray_fdr_log_records.h @@ -30,6 +30,7 @@ TSCWrap, WalltimeMarker, CustomEventMarker, + CallArgument, }; // Use 7 bits to identify this record type. /* RecordKinds */ uint8_t RecordKind : 7; Index: compiler-rt/trunk/lib/xray/xray_fdr_logging.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_fdr_logging.h +++ compiler-rt/trunk/lib/xray/xray_fdr_logging.h @@ -30,6 +30,7 @@ void *Options, size_t OptionsSize); XRayLogInitStatus fdrLoggingFinalize(); void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, uint64_t Arg1); XRayLogFlushStatus fdrLoggingFlush(); XRayLogInitStatus fdrLoggingReset(); Index: compiler-rt/trunk/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_fdr_logging.cc +++ compiler-rt/trunk/lib/xray/xray_fdr_logging.cc @@ -196,9 +196,17 @@ void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { auto TSC_CPU = getTimestamp(); - __xray_fdr_internal::processFunctionHook(FuncId, Entry, std::get<0>(TSC_CPU), - std::get<1>(TSC_CPU), clock_gettime, - LoggingStatus, *BQ); + __xray_fdr_internal::processFunctionHook( + FuncId, Entry, std::get<0>(TSC_CPU), std::get<1>(TSC_CPU), 0, + clock_gettime, *BQ); +} + +void fdrLoggingHandleArg1(int32_t FuncId, XRayEntryType Entry, + uint64_t Arg) XRAY_NEVER_INSTRUMENT { + auto TSC_CPU = getTimestamp(); + __xray_fdr_internal::processFunctionHook( + FuncId, Entry, std::get<0>(TSC_CPU), std::get<1>(TSC_CPU), Arg, + clock_gettime, *BQ); } void fdrLoggingHandleCustomEvent(void *Event, @@ -280,6 +288,9 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } + // Arg1 handler should go in first to avoid concurrent code accidentally + // falling back to arg0 when it should have ran arg1. + __xray_set_handler_arg1(fdrLoggingHandleArg1); // Install the actual handleArg0 handler after initialising the buffers. __xray_set_handler(fdrLoggingHandleArg0); __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); Index: compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h +++ compiler-rt/trunk/lib/xray/xray_fdr_logging_impl.h @@ -91,19 +91,6 @@ /// TSC Wrap records are written when a TSC delta encoding scheme overflows. static void writeTSCWrapMetadata(uint64_t TSC); -/// Here's where the meat of the processing happens. The writer captures -/// function entry, exit and tail exit points with a time and will create -/// TSCWrap, NewCPUId and Function records as necessary. The writer might -/// walk backward through its buffer and erase trivial functions to avoid -/// polluting the log and may use the buffer queue to obtain or release a -/// buffer. -static void processFunctionHook(int32_t FuncId, XRayEntryType Entry, - uint64_t TSC, unsigned char CPU, - int (*wall_clock_reader)(clockid_t, - struct timespec *), - __sanitizer::atomic_sint32_t &LoggingStatus, - const std::shared_ptr &BQ); - // Group together thread-local-data in a struct, then hide it behind a function // call so that it can be initialized on first use instead of as a global. struct ThreadLocalData { @@ -331,9 +318,22 @@ writeTSCWrapMetadata(TSC, getThreadLocalData().RecordPtr); } -inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, - XRayEntryType EntryType, - char *&MemPtr) XRAY_NEVER_INSTRUMENT { +// Call Argument metadata records store the arguments to a function in the +// order of their appearance; holes are not supported by the buffer format. +static inline void writeCallArgumentMetadata(uint64_t A) XRAY_NEVER_INSTRUMENT { + auto &TLD = getThreadLocalData(); + MetadataRecord CallArg; + CallArg.Type = uint8_t(RecordType::Metadata); + CallArg.RecordKind = uint8_t(MetadataRecord::RecordKinds::CallArgument); + + std::memcpy(CallArg.Data, &A, sizeof(A)); + std::memcpy(TLD.RecordPtr, &CallArg, sizeof(MetadataRecord)); + TLD.RecordPtr += sizeof(MetadataRecord); +} + +static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, + XRayEntryType EntryType, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { std::aligned_storage::type AlignedFuncRecordBuffer; auto &FuncRecord = @@ -560,6 +560,41 @@ return true; } // namespace __xray_fdr_internal +// Compute the TSC difference between the time of measurement and the previous +// event. There are a few interesting situations we need to account for: +// +// - The thread has migrated to a different CPU. If this is the case, then +// we write down the following records: +// +// 1. A 'NewCPUId' Metadata record. +// 2. A FunctionRecord with a 0 for the TSCDelta field. +// +// - The TSC delta is greater than the 32 bits we can store in a +// FunctionRecord. In this case we write down the following records: +// +// 1. A 'TSCWrap' Metadata record. +// 2. A FunctionRecord with a 0 for the TSCDelta field. +// +// - The TSC delta is representable within the 32 bits we can store in a +// FunctionRecord. In this case we write down just a FunctionRecord with +// the correct TSC delta. +inline uint32_t writeCurrentCPUTSC(ThreadLocalData &TLD, uint64_t TSC, uint8_t CPU) { + if (CPU != TLD.CurrentCPU) { + // We've moved to a new CPU. + writeNewCPUIdMetadata(CPU, TSC); + return 0; + } + // If the delta is greater than the range for a uint32_t, then we write out + // the TSC wrap metadata entry with the full TSC, and the TSC for the + // function record be 0. + uint64_t Delta = TSC - TLD.LastTSC; + if (Delta <= std::numeric_limits::max()) + return Delta; + + writeTSCWrapMetadata(TSC); + return 0; +} + inline void endBufferIfFull() XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); auto BufferStart = static_cast(TLD.Buffer.Buffer); @@ -573,10 +608,15 @@ thread_local volatile bool Running = false; +/// Here's where the meat of the processing happens. The writer captures +/// function entry, exit and tail exit points with a time and will create +/// TSCWrap, NewCPUId and Function records as necessary. The writer might +/// walk backward through its buffer and erase trivial functions to avoid +/// polluting the log and may use the buffer queue to obtain or release a +/// buffer. inline void processFunctionHook( int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, - int (*wall_clock_reader)(clockid_t, struct timespec *), - __sanitizer::atomic_sint32_t &LoggingStatus, + uint64_t Arg1, int (*wall_clock_reader)(clockid_t, struct timespec *), const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { // Prevent signal handler recursion, so in case we're already in a log writing // mode and the signal handler comes in (and is also instrumented) then we @@ -609,10 +649,10 @@ // - The least number of bytes we will ever write is 8 // (sizeof(FunctionRecord)) only if the delta between the previous entry // and this entry is within 32 bits. - // - The most number of bytes we will ever write is 8 + 16 = 24. This is - // computed by: + // - The most number of bytes we will ever write is 8 + 16 + 16 = 40. + // This is computed by: // - // sizeof(FunctionRecord) + sizeof(MetadataRecord) + // MaxSize = sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord) // // These arise in the following cases: // @@ -626,6 +666,7 @@ // FunctionRecord. // 3. When we learn about a new CPU ID, we need to write down a "new cpu // id" MetadataRecord before writing out the actual FunctionRecord. + // 4. The second MetadataRecord is the optional function call argument. // // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes. // @@ -634,53 +675,18 @@ // MetadataRecord. If we don't have enough space after writing as much as 24 // bytes in the end of the buffer, we need to write out the EOB, get a new // Buffer, set it up properly before doing any further writing. - // - if (!prepareBuffer(wall_clock_reader, FunctionRecSize + MetadataRecSize)) { + size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize; + if (!prepareBuffer(wall_clock_reader, MaxSize)) { TLD.LocalBQ = nullptr; return; } - // By this point, we are now ready to write at most 24 bytes (one metadata - // record and one function record). - assert((TLD.RecordPtr + (MetadataRecSize + FunctionRecSize)) - - static_cast(TLD.Buffer.Buffer) >= + // By this point, we are now ready to write up to 40 bytes (explained above). + assert((TLD.RecordPtr + MaxSize) - static_cast(TLD.Buffer.Buffer) >= static_cast(MetadataRecSize) && "Misconfigured BufferQueue provided; Buffer size not large enough."); - // Here we compute the TSC Delta. There are a few interesting situations we - // need to account for: - // - // - The thread has migrated to a different CPU. If this is the case, then - // we write down the following records: - // - // 1. A 'NewCPUId' Metadata record. - // 2. A FunctionRecord with a 0 for the TSCDelta field. - // - // - The TSC delta is greater than the 32 bits we can store in a - // FunctionRecord. In this case we write down the following records: - // - // 1. A 'TSCWrap' Metadata record. - // 2. A FunctionRecord with a 0 for the TSCDelta field. - // - // - The TSC delta is representable within the 32 bits we can store in a - // FunctionRecord. In this case we write down just a FunctionRecord with - // the correct TSC delta. - // - uint32_t RecordTSCDelta = 0; - if (CPU != TLD.CurrentCPU) { - // We've moved to a new CPU. - writeNewCPUIdMetadata(CPU, TSC); - } else { - // If the delta is greater than the range for a uint32_t, then we write out - // the TSC wrap metadata entry with the full TSC, and the TSC for the - // function record be 0. - auto Delta = TSC - TLD.LastTSC; - if (Delta > (1ULL << 32) - 1) - writeTSCWrapMetadata(TSC); - else - RecordTSCDelta = Delta; - } - + auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU); TLD.LastTSC = TSC; TLD.CurrentCPU = CPU; switch (Entry) { @@ -711,6 +717,8 @@ } writeFunctionRecord(FuncId, RecordTSCDelta, Entry, TLD.RecordPtr); + if (Entry == XRayEntryType::LOG_ARGS_ENTRY) + writeCallArgumentMetadata(Arg1); // If we've exhausted the buffer by this time, we then release the buffer to // make sure that other threads may start using this buffer. Index: compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S =================================================================== --- compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S +++ compiler-rt/trunk/lib/xray/xray_trampoline_x86_64.S @@ -173,11 +173,11 @@ .Larg1entryLog: - // First argument will become the third + // First argument will become the third movq %rdi, %rdx - // XRayEntryType::ENTRY into the second - xorq %rsi, %rsi + // XRayEntryType::LOG_ARGS_ENTRY into the second + mov $0x3, %esi // 32-bit function ID becomes the first movl %r10d, %edi Index: compiler-rt/trunk/test/xray/TestCases/Linux/arg1-logger.cc =================================================================== --- compiler-rt/trunk/test/xray/TestCases/Linux/arg1-logger.cc +++ compiler-rt/trunk/test/xray/TestCases/Linux/arg1-logger.cc @@ -29,7 +29,7 @@ __xray_set_handler_arg1(arg1logger); foo(nullptr); - // CHECK: Arg1: 0, XRayEntryType 0 + // CHECK: Arg1: 0, XRayEntryType 3 __xray_remove_handler_arg1(); foo((void *) 0xBADC0DE); @@ -37,7 +37,7 @@ __xray_set_handler_arg1(arg1logger); foo((void *) 0xDEADBEEFCAFE); - // CHECK-NEXT: Arg1: deadbeefcafe, XRayEntryType 0 + // CHECK-NEXT: Arg1: deadbeefcafe, XRayEntryType 3 foo((void *) -1); - // CHECK-NEXT: Arg1: ffffffffffffffff, XRayEntryType 0 + // CHECK-NEXT: Arg1: ffffffffffffffff, XRayEntryType 3 } Index: compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc =================================================================== --- compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc +++ compiler-rt/trunk/test/xray/TestCases/Linux/fdr-mode.cc @@ -30,6 +30,9 @@ [[clang::xray_always_instrument]] void __attribute__((noinline)) fA() { fB(); } +[[clang::xray_always_instrument, clang::xray_log_args(1)]] +void __attribute__((noinline)) fArg(int) { } + int main(int argc, char *argv[]) { using namespace __xray; FDRLoggingOptions Options; @@ -52,6 +55,7 @@ fC(); fB(); fA(); + fArg(1); }); other_thread.join(); std::cout << "Joined" << std::endl; @@ -85,8 +89,15 @@ // TRACE-DAG: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter, tsc: {{[0-9]+}} } // TRACE: - { type: 0, func-id: [[FIDB]], function: {{.*fB.*}}, cpu: {{.*}}, thread: [[THREAD2]], kind: function-{{exit|tail-exit}}, tsc: {{[0-9]+}} } +// TRACE-DAG: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', args: [ 1 ], cpu: {{.*}}, thread: [[THREAD2]], kind: function-enter-arg, tsc: {{[0-9]+}} } +// TRACE-DAG: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} } + // Assert that when unwriting is enabled with a high threshold time, all the function records are erased. A CPU switch could erroneously fail this test, but // is unlikely given the test program. -// UNWRITE: header +// Even with a high threshold, arg1 logging is never unwritten. +// UNWRITE: header: +// UNWRITE: records: +// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG:[0-9]+]], function: 'fArg(int)', args: [ 1 ], cpu: {{.*}}, thread: [[THREAD2:[0-9]+]], kind: function-enter-arg, tsc: {{[0-9]+}} } +// UNWRITE-NEXT: - { type: 0, func-id: [[FIDARG]], function: 'fArg(int)', cpu: {{.*}}, thread: [[THREAD2]], kind: function-exit, tsc: {{[0-9]+}} } // UNWRITE-NOT: function-enter // UNWRITE-NOT: function-{{exit|tail-exit}}