Index: lib/xray/xray_buffer_queue.h =================================================================== --- lib/xray/xray_buffer_queue.h +++ lib/xray/xray_buffer_queue.h @@ -15,9 +15,10 @@ #ifndef XRAY_BUFFER_QUEUE_H #define XRAY_BUFFER_QUEUE_H -#include #include "sanitizer_common/sanitizer_atomic.h" +#include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" +#include namespace __xray { @@ -27,7 +28,7 @@ /// the "flight data recorder" (FDR) mode to support ongoing XRay function call /// trace collection. class BufferQueue { - public: +public: struct alignas(64) BufferExtents { __sanitizer::atomic_uint64_t Size; }; @@ -35,10 +36,10 @@ struct Buffer { void *Data = nullptr; size_t Size = 0; - BufferExtents* Extents; + BufferExtents *Extents; }; - private: +private: struct BufferRep { // The managed buffer. Buffer Buff; @@ -48,10 +49,67 @@ bool Used = false; }; + // This models a ForwardIterator. |T| Must be either a `Buffer` or `const + // Buffer`. Note that we only advance to the "used" buffers, when + // incrementing, so that at dereference we're always at a valid point. + template class Iterator { + public: + BufferRep *Buffers = nullptr; + size_t Offset = 0; + size_t Max = 0; + + Iterator &operator++() { + DCHECK_NE(Offset, Max); + do { + ++Offset; + } while (!Buffers[Offset].Used && Offset != Max); + return *this; + } + + Iterator operator++(int) { + Iterator C = *this; + ++(*this); + return C; + } + + T &operator*() const { return Buffers[Offset].Buff; } + + T *operator->() const { return &(Buffers[Offset].Buff); } + + Iterator(BufferRep *Root, size_t O, size_t M) + : Buffers(Root), Offset(O), Max(M) { + // We want to advance to the first Offset where the 'Used' property is + // true, or to the end of the list/queue. + while (!Buffers[Offset].Used && Offset != Max) { + ++Offset; + } + } + + Iterator() = default; + Iterator(const Iterator &) = default; + Iterator(Iterator &&) = default; + Iterator &operator=(const Iterator &) = default; + Iterator &operator=(Iterator &&) = default; + ~Iterator() = default; + + template + friend bool operator==(const Iterator &L, const Iterator &R) { + DCHECK_EQ(L.Max, R.Max); + return L.Buffers == R.Buffers && L.Offset == R.Offset; + } + + template + friend bool operator!=(const Iterator &L, const Iterator &R) { + return !(L == R); + } + }; + // Size of each individual Buffer. size_t BufferSize; BufferRep *Buffers; + + // Amount of pre-allocated buffers. size_t BufferCount; __sanitizer::SpinMutex Mutex; @@ -70,7 +128,7 @@ // Count of buffers that have been handed out through 'getBuffer'. size_t LiveBuffers; - public: +public: enum class ErrorCode : unsigned { Ok, NotEnoughMemory, @@ -81,16 +139,16 @@ static const char *getErrorString(ErrorCode E) { switch (E) { - case ErrorCode::Ok: - return "(none)"; - case ErrorCode::NotEnoughMemory: - return "no available buffers in the queue"; - case ErrorCode::QueueFinalizing: - return "queue already finalizing"; - case ErrorCode::UnrecognizedBuffer: - return "buffer being returned not owned by buffer queue"; - case ErrorCode::AlreadyFinalized: - return "queue already finalized"; + case ErrorCode::Ok: + return "(none)"; + case ErrorCode::NotEnoughMemory: + return "no available buffers in the queue"; + case ErrorCode::QueueFinalizing: + return "queue already finalizing"; + case ErrorCode::UnrecognizedBuffer: + return "buffer being returned not owned by buffer queue"; + case ErrorCode::AlreadyFinalized: + return "queue already finalized"; } return "unknown error"; } @@ -141,19 +199,29 @@ /// Applies the provided function F to each Buffer in the queue, only if the /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a /// releaseBuffer(...) operation). - template - void apply(F Fn) { + template void apply(F Fn) { __sanitizer::SpinMutexLock G(&Mutex); - for (auto I = Buffers, E = Buffers + BufferCount; I != E; ++I) { - const auto &T = *I; - if (T.Used) Fn(T.Buff); - } + for (auto I = begin(), E = end(); I != E; ++I) + Fn(*I); + } + + using const_iterator = Iterator; + using iterator = Iterator; + + /// Provides iterator access to the raw Buffer instances. + iterator begin() const { return iterator(Buffers, 0, BufferCount); } + const_iterator cbegin() const { + return const_iterator(Buffers, 0, BufferCount); + } + iterator end() const { return iterator(Buffers, BufferCount, BufferCount); } + const_iterator cend() const { + return const_iterator(Buffers, BufferCount, BufferCount); } // Cleans up allocated buffers. ~BufferQueue(); }; -} // namespace __xray +} // namespace __xray -#endif // XRAY_BUFFER_QUEUE_H +#endif // XRAY_BUFFER_QUEUE_H Index: lib/xray/xray_fdr_flags.inc =================================================================== --- lib/xray/xray_fdr_flags.inc +++ lib/xray/xray_fdr_flags.inc @@ -25,3 +25,5 @@ XRAY_FLAG(int, buffer_size, 16384, "Size of buffers in the circular buffer queue.") XRAY_FLAG(int, buffer_max, 100, "Maximum number of buffers in the queue.") +XRAY_FLAG(bool, no_file_flush, false, + "Set to true to not write log files by default.") Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -45,6 +45,91 @@ __sanitizer::SpinMutex FDROptionsMutex; +namespace { +XRayFileHeader &fdrCommonHeaderInfo() { + static XRayFileHeader Header = [] { + XRayFileHeader H; + // Version 2 of the log writes the extents of the buffer, instead of + // relying on an end-of-buffer record. + H.Version = 2; + H.Type = FileTypes::FDR_LOG; + + // Test for required CPU features and cache the cycle frequency + static bool TSCSupported = probeRequiredCPUFeatures(); + static uint64_t CycleFrequency = + TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; + H.CycleFrequency = CycleFrequency; + + // FIXME: Actually check whether we have 'constant_tsc' and + // 'nonstop_tsc' before setting the values in the header. + H.ConstantTSC = 1; + H.NonstopTSC = 1; + return H; + }(); + return Header; +} + +} // namespace + +// This is the iterator implementation, which knows how to handle FDR-mode +// specific buffers. This is used as an implementation of the iterator function +// needed by __xray_set_buffer_iterator(...). It maintains a global state of the +// buffer iteration for the currently installed FDR mode buffers. In particular: +// +// - If the argument represents the initial state of XRayBuffer ({nullptr, 0}) +// then the iterator returns the header information. +// - If the argument represents the header information ({address of header +// info, size of the header info}) then it returns the first FDR buffer's +// address and extents. +// - It will keep returning the next buffer and extents as there are more +// buffers to process. When the input represents the last buffer, it will +// return the initial state to signal completion ({nullptr, 0}). +// +// See xray/xray_log_interface.h for more details on the requirements for the +// implementations of __xray_set_buffer_iterator(...) and +// __xray_log_process_buffers(...). +XRayBuffer fdrIterator(const XRayBuffer B) { + DCHECK_EQ(__xray_current_mode(), "xray-fdr"); + DCHECK(BQ->finalizing()); + + if (BQ == nullptr || !BQ->finalizing()) { + if (Verbosity()) + Report( + "XRay FDR: Failed global buffer queue is null or not finalizing!\n"); + return {nullptr, 0}; + } + + // We use a global scratch-pad for the header information, which only gets + // initialized the first time this function is called. We'll update one part + // of this information with some relevant data (in particular the number of + // buffers to expect). + static XRayFileHeader Header = fdrCommonHeaderInfo(); + if (B.Data == nullptr && B.Size == 0) { + Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; + return XRayBuffer{static_cast(&Header), sizeof(Header)}; + } + + static BufferQueue::const_iterator It{}; + static BufferQueue::const_iterator End{}; + if (B.Data == static_cast(&Header) && B.Size == sizeof(Header)) { + + // From this point on, we provide raw access to the raw buffer we're getting + // from the BufferQueue. We're relying on the iterators from the current + // Buffer queue. + It = BQ->cbegin(); + End = BQ->cend(); + } + + if (It == End) + return {nullptr, 0}; + + XRayBuffer Result{ + It->Data, __sanitizer::atomic_load(&It->Extents->Size, + __sanitizer::memory_order_acquire)}; + ++It; + return Result; +} + // Must finalize before flushing. XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { if (__sanitizer::atomic_load(&LoggingStatus, @@ -59,7 +144,6 @@ if (!__sanitizer::atomic_compare_exchange_strong( &LogFlushStatus, &Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING, __sanitizer::memory_order_release)) { - if (__sanitizer::Verbosity()) Report("Not flushing log, implementation is still finalizing.\n"); return static_cast(Result); @@ -75,6 +159,23 @@ // finalised before attempting to flush the log. __sanitizer::SleepForMillis(fdrFlags()->grace_period_ms); + // At this point, we're going to uninstall the iterator implementation, before + // we decide to do anything further with the global buffer queue. + __xray_log_remove_buffer_iterator(); + + if (fdrFlags()->no_file_flush) { + if (Verbosity()) + Report("XRay FDR: Not flushing to file, 'no_file_flush=true'.\n"); + + // Clean up the buffer queue, and do not bother writing out the files! + delete BQ; + BQ = nullptr; + __sanitizer::atomic_store(&LogFlushStatus, + XRayLogFlushStatus::XRAY_LOG_FLUSHED, + __sanitizer::memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; + } + // We write out the file in the following format: // // 1) We write down the XRay file header with version 1, type FDR_LOG. @@ -84,8 +185,6 @@ // (fixed-sized) and let the tools reading the buffers deal with the data // afterwards. // - // FIXME: Support the case for letting users handle the data through - // __xray_process_buffers(...) and provide an option to skip writing files. int Fd = -1; { // FIXME: Remove this section of the code, when we remove the struct-based @@ -102,23 +201,7 @@ return Result; } - // Test for required CPU features and cache the cycle frequency - static bool TSCSupported = probeRequiredCPUFeatures(); - static uint64_t CycleFrequency = - TSCSupported ? getTSCFrequency() : __xray::NanosecondsPerSecond; - - XRayFileHeader Header; - - // Version 2 of the log writes the extents of the buffer, instead of relying - // on an end-of-buffer record. - Header.Version = 2; - Header.Type = FileTypes::FDR_LOG; - Header.CycleFrequency = CycleFrequency; - - // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' - // before setting the values in the header. - Header.ConstantTSC = 1; - Header.NonstopTSC = 1; + XRayFileHeader Header = fdrCommonHeaderInfo(); Header.FdrData = FdrAdditionalHeaderData{BQ->ConfiguredBufferSize()}; retryingWriteAll(Fd, reinterpret_cast(&Header), reinterpret_cast(&Header) + sizeof(Header)); @@ -126,8 +209,8 @@ BQ->apply([&](const BufferQueue::Buffer &B) { // Starting at version 2 of the FDR logging implementation, we only write // the records identified by the extents of the buffer. We use the Extents - // from the Buffer and write that out as the first record in the buffer. - // We still use a Metadata record, but fill in the extents instead for the + // from the Buffer and write that out as the first record in the buffer. We + // still use a Metadata record, but fill in the extents instead for the // data. MetadataRecord ExtentsRecord; auto BufferExtents = __sanitizer::atomic_load( @@ -180,8 +263,8 @@ static TSCAndCPU getTimestamp() XRAY_NEVER_INSTRUMENT { // We want to get the TSC as early as possible, so that we can check whether - // we've seen this CPU before. We also do it before we load anything else, to - // allow for forward progress with the scheduling. + // we've seen this CPU before. We also do it before we load anything else, + // to allow for forward progress with the scheduling. TSCAndCPU Result; // Test once for required CPU features @@ -242,8 +325,8 @@ // Here we need to prepare the log to handle: // - The metadata record we're going to write. (16 bytes) - // - The additional data we're going to write. Currently, that's the size of - // the event we're going to dump into the log as free-form bytes. + // - The additional data we're going to write. Currently, that's the size + // of the event we're going to dump into the log as free-form bytes. if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { TLD.BQ = nullptr; return; @@ -293,8 +376,8 @@ // Here we need to prepare the log to handle: // - The metadata record we're going to write. (16 bytes) - // - The additional data we're going to write. Currently, that's the size of - // the event we're going to dump into the log as free-form bytes. + // - The additional data we're going to write. Currently, that's the size + // of the event we're going to dump into the log as free-form bytes. if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { TLD.BQ = nullptr; return; @@ -365,18 +448,23 @@ if (EnvOpts == nullptr) EnvOpts = ""; FDRParser.ParseString(EnvOpts); + + // FIXME: Remove this when we fully remove the deprecated flags. + if (internal_strlen(EnvOpts) == 0) { + FDRFlags.func_duration_threshold_us = + flags()->xray_fdr_log_func_duration_threshold_us; + FDRFlags.grace_period_ms = flags()->xray_fdr_log_grace_period_ms; + } + + // The provided options should always override the compiler-provided and + // environment-variable defined options. FDRParser.ParseString(static_cast(Options)); *fdrFlags() = FDRFlags; - BufferSize = FDRFlags.buffer_size; BufferMax = FDRFlags.buffer_max; - - // FIXME: Remove this when we fully remove the deprecated flags. - if (internal_strlen(EnvOpts) != 0) { - flags()->xray_fdr_log_func_duration_threshold_us = - FDRFlags.func_duration_threshold_us; - flags()->xray_fdr_log_grace_period_ms = FDRFlags.grace_period_ms; - } + __sanitizer::SpinMutexLock Guard(&FDROptionsMutex); + FDROptions.Fd = -1; + FDROptions.ReportErrors = true; } else if (OptionsSize != sizeof(FDRLoggingOptions)) { // FIXME: This is deprecated, and should really be removed. // At this point we use the flag parser specific to the FDR mode @@ -434,6 +522,9 @@ __xray_set_customevent_handler(fdrLoggingHandleCustomEvent); __xray_set_typedevent_handler(fdrLoggingHandleTypedEvent); + // Install the buffer iterator implementation. + __xray_log_set_buffer_iterator(fdrIterator); + __sanitizer::atomic_store(&LoggingStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZED, __sanitizer::memory_order_release); Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -31,8 +31,8 @@ #include "xray/xray_log_interface.h" #include "xray_buffer_queue.h" #include "xray_defs.h" +#include "xray_fdr_flags.h" #include "xray_fdr_log_records.h" -#include "xray_flags.h" #include "xray_tsc.h" namespace __xray { @@ -386,7 +386,7 @@ ? getTSCFrequency() : __xray::NanosecondsPerSecond; static const uint64_t ThresholdTicks = - TicksPerSec * flags()->xray_fdr_log_func_duration_threshold_us / 1000000; + TicksPerSec * fdrFlags()->func_duration_threshold_us / 1000000; return ThresholdTicks; } Index: test/xray/TestCases/Posix/fdr-mode-inmemory.cc =================================================================== --- test/xray/TestCases/Posix/fdr-mode-inmemory.cc +++ test/xray/TestCases/Posix/fdr-mode-inmemory.cc @@ -0,0 +1,50 @@ +// RUN: %clangxx_xray -g -std=c++11 %s -o %t -fxray-modes=xray-fdr +// RUN: rm fdr-inmemory-test-* || true +// RUN: XRAY_OPTIONS="patch_premain=false xray_logfile_base=fdr-inmemory-test- \ +// RUN: verbosity=1" \ +// RUN: XRAY_FDR_OPTIONS="no_file_flush=true func_duration_threshold_us=0" \ +// RUN: %run %t 2>&1 | FileCheck %s +// RUN: FILES=`find %T -name 'fdr-inmemory-test-*' | wc -l` +// RUN: [ $FILES -eq 0 ] +// RUN: rm fdr-inmemory-test-* || true +// +// REQUIRES: x86_64-target-arch +// REQUIRES: built-in-llvm-tree + +#include "xray/xray_log_interface.h" +#include +#include + +uint64_t var = 0; +uint64_t buffers = 0; +[[clang::xray_always_instrument]] void __attribute__((noinline)) f() { ++var; } + +int main(int argc, char *argv[]) { + assert(__xray_log_select_mode("xray-fdr") == + XRayLogRegisterStatus::XRAY_REGISTRATION_OK); + auto status = __xray_log_init_mode( + "xray-fdr", + "buffer_size=4096:buffer_max=10:func_duration_threshold_us=0"); + assert(status == XRayLogInitStatus::XRAY_LOG_INITIALIZED); + __xray_patch(); + + // Create enough entries. + for (int i = 0; i != 1 << 20; ++i) { + f(); + } + + // Then we want to verify that we're getting 10 buffers outside of the initial + // header. + auto finalize_status = __xray_log_finalize(); + assert(finalize_status == XRayLogInitStatus::XRAY_LOG_FINALIZED); + auto process_status = + __xray_log_process_buffers([](const char *, XRayBuffer) { ++buffers; }); + std::cout << "buffers = " << buffers << std::endl; + assert(process_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + auto flush_status = __xray_log_flushLog(); + assert(flush_status == XRayLogFlushStatus::XRAY_LOG_FLUSHED); + // We expect 11 buffers because 1 header buffer + 10 actual FDR buffers. + // CHECK: Buffers = 11 + std::cout << "Buffers = " << buffers << std::endl; + return 0; +}