Index: lib/xray/xray_buffer_queue.h =================================================================== --- lib/xray/xray_buffer_queue.h +++ lib/xray/xray_buffer_queue.h @@ -17,8 +17,8 @@ #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_mutex.h" -#include -#include +#include +#include #include namespace __xray { @@ -36,15 +36,23 @@ }; private: + // Size of each individual Buffer. size_t BufferSize; // We use a bool to indicate whether the Buffer has been used in this // freelist implementation. - std::deque> Buffers; - __sanitizer::BlockingMutex Mutex; - std::unordered_set OwnedBuffers; + std::unique_ptr[]> Buffers; + size_t BufferCount; + + __sanitizer::SpinMutex Mutex; __sanitizer::atomic_uint8_t Finalizing; + // Sorted buffer pointers, making it quick to find buffers that we own. + std::unique_ptr OwnedBuffers; + + std::tuple *Next; + std::tuple *First; + public: enum class ErrorCode : unsigned { Ok, @@ -117,8 +125,9 @@ /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a /// releaseBuffer(...) operation). template void apply(F Fn) { - __sanitizer::BlockingMutexLock G(&Mutex); - for (const auto &T : Buffers) { + __sanitizer::SpinMutexLock G(&Mutex); + for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) { + const auto &T = *I; if (std::get<1>(T)) Fn(std::get<0>(T)); } Index: lib/xray/xray_buffer_queue.cc =================================================================== --- lib/xray/xray_buffer_queue.cc +++ lib/xray/xray_buffer_queue.cc @@ -16,6 +16,7 @@ #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" +#include #include #include @@ -23,18 +24,21 @@ using namespace __sanitizer; BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success) - : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing{0} { - for (auto &T : Buffers) { + : BufferSize(B), Buffers(new std::tuple[N]()), + BufferCount(N), Finalizing{0}, OwnedBuffers(new void *[N]()), + Next(Buffers.get()), First(nullptr) { + for (size_t i = 0; i < N; ++i) { + auto &T = Buffers[i]; void *Tmp = malloc(BufferSize); if (Tmp == nullptr) { Success = false; return; } - auto &Buf = std::get<0>(T); + std::get<1>(T) = false; Buf.Buffer = Tmp; Buf.Size = B; - OwnedBuffers.emplace(Tmp); + OwnedBuffers[i] = Tmp; } Success = true; } @@ -42,27 +46,43 @@ BufferQueue::ErrorCode BufferQueue::getBuffer(Buffer &Buf) { if (__sanitizer::atomic_load(&Finalizing, __sanitizer::memory_order_acquire)) return ErrorCode::QueueFinalizing; - __sanitizer::BlockingMutexLock Guard(&Mutex); - if (Buffers.empty()) + __sanitizer::SpinMutexLock Guard(&Mutex); + + if (Next == First) return ErrorCode::NotEnoughMemory; - auto &T = Buffers.front(); + + auto &T = *Next; auto &B = std::get<0>(T); Buf = B; - B.Buffer = nullptr; - B.Size = 0; - Buffers.pop_front(); + + if (First == nullptr) + First = Next; + ++Next; + if (Next == (Buffers.get() + BufferCount)) + Next = Buffers.get(); + return ErrorCode::Ok; } BufferQueue::ErrorCode BufferQueue::releaseBuffer(Buffer &Buf) { - if (OwnedBuffers.count(Buf.Buffer) == 0) + // Blitz through the buffers array to find the buffer. + if (std::none_of(OwnedBuffers.get(), OwnedBuffers.get() + BufferCount, + [&Buf](void *P) { return P == Buf.Buffer; })) return ErrorCode::UnrecognizedBuffer; - __sanitizer::BlockingMutexLock Guard(&Mutex); + __sanitizer::SpinMutexLock Guard(&Mutex); + + // This points to a semantic bug, we really ought to not be releasing more + // buffers than we actually get. + if (First == nullptr || First == Next) + return ErrorCode::NotEnoughMemory; // Now that the buffer has been released, we mark it as "used". - Buffers.emplace(Buffers.end(), Buf, true /* used */); + *First = std::make_tuple(Buf, true); Buf.Buffer = nullptr; Buf.Size = 0; + ++First; + if (First == (Buffers.get() + BufferCount)) + First = Buffers.get(); return ErrorCode::Ok; } @@ -74,7 +94,8 @@ } BufferQueue::~BufferQueue() { - for (auto &T : Buffers) { + for (auto I = Buffers.get(), E = Buffers.get() + BufferCount; I != E; ++I) { + auto &T = *I; auto &Buf = std::get<0>(T); free(Buf.Buffer); } Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -243,8 +243,7 @@ } }; -inline bool loggingInitialized( - const __sanitizer::atomic_sint32_t &LoggingStatus) XRAY_NEVER_INSTRUMENT { +inline bool loggingInitialized() XRAY_NEVER_INSTRUMENT { return __sanitizer::atomic_load(&LoggingStatus, __sanitizer::memory_order_acquire) == XRayLogInitStatus::XRAY_LOG_INITIALIZED; @@ -576,7 +575,7 @@ return false; } - if (!loggingInitialized(LoggingStatus) || LBQ->finalizing()) { + if (!loggingInitialized() || LBQ->finalizing()) { writeEOBMetadata(); if (!releaseThreadLocalBuffer(*LBQ)) return false; Index: test/xray/TestCases/Linux/fdr-thread-order.cc =================================================================== --- test/xray/TestCases/Linux/fdr-thread-order.cc +++ test/xray/TestCases/Linux/fdr-thread-order.cc @@ -1,37 +1,63 @@ // RUN: %clangxx_xray -g -std=c++11 %s -o %t // RUN: rm fdr-thread-order.* || true -// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | FileCheck %s -// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t "`ls fdr-thread-order.* | head -1`" | FileCheck %s --check-prefix TRACE +// RUN: XRAY_OPTIONS="patch_premain=false xray_naive_log=false \ +// RUN: xray_logfile_base=fdr-thread-order. xray_fdr_log=true verbosity=1 \ +// RUN: xray_fdr_log_func_duration_threshold_us=0" %run %t 2>&1 | \ +// RUN: FileCheck %s +// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ +// RUN: "`ls fdr-thread-order.* | head -1`" +// RUN: %llvm_xray convert --symbolize --output-format=yaml -instr_map=%t \ +// RUN: "`ls fdr-thread-order.* | head -1`" | \ +// RUN: FileCheck %s --check-prefix TRACE // RUN: rm fdr-thread-order.* // FIXME: Make llvm-xray work on non-x86_64 as well. // REQUIRES: x86_64-linux // REQUIRES: built-in-llvm-tree #include "xray/xray_log_interface.h" -#include +#include #include +#include constexpr auto kBufferSize = 16384; constexpr auto kBufferMax = 10; -thread_local uint64_t var = 0; -[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() { ++var; } -[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() { ++var; } +std::atomic var{0}; + +[[clang::xray_always_instrument]] void __attribute__((noinline)) f1() { + for (auto i = 0; i < 1 << 20; ++i) + ++var; +} + +[[clang::xray_always_instrument]] void __attribute__((noinline)) f2() { + for (auto i = 0; i < 1 << 20; ++i) + ++var; +} int main(int argc, char *argv[]) { using namespace __xray; FDRLoggingOptions Options; + __xray_patch(); assert(__xray_log_init(kBufferSize, kBufferMax, &Options, sizeof(FDRLoggingOptions)) == XRayLogInitStatus::XRAY_LOG_INITIALIZED); - __xray_patch(); - std::thread t1([] { f1(); }); - std::thread t2([] { f2(); }); - t1.join(); - t2.join(); + + std::atomic_thread_fence(std::memory_order_acq_rel); + + { + std::thread t1([] { f1(); }); + std::thread t2([] { f2(); }); + t1.join(); + t2.join(); + } + + std::atomic_thread_fence(std::memory_order_acq_rel); __xray_log_finalize(); __xray_log_flushLog(); + __xray_unpatch(); + return var > 0 ? 0 : 1; // CHECK: {{.*}}XRay: Log file in '{{.*}}' + // CHECK-NOT: Failed } // We want to make sure that the order of the function log doesn't matter.