Index: compiler-rt/lib/xray/xray_buffer_queue.h =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.h +++ compiler-rt/lib/xray/xray_buffer_queue.h @@ -25,9 +25,9 @@ namespace __xray { /// BufferQueue implements a circular queue of fixed sized buffers (much like a -/// freelist) but is concerned mostly with making it really quick to initialise, -/// finalise, and get/return buffers to the queue. This is one key component of -/// the "flight data recorder" (FDR) mode to support ongoing XRay function call +/// freelist) but is concerned with making it quick to initialise, finalise, and +/// get from or return buffers to the queue. This is one key component of the +/// "flight data recorder" (FDR) mode to support ongoing XRay function call /// trace collection. class BufferQueue { public: @@ -36,6 +36,11 @@ uint64_t Generation{0}; void *Data = nullptr; size_t Size = 0; + + private: + friend class BufferQueue; + unsigned char *BackingStore = nullptr; + size_t Count = 0; }; struct BufferRep { @@ -135,6 +140,9 @@ // associated with. atomic_uint64_t Generation; + /// Releases references to the buffers backed by the current buffer queue. + void cleanupBuffers(); + public: enum class ErrorCode : unsigned { Ok, Index: compiler-rt/lib/xray/xray_buffer_queue.cc =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.cc +++ compiler-rt/lib/xray/xray_buffer_queue.cc @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// #include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_posix.h" @@ -24,23 +25,45 @@ using namespace __xray; using namespace __sanitizer; +namespace { + +void decRefCount(unsigned char *ControlBlock, size_t Size, size_t Count) { + if (ControlBlock == nullptr) + return; + auto *RefCount = reinterpret_cast(ControlBlock); + if (atomic_fetch_sub(RefCount, 1, memory_order_acq_rel) == 1) + deallocateBuffer(ControlBlock, (Size * Count) + kCacheLineSize); +} + +void incRefCount(unsigned char *ControlBlock) { + if (ControlBlock == nullptr) + return; + auto *RefCount = reinterpret_cast(ControlBlock); + atomic_fetch_add(RefCount, 1, memory_order_acq_rel); +} + +} // namespace + BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { SpinMutexLock Guard(&Mutex); if (!finalizing()) return BufferQueue::ErrorCode::AlreadyInitialized; + cleanupBuffers(); + bool Success = false; BufferSize = BS; BufferCount = BC; - BackingStore = allocateBuffer(BufferSize * BufferCount); + BackingStore = allocateBuffer((BufferSize * BufferCount) + kCacheLineSize); if (BackingStore == nullptr) return BufferQueue::ErrorCode::NotEnoughMemory; auto CleanupBackingStore = __sanitizer::at_scope_exit([&, this] { if (Success) return; - deallocateBuffer(BackingStore, BufferSize * BufferCount); + deallocateBuffer(BackingStore, (BufferSize * BufferCount) + kCacheLineSize); + BackingStore = nullptr; }); Buffers = initArray(BufferCount); @@ -52,13 +75,21 @@ atomic_fetch_add(&Generation, 1, memory_order_acq_rel); Success = true; + + // First, we initialize the refcount in the RefCountedBackingStore, which we + // treat as being at the start of the BackingStore pointer. + auto ControlBlock = reinterpret_cast(BackingStore); + atomic_store(ControlBlock, 1, memory_order_release); + for (size_t i = 0; i < BufferCount; ++i) { auto &T = Buffers[i]; auto &Buf = T.Buff; atomic_store(&Buf.Extents, 0, memory_order_release); Buf.Generation = generation(); - Buf.Data = reinterpret_cast(BackingStore) + (BufferSize * i); + Buf.Data = BackingStore + kCacheLineSize + (BufferSize * i); Buf.Size = BufferSize; + Buf.BackingStore = BackingStore; + Buf.Count = BufferCount; T.Used = false; } @@ -99,9 +130,12 @@ ++LiveBuffers; } + incRefCount(BackingStore); Buf.Data = B->Buff.Data; Buf.Generation = generation(); Buf.Size = B->Buff.Size; + Buf.BackingStore = BackingStore; + Buf.Count = BufferCount; B->Used = true; return ErrorCode::Ok; } @@ -116,18 +150,24 @@ Buf.Data > reinterpret_cast(BackingStore) + (BufferCount * BufferSize)) { if (Buf.Generation != generation()) { + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); Buf.Data = nullptr; Buf.Size = 0; Buf.Generation = 0; + Buf.Count = 0; + Buf.BackingStore = nullptr; return BufferQueue::ErrorCode::Ok; } return BufferQueue::ErrorCode::UnrecognizedBuffer; } if (LiveBuffers == 0) { + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); Buf.Data = nullptr; Buf.Size = Buf.Size; Buf.Generation = 0; + Buf.BackingStore = nullptr; + Buf.Count = 0; return ErrorCode::Ok; } @@ -141,13 +181,18 @@ B->Buff.Data = Buf.Data; B->Buff.Size = Buf.Size; B->Buff.Generation = Buf.Generation; + B->Buff.BackingStore = Buf.BackingStore; + B->Buff.Count = Buf.Count; B->Used = true; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); atomic_store(&B->Buff.Extents, atomic_load(&Buf.Extents, memory_order_acquire), memory_order_release); Buf.Data = nullptr; Buf.Size = 0; Buf.Generation = 0; + Buf.BackingStore = nullptr; + Buf.Count = 0; return ErrorCode::Ok; } @@ -157,9 +202,15 @@ return ErrorCode::Ok; } -BufferQueue::~BufferQueue() { +void BufferQueue::cleanupBuffers() { for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) B->~BufferRep(); deallocateBuffer(Buffers, BufferCount); - deallocateBuffer(BackingStore, BufferSize * BufferCount); + decRefCount(BackingStore, BufferSize, BufferCount); + BackingStore = nullptr; + Buffers = nullptr; + BufferCount = 0; + BufferSize = 0; } + +BufferQueue::~BufferQueue() { cleanupBuffers(); }