Index: compiler-rt/lib/xray/tests/unit/test_helpers.cc =================================================================== --- compiler-rt/lib/xray/tests/unit/test_helpers.cc +++ compiler-rt/lib/xray/tests/unit/test_helpers.cc @@ -82,7 +82,7 @@ Serialized.append(reinterpret_cast(&HeaderStorage), sizeof(XRayFileHeader)); Buffers.apply([&](const BufferQueue::Buffer &B) { - auto Size = atomic_load_relaxed(&B.Extents); + auto Size = atomic_load_relaxed(B.Extents); auto Extents = createMetadataRecord(Size); Serialized.append(reinterpret_cast(&Extents), Index: compiler-rt/lib/xray/xray_buffer_queue.h =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.h +++ compiler-rt/lib/xray/xray_buffer_queue.h @@ -15,12 +15,12 @@ #ifndef XRAY_BUFFER_QUEUE_H #define XRAY_BUFFER_QUEUE_H +#include +#include #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_mutex.h" #include "xray_defs.h" -#include -#include namespace __xray { @@ -30,12 +30,13 @@ /// "flight data recorder" (FDR) mode to support ongoing XRay function call /// trace collection. class BufferQueue { -public: + public: /// ControlBlock represents the memory layout of how we interpret the backing - /// store for all buffers managed by a BufferQueue instance. The ControlBlock - /// has the reference count as the first member, sized according to - /// platform-specific cache-line size. We never use the Buffer member of the - /// union, which is only there for compiler-supported alignment and sizing. + /// store for all buffers and extents managed by a BufferQueue instance. The + /// ControlBlock has the reference count as the first member, sized according + /// to platform-specific cache-line size. We never use the Buffer member of + /// the union, which is only there for compiler-supported alignment and + /// sizing. /// /// This ensures that the `Data` member will be placed at least kCacheLineSize /// bytes from the beginning of the structure. @@ -52,14 +53,15 @@ }; struct Buffer { - atomic_uint64_t Extents{0}; + atomic_uint64_t* Extents{0}; uint64_t Generation{0}; void *Data = nullptr; size_t Size = 0; - private: + private: friend class BufferQueue; ControlBlock *BackingStore = nullptr; + ControlBlock *ExtentsBackingStore = nullptr; size_t Count = 0; }; @@ -72,12 +74,13 @@ bool Used = false; }; -private: + private: // This models a ForwardIterator. |T| Must be either a `Buffer` or `const // Buffer`. Note that we only advance to the "used" buffers, when // incrementing, so that at dereference we're always at a valid point. - template class Iterator { - public: + template + class Iterator { + public: BufferRep *Buffers = nullptr; size_t Offset = 0; size_t Max = 0; @@ -142,6 +145,9 @@ // The collocated ControlBlock and buffer storage. ControlBlock *BackingStore; + // The collocated ControlBlock and extents storage. + ControlBlock *ExtentsBackingStore; + // A dynamically allocated array of BufferRep instances. BufferRep *Buffers; @@ -162,7 +168,7 @@ /// Releases references to the buffers backed by the current buffer queue. void cleanupBuffers(); -public: + public: enum class ErrorCode : unsigned { Ok, NotEnoughMemory, @@ -174,18 +180,18 @@ static const char *getErrorString(ErrorCode E) { switch (E) { - case ErrorCode::Ok: - return "(none)"; - case ErrorCode::NotEnoughMemory: - return "no available buffers in the queue"; - case ErrorCode::QueueFinalizing: - return "queue already finalizing"; - case ErrorCode::UnrecognizedBuffer: - return "buffer being returned not owned by buffer queue"; - case ErrorCode::AlreadyFinalized: - return "queue already finalized"; - case ErrorCode::AlreadyInitialized: - return "queue already initialized"; + case ErrorCode::Ok: + return "(none)"; + case ErrorCode::NotEnoughMemory: + return "no available buffers in the queue"; + case ErrorCode::QueueFinalizing: + return "queue already finalizing"; + case ErrorCode::UnrecognizedBuffer: + return "buffer being returned not owned by buffer queue"; + case ErrorCode::AlreadyFinalized: + return "queue already finalized"; + case ErrorCode::AlreadyInitialized: + return "queue already initialized"; } return "unknown error"; } @@ -248,10 +254,10 @@ /// Applies the provided function F to each Buffer in the queue, only if the /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a /// releaseBuffer(...) operation). - template void apply(F Fn) XRAY_NEVER_INSTRUMENT { + template + void apply(F Fn) XRAY_NEVER_INSTRUMENT { SpinMutexLock G(&Mutex); - for (auto I = begin(), E = end(); I != E; ++I) - Fn(*I); + for (auto I = begin(), E = end(); I != E; ++I) Fn(*I); } using const_iterator = Iterator; @@ -271,6 +277,6 @@ ~BufferQueue(); }; -} // namespace __xray +} // namespace __xray -#endif // XRAY_BUFFER_QUEUE_H +#endif // XRAY_BUFFER_QUEUE_H Index: compiler-rt/lib/xray/xray_buffer_queue.cc =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.cc +++ compiler-rt/lib/xray/xray_buffer_queue.cc @@ -13,17 +13,16 @@ // //===----------------------------------------------------------------------===// #include "xray_buffer_queue.h" +#include +#include #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_posix.h" #include "xray_allocator.h" #include "xray_defs.h" -#include -#include using namespace __xray; -using namespace __sanitizer; namespace { @@ -41,25 +40,34 @@ } void decRefCount(BufferQueue::ControlBlock *C, size_t Size, size_t Count) { - if (C == nullptr) - return; + if (C == nullptr) return; if (atomic_fetch_sub(&C->RefCount, 1, memory_order_acq_rel) == 1) deallocControlBlock(C, Size, Count); } void incRefCount(BufferQueue::ControlBlock *C) { - if (C == nullptr) - return; + if (C == nullptr) return; atomic_fetch_add(&C->RefCount, 1, memory_order_acq_rel); } -} // namespace +// We use a struct to ensure that we are allocating one atomic_uint64_t per +// cache line. This allows us to not worry about false-sharing among atomic +// objects being updated (constantly) by different threads. +struct ExtentsPadded { + union { + atomic_uint64_t Extents; + unsigned char Storage[kCacheLineSize]; + }; +}; + +constexpr size_t kExtentsSize = sizeof(ExtentsPadded); + +} // namespace BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { SpinMutexLock Guard(&Mutex); - if (!finalizing()) - return BufferQueue::ErrorCode::AlreadyInitialized; + if (!finalizing()) return BufferQueue::ErrorCode::AlreadyInitialized; cleanupBuffers(); @@ -68,20 +76,28 @@ BufferCount = BC; BackingStore = allocControlBlock(BufferSize, BufferCount); - if (BackingStore == nullptr) - return BufferQueue::ErrorCode::NotEnoughMemory; + if (BackingStore == nullptr) return BufferQueue::ErrorCode::NotEnoughMemory; - auto CleanupBackingStore = __sanitizer::at_scope_exit([&, this] { - if (Success) - return; + auto CleanupBackingStore = at_scope_exit([&, this] { + if (Success) return; deallocControlBlock(BackingStore, BufferSize, BufferCount); BackingStore = nullptr; }); - Buffers = initArray(BufferCount); - if (Buffers == nullptr) + // Initialize enough atomic_uint64_t instances, each + ExtentsBackingStore = allocControlBlock(kExtentsSize, BufferCount); + if (ExtentsBackingStore == nullptr) return BufferQueue::ErrorCode::NotEnoughMemory; + auto CleanupExtentsBackingStore = at_scope_exit([&, this] { + if (Success) return; + deallocControlBlock(ExtentsBackingStore, kExtentsSize, BufferCount); + ExtentsBackingStore = nullptr; + }); + + Buffers = initArray(BufferCount); + if (Buffers == nullptr) return BufferQueue::ErrorCode::NotEnoughMemory; + // At this point we increment the generation number to associate the buffers // to the new generation. atomic_fetch_add(&Generation, 1, memory_order_acq_rel); @@ -89,6 +105,7 @@ // First, we initialize the refcount in the ControlBlock, which we treat as // being at the start of the BackingStore pointer. atomic_store(&BackingStore->RefCount, 1, memory_order_release); + atomic_store(&ExtentsBackingStore->RefCount, 1, memory_order_release); // Then we initialise the individual buffers that sub-divide the whole backing // store. Each buffer will start at the `Data` member of the ControlBlock, and @@ -96,11 +113,15 @@ for (size_t i = 0; i < BufferCount; ++i) { auto &T = Buffers[i]; auto &Buf = T.Buff; - atomic_store(&Buf.Extents, 0, memory_order_release); + auto *E = reinterpret_cast(&ExtentsBackingStore->Data + + (kExtentsSize * i)); + Buf.Extents = &E->Extents; + atomic_store(Buf.Extents, 0, memory_order_release); Buf.Generation = generation(); Buf.Data = &BackingStore->Data + (BufferSize * i); Buf.Size = BufferSize; Buf.BackingStore = BackingStore; + Buf.ExtentsBackingStore = ExtentsBackingStore; Buf.Count = BufferCount; T.Used = false; } @@ -120,6 +141,7 @@ Mutex(), Finalizing{1}, BackingStore(nullptr), + ExtentsBackingStore(nullptr), Buffers(nullptr), Next(Buffers), First(Buffers), @@ -135,15 +157,14 @@ BufferRep *B = nullptr; { SpinMutexLock Guard(&Mutex); - if (LiveBuffers == BufferCount) - return ErrorCode::NotEnoughMemory; + if (LiveBuffers == BufferCount) return ErrorCode::NotEnoughMemory; B = Next++; - if (Next == (Buffers + BufferCount)) - Next = Buffers; + if (Next == (Buffers + BufferCount)) Next = Buffers; ++LiveBuffers; } incRefCount(BackingStore); + incRefCount(ExtentsBackingStore); Buf = B->Buff; Buf.Generation = generation(); B->Used = true; @@ -159,6 +180,7 @@ if (Buf.Generation != generation() || LiveBuffers == 0) { Buf = {}; decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); return BufferQueue::ErrorCode::Ok; } @@ -168,16 +190,15 @@ --LiveBuffers; B = First++; - if (First == (Buffers + BufferCount)) - First = Buffers; + if (First == (Buffers + BufferCount)) First = Buffers; } // Now that the buffer has been released, we mark it as "used". B->Buff = Buf; B->Used = true; decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); - atomic_store(&B->Buff.Extents, - atomic_load(&Buf.Extents, memory_order_acquire), + decRefCount(Buf.ExtentsBackingStore, kExtentsSize, Buf.Count); + atomic_store(B->Buff.Extents, atomic_load(Buf.Extents, memory_order_acquire), memory_order_release); Buf = {}; return ErrorCode::Ok; @@ -194,7 +215,9 @@ B->~BufferRep(); deallocateBuffer(Buffers, BufferCount); decRefCount(BackingStore, BufferSize, BufferCount); + decRefCount(ExtentsBackingStore, kExtentsSize, BufferCount); BackingStore = nullptr; + ExtentsBackingStore = nullptr; Buffers = nullptr; BufferCount = 0; BufferSize = 0; Index: compiler-rt/lib/xray/xray_fdr_controller.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_controller.h +++ compiler-rt/lib/xray/xray_fdr_controller.h @@ -64,7 +64,7 @@ First = true; UndoableFunctionEnters = 0; UndoableTailExits = 0; - atomic_store(&B.Extents, 0, memory_order_release); + atomic_store(B.Extents, 0, memory_order_release); return true; } @@ -123,7 +123,7 @@ if (First) { First = false; W.resetRecord(); - atomic_store(&B.Extents, 0, memory_order_release); + atomic_store(B.Extents, 0, memory_order_release); return setupNewBuffer(); } Index: compiler-rt/lib/xray/xray_fdr_log_writer.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_log_writer.h +++ compiler-rt/lib/xray/xray_fdr_log_writer.h @@ -86,7 +86,7 @@ // read the bytes in the buffer will see the writes committed before the // extents are updated. atomic_thread_fence(memory_order_release); - atomic_fetch_add(&Buffer.Extents, sizeof(T), memory_order_acq_rel); + atomic_fetch_add(Buffer.Extents, sizeof(T), memory_order_acq_rel); } public: @@ -116,7 +116,7 @@ // read the bytes in the buffer will see the writes committed before the // extents are updated. atomic_thread_fence(memory_order_release); - atomic_fetch_add(&Buffer.Extents, Size, memory_order_acq_rel); + atomic_fetch_add(Buffer.Extents, Size, memory_order_acq_rel); return Size; } @@ -160,7 +160,7 @@ // read the bytes in the buffer will see the writes committed before the // extents are updated. atomic_thread_fence(memory_order_release); - atomic_fetch_add(&Buffer.Extents, sizeof(R) + sizeof(A), + atomic_fetch_add(Buffer.Extents, sizeof(R) + sizeof(A), memory_order_acq_rel); return true; } @@ -185,7 +185,7 @@ // read the bytes in the buffer will see the writes committed before the // extents are updated. atomic_thread_fence(memory_order_release); - atomic_fetch_add(&Buffer.Extents, sizeof(R) + EventSize, + atomic_fetch_add(Buffer.Extents, sizeof(R) + EventSize, memory_order_acq_rel); return true; } @@ -208,7 +208,7 @@ // read the bytes in the buffer will see the writes committed before the // extents are updated. atomic_thread_fence(memory_order_release); - atomic_fetch_add(&Buffer.Extents, EventSize, memory_order_acq_rel); + atomic_fetch_add(Buffer.Extents, EventSize, memory_order_acq_rel); return true; } @@ -216,13 +216,13 @@ void resetRecord() { NextRecord = reinterpret_cast(Buffer.Data); - atomic_store(&Buffer.Extents, 0, memory_order_release); + atomic_store(Buffer.Extents, 0, memory_order_release); } void undoWrites(size_t B) { DCHECK_GE(NextRecord - B, reinterpret_cast(Buffer.Data)); NextRecord -= B; - atomic_fetch_sub(&Buffer.Extents, B, memory_order_acq_rel); + atomic_fetch_sub(Buffer.Extents, B, memory_order_acq_rel); } }; // namespace __xray Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -250,7 +250,7 @@ // fence ordering to ensure that writes we expect to have been completed // before the fence are fully committed before we read the extents. atomic_thread_fence(memory_order_acquire); - auto BufferSize = atomic_load(&It->Extents, memory_order_acquire); + auto BufferSize = atomic_load(It->Extents, memory_order_acquire); SerializedBufferSize = BufferSize + sizeof(MetadataRecord); CurrentBuffer = allocateBuffer(SerializedBufferSize); if (CurrentBuffer == nullptr) @@ -364,7 +364,7 @@ // still use a Metadata record, but fill in the extents instead for the // data. MetadataRecord ExtentsRecord; - auto BufferExtents = atomic_load(&B.Extents, memory_order_acquire); + auto BufferExtents = atomic_load(B.Extents, memory_order_acquire); DCHECK(BufferExtents <= B.Size); ExtentsRecord.Type = uint8_t(RecordType::Metadata); ExtentsRecord.RecordKind =