Index: compiler-rt/lib/xray/tests/unit/fdr_controller_test.cc =================================================================== --- compiler-rt/lib/xray/tests/unit/fdr_controller_test.cc +++ compiler-rt/lib/xray/tests/unit/fdr_controller_test.cc @@ -183,7 +183,7 @@ public: void SetUp() override { bool Success; - BQ = llvm::make_unique(sizeof(MetadataRecord) * 4 + + BQ = llvm::make_unique(sizeof(MetadataRecord) * 5 + sizeof(FunctionRecord) * 2, kBuffers, Success); ASSERT_TRUE(Success); @@ -198,12 +198,12 @@ TEST_F(BufferManagementTest, HandlesOverflow) { uint64_t TSC = 1; uint16_t CPU = 1; - for (size_t I = 0; I < kBuffers; ++I) { + for (size_t I = 0; I < kBuffers + 1; ++I) { ASSERT_TRUE(C->functionEnter(1, TSC++, CPU)); ASSERT_TRUE(C->functionExit(1, TSC++, CPU)); } - C->flush(); - ASSERT_EQ(BQ->finalize(), BufferQueue::ErrorCode::Ok); + ASSERT_TRUE(C->flush()); + ASSERT_THAT(BQ->finalize(), Eq(BufferQueue::ErrorCode::Ok)); std::string Serialized = serialize(*BQ, 3); llvm::DataExtractor DE(Serialized, true, 8); @@ -238,5 +238,28 @@ FuncId(1), RecordType(llvm::xray::RecordTypes::ENTER))))); } +TEST_F(BufferManagementTest, HandlesGenerationalBufferQueue) { + uint64_t TSC = 1; + uint16_t CPU = 1; + + ASSERT_TRUE(C->functionEnter(1, TSC++, CPU)); + ASSERT_THAT(BQ->finalize(), Eq(BufferQueue::ErrorCode::Ok)); + ASSERT_THAT(BQ->init(sizeof(MetadataRecord) * 4 + sizeof(FunctionRecord) * 2, + kBuffers), + Eq(BufferQueue::ErrorCode::Ok)); + EXPECT_TRUE(C->functionExit(1, TSC++, CPU)); + ASSERT_TRUE(C->flush()); + + // We expect that we will only be able to find the function exit event, but + // not the function enter event, since we only have information about the new + // generation of the buffers. + std::string Serialized = serialize(*BQ, 3); + llvm::DataExtractor DE(Serialized, true, 8); + auto TraceOrErr = llvm::xray::loadTrace(DE); + EXPECT_THAT_EXPECTED( + TraceOrErr, HasValue(ElementsAre(AllOf( + FuncId(1), RecordType(llvm::xray::RecordTypes::EXIT))))); +} + } // namespace } // namespace __xray Index: compiler-rt/lib/xray/xray_buffer_queue.h =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.h +++ compiler-rt/lib/xray/xray_buffer_queue.h @@ -25,9 +25,9 @@ namespace __xray { /// BufferQueue implements a circular queue of fixed sized buffers (much like a -/// freelist) but is concerned mostly with making it really quick to initialise, -/// finalise, and get/return buffers to the queue. This is one key component of -/// the "flight data recorder" (FDR) mode to support ongoing XRay function call +/// freelist) but is concerned with making it quick to initialise, finalise, and +/// get from or return buffers to the queue. This is one key component of the +/// "flight data recorder" (FDR) mode to support ongoing XRay function call /// trace collection. class BufferQueue { public: @@ -36,6 +36,11 @@ uint64_t Generation{0}; void *Data = nullptr; size_t Size = 0; + + private: + friend class BufferQueue; + unsigned char *BackingStore = nullptr; + size_t Count = 0; }; struct BufferRep { @@ -135,6 +140,9 @@ // associated with. atomic_uint64_t Generation; + /// Releases references to the buffers backed by the current buffer queue. + void cleanupBuffers(); + public: enum class ErrorCode : unsigned { Ok, Index: compiler-rt/lib/xray/xray_buffer_queue.cc =================================================================== --- compiler-rt/lib/xray/xray_buffer_queue.cc +++ compiler-rt/lib/xray/xray_buffer_queue.cc @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// #include "xray_buffer_queue.h" +#include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" #include "sanitizer_common/sanitizer_libc.h" #include "sanitizer_common/sanitizer_posix.h" @@ -24,23 +25,45 @@ using namespace __xray; using namespace __sanitizer; +namespace { + +void decRefCount(unsigned char *ControlBlock, size_t Size, size_t Count) { + if (ControlBlock == nullptr) + return; + auto *RefCount = reinterpret_cast(ControlBlock); + if (atomic_fetch_sub(RefCount, 1, memory_order_acq_rel) == 1) + deallocateBuffer(ControlBlock, (Size * Count) + kCacheLineSize); +} + +void incRefCount(unsigned char *ControlBlock) { + if (ControlBlock == nullptr) + return; + auto *RefCount = reinterpret_cast(ControlBlock); + atomic_fetch_add(RefCount, 1, memory_order_acq_rel); +} + +} // namespace + BufferQueue::ErrorCode BufferQueue::init(size_t BS, size_t BC) { SpinMutexLock Guard(&Mutex); if (!finalizing()) return BufferQueue::ErrorCode::AlreadyInitialized; + cleanupBuffers(); + bool Success = false; BufferSize = BS; BufferCount = BC; - BackingStore = allocateBuffer(BufferSize * BufferCount); + BackingStore = allocateBuffer((BufferSize * BufferCount) + kCacheLineSize); if (BackingStore == nullptr) return BufferQueue::ErrorCode::NotEnoughMemory; auto CleanupBackingStore = __sanitizer::at_scope_exit([&, this] { if (Success) return; - deallocateBuffer(BackingStore, BufferSize * BufferCount); + deallocateBuffer(BackingStore, (BufferSize * BufferCount) + kCacheLineSize); + BackingStore = nullptr; }); Buffers = initArray(BufferCount); @@ -52,13 +75,21 @@ atomic_fetch_add(&Generation, 1, memory_order_acq_rel); Success = true; + + // First, we initialize the refcount in the RefCountedBackingStore, which we + // treat as being at the start of the BackingStore pointer. + auto ControlBlock = reinterpret_cast(BackingStore); + atomic_store(ControlBlock, 1, memory_order_release); + for (size_t i = 0; i < BufferCount; ++i) { auto &T = Buffers[i]; auto &Buf = T.Buff; atomic_store(&Buf.Extents, 0, memory_order_release); Buf.Generation = generation(); - Buf.Data = reinterpret_cast(BackingStore) + (BufferSize * i); + Buf.Data = BackingStore + kCacheLineSize + (BufferSize * i); Buf.Size = BufferSize; + Buf.BackingStore = BackingStore; + Buf.Count = BufferCount; T.Used = false; } @@ -99,9 +130,12 @@ ++LiveBuffers; } + incRefCount(BackingStore); Buf.Data = B->Buff.Data; Buf.Generation = generation(); Buf.Size = B->Buff.Size; + Buf.BackingStore = BackingStore; + Buf.Count = BufferCount; B->Used = true; return ErrorCode::Ok; } @@ -116,18 +150,24 @@ Buf.Data > reinterpret_cast(BackingStore) + (BufferCount * BufferSize)) { if (Buf.Generation != generation()) { + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); Buf.Data = nullptr; Buf.Size = 0; Buf.Generation = 0; + Buf.Count = 0; + Buf.BackingStore = nullptr; return BufferQueue::ErrorCode::Ok; } return BufferQueue::ErrorCode::UnrecognizedBuffer; } if (LiveBuffers == 0) { + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); Buf.Data = nullptr; Buf.Size = Buf.Size; Buf.Generation = 0; + Buf.BackingStore = nullptr; + Buf.Count = 0; return ErrorCode::Ok; } @@ -141,13 +181,18 @@ B->Buff.Data = Buf.Data; B->Buff.Size = Buf.Size; B->Buff.Generation = Buf.Generation; + B->Buff.BackingStore = Buf.BackingStore; + B->Buff.Count = Buf.Count; B->Used = true; + decRefCount(Buf.BackingStore, Buf.Size, Buf.Count); atomic_store(&B->Buff.Extents, atomic_load(&Buf.Extents, memory_order_acquire), memory_order_release); Buf.Data = nullptr; Buf.Size = 0; Buf.Generation = 0; + Buf.BackingStore = nullptr; + Buf.Count = 0; return ErrorCode::Ok; } @@ -157,9 +202,15 @@ return ErrorCode::Ok; } -BufferQueue::~BufferQueue() { +void BufferQueue::cleanupBuffers() { for (auto B = Buffers, E = Buffers + BufferCount; B != E; ++B) B->~BufferRep(); deallocateBuffer(Buffers, BufferCount); - deallocateBuffer(BackingStore, BufferSize * BufferCount); + decRefCount(BackingStore, BufferSize, BufferCount); + BackingStore = nullptr; + Buffers = nullptr; + BufferCount = 0; + BufferSize = 0; } + +BufferQueue::~BufferQueue() { cleanupBuffers(); } Index: compiler-rt/lib/xray/xray_fdr_controller.h =================================================================== --- compiler-rt/lib/xray/xray_fdr_controller.h +++ compiler-rt/lib/xray/xray_fdr_controller.h @@ -42,7 +42,7 @@ bool finalized() const { return BQ == nullptr || BQ->finalizing(); } bool hasSpace(size_t S) { - return B.Data != nullptr && + return B.Data != nullptr && B.Generation == BQ->generation() && W.getNextRecord() + S <= reinterpret_cast(B.Data) + B.Size; } @@ -51,8 +51,6 @@ } bool getNewBuffer() { - if (!returnBuffer()) - return false; if (BQ->getBuffer(B) != BufferQueue::ErrorCode::Ok) return false; @@ -60,6 +58,9 @@ DCHECK_EQ(W.getNextRecord(), B.Data); LatestTSC = 0; LatestCPU = 0; + First = true; + UndoableFunctionEnters = 0; + UndoableTailExits = 0; atomic_store(&B.Extents, 0, memory_order_release); return true; } @@ -108,6 +109,8 @@ return returnBuffer(); if (UNLIKELY(!hasSpace(S))) { + if (!returnBuffer()) + return false; if (!getNewBuffer()) return false; if (!setupNewBuffer()) @@ -128,24 +131,26 @@ if (BQ == nullptr) return false; + First = true; if (finalized()) { BQ->releaseBuffer(B); // ignore result. return false; } - First = true; - if (BQ->releaseBuffer(B) != BufferQueue::ErrorCode::Ok) - return false; - return true; + return BQ->releaseBuffer(B) == BufferQueue::ErrorCode::Ok; } - enum class PreambleResult { NoChange, WroteMetadata }; + enum class PreambleResult { NoChange, WroteMetadata, InvalidBuffer }; PreambleResult functionPreamble(uint64_t TSC, uint16_t CPU) { if (UNLIKELY(LatestCPU != CPU || LatestTSC == 0)) { // We update our internal tracking state for the Latest TSC and CPU we've // seen, then write out the appropriate metadata and function records. LatestTSC = TSC; LatestCPU = CPU; + + if (B.Generation != BQ->generation()) + return PreambleResult::InvalidBuffer; + W.writeMetadata(CPU, TSC); return PreambleResult::WroteMetadata; } @@ -153,6 +158,10 @@ if (UNLIKELY(LatestCPU == LatestCPU && LatestTSC > TSC)) { // The TSC has wrapped around, from the last TSC we've seen. LatestTSC = TSC; + + if (B.Generation != BQ->generation()) + return PreambleResult::InvalidBuffer; + W.writeMetadata(TSC); return PreambleResult::WroteMetadata; } @@ -160,7 +169,7 @@ return PreambleResult::NoChange; } - void rewindRecords(int32_t FuncId, uint64_t TSC, uint16_t CPU) { + bool rewindRecords(int32_t FuncId, uint64_t TSC, uint16_t CPU) { // Undo one enter record, because at this point we are either at the state // of: // - We are exiting a function that we recently entered. @@ -169,6 +178,8 @@ // FunctionRecord F; W.undoWrites(sizeof(FunctionRecord)); + if (B.Generation != BQ->generation()) + return false; internal_memcpy(&F, W.getNextRecord(), sizeof(FunctionRecord)); DCHECK(F.RecordKind == @@ -179,30 +190,35 @@ LatestTSC -= F.TSCDelta; if (--UndoableFunctionEnters != 0) { LastFunctionEntryTSC -= F.TSCDelta; - return; + return true; } LastFunctionEntryTSC = 0; auto RewindingTSC = LatestTSC; auto RewindingRecordPtr = W.getNextRecord() - sizeof(FunctionRecord); while (UndoableTailExits) { + if (B.Generation != BQ->generation()) + return false; internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); DCHECK_EQ(F.RecordKind, uint8_t(FunctionRecord::RecordKinds::FunctionTailExit)); RewindingTSC -= F.TSCDelta; RewindingRecordPtr -= sizeof(FunctionRecord); + if (B.Generation != BQ->generation()) + return false; internal_memcpy(&F, RewindingRecordPtr, sizeof(FunctionRecord)); // This tail call exceeded the threshold duration. It will not be erased. if ((TSC - RewindingTSC) >= CycleThreshold) { UndoableTailExits = 0; - return; + return true; } --UndoableTailExits; W.undoWrites(sizeof(FunctionRecord) * 2); LatestTSC = RewindingTSC; } + return true; } public: @@ -212,18 +228,17 @@ : BQ(BQ), B(B), W(W), WallClockReader(R), CycleThreshold(C) {} bool functionEnter(int32_t FuncId, uint64_t TSC, uint16_t CPU) { - if (finalized()) + if (finalized() || + !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) return returnBuffer(); - if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) + auto PreambleStatus = functionPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) return returnBuffer(); - if (functionPreamble(TSC, CPU) == PreambleResult::WroteMetadata) { - UndoableFunctionEnters = 1; - } else { - ++UndoableFunctionEnters; - } - + UndoableFunctionEnters = (PreambleStatus == PreambleResult::WroteMetadata) + ? 1 + : UndoableFunctionEnters + 1; LastFunctionEntryTSC = TSC; LatestTSC = TSC; return W.writeFunction(FDRLogWriter::FunctionRecordKind::Enter, @@ -237,12 +252,14 @@ if (!prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) return returnBuffer(); - if (functionPreamble(TSC, CPU) == PreambleResult::NoChange && + auto PreambleStatus = functionPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) + return returnBuffer(); + + if (PreambleStatus == PreambleResult::NoChange && UndoableFunctionEnters != 0 && - TSC - LastFunctionEntryTSC < CycleThreshold) { - rewindRecords(FuncId, TSC, CPU); - return true; - } + TSC - LastFunctionEntryTSC < CycleThreshold) + return rewindRecords(FuncId, TSC, CPU); UndoableTailExits = UndoableFunctionEnters ? UndoableTailExits + 1 : 0; UndoableFunctionEnters = 0; @@ -253,15 +270,11 @@ bool functionEnterArg(int32_t FuncId, uint64_t TSC, uint16_t CPU, uint64_t Arg) { - if (finalized()) + if (finalized() || + !prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord)) || + functionPreamble(TSC, CPU) == PreambleResult::InvalidBuffer) return returnBuffer(); - if (!prepareBuffer((2 * sizeof(MetadataRecord)) + sizeof(FunctionRecord))) - return returnBuffer(); - - // Ignore the result of writing out the preamble. - functionPreamble(TSC, CPU); - LatestTSC = TSC; LastFunctionEntryTSC = 0; UndoableFunctionEnters = 0; @@ -273,15 +286,18 @@ } bool functionExit(int32_t FuncId, uint64_t TSC, uint16_t CPU) { - if (finalized()) + if (finalized() || + !prepareBuffer(sizeof(MetadataRecord) + sizeof(FunctionRecord))) return returnBuffer(); - if (functionPreamble(TSC, CPU) == PreambleResult::NoChange && + auto PreambleStatus = functionPreamble(TSC, CPU); + if (PreambleStatus == PreambleResult::InvalidBuffer) + return returnBuffer(); + + if (PreambleStatus == PreambleResult::NoChange && UndoableFunctionEnters != 0 && - TSC - LastFunctionEntryTSC < CycleThreshold) { - rewindRecords(FuncId, TSC, CPU); - return true; - } + TSC - LastFunctionEntryTSC < CycleThreshold) + return rewindRecords(FuncId, TSC, CPU); LatestTSC = TSC; UndoableFunctionEnters = 0;