Index: compiler-rt/lib/xray/xray_fdr_logging.cc =================================================================== --- compiler-rt/lib/xray/xray_fdr_logging.cc +++ compiler-rt/lib/xray/xray_fdr_logging.cc @@ -272,12 +272,27 @@ incrementExtents(sizeof(MetadataRecord)); } -static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, +static void writeFunctionRecord(int32_t FuncId, uint32_t TSCDelta, XRayEntryType EntryType) XRAY_NEVER_INSTRUMENT { + constexpr int32_t MaxFuncId = (1 << 29) - 1; + if (UNLIKELY(FuncId > MaxFuncId)) { + if (Verbosity()) + Report("Warning: Function ID '%d' > max function id: '%d'", FuncId, + MaxFuncId); + return; + } + FunctionRecord FuncRecord; FuncRecord.Type = uint8_t(RecordType::Function); + // Only take 28 bits of the function id. - FuncRecord.FuncId = FuncId & ~(0x0F << 28); + // + // We need to be careful about the sign bit and the bitwise operations being + // performed here. In effect, we want to truncate the value of the function id + // to the first 28 bits -- this means we need to mask the function id with (2 + // ^ 28) - 1, or 0xfffffff. + // + FuncRecord.FuncId = FuncId & MaxFuncId; FuncRecord.TSCDelta = TSCDelta; auto &TLD = getThreadLocalData(); @@ -345,7 +360,8 @@ // Re-point the thread local pointer into this thread's Buffer before the recent // "Function Entry" record and any "Tail Call Exit" records after that. static void rewindRecentCall(uint64_t TSC, uint64_t &LastTSC, - uint64_t &LastFunctionEntryTSC, int32_t FuncId) { + uint64_t &LastFunctionEntryTSC, + int32_t FuncId) XRAY_NEVER_INSTRUMENT { auto &TLD = getThreadLocalData(); TLD.RecordPtr -= FunctionRecSize; decrementExtents(FunctionRecSize); @@ -521,6 +537,7 @@ writeNewCPUIdMetadata(CPU, TSC); return 0; } + // If the delta is greater than the range for a uint32_t, then we write out // the TSC wrap metadata entry with the full TSC, and the TSC for the // function record be 0. @@ -586,10 +603,10 @@ // - The least number of bytes we will ever write is 8 // (sizeof(FunctionRecord)) only if the delta between the previous entry // and this entry is within 32 bits. - // - The most number of bytes we will ever write is 8 + 16 + 16 = 40. + // - The most number of bytes we will ever write is 8 + 16 + 16 + 16 = 56. // This is computed by: // - // MaxSize = sizeof(FunctionRecord) + 2 * sizeof(MetadataRecord) + // MaxSize = sizeof(FunctionRecord) + 3 * sizeof(MetadataRecord) // // These arise in the following cases: // @@ -603,32 +620,33 @@ // FunctionRecord. // 3. When we learn about a new CPU ID, we need to write down a "new cpu // id" MetadataRecord before writing out the actual FunctionRecord. - // 4. The second MetadataRecord is the optional function call argument. + // 4. The third MetadataRecord is the optional function call argument. // // So the math we need to do is to determine whether writing 40 bytes past the // current pointer exceeds the buffer's maximum size. If we don't have enough // space to write 40 bytes in the buffer, we need get a new Buffer, set it up // properly before doing any further writing. - size_t MaxSize = FunctionRecSize + 2 * MetadataRecSize; + size_t MaxSize = FunctionRecSize + 3 * MetadataRecSize; if (!prepareBuffer(TSC, CPU, wall_clock_reader, MaxSize)) { TLD.BQ = nullptr; return; } - // By this point, we are now ready to write up to 40 bytes (explained above). - DCHECK((TLD.RecordPtr + MaxSize) - static_cast(TLD.Buffer.Data) >= - static_cast(MetadataRecSize) && - "Misconfigured BufferQueue provided; Buffer size not large enough."); - auto RecordTSCDelta = writeCurrentCPUTSC(TLD, TSC, CPU); TLD.LastTSC = TSC; TLD.CurrentCPU = CPU; switch (Entry) { case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: // Update the thread local state for the next invocation. TLD.LastFunctionEntryTSC = TSC; break; + case XRayEntryType::LOG_ARGS_ENTRY: + // Update the thread local state for the next invocation, but also prevent + // rewinding when we have arguments logged. + TLD.LastFunctionEntryTSC = TSC; + TLD.NumConsecutiveFnEnters = 0; + TLD.NumTailCalls = 0; + break; case XRayEntryType::TAIL: case XRayEntryType::EXIT: // Break out and write the exit record if we can't erase any functions. @@ -986,11 +1004,17 @@ // - The metadata record we're going to write. (16 bytes) // - The additional data we're going to write. Currently, that's the size // of the event we're going to dump into the log as free-form bytes. - if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { + if (!prepareBuffer(TSC, CPU, clock_gettime, + MetadataRecSize + ReducedEventSize)) { TLD.BQ = nullptr; return; } + // We need to reset the counts for the number of functions we're able to + // rewind. + TLD.NumConsecutiveFnEnters = 0; + TLD.NumTailCalls = 0; + // Write the custom event metadata record, which consists of the following // information: // - 8 bytes (64-bits) for the full TSC when the event started. @@ -1001,11 +1025,12 @@ uint8_t(MetadataRecord::RecordKinds::CustomEventMarker); constexpr auto TSCSize = sizeof(TC.TSC); internal_memcpy(&CustomEvent.Data, &ReducedEventSize, sizeof(int32_t)); - internal_memcpy(&CustomEvent.Data[sizeof(int32_t)], &TSC, TSCSize); + internal_memcpy(&CustomEvent.Data + sizeof(int32_t), &TSC, TSCSize); internal_memcpy(TLD.RecordPtr, &CustomEvent, sizeof(CustomEvent)); TLD.RecordPtr += sizeof(CustomEvent); internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize); - incrementExtents(MetadataRecSize + EventSize); + TLD.RecordPtr += ReducedEventSize; + incrementExtents(MetadataRecSize + ReducedEventSize); endBufferIfFull(); } @@ -1031,7 +1056,8 @@ // - The metadata record we're going to write. (16 bytes) // - The additional data we're going to write. Currently, that's the size // of the event we're going to dump into the log as free-form bytes. - if (!prepareBuffer(TSC, CPU, clock_gettime, MetadataRecSize + EventSize)) { + if (!prepareBuffer(TSC, CPU, clock_gettime, + MetadataRecSize + ReducedEventSize)) { TLD.BQ = nullptr; return; } @@ -1056,6 +1082,7 @@ TLD.RecordPtr += sizeof(TypedEvent); internal_memcpy(TLD.RecordPtr, Event, ReducedEventSize); + TLD.RecordPtr += ReducedEventSize; incrementExtents(MetadataRecSize + EventSize); endBufferIfFull(); } Index: llvm/include/llvm/XRay/BlockIndexer.h =================================================================== --- llvm/include/llvm/XRay/BlockIndexer.h +++ llvm/include/llvm/XRay/BlockIndexer.h @@ -39,9 +39,6 @@ private: Index &Indices; - enum class State : unsigned { SeekExtents, ExtentsFound, ThreadIDFound }; - - State CurrentState = State::SeekExtents; Block CurrentBlock{0, 0, nullptr, {}}; public: Index: llvm/lib/XRay/BlockIndexer.cpp =================================================================== --- llvm/lib/XRay/BlockIndexer.cpp +++ llvm/lib/XRay/BlockIndexer.cpp @@ -16,22 +16,7 @@ namespace llvm { namespace xray { -Error BlockIndexer::visit(BufferExtents &) { - if (CurrentState == State::ThreadIDFound) { - Index::iterator It; - std::tie(It, std::ignore) = - Indices.insert({{CurrentBlock.ProcessID, CurrentBlock.ThreadID}, {}}); - It->second.push_back({CurrentBlock.ProcessID, CurrentBlock.ThreadID, - CurrentBlock.WallclockTime, - std::move(CurrentBlock.Records)}); - CurrentBlock.ProcessID = 0; - CurrentBlock.ThreadID = 0; - CurrentBlock.WallclockTime = nullptr; - CurrentBlock.Records = {}; - } - CurrentState = State::ExtentsFound; - return Error::success(); -} +Error BlockIndexer::visit(BufferExtents &) { return Error::success(); } Error BlockIndexer::visit(WallclockRecord &R) { CurrentBlock.Records.push_back(&R); @@ -66,14 +51,16 @@ } Error BlockIndexer::visit(NewBufferRecord &R) { - CurrentState = State::ThreadIDFound; + if (!CurrentBlock.Records.empty()) + if (auto E = flush()) + return E; + CurrentBlock.ThreadID = R.tid(); CurrentBlock.Records.push_back(&R); return Error::success(); } Error BlockIndexer::visit(EndBufferRecord &R) { - CurrentState = State::SeekExtents; CurrentBlock.Records.push_back(&R); return Error::success(); } @@ -84,7 +71,6 @@ } Error BlockIndexer::flush() { - CurrentState = State::SeekExtents; Index::iterator It; std::tie(It, std::ignore) = Indices.insert({{CurrentBlock.ProcessID, CurrentBlock.ThreadID}, {}}); @@ -94,6 +80,7 @@ CurrentBlock.ProcessID = 0; CurrentBlock.ThreadID = 0; CurrentBlock.Records = {}; + CurrentBlock.WallclockTime = nullptr; return Error::success(); } Index: llvm/lib/XRay/Trace.cpp =================================================================== --- llvm/lib/XRay/Trace.cpp +++ llvm/lib/XRay/Trace.cpp @@ -289,16 +289,15 @@ // Then we verify the consistency of the blocks. { - BlockVerifier Verifier; for (auto &PTB : Index) { auto &Blocks = PTB.second; for (auto &B : Blocks) { + BlockVerifier Verifier; for (auto *R : B.Records) if (auto E = R->apply(Verifier)) return E; if (auto E = Verifier.verify()) return E; - Verifier.reset(); } } }