Index: lib/xray/tests/unit/CMakeLists.txt =================================================================== --- lib/xray/tests/unit/CMakeLists.txt +++ lib/xray/tests/unit/CMakeLists.txt @@ -2,3 +2,5 @@ buffer_queue_test.cc xray_unit_test_main.cc) add_xray_unittest(XRayFDRLoggingTest SOURCES fdr_logging_test.cc xray_unit_test_main.cc) + +add_executable(xray_fdr_log_printer xray_fdr_log_printer_tool.cc) Index: lib/xray/tests/unit/xray_fdr_log_printer_tool.cc =================================================================== --- /dev/null +++ lib/xray/tests/unit/xray_fdr_log_printer_tool.cc @@ -0,0 +1,329 @@ +//===-- xray_fdr_log_printer_tool.cc --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// + +#include "xray_fdr_logging.h" +#include "xray_fdr_logging_impl.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "xray/xray_records.h" + +// Writes out xray fdr mode log records to stdout based on a sequence of +// formatted data read from stdin. +// +// Interprets an adhoc format of Top Level Types and parameter maps of the form: +// +// RecordType : { Parameter1 = Value, Parameter2 = value , Parameter3 = value} +// OtherRecordType : { ParameterOne = Value } +// +// Each line corresponds to a type of record written by the Xray Flight Data +// Recorder mode to a buffer. This program synthesizes records in the FDR binary +// format and writes them to std::cout. + +namespace { + +/// A crude lexer to read tokens and skip over whitespace. +class TokenReader { +public: + TokenReader() : LastDelimPresent(false), FoundEof(false), LastDelim(0) {} + std::string readToken(std::istream &Stream); + bool hasLastDelim() const { return LastDelimPresent; } + char getLastDelim() const { return LastDelim; } + void setLastDelim(char Delim) { + LastDelimPresent = true; + LastDelim = Delim; + } + void clearLastDelim() { + LastDelimPresent = false; + LastDelim = 0; + } + bool isEof() { return FoundEof; } + void setFoundEof(bool eof) { FoundEof = eof; } + +private: + bool LastDelimPresent; + bool FoundEof; + char LastDelim; +}; + +// Globally tracks whether we reached EOF and caches delimiters that qualify as +// tokens. +static TokenReader TokenReader{}; + +bool isWhitespace(char c) { + // Hardcode the whitespace characters we will not return as tokens even though + // they are token delimiters. + static const std::vector Whitespace{' ', '\n', '\t'}; + return std::find(Whitespace.begin(), Whitespace.end(), c) != Whitespace.end(); +} + +bool isDelimiter(char c) { + // Hardcode a set of token delimiters. + static const std::vector Delimiters{' ', ':', ',', '\n', + '\t', '{', '}', '='}; + return std::find(Delimiters.begin(), Delimiters.end(), c) != Delimiters.end(); +} + +std::string TokenReader::readToken(std::istream &Stream) { + // If on the last call we read a trailing delimiter that also qualifies as a + // token, return it now. + if (hasLastDelim()) { + char Token = getLastDelim(); + clearLastDelim(); + return std::string{Token}; + } + + std::stringstream Builder{}; + char c; + c = Stream.get(); + while (!isDelimiter(c) && !Stream.eof()) { + Builder << c; + c = Stream.get(); + } + + setFoundEof(Stream.eof()); + + std::string Token = Builder.str(); + + if (Token.empty()) { + // We read a whitespace delimiter only. Skip over it. + if (!isEof() && isWhitespace(c)) { + return readToken(Stream); + } else if (isWhitespace(c)) { + // We only read a whitespace delimiter. + return ""; + } else { + // We read a delimiter that matters as a token. + return std::string{c}; + } + } + + // If we found a delimiter that's a valid token. Store it to return as the + // next token. + if (!isWhitespace(c)) + setLastDelim(c); + + return Token; +} + +// Reads an expected token or dies a gruesome death. +void eatExpectedToken(std::istream &Stream, const std::string &Expected) { + std::string Token = TokenReader.readToken(Stream); + if (Token.compare(Expected) != 0) { + std::cerr << "Expecting token '" << Expected << "'. Found '" << Token + << "'.\n"; + std::exit(1); + } +} + +// Constructs a map of key value pairs from a token stream. +// Expects to read an expression of the form: +// +// { a = b, c = d, e = f} +// +// If not, the driver will crash. +std::map readMap(std::istream &Stream) { + using StrMap = std::map; + using StrVector = std::vector; + + eatExpectedToken(Stream, "{"); + StrVector TokenList{}; + + while (!TokenReader.isEof()) { + std::string CurrentToken = TokenReader.readToken(Stream); + if (CurrentToken.compare("}") == 0) { + break; + } + TokenList.push_back(CurrentToken); + if (TokenReader.isEof()) { + std::cerr << "Got EOF while building a param map.\n"; + std::exit(1); + } + } + + if (TokenList.size() == 0) { + StrMap EmptyMap{}; + return EmptyMap; + } + if (TokenList.size() % 4 != 3) { + std::cerr << "Error while building token map. Expected triples of tokens " + "in the form 'a = b' separated by commas.\n"; + std::exit(1); + } + + StrMap TokenMap{}; + std::size_t ElementIndex = 0; + for (; ElementIndex < TokenList.size(); ElementIndex += 4) { + if (TokenList[ElementIndex + 1].compare("=") != 0) { + std::cerr << "Expected an assignment when building a param map.\n"; + std::exit(1); + } + TokenMap[TokenList[ElementIndex]] = TokenList[ElementIndex + 2]; + if (ElementIndex + 3 < TokenList.size()) { + if (TokenList[ElementIndex + 3].compare(",") != 0) { + std::cerr << "Expected assignment statements to be separated by commas." + << "\n"; + std::exit(1); + } + } + } + return TokenMap; +} + +std::string getOrDie(const std::map &Lookup, + const std::string &Key) { + auto MapIter = Lookup.find(Key); + if (MapIter == Lookup.end()) { + std::cerr << "Expected key '" << Key << "'. Was not found.\n"; + std::exit(1); + } + return MapIter->second; +} + +// Reads a numeric type from a string token through the magic of +// std::stringstream. +template struct NumberParser { + static NT parse(const std::string &Input) { + NT Number = 0; + std::stringstream Stream(Input); + Stream >> Number; + return Number; + } +}; + +void writeNewBufferOrDie(std::istream &Input) { + auto TokenMap = readMap(Input); + pid_t Tid = NumberParser::parse(getOrDie(TokenMap, "Tid")); + time_t Time = NumberParser::parse(getOrDie(TokenMap, "time")); + timespec TimeSpec = {Time, 0}; + constexpr const size_t OutputSize = 32; + std::array Buffer{}; + char *MemPtr = Buffer.data(); + __xray::__xray_fdr_internal::writeNewBufferPreamble(Tid, TimeSpec, MemPtr); + std::cout.write(Buffer.data(), OutputSize); +} + +void writeNewCPUIdOrDie(std::istream &Input) { + auto TokenMap = readMap(Input); + uint16_t CPU = NumberParser::parse(getOrDie(TokenMap, "CPU")); + uint64_t TSC = NumberParser::parse(getOrDie(TokenMap, "TSC")); + constexpr const size_t OutputSize = 16; + std::array Buffer{}; + char *MemPtr = Buffer.data(); + __xray::__xray_fdr_internal::writeNewCPUIdMetadata(CPU, TSC, MemPtr); + std::cout.write(Buffer.data(), OutputSize); +} + +void writeEOBOrDie(std::istream &Input) { + auto TokenMap = readMap(Input); + constexpr const size_t OutputSize = 16; + std::array Buffer{}; + char *MemPtr = Buffer.data(); + __xray::__xray_fdr_internal::writeEOBMetadata(MemPtr); + std::cout.write(Buffer.data(), OutputSize); +} + +void writeTSCWrapOrDie(std::istream &Input) { + auto TokenMap = readMap(Input); + uint64_t TSC = NumberParser::parse(getOrDie(TokenMap, "TSC")); + constexpr const size_t OutputSize = 16; + std::array Buffer{}; + char *MemPtr = Buffer.data(); + __xray::__xray_fdr_internal::writeTSCWrapMetadata(TSC, MemPtr); + std::cout.write(Buffer.data(), OutputSize); +} + +XRayEntryType decodeEntryType(const std::string &EntryTypeStr) { + if (EntryTypeStr.compare("Entry") == 0) { + return XRayEntryType::ENTRY; + } else if (EntryTypeStr.compare("LogArgsEntry") == 0) { + return XRayEntryType::LOG_ARGS_ENTRY; + } else if (EntryTypeStr.compare("Exit") == 0) { + return XRayEntryType::EXIT; + } else if (EntryTypeStr.compare("Tail") == 0) { + return XRayEntryType::TAIL; + } + std::cerr << "Illegal entry type " << EntryTypeStr << ".\n"; + std::exit(1); +} + +void writeFunctionOrDie(std::istream &Input) { + auto TokenMap = readMap(std::cin); + int FuncId = NumberParser::parse(getOrDie(TokenMap, "FuncId")); + uint32_t TSCDelta = + NumberParser::parse(getOrDie(TokenMap, "TSCDelta")); + std::string EntryType = getOrDie(TokenMap, "EntryType"); + XRayEntryType XrayEntryType = decodeEntryType(EntryType); + constexpr const size_t OutputSize = 8; + std::array Buffer{}; + char *MemPtr = Buffer.data(); + __xray::__xray_fdr_internal::writeFunctionRecord(FuncId, TSCDelta, + XrayEntryType, MemPtr); + std::cout.write(Buffer.data(), OutputSize); +} + +} // namespace + +int main(int argc, char **argv) { + std::map> TopLevelRecordMap; + TopLevelRecordMap["NewBuffer"] = writeNewBufferOrDie; + TopLevelRecordMap["NewCPU"] = writeNewCPUIdOrDie; + TopLevelRecordMap["EOB"] = writeEOBOrDie; + TopLevelRecordMap["TSCWrap"] = writeTSCWrapOrDie; + TopLevelRecordMap["Function"] = writeFunctionOrDie; + + // Write file header + // + // (2) uint16 : version + // (2) uint16 : type + // (4) uint32 : bitfield + // (8) uint64 : cycle frequency + // (16) - : padding + uint16_t HeaderVersion = 1; + uint16_t HeaderType = 1; + uint32_t Bitfield = 3; + uint64_t CycleFreq = 42; + constexpr const size_t HeaderSize = 32; + std::array Header{}; + std::memcpy(Header.data(), &HeaderVersion, sizeof(HeaderVersion)); + std::memcpy(Header.data() + 2, &HeaderType, sizeof(HeaderType)); + std::memcpy(Header.data() + 4, &Bitfield, sizeof(Bitfield)); + std::memcpy(Header.data() + 8, &CycleFreq, sizeof(CycleFreq)); + std::cout.write(Header.data(), HeaderSize); + + std::string CurrentToken; + while (true) { + CurrentToken = TokenReader.readToken(std::cin); + if (TokenReader.isEof()) + break; + auto MapIter = TopLevelRecordMap.find(CurrentToken); + if (MapIter != TopLevelRecordMap.end()) { + eatExpectedToken(std::cin, ":"); + if (TokenReader.isEof()) { + std::cerr << "Got eof when expecting to read a map.\n"; + std::exit(1); + } + MapIter->second(std::cin); + } else { + std::cerr << "Got bad top level instruction '" << CurrentToken << "'.\n"; + std::exit(1); + } + } + return 0; +} Index: lib/xray/xray_fdr_log_records.h =================================================================== --- lib/xray/xray_fdr_log_records.h +++ lib/xray/xray_fdr_log_records.h @@ -1,4 +1,4 @@ -//===-- xray_fdr_logging.h ------------------------------------------------===// +//===-- xray_fdr_log_records.h -------------------------------------------===// // // The LLVM Compiler Infrastructure // @@ -10,25 +10,10 @@ // This file is a part of XRay, a function call tracing system. // //===----------------------------------------------------------------------===// -#ifndef XRAY_XRAY_FDR_LOGGING_H -#define XRAY_XRAY_FDR_LOGGING_H +#ifndef XRAY_XRAY_FDR_LOG_RECORDS_H +#define XRAY_XRAY_FDR_LOG_RECORDS_H -#include "xray/xray_log_interface.h" - -// FDR (Flight Data Recorder) Mode -// =============================== -// -// The XRay whitepaper describes a mode of operation for function call trace -// logging that involves writing small records into an in-memory circular -// buffer, that then gets logged to disk on demand. To do this efficiently and -// capture as much data as we can, we use smaller records compared to the -// default mode of always writing fixed-size records. - -namespace __xray { - -enum class RecordType : uint8_t { - Function, Metadata -}; +enum class RecordType : uint8_t { Function, Metadata }; // A MetadataRecord encodes the kind of record in its first byte, and have 15 // additional bytes in the end to hold free-form data. @@ -77,20 +62,4 @@ static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); -// Options used by the FDR implementation. -struct FDRLoggingOptions { - bool ReportErrors = false; - int Fd = -1; -}; - -// Flight Data Recorder mode implementation interfaces. -XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, - void *Options, size_t OptionsSize); -XRayLogInitStatus fdrLoggingFinalize(); -void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); -XRayLogFlushStatus fdrLoggingFlush(); -XRayLogInitStatus fdrLoggingReset(); - -} // namespace __xray - -#endif // XRAY_XRAY_FDR_LOGGING_H +#endif // XRAY_XRAY_FDR_LOG_RECORDS_H Index: lib/xray/xray_fdr_logging.h =================================================================== --- lib/xray/xray_fdr_logging.h +++ lib/xray/xray_fdr_logging.h @@ -14,6 +14,7 @@ #define XRAY_XRAY_FDR_LOGGING_H #include "xray/xray_log_interface.h" +#include "xray_fdr_log_records.h" // FDR (Flight Data Recorder) Mode // =============================== @@ -26,57 +27,6 @@ namespace __xray { -enum class RecordType : uint8_t { - Function, Metadata -}; - -// A MetadataRecord encodes the kind of record in its first byte, and have 15 -// additional bytes in the end to hold free-form data. -struct alignas(16) MetadataRecord { - // A MetadataRecord must always have a type of 1. - /* RecordType */ uint8_t Type : 1; - - // Each kind of record is represented as a 7-bit value (even though we use an - // unsigned 8-bit enum class to do so). - enum class RecordKinds : uint8_t { - NewBuffer, - EndOfBuffer, - NewCPUId, - TSCWrap, - WalltimeMarker, - }; - // Use 7 bits to identify this record type. - /* RecordKinds */ uint8_t RecordKind : 7; - char Data[15]; -} __attribute__((packed)); - -static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord."); - -struct alignas(8) FunctionRecord { - // A FunctionRecord must always have a type of 0. - /* RecordType */ uint8_t Type : 1; - enum class RecordKinds { - FunctionEnter = 0x00, - FunctionExit = 0x01, - FunctionTailExit = 0x02, - }; - /* RecordKinds */ uint8_t RecordKind : 3; - - // We only use 28 bits of the function ID, so that we can use as few bytes as - // possible. This means we only support 2^28 (268,435,456) unique function ids - // in a single binary. - int FuncId : 28; - - // We use another 4 bytes to hold the delta between the previous entry's TSC. - // In case we've found that the distance is greater than the allowable 32 bits - // (either because we are running in a different CPU and the TSC might be - // different then), we should use a MetadataRecord before this FunctionRecord - // that will contain the full TSC for that CPU, and keep this to 0. - uint32_t TSCDelta; -} __attribute__((packed)); - -static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); - // Options used by the FDR implementation. struct FDRLoggingOptions { bool ReportErrors = false; @@ -85,7 +35,7 @@ // Flight Data Recorder mode implementation interfaces. XRayLogInitStatus fdrLoggingInit(size_t BufferSize, size_t BufferMax, - void *Options, size_t OptionsSize); + void *Options, size_t OptionsSize); XRayLogInitStatus fdrLoggingFinalize(); void fdrLoggingHandleArg0(int32_t FuncId, XRayEntryType Entry); XRayLogFlushStatus fdrLoggingFlush(); Index: lib/xray/xray_fdr_logging.cc =================================================================== --- lib/xray/xray_fdr_logging.cc +++ lib/xray/xray_fdr_logging.cc @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file is a part of XRay, a dynamic runtime instruementation system. +// This file is a part of XRay, a dynamic runtime instrumentation system. // // Here we implement the Flight Data Recorder mode for XRay, where we use // compact structures to store records in memory as well as when writing out the @@ -31,6 +31,7 @@ #include "xray/xray_records.h" #include "xray_buffer_queue.h" #include "xray_defs.h" +#include "xray_fdr_logging_impl.h" #include "xray_flags.h" #include "xray_tsc.h" #include "xray_utils.h" @@ -49,8 +50,8 @@ std::unique_ptr FDROptions; XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax, - void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { assert(OptionsSize == sizeof(FDRLoggingOptions)); XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; if (!LoggingStatus.compare_exchange_strong( @@ -174,352 +175,16 @@ return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; } -namespace { -thread_local BufferQueue::Buffer Buffer; -thread_local char *RecordPtr = nullptr; - -void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT { - RecordPtr = static_cast(Buffer.Buffer); - - static constexpr int InitRecordsCount = 2; - std::aligned_storage::type Records[InitRecordsCount]; - { - // Write out a MetadataRecord to signify that this is the start of a new - // buffer, associated with a particular thread, with a new CPU. For the - // data, we have 15 bytes to squeeze as much information as we can. At this - // point we only write down the following bytes: - // - Thread ID (pid_t, 4 bytes) - auto &NewBuffer = *reinterpret_cast(&Records[0]); - NewBuffer.Type = uint8_t(RecordType::Metadata); - NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer); - pid_t Tid = syscall(SYS_gettid); - std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t)); - } - - // Also write the WalltimeMarker record. - { - static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); - auto &WalltimeMarker = *reinterpret_cast(&Records[1]); - WalltimeMarker.Type = uint8_t(RecordType::Metadata); - WalltimeMarker.RecordKind = - uint8_t(MetadataRecord::RecordKinds::WalltimeMarker); - timespec TS{0, 0}; - clock_gettime(CLOCK_MONOTONIC, &TS); - - // We only really need microsecond precision here, and enforce across - // platforms that we need 64-bit seconds and 32-bit microseconds encoded in - // the Metadata record. - int32_t Micros = TS.tv_nsec / 1000; - int64_t Seconds = TS.tv_sec; - std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds)); - std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros)); - } - std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount); - RecordPtr += sizeof(MetadataRecord) * InitRecordsCount; -} - -void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { - MetadataRecord NewCPUId; - NewCPUId.Type = uint8_t(RecordType::Metadata); - NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId); - - // The data for the New CPU will contain the following bytes: - // - CPU ID (uint16_t, 2 bytes) - // - Full TSC (uint64_t, 8 bytes) - // Total = 12 bytes. - std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU)); - std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC)); - std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); -} - -void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { - MetadataRecord EOBMeta; - EOBMeta.Type = uint8_t(RecordType::Metadata); - EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer); - // For now we don't write any bytes into the Data field. - std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); -} - -void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { - MetadataRecord TSCWrap; - TSCWrap.Type = uint8_t(RecordType::Metadata); - TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap); - - // The data for the TSCWrap record contains the following bytes: - // - Full TSC (uint64_t, 8 bytes) - // Total = 8 bytes. - std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC)); - std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); -} - -constexpr auto MetadataRecSize = sizeof(MetadataRecord); -constexpr auto FunctionRecSize = sizeof(FunctionRecord); - -class ThreadExitBufferCleanup { - std::weak_ptr Buffers; - BufferQueue::Buffer &Buffer; - -public: - explicit ThreadExitBufferCleanup(std::weak_ptr BQ, - BufferQueue::Buffer &Buffer) - XRAY_NEVER_INSTRUMENT : Buffers(BQ), - Buffer(Buffer) {} - - ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT { - if (RecordPtr == nullptr) - return; - - // We make sure that upon exit, a thread will write out the EOB - // MetadataRecord in the thread-local log, and also release the buffer to - // the queue. - assert((RecordPtr + MetadataRecSize) - static_cast(Buffer.Buffer) >= - static_cast(MetadataRecSize)); - if (auto BQ = Buffers.lock()) { - writeEOBMetadata(); - if (auto EC = BQ->releaseBuffer(Buffer)) - Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, - EC.message().c_str()); - return; - } - } -}; - -class RecursionGuard { - bool &Running; - const bool Valid; - -public: - explicit RecursionGuard(bool &R) : Running(R), Valid(!R) { - if (Valid) - Running = true; - } - - RecursionGuard(const RecursionGuard &) = delete; - RecursionGuard(RecursionGuard &&) = delete; - RecursionGuard &operator=(const RecursionGuard &) = delete; - RecursionGuard &operator=(RecursionGuard &&) = delete; - - explicit operator bool() const { return Valid; } - - ~RecursionGuard() noexcept { - if (Valid) - Running = false; - } -}; - -inline bool loggingInitialized() { - return LoggingStatus.load(std::memory_order_acquire) == - XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} - -} // namespace - void fdrLoggingHandleArg0(int32_t FuncId, - XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { // We want to get the TSC as early as possible, so that we can check whether // we've seen this CPU before. We also do it before we load anything else, to // allow for forward progress with the scheduling. unsigned char CPU; uint64_t TSC = __xray::readTSC(CPU); - // Bail out right away if logging is not initialized yet. - if (LoggingStatus.load(std::memory_order_acquire) != - XRayLogInitStatus::XRAY_LOG_INITIALIZED) - return; - - // We use a thread_local variable to keep track of which CPUs we've already - // run, and the TSC times for these CPUs. This allows us to stop repeating the - // CPU field in the function records. - // - // We assume that we'll support only 65536 CPUs for x86_64. - thread_local uint16_t CurrentCPU = std::numeric_limits::max(); - thread_local uint64_t LastTSC = 0; - - // Make sure a thread that's ever called handleArg0 has a thread-local - // live reference to the buffer queue for this particular instance of - // FDRLogging, and that we're going to clean it up when the thread exits. - thread_local auto LocalBQ = BQ; - thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer); - - // Prevent signal handler recursion, so in case we're already in a log writing - // mode and the signal handler comes in (and is also instrumented) then we - // don't want to be clobbering potentially partial writes already happening in - // the thread. We use a simple thread_local latch to only allow one on-going - // handleArg0 to happen at any given time. - thread_local bool Running = false; - RecursionGuard Guard{Running}; - if (!Guard) { - assert(Running == true && "RecursionGuard is buggy!"); - return; - } - - if (!loggingInitialized() || LocalBQ->finalizing()) { - writeEOBMetadata(); - if (auto EC = BQ->releaseBuffer(Buffer)) { - Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, - EC.message().c_str()); - return; - } - RecordPtr = nullptr; - } - - if (Buffer.Buffer == nullptr) { - if (auto EC = LocalBQ->getBuffer(Buffer)) { - auto LS = LoggingStatus.load(std::memory_order_acquire); - if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING && - LS != XRayLogInitStatus::XRAY_LOG_FINALIZED) - Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str()); - return; - } - - setupNewBuffer(Buffer); - } - - if (CurrentCPU == std::numeric_limits::max()) { - // This means this is the first CPU this thread has ever run on. We set the - // current CPU and record this as the first TSC we've seen. - CurrentCPU = CPU; - writeNewCPUIdMetadata(CPU, TSC); - } - - // Before we go setting up writing new function entries, we need to be really - // careful about the pointer math we're doing. This means we need to ensure - // that the record we are about to write is going to fit into the buffer, - // without overflowing the buffer. - // - // To do this properly, we use the following assumptions: - // - // - The least number of bytes we will ever write is 8 - // (sizeof(FunctionRecord)) only if the delta between the previous entry - // and this entry is within 32 bits. - // - The most number of bytes we will ever write is 8 + 16 = 24. This is - // computed by: - // - // sizeof(FunctionRecord) + sizeof(MetadataRecord) - // - // These arise in the following cases: - // - // 1. When the delta between the TSC we get and the previous TSC for the - // same CPU is outside of the uint32_t range, we end up having to - // write a MetadataRecord to indicate a "tsc wrap" before the actual - // FunctionRecord. - // 2. When we learn that we've moved CPUs, we need to write a - // MetadataRecord to indicate a "cpu change", and thus write out the - // current TSC for that CPU before writing out the actual - // FunctionRecord. - // 3. When we learn about a new CPU ID, we need to write down a "new cpu - // id" MetadataRecord before writing out the actual FunctionRecord. - // - // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes. - // - // So the math we need to do is to determine whether writing 24 bytes past the - // current pointer leaves us with enough bytes to write the EOB - // MetadataRecord. If we don't have enough space after writing as much as 24 - // bytes in the end of the buffer, we need to write out the EOB, get a new - // Buffer, set it up properly before doing any further writing. - // - char *BufferStart = static_cast(Buffer.Buffer); - if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart < - static_cast(MetadataRecSize)) { - writeEOBMetadata(); - if (auto EC = LocalBQ->releaseBuffer(Buffer)) { - Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, - EC.message().c_str()); - return; - } - if (auto EC = LocalBQ->getBuffer(Buffer)) { - Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str()); - return; - } - setupNewBuffer(Buffer); - } - - // By this point, we are now ready to write at most 24 bytes (one metadata - // record and one function record). - BufferStart = static_cast(Buffer.Buffer); - assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >= - static_cast(MetadataRecSize) && - "Misconfigured BufferQueue provided; Buffer size not large enough."); - - std::aligned_storage::type - AlignedFuncRecordBuffer; - auto &FuncRecord = - *reinterpret_cast(&AlignedFuncRecordBuffer); - FuncRecord.Type = uint8_t(RecordType::Function); - - // Only get the lower 28 bits of the function id. - FuncRecord.FuncId = FuncId & ~(0x0F << 28); - - // Here we compute the TSC Delta. There are a few interesting situations we - // need to account for: - // - // - The thread has migrated to a different CPU. If this is the case, then - // we write down the following records: - // - // 1. A 'NewCPUId' Metadata record. - // 2. A FunctionRecord with a 0 for the TSCDelta field. - // - // - The TSC delta is greater than the 32 bits we can store in a - // FunctionRecord. In this case we write down the following records: - // - // 1. A 'TSCWrap' Metadata record. - // 2. A FunctionRecord with a 0 for the TSCDelta field. - // - // - The TSC delta is representable within the 32 bits we can store in a - // FunctionRecord. In this case we write down just a FunctionRecord with - // the correct TSC delta. - // - FuncRecord.TSCDelta = 0; - if (CPU != CurrentCPU) { - // We've moved to a new CPU. - writeNewCPUIdMetadata(CPU, TSC); - } else { - // If the delta is greater than the range for a uint32_t, then we write out - // the TSC wrap metadata entry with the full TSC, and the TSC for the - // function record be 0. - auto Delta = LastTSC - TSC; - if (Delta > (1ULL << 32) - 1) - writeTSCWrapMetadata(TSC); - else - FuncRecord.TSCDelta = Delta; - } - - // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid - // us in future computations of this TSC delta value. - LastTSC = TSC; - CurrentCPU = CPU; - - switch (Entry) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); - break; - case XRayEntryType::EXIT: - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit); - break; - case XRayEntryType::TAIL: - FuncRecord.RecordKind = - uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); - break; - } - - std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); - RecordPtr += sizeof(FunctionRecord); - - // If we've exhausted the buffer by this time, we then release the buffer to - // make sure that other threads may start using this buffer. - if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) { - writeEOBMetadata(); - if (auto EC = LocalBQ->releaseBuffer(Buffer)) { - Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer, - EC.message().c_str()); - return; - } - RecordPtr = nullptr; - } + __xray_fdr_internal::processFunctionHook(FuncId, Entry, TSC, CPU, + clock_gettime, LoggingStatus, BQ); } } // namespace __xray Index: lib/xray/xray_fdr_logging_impl.h =================================================================== --- lib/xray/xray_fdr_logging_impl.h +++ lib/xray/xray_fdr_logging_impl.h @@ -1,4 +1,4 @@ -//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===// +//===-- xray_fdr_logging_impl.h ---------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,180 +7,172 @@ // //===----------------------------------------------------------------------===// // -// This file is a part of XRay, a dynamic runtime instruementation system. +// This file is a part of XRay, a dynamic runtime instrumentation system. // -// Here we implement the Flight Data Recorder mode for XRay, where we use -// compact structures to store records in memory as well as when writing out the -// data to files. +// Here we implement the thread local state management and record i/o for Flight +// Data Recorder mode for XRay, where we use compact structures to store records +// in memory as well as when writing out the data to files. // //===----------------------------------------------------------------------===// -#include "xray_fdr_logging.h" -#include -#include +#ifndef XRAY_XRAY_FDR_LOGGING_IMPL_H +#define XRAY_XRAY_FDR_LOGGING_IMPL_H + #include +#include #include #include #include -#include -#include #include -#include #include "sanitizer_common/sanitizer_common.h" -#include "xray/xray_interface.h" -#include "xray/xray_records.h" +#include "xray/xray_log_interface.h" #include "xray_buffer_queue.h" #include "xray_defs.h" -#include "xray_flags.h" -#include "xray_tsc.h" -#include "xray_utils.h" +#include "xray_fdr_log_records.h" namespace __xray { -// Global BufferQueue. -std::shared_ptr BQ; - -std::atomic LoggingStatus{ - XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; - -std::atomic LogFlushStatus{ - XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; - -std::unique_ptr FDROptions; - -XRayLogInitStatus fdrLoggingInit(std::size_t BufferSize, std::size_t BufferMax, - void *Options, - size_t OptionsSize) XRAY_NEVER_INSTRUMENT { - assert(OptionsSize == sizeof(FDRLoggingOptions)); - XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - if (!LoggingStatus.compare_exchange_strong( - CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, - std::memory_order_release, std::memory_order_relaxed)) - return CurrentStatus; - - FDROptions.reset(new FDRLoggingOptions()); - *FDROptions = *reinterpret_cast(Options); - if (FDROptions->ReportErrors) - SetPrintfAndReportCallback(printToStdErr); - - bool Success = false; - BQ = std::make_shared(BufferSize, BufferMax, Success); - if (!Success) { - Report("BufferQueue init failed.\n"); - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; - } +/// We expose some of the state transitions when FDR logging mode is operating +/// such that we can simulate a series of log events that may occur without +/// and test with determinism without worrying about the real CPU time. +/// +/// Because the code uses thread_local allocation extensively as part of its +/// design, callers that wish to test events occuring on different threads +/// will actually have to run them on different threads. +/// +/// This also means that it is possible to break invariants maintained by +/// cooperation with xray_fdr_logging class, so be careful and think twice. +namespace __xray_fdr_internal { + +/// Writes the new buffer record and wallclock time that begin a buffer for a +/// thread to MemPtr and increments MemPtr. Bypasses the thread local state +// machine and writes directly to memory without checks. +static void writeNewBufferPreamble(pid_t Tid, timespec TS, char *&MemPtr); + +/// Write a metadata record to switch to a new CPU to MemPtr and increments +/// MemPtr. Bypasses the thread local state machine and writes directly to +/// memory without checks. +static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, char *&MemPtr); + +/// Writes an EOB metadata record to MemPtr and increments MemPtr. Bypasses the +/// thread local state machine and writes directly to memory without checks. +static void writeEOBMetadata(char *&MemPtr); + +/// Writes a TSC Wrap metadata record to MemPtr and increments MemPtr. Bypasses +/// the thread local state machine and directly writes to memory without checks. +static void writeTSCWrapMetadata(uint64_t TSC, char *&MemPtr); + +/// Writes a Function Record to MemPtr and increments MemPtr. Bypasses the +/// thread local state machine and writes the function record directly to +/// memory. +static void writeFunctionRecord(int FuncId, uint32_t TSCDelta, + XRayEntryType EntryType, char *&MemPtr); + +/// Sets up a new buffer in thread_local storage and writes a preamble. The +/// wall_clock_reader function is used to populate the WallTimeRecord entry. +static void setupNewBuffer(const BufferQueue::Buffer &Buffer, + int (*wall_clock_reader)(clockid_t, + struct timespec *)); + +/// Called to record CPU time for a new CPU within the current thread. +static void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC); + +/// Called to close the buffer when the thread exhausts the buffer or when the +/// thread exits (via a thread local variable destructor). +static void writeEOBMetadata(); + +/// TSC Wrap records are written when a TSC delta encoding scheme overflows. +static void writeTSCWrapMetadata(uint64_t TSC); + +/// Here's where the meat of the processing happens. The writer captures +/// function entry, exit and tail exit points with a time and will create +/// TSCWrap, NewCPUId and Function records as necessary. The writer might +/// walk backward through its buffer and erase trivial functions to avoid +/// polluting the log and may use the buffer queue to obtain or release a +/// buffer. +static void +processFunctionHook(int32_t FuncId, XRayEntryType Entry, uint64_t TSC, + unsigned char CPU, + int (*wall_clock_reader)(clockid_t, struct timespec *), + const std::atomic &LoggingStatus, + const std::shared_ptr &BQ); + +//-----------------------------------------------------------------------------| +// The rest of the file is implementation. | +//-----------------------------------------------------------------------------| +// Functions are implemented in the header for inlining since we don't want | +// to grow the stack when we've hijacked the binary for logging. | +//-----------------------------------------------------------------------------| - // Install the actual handleArg0 handler after initialising the buffers. - __xray_set_handler(fdrLoggingHandleArg0); +namespace { +thread_local BufferQueue::Buffer Buffer; +thread_local char *RecordPtr = nullptr; - LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_INITIALIZED, - std::memory_order_release); - return XRayLogInitStatus::XRAY_LOG_INITIALIZED; -} +constexpr auto MetadataRecSize = sizeof(MetadataRecord); +constexpr auto FunctionRecSize = sizeof(FunctionRecord); -// Must finalize before flushing. -XRayLogFlushStatus fdrLoggingFlush() XRAY_NEVER_INSTRUMENT { - if (LoggingStatus.load(std::memory_order_acquire) != - XRayLogInitStatus::XRAY_LOG_FINALIZED) - return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; +class ThreadExitBufferCleanup { + std::weak_ptr Buffers; + BufferQueue::Buffer &Buffer; - XRayLogFlushStatus Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - if (!LogFlushStatus.compare_exchange_strong( - Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING, - std::memory_order_release, std::memory_order_relaxed)) - return Result; +public: + explicit ThreadExitBufferCleanup(std::weak_ptr BQ, + BufferQueue::Buffer &Buffer) + XRAY_NEVER_INSTRUMENT : Buffers(BQ), + Buffer(Buffer) {} - // Make a copy of the BufferQueue pointer to prevent other threads that may be - // resetting it from blowing away the queue prematurely while we're dealing - // with it. - auto LocalBQ = BQ; + ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT { + if (RecordPtr == nullptr) + return; - // We write out the file in the following format: - // - // 1) We write down the XRay file header with version 1, type FDR_LOG. - // 2) Then we use the 'apply' member of the BufferQueue that's live, to - // ensure that at this point in time we write down the buffers that have - // been released (and marked "used") -- we dump the full buffer for now - // (fixed-sized) and let the tools reading the buffers deal with the data - // afterwards. - // - int Fd = FDROptions->Fd; - if (Fd == -1) - Fd = getLogFD(); - if (Fd == -1) { - auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; - LogFlushStatus.store(Result, std::memory_order_release); - return Result; + // We make sure that upon exit, a thread will write out the EOB + // MetadataRecord in the thread-local log, and also release the buffer to + // the queue. + assert((RecordPtr + MetadataRecSize) - static_cast(Buffer.Buffer) >= + static_cast(MetadataRecSize)); + if (auto BQ = Buffers.lock()) { + writeEOBMetadata(); + if (auto EC = BQ->releaseBuffer(Buffer)) + Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, + EC.message().c_str()); + return; + } } +}; - XRayFileHeader Header; - Header.Version = 1; - Header.Type = FileTypes::FDR_LOG; - Header.CycleFrequency = getTSCFrequency(); - // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' - // before setting the values in the header. - Header.ConstantTSC = 1; - Header.NonstopTSC = 1; - clock_gettime(CLOCK_REALTIME, &Header.TS); - retryingWriteAll(Fd, reinterpret_cast(&Header), - reinterpret_cast(&Header) + sizeof(Header)); - LocalBQ->apply([&](const BufferQueue::Buffer &B) { - retryingWriteAll(Fd, reinterpret_cast(B.Buffer), - reinterpret_cast(B.Buffer) + B.Size); - }); - LogFlushStatus.store(XRayLogFlushStatus::XRAY_LOG_FLUSHED, - std::memory_order_release); - return XRayLogFlushStatus::XRAY_LOG_FLUSHED; -} +class RecursionGuard { + bool &Running; + const bool Valid; -XRayLogInitStatus fdrLoggingFinalize() XRAY_NEVER_INSTRUMENT { - XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; - if (!LoggingStatus.compare_exchange_strong( - CurrentStatus, XRayLogInitStatus::XRAY_LOG_FINALIZING, - std::memory_order_release, std::memory_order_relaxed)) - return CurrentStatus; +public: + explicit RecursionGuard(bool &R) : Running(R), Valid(!R) { + if (Valid) + Running = true; + } - // Do special things to make the log finalize itself, and not allow any more - // operations to be performed until re-initialized. - BQ->finalize(); + RecursionGuard(const RecursionGuard &) = delete; + RecursionGuard(RecursionGuard &&) = delete; + RecursionGuard &operator=(const RecursionGuard &) = delete; + RecursionGuard &operator=(RecursionGuard &&) = delete; - LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_FINALIZED, - std::memory_order_release); - return XRayLogInitStatus::XRAY_LOG_FINALIZED; -} + explicit operator bool() const { return Valid; } -XRayLogInitStatus fdrLoggingReset() XRAY_NEVER_INSTRUMENT { - XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED; - if (!LoggingStatus.compare_exchange_strong( - CurrentStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, - std::memory_order_release, std::memory_order_relaxed)) - return CurrentStatus; - - // Release the in-memory buffer queue. - BQ.reset(); - - // Spin until the flushing status is flushed. - XRayLogFlushStatus CurrentFlushingStatus = - XRayLogFlushStatus::XRAY_LOG_FLUSHED; - while (!LogFlushStatus.compare_exchange_weak( - CurrentFlushingStatus, XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, - std::memory_order_release, std::memory_order_relaxed)) { - if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING) - break; - CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED; + ~RecursionGuard() noexcept { + if (Valid) + Running = false; } +}; - // At this point, we know that the status is flushed, and that we can assume - return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +static inline bool loggingInitialized( + const std::atomic &LoggingStatus) XRAY_NEVER_INSTRUMENT { + return LoggingStatus.load(std::memory_order_acquire) == + XRayLogInitStatus::XRAY_LOG_INITIALIZED; } -namespace { -thread_local BufferQueue::Buffer Buffer; -thread_local char *RecordPtr = nullptr; - -void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT { - RecordPtr = static_cast(Buffer.Buffer); +} // namespace anonymous +static inline void writeNewBufferPreamble(pid_t Tid, timespec TS, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { static constexpr int InitRecordsCount = 2; std::aligned_storage::type Records[InitRecordsCount]; { @@ -192,10 +184,8 @@ auto &NewBuffer = *reinterpret_cast(&Records[0]); NewBuffer.Type = uint8_t(RecordType::Metadata); NewBuffer.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewBuffer); - pid_t Tid = syscall(SYS_gettid); std::memcpy(&NewBuffer.Data, &Tid, sizeof(pid_t)); } - // Also write the WalltimeMarker record. { static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); @@ -203,8 +193,6 @@ WalltimeMarker.Type = uint8_t(RecordType::Metadata); WalltimeMarker.RecordKind = uint8_t(MetadataRecord::RecordKinds::WalltimeMarker); - timespec TS{0, 0}; - clock_gettime(CLOCK_MONOTONIC, &TS); // We only really need microsecond precision here, and enforce across // platforms that we need 64-bit seconds and 32-bit microseconds encoded in @@ -214,11 +202,24 @@ std::memcpy(WalltimeMarker.Data, &Seconds, sizeof(Seconds)); std::memcpy(WalltimeMarker.Data + sizeof(Seconds), &Micros, sizeof(Micros)); } - std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount); - RecordPtr += sizeof(MetadataRecord) * InitRecordsCount; + std::memcpy(MemPtr, Records, sizeof(MetadataRecord) * InitRecordsCount); + MemPtr += sizeof(MetadataRecord) * InitRecordsCount; } -void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { +static inline void setupNewBuffer(const BufferQueue::Buffer &Buffer, + int (*wall_clock_reader)(clockid_t, + struct timespec *)) + XRAY_NEVER_INSTRUMENT { + RecordPtr = static_cast(Buffer.Buffer); + pid_t Tid = syscall(SYS_gettid); + timespec TS{0, 0}; + // This is typically clock_gettime, but callers have injection ability. + wall_clock_reader(CLOCK_MONOTONIC, &TS); + writeNewBufferPreamble(Tid, TS, RecordPtr); +} + +static inline void writeNewCPUIdMetadata(uint16_t CPU, uint64_t TSC, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord NewCPUId; NewCPUId.Type = uint8_t(RecordType::Metadata); NewCPUId.RecordKind = uint8_t(MetadataRecord::RecordKinds::NewCPUId); @@ -229,20 +230,30 @@ // Total = 12 bytes. std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU)); std::memcpy(&NewCPUId.Data[sizeof(CPU)], &TSC, sizeof(TSC)); - std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); + std::memcpy(MemPtr, &NewCPUId, sizeof(MetadataRecord)); + MemPtr += sizeof(MetadataRecord); } -void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { +static inline void writeNewCPUIdMetadata(uint16_t CPU, + uint64_t TSC) XRAY_NEVER_INSTRUMENT { + writeNewCPUIdMetadata(CPU, TSC, RecordPtr); +} + +static inline void writeEOBMetadata(char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord EOBMeta; EOBMeta.Type = uint8_t(RecordType::Metadata); EOBMeta.RecordKind = uint8_t(MetadataRecord::RecordKinds::EndOfBuffer); // For now we don't write any bytes into the Data field. - std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); + std::memcpy(MemPtr, &EOBMeta, sizeof(MetadataRecord)); + MemPtr += sizeof(MetadataRecord); +} + +static inline void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { + writeEOBMetadata(RecordPtr); } -void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { +static inline void writeTSCWrapMetadata(uint64_t TSC, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { MetadataRecord TSCWrap; TSCWrap.Type = uint8_t(RecordType::Metadata); TSCWrap.RecordKind = uint8_t(MetadataRecord::RecordKinds::TSCWrap); @@ -251,80 +262,49 @@ // - Full TSC (uint64_t, 8 bytes) // Total = 8 bytes. std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC)); - std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord)); - RecordPtr += sizeof(MetadataRecord); + std::memcpy(MemPtr, &TSCWrap, sizeof(MetadataRecord)); + MemPtr += sizeof(MetadataRecord); } -constexpr auto MetadataRecSize = sizeof(MetadataRecord); -constexpr auto FunctionRecSize = sizeof(FunctionRecord); - -class ThreadExitBufferCleanup { - std::weak_ptr Buffers; - BufferQueue::Buffer &Buffer; - -public: - explicit ThreadExitBufferCleanup(std::weak_ptr BQ, - BufferQueue::Buffer &Buffer) - XRAY_NEVER_INSTRUMENT : Buffers(BQ), - Buffer(Buffer) {} - - ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT { - if (RecordPtr == nullptr) - return; - - // We make sure that upon exit, a thread will write out the EOB - // MetadataRecord in the thread-local log, and also release the buffer to - // the queue. - assert((RecordPtr + MetadataRecSize) - static_cast(Buffer.Buffer) >= - static_cast(MetadataRecSize)); - if (auto BQ = Buffers.lock()) { - writeEOBMetadata(); - if (auto EC = BQ->releaseBuffer(Buffer)) - Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, - EC.message().c_str()); - return; - } - } -}; - -class RecursionGuard { - bool &Running; - const bool Valid; - -public: - explicit RecursionGuard(bool &R) : Running(R), Valid(!R) { - if (Valid) - Running = true; - } - - RecursionGuard(const RecursionGuard &) = delete; - RecursionGuard(RecursionGuard &&) = delete; - RecursionGuard &operator=(const RecursionGuard &) = delete; - RecursionGuard &operator=(RecursionGuard &&) = delete; +static inline void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { + writeTSCWrapMetadata(TSC, RecordPtr); +} - explicit operator bool() const { return Valid; } +static inline void writeFunctionRecord(int FuncId, uint32_t TSCDelta, + XRayEntryType EntryType, + char *&MemPtr) XRAY_NEVER_INSTRUMENT { + std::aligned_storage::type + AlignedFuncRecordBuffer; + auto &FuncRecord = + *reinterpret_cast(&AlignedFuncRecordBuffer); + FuncRecord.Type = uint8_t(RecordType::Function); + // Only take 28 bits of the function id. + FuncRecord.FuncId = FuncId & ~(0x0F << 28); + FuncRecord.TSCDelta = TSCDelta; - ~RecursionGuard() noexcept { - if (Valid) - Running = false; + switch (EntryType) { + case XRayEntryType::ENTRY: + case XRayEntryType::LOG_ARGS_ENTRY: + FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); + break; + case XRayEntryType::EXIT: + FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit); + break; + case XRayEntryType::TAIL: + FuncRecord.RecordKind = + uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); + break; } -}; -inline bool loggingInitialized() { - return LoggingStatus.load(std::memory_order_acquire) == - XRayLogInitStatus::XRAY_LOG_INITIALIZED; + std::memcpy(MemPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); + MemPtr += sizeof(FunctionRecord); } -} // namespace - -void fdrLoggingHandleArg0(int32_t FuncId, - XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { - // We want to get the TSC as early as possible, so that we can check whether - // we've seen this CPU before. We also do it before we load anything else, to - // allow for forward progress with the scheduling. - unsigned char CPU; - uint64_t TSC = __xray::readTSC(CPU); - +static inline void processFunctionHook( + int32_t FuncId, XRayEntryType Entry, uint64_t TSC, unsigned char CPU, + int (*wall_clock_reader)(clockid_t, struct timespec *), + const std::atomic &LoggingStatus, + const std::shared_ptr &BQ) XRAY_NEVER_INSTRUMENT { // Bail out right away if logging is not initialized yet. if (LoggingStatus.load(std::memory_order_acquire) != XRayLogInitStatus::XRAY_LOG_INITIALIZED) @@ -356,7 +336,7 @@ return; } - if (!loggingInitialized() || LocalBQ->finalizing()) { + if (!loggingInitialized(LoggingStatus) || LocalBQ->finalizing()) { writeEOBMetadata(); if (auto EC = BQ->releaseBuffer(Buffer)) { Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, @@ -375,7 +355,7 @@ return; } - setupNewBuffer(Buffer); + setupNewBuffer(Buffer, wall_clock_reader); } if (CurrentCPU == std::numeric_limits::max()) { @@ -434,7 +414,7 @@ Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str()); return; } - setupNewBuffer(Buffer); + setupNewBuffer(Buffer, wall_clock_reader); } // By this point, we are now ready to write at most 24 bytes (one metadata @@ -444,15 +424,6 @@ static_cast(MetadataRecSize) && "Misconfigured BufferQueue provided; Buffer size not large enough."); - std::aligned_storage::type - AlignedFuncRecordBuffer; - auto &FuncRecord = - *reinterpret_cast(&AlignedFuncRecordBuffer); - FuncRecord.Type = uint8_t(RecordType::Function); - - // Only get the lower 28 bits of the function id. - FuncRecord.FuncId = FuncId & ~(0x0F << 28); - // Here we compute the TSC Delta. There are a few interesting situations we // need to account for: // @@ -472,7 +443,8 @@ // FunctionRecord. In this case we write down just a FunctionRecord with // the correct TSC delta. // - FuncRecord.TSCDelta = 0; + + uint32_t RecordTSCDelta = 0; if (CPU != CurrentCPU) { // We've moved to a new CPU. writeNewCPUIdMetadata(CPU, TSC); @@ -480,11 +452,11 @@ // If the delta is greater than the range for a uint32_t, then we write out // the TSC wrap metadata entry with the full TSC, and the TSC for the // function record be 0. - auto Delta = LastTSC - TSC; + auto Delta = TSC - LastTSC; if (Delta > (1ULL << 32) - 1) writeTSCWrapMetadata(TSC); else - FuncRecord.TSCDelta = Delta; + RecordTSCDelta = Delta; } // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid @@ -492,22 +464,7 @@ LastTSC = TSC; CurrentCPU = CPU; - switch (Entry) { - case XRayEntryType::ENTRY: - case XRayEntryType::LOG_ARGS_ENTRY: - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionEnter); - break; - case XRayEntryType::EXIT: - FuncRecord.RecordKind = uint8_t(FunctionRecord::RecordKinds::FunctionExit); - break; - case XRayEntryType::TAIL: - FuncRecord.RecordKind = - uint8_t(FunctionRecord::RecordKinds::FunctionTailExit); - break; - } - - std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); - RecordPtr += sizeof(FunctionRecord); + writeFunctionRecord(FuncId, RecordTSCDelta, Entry, RecordPtr); // If we've exhausted the buffer by this time, we then release the buffer to // make sure that other threads may start using this buffer. @@ -522,16 +479,8 @@ } } +} // namespace __xray_fdr_internal + } // namespace __xray -static auto UNUSED Unused = [] { - using namespace __xray; - if (flags()->xray_fdr_log) { - XRayLogImpl Impl{ - fdrLoggingInit, fdrLoggingFinalize, fdrLoggingHandleArg0, - fdrLoggingFlush, - }; - __xray_set_log_impl(Impl); - } - return true; -}(); +#endif // XRAY_XRAY_FDR_LOGGING_IMPL_H