Index: include/xray/xray_log_interface.h =================================================================== --- /dev/null +++ include/xray/xray_log_interface.h @@ -0,0 +1,51 @@ +//===-- xray_log_interface.h ----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +// APIs for installing a new logging implementation. +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_LOG_INTERFACE_H +#define XRAY_XRAY_LOG_INTERFACE_H + +#include "xray/xray_interface.h" +#include + +extern "C" { + +enum XRayLogInitStatus { + XRAY_LOG_UNINITIALIZED, + XRAY_LOG_INITIALIZING, + XRAY_LOG_INITIALIZED, + XRAY_LOG_FINALIZING, + XRAY_LOG_FINALIZED, +}; + +enum XRayLogFlushStatus { + XRAY_LOG_NOT_FLUSHING, + XRAY_LOG_FLUSHING, + XRAY_LOG_FLUSHED +}; + +struct XRayLogImpl { + XRayLogInitStatus (*log_init)(size_t, size_t, void *, size_t); + XRayLogInitStatus (*log_finalize)(); + void (*handle_arg0)(int32_t, XRayEntryType); + XRayLogFlushStatus (*flush_log)(); +}; + +void __xray_set_log_impl(XRayLogImpl Impl); +XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers, + void *Args, size_t ArgsSize); +XRayLogInitStatus __xray_log_finalize(); +XRayLogFlushStatus __xray_log_flushLog(); + +} // extern "C" + +#endif // XRAY_XRAY_LOG_INTERFACE_H Index: include/xray/xray_records.h =================================================================== --- include/xray/xray_records.h +++ include/xray/xray_records.h @@ -21,6 +21,7 @@ enum FileTypes { NAIVE_LOG = 0, + FDR_LOG = 1, }; // This data structure is used to describe the contents of the file. We use this @@ -40,6 +41,11 @@ // The frequency by which TSC increases per-second. alignas(8) uint64_t CycleFrequency = 0; + + // The current civiltime timestamp, as retrived from 'clock_gettime'. This + // allows readers of the file to determine when the file was created or + // written down. + struct timespec TS; } __attribute__((packed)); static_assert(sizeof(XRayFileHeader) == 32, "XRayFileHeader != 32 bytes"); Index: lib/xray/CMakeLists.txt =================================================================== --- lib/xray/CMakeLists.txt +++ lib/xray/CMakeLists.txt @@ -1,15 +1,15 @@ # Build for the XRay runtime support library. -# Core XRay runtime library implementation files. +# XRay runtime library implementation files. set(XRAY_SOURCES + xray_inmemory_log.cc xray_init.cc - xray_interface.cc xray_flags.cc - xray_inmemory_log.cc) - -# XRay flight data recorder (FDR) implementation files. -set(XRAY_FDR_SOURCES - xray_buffer_queue.cc) + xray_interface.cc + xray_buffer_queue.cc + xray_log_interface.cc + xray_fdr_logging.cc + xray_utils.cc) set(x86_64_SOURCES xray_x86_64.cc @@ -21,12 +21,13 @@ xray_trampoline_arm.S ${XRAY_SOURCES}) -set(armhf_SOURCES ${arm_SOURCES}) +set(armhf_SOURCES + ${arm_SOURCES}) set(aarch64_SOURCES - xray_AArch64.cc - xray_trampoline_AArch64.S - ${XRAY_SOURCES}) + xray_AArch64.cc + xray_trampoline_AArch64.S + ${XRAY_SOURCES}) include_directories(..) include_directories(../../include) @@ -41,13 +42,7 @@ SOURCES ${XRAY_SOURCES} CFLAGS ${XRAY_CFLAGS} DEFS ${XRAY_COMMON_DEFINITIONS}) -add_compiler_rt_object_libraries(RTXrayFDR - ARCHS ${XRAY_SUPPORTED_ARCH} - SOURCES ${XRAY_FDR_SOURCES} CFLAGS ${XRAY_CFLAGS} - DEFS ${XRAY_COMMON_DEFINITIONS}) - add_compiler_rt_component(xray) -add_compiler_rt_component(xray-fdr) set(XRAY_COMMON_RUNTIME_OBJECT_LIBS RTSanitizerCommon @@ -63,14 +58,6 @@ DEFS ${XRAY_COMMON_DEFINITIONS} OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} PARENT_TARGET xray) - add_compiler_rt_runtime(clang_rt.xray-fdr - STATIC - ARCHS ${arch} - SOURCES ${XRAY_FDR_SOURCES} - CFLAGS ${XRAY_CFLAGS} - DEFS ${XRAY_COMMON_DEFINITIONS} - OBJECT_LIBS ${XRAY_COMMON_RUNTIME_OBJECT_LIBS} - PARENT_TARGET xray-fdr) endif() endforeach() Index: lib/xray/tests/CMakeLists.txt =================================================================== --- lib/xray/tests/CMakeLists.txt +++ lib/xray/tests/CMakeLists.txt @@ -8,14 +8,15 @@ ${COMPILER_RT_UNITTEST_CFLAGS} ${COMPILER_RT_GTEST_CFLAGS} -I${COMPILER_RT_SOURCE_DIR}/include - -I${COMPILER_RT_SOURCE_DIR}/lib/xray) + -I${COMPILER_RT_SOURCE_DIR}/lib/xray + -I${COMPILER_RT_SOURCE_DIR}/lib) macro(xray_compile obj_list source arch) get_filename_component(basename ${source} NAME) set(output_obj "${basename}.${arch}.o") get_target_flags_for_arch(${arch} TARGET_CFLAGS) if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND COMPILE_DEPS gtest_main xray-fdr) + list(APPEND COMPILE_DEPS gtest_main xray) endif() clang_compile(${output_obj} ${source} CFLAGS ${XRAY_UNITTEST_CFLAGS} ${TARGET_CFLAGS} @@ -38,7 +39,7 @@ get_target_flags_for_arch(${arch} TARGET_LINK_FLAGS) set(TEST_DEPS ${TEST_OBJECTS}) if(NOT COMPILER_RT_STANDALONE_BUILD) - list(APPEND TEST_DEPS gtest_main xray-fdr) + list(APPEND TEST_DEPS gtest_main xray) endif() if(NOT APPLE) add_compiler_rt_test(XRayUnitTests ${testname} @@ -47,7 +48,8 @@ LINK_FLAGS ${TARGET_LINK_FLAGS} -lstdc++ -lm ${CMAKE_THREAD_LIBS_INIT} -lpthread - -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-fdr-${arch}) + -L${COMPILER_RT_LIBRARY_OUTPUT_DIR} -lclang_rt.xray-${arch} + -latomic -ldl -lrt) endif() # FIXME: Figure out how to run even just the unit tests on APPLE. endforeach() Index: lib/xray/tests/unit/CMakeLists.txt =================================================================== --- lib/xray/tests/unit/CMakeLists.txt +++ lib/xray/tests/unit/CMakeLists.txt @@ -1,2 +1,4 @@ add_xray_unittest(XRayBufferQueueTest SOURCES buffer_queue_test.cc xray_unit_test_main.cc) +add_xray_unittest(XRayFDRLoggingTest SOURCES + fdr_logging_test.cc xray_unit_test_main.cc) Index: lib/xray/tests/unit/buffer_queue_test.cc =================================================================== --- lib/xray/tests/unit/buffer_queue_test.cc +++ lib/xray/tests/unit/buffer_queue_test.cc @@ -21,10 +21,16 @@ static constexpr size_t kSize = 4096; -TEST(BufferQueueTest, API) { BufferQueue Buffers(kSize, 1); } +TEST(BufferQueueTest, API) { + bool Success = false; + BufferQueue Buffers(kSize, 1, Success); + ASSERT_TRUE(Success); +} TEST(BufferQueueTest, GetAndRelease) { - BufferQueue Buffers(kSize, 1); + bool Success = false; + BufferQueue Buffers(kSize, 1, Success); + ASSERT_TRUE(Success); BufferQueue::Buffer Buf; ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code()); ASSERT_NE(nullptr, Buf.Buffer); @@ -33,7 +39,9 @@ } TEST(BufferQueueTest, GetUntilFailed) { - BufferQueue Buffers(kSize, 1); + bool Success = false; + BufferQueue Buffers(kSize, 1, Success); + ASSERT_TRUE(Success); BufferQueue::Buffer Buf0; EXPECT_EQ(Buffers.getBuffer(Buf0), std::error_code()); BufferQueue::Buffer Buf1; @@ -42,7 +50,9 @@ } TEST(BufferQueueTest, ReleaseUnknown) { - BufferQueue Buffers(kSize, 1); + bool Success = false; + BufferQueue Buffers(kSize, 1, Success); + ASSERT_TRUE(Success); BufferQueue::Buffer Buf; Buf.Buffer = reinterpret_cast(0xdeadbeef); Buf.Size = kSize; @@ -50,7 +60,9 @@ } TEST(BufferQueueTest, ErrorsWhenFinalising) { - BufferQueue Buffers(kSize, 2); + bool Success = false; + BufferQueue Buffers(kSize, 2, Success); + ASSERT_TRUE(Success); BufferQueue::Buffer Buf; ASSERT_EQ(Buffers.getBuffer(Buf), std::error_code()); ASSERT_NE(nullptr, Buf.Buffer); @@ -62,7 +74,9 @@ } TEST(BufferQueueTest, MultiThreaded) { - BufferQueue Buffers(kSize, 100); + bool Success = false; + BufferQueue Buffers(kSize, 100, Success); + ASSERT_TRUE(Success); auto F = [&] { BufferQueue::Buffer B; while (!Buffers.getBuffer(B)) { @@ -78,4 +92,18 @@ F(); } +TEST(BufferQueueTest, Apply) { + bool Success = false; + BufferQueue Buffers(kSize, 10, Success); + ASSERT_TRUE(Success); + auto Count = 0; + BufferQueue::Buffer B; + for (int I = 0; I < 10; ++I) { + ASSERT_FALSE(Buffers.getBuffer(B)); + ASSERT_FALSE(Buffers.releaseBuffer(B)); + } + Buffers.apply([&](const BufferQueue::Buffer &B) { ++Count; }); + ASSERT_EQ(Count, 10); +} + } // namespace __xray Index: lib/xray/tests/unit/fdr_logging_test.cc =================================================================== --- /dev/null +++ lib/xray/tests/unit/fdr_logging_test.cc @@ -0,0 +1,127 @@ +//===-- fdr_logging_test.cc -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray_fdr_logging.h" +#include "gtest/gtest.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "xray/xray_records.h" + +namespace __xray { +namespace { + +constexpr auto kBufferSize = 16384; +constexpr auto kBufferMax = 10; + +struct ScopedFileCloserAndDeleter { + explicit ScopedFileCloserAndDeleter(int Fd, const char *Filename) + : Fd(Fd), Filename(Filename) {} + + ~ScopedFileCloserAndDeleter() { + if (Fd) { + close(Fd); + unlink(Filename); + } + } + + int Fd; + const char *Filename; +}; + +TEST(FDRLoggingTest, Simple) { + FDRLoggingOptions Options; + Options.ReportErrors = true; + char TmpFilename[] = "fdr-logging-test.XXXXXX"; + Options.Fd = mkstemp(TmpFilename); + ASSERT_NE(Options.Fd, -1); + ASSERT_EQ(FDRLogging_init(kBufferSize, kBufferMax, &Options, + sizeof(FDRLoggingOptions)), + XRayLogInitStatus::XRAY_LOG_INITIALIZED); + FDRLogging_handleArg0(1, XRayEntryType::ENTRY); + FDRLogging_handleArg0(1, XRayEntryType::EXIT); + ASSERT_EQ(FDRLogging_finalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED); + ASSERT_EQ(FDRLogging_flush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED); + ASSERT_EQ(FDRLogging_reset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED); + + // To do this properly, we have to close the file descriptor then re-open the + // file for reading this time. + ASSERT_EQ(close(Options.Fd), 0); + int Fd = open(TmpFilename, O_RDONLY); + ASSERT_NE(-1, Fd); + ScopedFileCloserAndDeleter Guard(Fd, TmpFilename); + auto Size = lseek(Fd, 0, SEEK_END); + ASSERT_NE(Size, 0); + // Map the file contents. + const char *Contents = static_cast( + mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0)); + ASSERT_NE(Contents, nullptr); + + XRayFileHeader H; + memcpy(&H, Contents, sizeof(XRayFileHeader)); + ASSERT_EQ(H.Version, 1); + ASSERT_EQ(H.Type, FileTypes::FDR_LOG); + + // We require one buffer at least to have the "start of buffer" metadata + // record. + MetadataRecord MDR; + memcpy(&MDR, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord)); + ASSERT_EQ(MDR.RecordKind, MetadataRecord::RecordKinds::NewBuffer); +} + +TEST(FDRLoggingTest, Multiple) { + FDRLoggingOptions Options; + char TmpFilename[] = "fdr-logging-test.XXXXXX"; + Options.Fd = mkstemp(TmpFilename); + ASSERT_NE(Options.Fd, -1); + ASSERT_EQ(FDRLogging_init(kBufferSize, kBufferMax, &Options, + sizeof(FDRLoggingOptions)), + XRayLogInitStatus::XRAY_LOG_INITIALIZED); + for (uint64_t I = 0; I < 100; ++I) { + FDRLogging_handleArg0(1, XRayEntryType::ENTRY); + FDRLogging_handleArg0(1, XRayEntryType::EXIT); + } + ASSERT_EQ(FDRLogging_finalize(), XRayLogInitStatus::XRAY_LOG_FINALIZED); + ASSERT_EQ(FDRLogging_flush(), XRayLogFlushStatus::XRAY_LOG_FLUSHED); + ASSERT_EQ(FDRLogging_reset(), XRayLogInitStatus::XRAY_LOG_UNINITIALIZED); + + // To do this properly, we have to close the file descriptor then re-open the + // file for reading this time. + ASSERT_EQ(close(Options.Fd), 0); + int Fd = open(TmpFilename, O_RDONLY); + ASSERT_NE(-1, Fd); + ScopedFileCloserAndDeleter Guard(Fd, TmpFilename); + auto Size = lseek(Fd, 0, SEEK_END); + ASSERT_NE(Size, 0); + // Map the file contents. + const char *Contents = static_cast( + mmap(NULL, Size, PROT_READ, MAP_PRIVATE, Fd, 0)); + ASSERT_NE(Contents, nullptr); + + XRayFileHeader H; + memcpy(&H, Contents, sizeof(XRayFileHeader)); + ASSERT_EQ(H.Version, 1); + ASSERT_EQ(H.Type, FileTypes::FDR_LOG); + + MetadataRecord MDR0; + memcpy(&MDR0, Contents + sizeof(XRayFileHeader), sizeof(MetadataRecord)); + ASSERT_EQ(MDR0.RecordKind, MetadataRecord::RecordKinds::NewBuffer); +} + +} // namespace +} // namespace __xray Index: lib/xray/xray_buffer_queue.h =================================================================== --- lib/xray/xray_buffer_queue.h +++ lib/xray/xray_buffer_queue.h @@ -21,6 +21,7 @@ #include #include #include +#include namespace __xray { @@ -38,14 +39,18 @@ private: std::size_t BufferSize; - std::deque Buffers; + + // We use a bool to indicate whether the Buffer has been used in this + // freelist implementation. + std::deque> Buffers; std::mutex Mutex; std::unordered_set OwnedBuffers; std::atomic Finalizing; public: - /// Initialise a queue of size |N| with buffers of size |B|. - BufferQueue(std::size_t B, std::size_t N); + /// Initialise a queue of size |N| with buffers of size |B|. We report success + /// through |Success|. + BufferQueue(std::size_t B, std::size_t N, bool &Success); /// Updates |Buf| to contain the pointer to an appropriate buffer. Returns an /// error in case there are no available buffers to return when we will run @@ -68,15 +73,26 @@ bool finalizing() const { return Finalizing.load(std::memory_order_acquire); } - // Sets the state of the BufferQueue to finalizing, which ensures that: - // - // - All subsequent attempts to retrieve a Buffer will fail. - // - All releaseBuffer operations will not fail. - // - // After a call to finalize succeeds, all subsequent calls to finalize will - // fail with std::errc::state_not_recoverable. + /// Sets the state of the BufferQueue to finalizing, which ensures that: + /// + /// - All subsequent attempts to retrieve a Buffer will fail. + /// - All releaseBuffer operations will not fail. + /// + /// After a call to finalize succeeds, all subsequent calls to finalize will + /// fail with std::errc::state_not_recoverable. std::error_code finalize(); + /// Applies the provided function F to each Buffer in the queue, only if the + /// Buffer is marked 'used' (i.e. has been the result of getBuffer(...) and a + /// releaseBuffer(...) operation. + template void apply(F Fn) { + std::lock_guard G(Mutex); + for (const auto &T : Buffers) { + if (std::get<1>(T)) + Fn(std::get<0>(T)); + } + } + // Cleans up allocated buffers. ~BufferQueue(); }; Index: lib/xray/xray_buffer_queue.cc =================================================================== --- lib/xray/xray_buffer_queue.cc +++ lib/xray/xray_buffer_queue.cc @@ -18,15 +18,21 @@ using namespace __xray; -BufferQueue::BufferQueue(std::size_t B, std::size_t N) +BufferQueue::BufferQueue(std::size_t B, std::size_t N, bool &Success) : BufferSize(B), Buffers(N), Mutex(), OwnedBuffers(), Finalizing(false) { - for (auto &Buf : Buffers) { + for (auto &T : Buffers) { void *Tmp = malloc(BufferSize); + if (Tmp == nullptr) { + Success = false; + return; + } + + auto &Buf = std::get<0>(T); Buf.Buffer = Tmp; Buf.Size = B; - if (Tmp != 0) - OwnedBuffers.insert(Tmp); + OwnedBuffers.emplace(Tmp); } + Success = true; } std::error_code BufferQueue::getBuffer(Buffer &Buf) { @@ -35,7 +41,11 @@ std::lock_guard Guard(Mutex); if (Buffers.empty()) return std::make_error_code(std::errc::not_enough_memory); - Buf = Buffers.front(); + auto &T = Buffers.front(); + auto &B = std::get<0>(T); + Buf = B; + B.Buffer = nullptr; + B.Size = 0; Buffers.pop_front(); return {}; } @@ -44,9 +54,11 @@ if (OwnedBuffers.count(Buf.Buffer) == 0) return std::make_error_code(std::errc::argument_out_of_domain); std::lock_guard Guard(Mutex); - Buffers.push_back(Buf); + + // Now that the buffer has been released, we mark it as "used". + Buffers.emplace(Buffers.end(), Buf, true /* used */); Buf.Buffer = nullptr; - Buf.Size = BufferSize; + Buf.Size = 0; return {}; } @@ -57,9 +69,8 @@ } BufferQueue::~BufferQueue() { - for (auto &Buf : Buffers) { + for (auto &T : Buffers) { + auto &Buf = std::get<0>(T); free(Buf.Buffer); - Buf.Buffer = nullptr; - Buf.Size = 0; } } Index: lib/xray/xray_fdr_logging.h =================================================================== --- /dev/null +++ lib/xray/xray_fdr_logging.h @@ -0,0 +1,95 @@ +//===-- xray_fdr_logging.h ------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_XRAY_FDR_LOGGING_H +#define XRAY_XRAY_FDR_LOGGING_H + +#include "xray/xray_log_interface.h" + +// FDR (Flight Data Recorder) Mode +// =============================== +// +// The XRay whitepaper describes a mode of operation for function call trace +// logging that involves writing small records into an in-memory circular +// buffer, that then gets logged to disk on demand. To do this efficiently and +// capture as much data as we can, we use smaller records compared to the +// default mode of always writing fixed-size records. + +namespace __xray { + +enum class RecordType : uint8_t { + Function, Metadata +}; + +// A MetadataRecord encodes the kind of record in its first byte, and have 15 +// additional bytes in the end to hold free-form data. +struct alignas(16) MetadataRecord { + // A MetadataRecord must always have a type of 1. + RecordType Type : 1; + + // Each kind of record is represented as a 7-bit value (even though we use an + // unsigned 8-bit enum class to do so). + enum class RecordKinds : uint8_t { + NewBuffer, + EndOfBuffer, + NewCPUId, + TSCWrap, + WalltimeMarker, + }; + RecordKinds RecordKind : 7; // Use 7 bits to identify this record type. + char Data[15]; +} __attribute__((packed)); + +static_assert(sizeof(MetadataRecord) == 16, "Wrong size for MetadataRecord."); + +struct alignas(8) FunctionRecord { + // A FunctionRecord must always have a type of 0. + RecordType Type : 1; + enum class RecordKinds { + FunctionEnter = 0x00, + FunctionExit = 0x01, + FunctionTailExit = 0x02, + }; + RecordKinds RecordKind : 3; + + // We only use 28 bits of the function ID, so that we can use as few bytes as + // possible. This means we only support 2^28 (268,435,456) unique function ids + // in a single binary. + int FuncId : 28; + + // We use another 4 bytes to hold the delta between the previous entry's TSC. + // In case we've found that the distance is greater than the allowable 32 bits + // (either because we are running in a different CPU and the TSC might be + // different then), we should use a MetadataRecord before this FunctionRecord + // that will contain the full TSC for that CPU, and keep this to 0. + uint32_t TSCDelta; +} __attribute__((packed)); + +static_assert(sizeof(FunctionRecord) == 8, "Wrong size for FunctionRecord."); + +// Options used by the FDR implementation. +struct FDRLoggingOptions { + bool ReportErrors = false; + int Fd = -1; +}; + +// Flight Data Recorder mode implementation interfaces. +XRayLogInitStatus FDRLogging_init(std::size_t BufferSize, std::size_t BufferMax, + void *Options, size_t OptionsSize); +XRayLogInitStatus FDRLogging_finalize(); +void FDRLogging_handleArg0(int32_t FuncId, XRayEntryType Entry); +XRayLogFlushStatus FDRLogging_flush(); +XRayLogInitStatus FDRLogging_reset(); + +} // namespace __xray + +#endif // XRAY_XRAY_FDR_LOGGING_H Index: lib/xray/xray_fdr_logging.cc =================================================================== --- /dev/null +++ lib/xray/xray_fdr_logging.cc @@ -0,0 +1,546 @@ +//===-- xray_fdr_logging.cc ------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instruementation system. +// +// Here we implement the Flight Data Recorder mode for XRay, where we use +// compact structures to store records in memory as well as when writing out the +// data to files. +// +//===----------------------------------------------------------------------===// +#include "xray_fdr_logging.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "sanitizer_common/sanitizer_common.h" +#include "xray/xray_interface.h" +#include "xray/xray_records.h" +#include "xray_buffer_queue.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include "xray_utils.h" + +#if defined(__x86_64__) +#include "xray_x86_64.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "xray_emulate_tsc.h" +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +namespace __xray { + +// Global BufferQueue. +std::shared_ptr BQ; + +std::atomic LoggingStatus{ + XRayLogInitStatus::XRAY_LOG_UNINITIALIZED}; + +std::atomic LogFlushStatus{ + XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING}; + +std::unique_ptr FDROptions; + +XRayLogInitStatus FDRLogging_init(std::size_t BufferSize, std::size_t BufferMax, + void *Options, + size_t OptionsSize) XRAY_NEVER_INSTRUMENT { + assert(OptionsSize == sizeof(FDRLoggingOptions)); + XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + if (!LoggingStatus.compare_exchange_weak( + CurrentStatus, XRayLogInitStatus::XRAY_LOG_INITIALIZING, + std::memory_order_release, std::memory_order_relaxed)) + return CurrentStatus; + + FDROptions.reset(new FDRLoggingOptions()); + *FDROptions = *reinterpret_cast(Options); + if (FDROptions->ReportErrors) + SetPrintfAndReportCallback(PrintToStdErr); + + bool Success = false; + BQ = std::make_shared(BufferSize, BufferMax, Success); + if (!Success) { + Report("BufferQueue init failed.\n"); + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + } + + // Install the actual handleArg0 handler after initialising the buffers. + __xray_set_handler(FDRLogging_handleArg0); + + LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_INITIALIZED, + std::memory_order_release); + return XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +// Must finalize before flushing. +XRayLogFlushStatus FDRLogging_flush() XRAY_NEVER_INSTRUMENT { + if (LoggingStatus.load(std::memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_FINALIZED) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + + XRayLogFlushStatus Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + if (!LogFlushStatus.compare_exchange_weak( + Result, XRayLogFlushStatus::XRAY_LOG_FLUSHING, + std::memory_order_release, std::memory_order_relaxed)) + return Result; + + // Make a copy of the BufferQueue pointer to prevent other threads that may be + // resetting it from blowing away the queue prematurely while we're dealing + // with it. + auto LocalBQ = BQ; + + // We write out the file in the following format: + // + // 1) We write down the XRay file header with version 1, type FDR_LOG. + // 2) Then we use the 'apply' member of the BufferQueue that's live, to + // ensure that at this point in time we write down the buffers that have + // been released (and marked "used") -- we dump the full buffer for now + // (fixed-sized) and let the tools reading the buffers deal with the data + // afterwards. + // + int Fd = FDROptions->Fd; + if (Fd == -1) + Fd = getLogFD(); + if (Fd == -1) { + auto Result = XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + LogFlushStatus.store(Result, std::memory_order_release); + return Result; + } + + XRayFileHeader Header; + Header.Version = 1; + Header.Type = FileTypes::FDR_LOG; + auto CPUFrequency = getCPUFrequency(); + Header.CycleFrequency = + CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + clock_gettime(CLOCK_REALTIME, &Header.TS); + retryingWriteAll(Fd, reinterpret_cast(&Header), + reinterpret_cast(&Header) + sizeof(Header)); + LocalBQ->apply([&](const BufferQueue::Buffer &B) { + retryingWriteAll(Fd, reinterpret_cast(B.Buffer), + reinterpret_cast(B.Buffer) + B.Size); + }); + LogFlushStatus.store(XRayLogFlushStatus::XRAY_LOG_FLUSHED, + std::memory_order_release); + return XRayLogFlushStatus::XRAY_LOG_FLUSHED; +} + +XRayLogInitStatus FDRLogging_finalize() XRAY_NEVER_INSTRUMENT { + XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_INITIALIZED; + if (!LoggingStatus.compare_exchange_weak( + CurrentStatus, XRayLogInitStatus::XRAY_LOG_FINALIZING, + std::memory_order_release, std::memory_order_relaxed)) + return CurrentStatus; + + // Do special things to make the log finalize itself, and not allow any more + // operations to be performed until re-initialized. + BQ->finalize(); + + LoggingStatus.store(XRayLogInitStatus::XRAY_LOG_FINALIZED, + std::memory_order_release); + return XRayLogInitStatus::XRAY_LOG_FINALIZED; +} + +XRayLogInitStatus FDRLogging_reset() XRAY_NEVER_INSTRUMENT { + XRayLogInitStatus CurrentStatus = XRayLogInitStatus::XRAY_LOG_FINALIZED; + if (!LoggingStatus.compare_exchange_weak( + CurrentStatus, XRayLogInitStatus::XRAY_LOG_UNINITIALIZED, + std::memory_order_release, std::memory_order_relaxed)) + return CurrentStatus; + + // Release the in-memory buffer queue. + BQ.reset(); + + // Spin until the flushing status is flushed. + XRayLogFlushStatus CurrentFlushingStatus = + XRayLogFlushStatus::XRAY_LOG_FLUSHED; + while (!LogFlushStatus.compare_exchange_weak( + CurrentFlushingStatus, XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING, + std::memory_order_release, std::memory_order_relaxed)) { + if (CurrentFlushingStatus == XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING) + break; + CurrentFlushingStatus = XRayLogFlushStatus::XRAY_LOG_FLUSHED; + } + + // At this point, we know that the status is flushed, and that we can assume + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; +} + +namespace { +thread_local BufferQueue::Buffer Buffer; +thread_local char *RecordPtr = nullptr; + +void setupNewBuffer(const BufferQueue::Buffer &Buffer) XRAY_NEVER_INSTRUMENT { + RecordPtr = static_cast(Buffer.Buffer); + + static constexpr int InitRecordsCount = 2; + std::aligned_storage::type Records[InitRecordsCount]; + { + // Write out a MetadataRecord to signify that this is the start of a new + // buffer, associated with a particular thread, with a new CPU. For the + // data, we have 15 bytes to squeeze as much information as we can. At this + // point we only write down the following bytes: + // - Thread ID (pid_t, 4 bytes) + auto &NewBuffer = *reinterpret_cast(&Records[0]); + NewBuffer.Type = RecordType::Metadata; + NewBuffer.RecordKind = MetadataRecord::RecordKinds::NewBuffer; + pid_t Pid = getpid(); + std::memcpy(&NewBuffer.Data, &Pid, sizeof(pid_t)); + } + + // Also write the WalltimeMarker record. + { + static_assert(sizeof(time_t) <= 8, "time_t needs to be at most 8 bytes"); + auto &WalltimeMarker = *reinterpret_cast(&Records[1]); + WalltimeMarker.Type = RecordType::Metadata; + WalltimeMarker.RecordKind = MetadataRecord::RecordKinds::WalltimeMarker; + timespec TS{0, 0}; + clock_gettime(CLOCK_MONOTONIC, &TS); + static_assert(sizeof(TS.tv_sec) == sizeof(time_t), + "tv_sec is not a time_t?!"); + static_assert(sizeof(TS.tv_nsec) == sizeof(int64_t), + "tv_nsec is not an int64_t?!"); + + // Because sizeof(timespec) == 16, we ought to really write just the seconds + // part of the timestamp and discard the high bits of the nanoseconds part + // (as we really only need microsecond precision). + int32_t Micros = TS.tv_nsec / 1000; + std::memcpy(WalltimeMarker.Data, &TS.tv_sec, sizeof(TS.tv_sec)); + std::memcpy(WalltimeMarker.Data + sizeof(TS.tv_sec), &Micros, + sizeof(Micros)); + } + std::memcpy(RecordPtr, Records, sizeof(MetadataRecord) * InitRecordsCount); + RecordPtr += sizeof(MetadataRecord) * InitRecordsCount; +} + +void writeNewCPUIdMetadata(unsigned CPU, uint64_t TSC) XRAY_NEVER_INSTRUMENT { + MetadataRecord NewCPUId; + NewCPUId.Type = RecordType::Metadata; + NewCPUId.RecordKind = MetadataRecord::RecordKinds::NewCPUId; + + // The data for the New CPU will contain the following bytes: + // - CPU ID (uint16_t, 4 bytes) + // - Full TSC (uint64_t, 8 bytes) + // Total = 12 bytes. + std::memcpy(&NewCPUId.Data, &CPU, sizeof(CPU)); + std::memcpy(&NewCPUId.Data[sizeof(uint16_t)], &TSC, sizeof(TSC)); + std::memcpy(RecordPtr, &NewCPUId, sizeof(MetadataRecord)); + RecordPtr += sizeof(MetadataRecord); +} + +void writeEOBMetadata() XRAY_NEVER_INSTRUMENT { + MetadataRecord EOBMeta; + EOBMeta.Type = RecordType::Metadata; + EOBMeta.RecordKind = MetadataRecord::RecordKinds::EndOfBuffer; + // For now we don't write any bytes into the Data field. + std::memcpy(RecordPtr, &EOBMeta, sizeof(MetadataRecord)); + RecordPtr += sizeof(MetadataRecord); +} + +void writeTSCWrapMetadata(uint64_t TSC) XRAY_NEVER_INSTRUMENT { + MetadataRecord TSCWrap; + TSCWrap.Type = RecordType::Metadata; + TSCWrap.RecordKind = MetadataRecord::RecordKinds::TSCWrap; + + // The data for the TSCWrap record contains the following bytes: + // - Full TSC (uint64_t, 8 bytes) + // Total = 8 bytes. + std::memcpy(&TSCWrap.Data, &TSC, sizeof(TSC)); + std::memcpy(RecordPtr, &TSCWrap, sizeof(MetadataRecord)); + RecordPtr += sizeof(MetadataRecord); +} + +constexpr auto MetadataRecSize = sizeof(MetadataRecord); +constexpr auto FunctionRecSize = sizeof(FunctionRecord); + +class ThreadExitBufferCleanup { + std::weak_ptr Buffers; + BufferQueue::Buffer &Buffer; + +public: + explicit ThreadExitBufferCleanup(std::weak_ptr BQ, + BufferQueue::Buffer &Buffer) + XRAY_NEVER_INSTRUMENT : Buffers(BQ), + Buffer(Buffer) {} + + ~ThreadExitBufferCleanup() noexcept XRAY_NEVER_INSTRUMENT { + if (RecordPtr == nullptr) + return; + + // We make sure that upon exit, a thread will write out the EOB + // MetadataRecord in the thread-local log, and also release the buffer to + // the queue. + assert((RecordPtr + MetadataRecSize) - static_cast(Buffer.Buffer) >= + static_cast(MetadataRecSize)); + if (auto BQ = Buffers.lock()) { + writeEOBMetadata(); + if (auto EC = BQ->releaseBuffer(Buffer)) + Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, + EC.message().c_str()); + return; + } + } +}; + +class RecursionGuard { + bool &Running; + const bool Valid; + +public: + explicit RecursionGuard(bool &R) : Running(R), Valid(!R) { + if (Valid) + Running = true; + } + + RecursionGuard(const RecursionGuard &) = delete; + RecursionGuard(RecursionGuard &&) = delete; + RecursionGuard &operator=(const RecursionGuard &) = delete; + RecursionGuard &operator=(RecursionGuard &&) = delete; + + explicit operator bool() const { return Valid; } + + ~RecursionGuard() noexcept { + if (Valid) + Running = false; + } +}; + +inline bool loggingInitialized() { + return LoggingStatus.load(std::memory_order_acquire) == + XRayLogInitStatus::XRAY_LOG_INITIALIZED; +} + +} // namespace + +void FDRLogging_handleArg0(int32_t FuncId, + XRayEntryType Entry) XRAY_NEVER_INSTRUMENT { + // We want to get the TSC as early as possible, so that we can check whether + // we've seen this CPU before. We also do it before we load anything else, to + // allow for forward progress with the scheduling. + unsigned char CPU; + uint64_t TSC = __xray::readTSC(CPU); + + // Bail out right away if logging is not initialized yet. + if (LoggingStatus.load(std::memory_order_acquire) != + XRayLogInitStatus::XRAY_LOG_INITIALIZED) + return; + + // We use a thread_local variable to keep track of which CPUs we've already + // run, and the TSC times for these CPUs. This allows us to stop repeating the + // CPU field in the function records. + // + // We assume that we'll support only 65536 CPUs for x86_64. + thread_local uint16_t CurrentCPU = std::numeric_limits::max(); + thread_local uint64_t LastTSC = 0; + + // Make sure a thread that's ever called handleArg0 has a thread-local + // live reference to the buffer queue for this particular instance of + // FDRLogging, and that we're going to clean it up when the thread exits. + thread_local auto LocalBQ = BQ; + thread_local ThreadExitBufferCleanup Cleanup(LocalBQ, Buffer); + + // Prevent signal handler recursion, so in case we're already in a log writing + // mode and the signal handler comes in (and is also instrumented) then we + // don't want to be clobbering potentially partial writes already happening in + // the thread. We use a simple thread_local latch to only allow one on-going + // handleArg0 to happen at any given time. + thread_local bool Running = false; + RecursionGuard Guard{Running}; + if (!Guard) { + assert(Running == true && "RecursionGuard is buggy!"); + return; + } + + if (!loggingInitialized() || LocalBQ->finalizing()) { + writeEOBMetadata(); + if (auto EC = BQ->releaseBuffer(Buffer)) { + Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, + EC.message().c_str()); + return; + } + RecordPtr = nullptr; + } + + if (Buffer.Buffer == nullptr) { + if (auto EC = LocalBQ->getBuffer(Buffer)) { + auto LS = LoggingStatus.load(std::memory_order_acquire); + if (LS != XRayLogInitStatus::XRAY_LOG_FINALIZING && + LS != XRayLogInitStatus::XRAY_LOG_FINALIZED) + Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str()); + return; + } + + setupNewBuffer(Buffer); + } + + if (CurrentCPU == std::numeric_limits::max()) { + // This means this is the first CPU this thread has ever run on. We set the + // current CPU and record this as the first TSC we've seen. + CurrentCPU = CPU; + writeNewCPUIdMetadata(CPU, TSC); + } + + // Before we go setting up writing new function entries, we need to be really + // careful about the pointer math we're doing. This means we need to ensure + // that the record we are about to write is going to fit into the buffer, + // without overflowing the buffer. + // + // To do this properly, we use the following assumptions: + // + // - The least number of bytes we will ever write is 8 + // (sizeof(FunctionRecord)) only if the delta between the previous entry + // and this entry is within 32 bits. + // - The most number of bytes we will ever write is 8 + 16 = 24. This is + // computed by: + // + // sizeof(FunctionRecord) + sizeof(MetadataRecord) + // + // These arise in the following cases: + // + // 1. When the delta between the TSC we get and the previous TSC for the + // same CPU is outside of the uint32_t range, we end up having to + // write a MetadataRecord to indicate a "tsc wrap" before the actual + // FunctionRecord. + // 2. When we learn that we've moved CPUs, we need to write a + // MetadataRecord to indicate a "cpu change", and thus write out the + // current TSC for that CPU before writing out the actual + // FunctionRecord. + // 3. When we learn about a new CPU ID, we need to write down a "new cpu + // id" MetadataRecord before writing out the actual FunctionRecord. + // + // - An End-of-Buffer (EOB) MetadataRecord is 16 bytes. + // + // So the math we need to do is to determine whether writing 24 bytes past the + // current pointer leaves us with enough bytes to write the EOB + // MetadataRecord. If we don't have enough space after writing as much as 24 + // bytes in the end of the buffer, we need to write out the EOB, get a new + // Buffer, set it up properly before doing any further writing. + // + char *BufferStart = static_cast(Buffer.Buffer); + if ((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart < + static_cast(MetadataRecSize)) { + writeEOBMetadata(); + if (auto EC = LocalBQ->releaseBuffer(Buffer)) { + Report("Failed to release buffer at %p; error=%s\n", Buffer.Buffer, + EC.message().c_str()); + return; + } + if (auto EC = LocalBQ->getBuffer(Buffer)) { + Report("Failed to acquire a buffer; error=%s\n", EC.message().c_str()); + return; + } + setupNewBuffer(Buffer); + } + + // By this point, we are now ready to write at most 24 bytes (one metadata + // record and one function record). + BufferStart = static_cast(Buffer.Buffer); + assert((RecordPtr + (MetadataRecSize + FunctionRecSize)) - BufferStart >= + static_cast(MetadataRecSize) && + "Misconfigured BufferQueue provided; Buffer size not large enough."); + + std::aligned_storage::type + AlignedFuncRecordBuffer; + auto &FuncRecord = + *reinterpret_cast(&AlignedFuncRecordBuffer); + FuncRecord.Type = RecordType::Function; + + // Only get the lower 28 bits of the function id. + FuncRecord.FuncId = FuncId | ~(0x03 << 28); + + // Here we compute the TSC Delta. There are a few interesting situations we + // need to account for: + // + // - The thread has migrated to a different CPU. If this is the case, then + // we write down the following records: + // + // 1. A 'NewCPUId' Metadata record. + // 2. A FunctionRecord with a 0 for the TSCDelta field. + // + // - The TSC delta is greater than the 32 bits we can store in a + // FunctionRecord. In this case we write down the following records: + // + // 1. A 'TSCWrap' Metadata record. + // 2. A FunctionRecord with a 0 for the TSCDelta field. + // + // - The TSC delta is representable within the 32 bits we can store in a + // FunctionRecord. In this case we write down just a FunctionRecord with + // the correct TSC delta. + // + FuncRecord.TSCDelta = 0; + if (CPU != CurrentCPU) { + // We've moved to a new CPU. + writeNewCPUIdMetadata(CPU, TSC); + } else { + // If the delta is greater than the range for a uint32_t, then we write out + // the TSC wrap metadata entry with the full TSC, and the TSC for the + // function record be 0. + auto Delta = LastTSC - TSC; + if (Delta > (1ULL << 32) - 1) + writeTSCWrapMetadata(TSC); + else + FuncRecord.TSCDelta = Delta; + } + + // We then update our "LastTSC" and "CurrentCPU" thread-local variables to aid + // us in future computations of this TSC delta value. + LastTSC = TSC; + CurrentCPU = CPU; + + switch (Entry) { + case XRayEntryType::ENTRY: + FuncRecord.RecordKind = FunctionRecord::RecordKinds::FunctionEnter; + break; + case XRayEntryType::EXIT: + FuncRecord.RecordKind = FunctionRecord::RecordKinds::FunctionExit; + break; + case XRayEntryType::TAIL: + FuncRecord.RecordKind = FunctionRecord::RecordKinds::FunctionTailExit; + break; + } + + std::memcpy(RecordPtr, &AlignedFuncRecordBuffer, sizeof(FunctionRecord)); + RecordPtr += sizeof(FunctionRecord); + + // If we've exhausted the buffer by this time, we then release the buffer to + // make sure that other threads may start using this buffer. + if ((RecordPtr + MetadataRecSize) - BufferStart == MetadataRecSize) { + writeEOBMetadata(); + if (auto EC = LocalBQ->releaseBuffer(Buffer)) { + Report("Failed releasing buffer at %p; error=%s\n", Buffer.Buffer, + EC.message().c_str()); + return; + } + RecordPtr = nullptr; + } +} + +} // namespace __xray + +static auto Unused = [] { + using namespace __xray; + if (flags()->xray_fdr_log) { + XRayLogImpl Impl{ + FDRLogging_init, FDRLogging_finalize, FDRLogging_handleArg0, + FDRLogging_flush, + }; + __xray_set_log_impl(Impl); + } + return true; +}(); Index: lib/xray/xray_flags.inc =================================================================== --- lib/xray/xray_flags.inc +++ lib/xray/xray_flags.inc @@ -20,3 +20,5 @@ "Whether to install the naive log implementation.") XRAY_FLAG(const char *, xray_logfile_base, "xray-log.", "Filename base for the xray logfile.") +XRAY_FLAG(bool, xray_fdr_log, false, + "Whether to install the flight data recorder logging implementation.") Index: lib/xray/xray_inmemory_log.cc =================================================================== --- lib/xray/xray_inmemory_log.cc +++ lib/xray/xray_inmemory_log.cc @@ -16,8 +16,6 @@ //===----------------------------------------------------------------------===// #include -#include -#include #include #include #include @@ -39,6 +37,7 @@ #include "xray_defs.h" #include "xray_flags.h" #include "xray_interface_internal.h" +#include "xray_utils.h" // __xray_InMemoryRawLog will use a thread-local aligned buffer capped to a // certain size (32kb by default) and use it as if it were a circular buffer for @@ -53,25 +52,6 @@ std::mutex LogMutex; -static void retryingWriteAll(int Fd, char *Begin, - char *End) XRAY_NEVER_INSTRUMENT { - if (Begin == End) - return; - auto TotalBytes = std::distance(Begin, End); - while (auto Written = write(Fd, Begin, TotalBytes)) { - if (Written < 0) { - if (errno == EINTR) - continue; // Try again. - Report("Failed to write; errno = %d\n", errno); - return; - } - TotalBytes -= Written; - if (TotalBytes == 0) - break; - Begin += Written; - } -} - class ThreadExitFlusher { int Fd; XRayRecord *Start; @@ -102,57 +82,27 @@ using namespace __xray; -void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT { - fprintf(stderr, "%s", Buffer); -} - static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT { - // FIXME: Figure out how to make this less stderr-dependent. - SetPrintfAndReportCallback(PrintToStdErr); - // Open a temporary file once for the log. - static char TmpFilename[256] = {}; - static char TmpWildcardPattern[] = "XXXXXX"; - auto Argv = GetArgv(); - const char *Progname = Argv[0] == nullptr ? "(unknown)" : Argv[0]; - const char *LastSlash = internal_strrchr(Progname, '/'); - - if (LastSlash != nullptr) - Progname = LastSlash + 1; - - const int HalfLength = sizeof(TmpFilename) / 2 - sizeof(TmpWildcardPattern); - int NeededLength = internal_snprintf(TmpFilename, sizeof(TmpFilename), - "%.*s%.*s.%s", - HalfLength, flags()->xray_logfile_base, - HalfLength, Progname, - TmpWildcardPattern); - if (NeededLength > int(sizeof(TmpFilename))) { - Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); - return -1; - } - int Fd = mkstemp(TmpFilename); - if (Fd == -1) { - Report("XRay: Failed opening temporary file '%s'; not logging events.\n", - TmpFilename); + int F = getLogFD(); + auto CPUFrequency = getCPUFrequency(); + if (F == -1) return -1; - } - if (Verbosity()) - fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); - // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; Header.Version = 1; Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = __xray::cycleFrequency(); + Header.CycleFrequency = + CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. Header.ConstantTSC = 1; Header.NonstopTSC = 1; - retryingWriteAll(Fd, reinterpret_cast(&Header), + retryingWriteAll(F, reinterpret_cast(&Header), reinterpret_cast(&Header) + sizeof(Header)); - return Fd; + return F; } void __xray_InMemoryRawLog(int32_t FuncId, Index: lib/xray/xray_log_interface.cc =================================================================== --- /dev/null +++ lib/xray/xray_log_interface.cc @@ -0,0 +1,57 @@ +//===-- xray_log_interface.cc ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a function call tracing system. +// +//===----------------------------------------------------------------------===// +#include "xray/xray_log_interface.h" + +#include "xray/xray_interface.h" +#include "xray_defs.h" + +#include +#include + +std::mutex XRayImplMutex; +std::unique_ptr GlobalXRayImpl; + +void __xray_set_log_impl(XRayLogImpl Impl) XRAY_NEVER_INSTRUMENT { + if (Impl.log_init == nullptr || Impl.log_finalize == nullptr || + Impl.handle_arg0 == nullptr || Impl.flush_log == nullptr) { + std::lock_guard Guard(XRayImplMutex); + GlobalXRayImpl.reset(); + return; + } + + std::lock_guard Guard(XRayImplMutex); + GlobalXRayImpl.reset(new XRayLogImpl); + *GlobalXRayImpl = Impl; +} + +XRayLogInitStatus __xray_init(size_t BufferSize, size_t MaxBuffers, void *Args, + size_t ArgsSize) XRAY_NEVER_INSTRUMENT { + std::lock_guard Guard(XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_init(BufferSize, MaxBuffers, Args, ArgsSize); +} + +XRayLogInitStatus __xray_log_finalize() XRAY_NEVER_INSTRUMENT { + std::lock_guard Guard(XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogInitStatus::XRAY_LOG_UNINITIALIZED; + return GlobalXRayImpl->log_finalize(); +} + +XRayLogFlushStatus __xray_log_flushLog() XRAY_NEVER_INSTRUMENT { + std::lock_guard Guard(XRayImplMutex); + if (!GlobalXRayImpl) + return XRayLogFlushStatus::XRAY_LOG_NOT_FLUSHING; + return GlobalXRayImpl->flush_log(); +} Index: lib/xray/xray_utils.h =================================================================== --- /dev/null +++ lib/xray/xray_utils.h @@ -0,0 +1,44 @@ +//===-- xray_utils.h --------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +// Some shared utilities for the XRay runtime implementation. +// +//===----------------------------------------------------------------------===// +#ifndef XRAY_UTILS_H +#define XRAY_UTILS_H + +#include +#include + +namespace __xray { + +// Default implementation of the reporting interface for sanitizer errors. +void PrintToStdErr(const char *Buffer); + +// EINTR-safe write routine, provided a file descriptor and a character range. +void retryingWriteAll(int Fd, char *Begin, char *End); + +// Reads a long long value from a provided file. +bool readValueFromFile(const char *Filename, long long *Value); + +// EINTR-safe read routine, providing a file descriptor and a character range. +std::pair retryingReadSome(int Fd, char *Begin, char *End); + +// EINTR-safe open routine, uses flag-provided values for initialising a log +// file. +int getLogFD(); + +// EINTR-safe read of CPU frquency for the current CPU. +long long getCPUFrequency(); + +} // namespace __xray + +#endif // XRAY_UTILS_H Index: lib/xray/xray_utils.cc =================================================================== --- /dev/null +++ lib/xray/xray_utils.cc @@ -0,0 +1,165 @@ +//===-- xray_utils.cc -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file is a part of XRay, a dynamic runtime instrumentation system. +// +//===----------------------------------------------------------------------===// +#include "xray_utils.h" + +#include "sanitizer_common/sanitizer_common.h" +#include "xray_defs.h" +#include "xray_flags.h" +#include +#include +#include +#include +#include +#include +#include + +#if defined(__x86_64__) +#include "xray_x86_64.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "xray_emulate_tsc.h" +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + +namespace __xray { + +void PrintToStdErr(const char *Buffer) XRAY_NEVER_INSTRUMENT { + fprintf(stderr, "%s", Buffer); +} + +void retryingWriteAll(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { + if (Begin == End) + return; + auto TotalBytes = std::distance(Begin, End); + while (auto Written = write(Fd, Begin, TotalBytes)) { + if (Written < 0) { + if (errno == EINTR) + continue; // Try again. + Report("Failed to write; errno = %d\n", errno); + return; + } + TotalBytes -= Written; + if (TotalBytes == 0) + break; + Begin += Written; + } +} + +std::pair retryingReadSome(int Fd, char *Begin, + char *End) XRAY_NEVER_INSTRUMENT { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d\n", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +bool readValueFromFile(const char *Filename, + long long *Value) XRAY_NEVER_INSTRUMENT { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + if (!Success) + return false; + close(Fd); + char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +long long getCPUFrequency() XRAY_NEVER_INSTRUMENT { + // Get the cycle frequency from SysFS on Linux. + long long CPUFrequency = -1; +#if defined(__x86_64__) + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } +#elif defined(__arm__) || defined(__aarch64__) + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86(_64), it may go faster + // or slower depending on CPU turbo or power saving mode. Furthermore, + // to read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime() which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + CPUFrequency = NanosecondsPerSecond; +#else +#error "Unsupported CPU Architecture" +#endif /* CPU architecture */ + return CPUFrequency; +} + +int getLogFD() XRAY_NEVER_INSTRUMENT { + // FIXME: Figure out how to make this less stderr-dependent. + SetPrintfAndReportCallback(PrintToStdErr); + // Open a temporary file once for the log. + static char TmpFilename[256] = {}; + static char TmpWildcardPattern[] = "XXXXXX"; + auto Argv = GetArgv(); + const char *Progname = Argv[0] == nullptr ? "(unknown)" : Argv[0]; + const char *LastSlash = internal_strrchr(Progname, '/'); + + if (LastSlash != nullptr) + Progname = LastSlash + 1; + + const int HalfLength = sizeof(TmpFilename) / 2 - sizeof(TmpWildcardPattern); + int NeededLength = internal_snprintf( + TmpFilename, sizeof(TmpFilename), "%.*s%.*s.%s", HalfLength, + flags()->xray_logfile_base, HalfLength, Progname, TmpWildcardPattern); + if (NeededLength > int(sizeof(TmpFilename))) { + Report("XRay log file name too long (%d): %s\n", NeededLength, TmpFilename); + return -1; + } + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.\n", + TmpFilename); + return -1; + } + if (Verbosity()) + fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); + + return Fd; +} + +} // namespace __xray Index: lib/xray/xray_x86_64.h =================================================================== --- lib/xray/xray_x86_64.h +++ lib/xray/xray_x86_64.h @@ -27,6 +27,6 @@ CPU = LongCPU; return TSC; } -} +} // namespace __xray #endif // XRAY_X86_64_H Index: test/xray/CMakeLists.txt =================================================================== --- test/xray/CMakeLists.txt +++ test/xray/CMakeLists.txt @@ -9,7 +9,6 @@ if(NOT COMPILER_RT_STANDALONE_BUILD AND COMPILER_RT_BUILD_XRAY AND COMPILER_RT_HAS_XRAY) list(APPEND XRAY_TEST_DEPS xray) - list(APPEND XRAY_FDR_TEST_DEPS xray-fdr) endif() set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH}) @@ -41,8 +40,3 @@ ${XRAY_TESTSUITES} DEPENDS ${XRAY_TEST_DEPS}) set_target_properties(check-xray PROPERTIES FOLDER "Compiler-RT Misc") - -add_lit_testsuite(check-xray-fdr "Running the XRay flight data recorder tests" - ${XRAY_FDR_TESTSUITES} - DEPENDS ${XRAY_FDR_TEST_DEPS}) -set_target_properties(check-xray-fdr PROPERTIES FOLDER "Compiler-RT Misc") Index: test/xray/Unit/lit.site.cfg.in =================================================================== --- test/xray/Unit/lit.site.cfg.in +++ test/xray/Unit/lit.site.cfg.in @@ -10,3 +10,7 @@ config.test_exec_root = "@COMPILER_RT_BINARY_DIR@/lib/xray/tests" config.test_source_root = config.test_exec_root + +# Do not patch the XRay unit tests pre-main, and also make the error logging +# verbose to get a more accurate error logging mechanism. +config.environment['XRAY_OPTIONS'] = 'patch_premain=false verbose=1'