Index: compiler-rt/trunk/lib/xray/xray_AArch64.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_AArch64.cc +++ compiler-rt/trunk/lib/xray/xray_AArch64.cc @@ -14,12 +14,26 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_common.h" #include "xray_defs.h" +#include "xray_emulate_tsc.h" #include "xray_interface_internal.h" #include #include namespace __xray { +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86[_64]; it may go faster or + // slower depending on CPU's turbo or power saving modes. Furthermore, to + // read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + // The machine codes for some instructions used in runtime patching. enum class PatchOpcodes : uint32_t { PO_StpX0X30SP_m16e = 0xA9BF7BE0, // STP X0, X30, [SP, #-16]! Index: compiler-rt/trunk/lib/xray/xray_arm.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_arm.cc +++ compiler-rt/trunk/lib/xray/xray_arm.cc @@ -14,12 +14,26 @@ //===----------------------------------------------------------------------===// #include "sanitizer_common/sanitizer_common.h" #include "xray_defs.h" +#include "xray_emulate_tsc.h" #include "xray_interface_internal.h" #include #include namespace __xray { +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86[_64]; it may go faster or + // slower depending on CPU's turbo or power saving modes. Furthermore, to + // read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime(2) which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + // The machine codes for some instructions used in runtime patching. enum class PatchOpcodes : uint32_t { PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} Index: compiler-rt/trunk/lib/xray/xray_emulate_tsc.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_emulate_tsc.h +++ compiler-rt/trunk/lib/xray/xray_emulate_tsc.h @@ -0,0 +1,23 @@ +#pragma once +#include + +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" + +namespace __xray { + +static constexpr uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime(2) returned %d, errno=%d.", result, int(errno)); + TS.tv_sec = 0; + TS.tv_nsec = 0; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} + +} Index: compiler-rt/trunk/lib/xray/xray_inmemory_log.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_inmemory_log.cc +++ compiler-rt/trunk/lib/xray/xray_inmemory_log.cc @@ -26,12 +26,12 @@ #include #if defined(__x86_64__) -#include +#include "xray_x86_64.h" #elif defined(__arm__) || defined(__aarch64__) -static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000; +#include "xray_emulate_tsc.h" #else #error "Unsupported CPU Architecture" -#endif /* CPU architecture */ +#endif /* Architecture-specific inline intrinsics */ #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" @@ -71,52 +71,6 @@ } } -#if defined(__x86_64__) -static std::pair -retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { - auto BytesToRead = std::distance(Begin, End); - ssize_t BytesRead; - ssize_t TotalBytesRead = 0; - while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { - if (BytesRead == -1) { - if (errno == EINTR) - continue; - Report("Read error; errno = %d\n", errno); - return std::make_pair(TotalBytesRead, false); - } - - TotalBytesRead += BytesRead; - BytesToRead -= BytesRead; - Begin += BytesRead; - } - return std::make_pair(TotalBytesRead, true); -} - -static bool readValueFromFile(const char *Filename, - long long *Value) XRAY_NEVER_INSTRUMENT { - int Fd = open(Filename, O_RDONLY | O_CLOEXEC); - if (Fd == -1) - return false; - static constexpr size_t BufSize = 256; - char Line[BufSize] = {}; - ssize_t BytesRead; - bool Success; - std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); - if (!Success) - return false; - close(Fd); - char *End = nullptr; - long long Tmp = internal_simple_strtoll(Line, &End, 10); - bool Result = false; - if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { - *Value = Tmp; - Result = true; - } - return Result; -} - -#endif /* CPU architecture */ - class ThreadExitFlusher { int Fd; XRayRecord *Start; @@ -151,6 +105,47 @@ fprintf(stderr, "%s", Buffer); } +static int __xray_OpenLogFile() XRAY_NEVER_INSTRUMENT { + // FIXME: Figure out how to make this less stderr-dependent. + SetPrintfAndReportCallback(PrintToStdErr); + // Open a temporary file once for the log. + static char TmpFilename[256] = {}; + static char TmpWildcardPattern[] = "XXXXXX"; + auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base, + sizeof(TmpFilename) - 10); + if (static_cast((E + 6) - TmpFilename) > + (sizeof(TmpFilename) - 1)) { + Report("XRay log file base too long: %s\n", flags()->xray_logfile_base); + return -1; + } + internal_strncat(TmpFilename, TmpWildcardPattern, + sizeof(TmpWildcardPattern) - 1); + int Fd = mkstemp(TmpFilename); + if (Fd == -1) { + Report("XRay: Failed opening temporary file '%s'; not logging events.\n", + TmpFilename); + return -1; + } + if (Verbosity()) + fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); + + // Since we're here, we get to write the header. We set it up so that the + // header will only be written once, at the start, and let the threads + // logging do writes which just append. + XRayFileHeader Header; + Header.Version = 1; + Header.Type = FileTypes::NAIVE_LOG; + Header.CycleFrequency = __xray::cycleFrequency(); + + // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' + // before setting the values in the header. + Header.ConstantTSC = 1; + Header.NonstopTSC = 1; + retryingWriteAll(Fd, reinterpret_cast(&Header), + reinterpret_cast(&Header) + sizeof(Header)); + return Fd; +} + void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) XRAY_NEVER_INSTRUMENT { using Buffer = @@ -158,75 +153,7 @@ static constexpr size_t BuffLen = 1024; thread_local static Buffer InMemoryBuffer[BuffLen] = {}; thread_local static size_t Offset = 0; - static int Fd = [] { - // FIXME: Figure out how to make this less stderr-dependent. - SetPrintfAndReportCallback(PrintToStdErr); - // Open a temporary file once for the log. - static char TmpFilename[256] = {}; - static char TmpWildcardPattern[] = "XXXXXX"; - auto E = internal_strncat(TmpFilename, flags()->xray_logfile_base, - sizeof(TmpFilename) - 10); - if (static_cast((E + 6) - TmpFilename) > - (sizeof(TmpFilename) - 1)) { - Report("XRay log file base too long: %s\n", flags()->xray_logfile_base); - return -1; - } - internal_strncat(TmpFilename, TmpWildcardPattern, - sizeof(TmpWildcardPattern) - 1); - int Fd = mkstemp(TmpFilename); - if (Fd == -1) { - Report("XRay: Failed opening temporary file '%s'; not logging events.\n", - TmpFilename); - return -1; - } - if (Verbosity()) - fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); - - // Get the cycle frequency from SysFS on Linux. - long long CPUFrequency = -1; -#if defined(__x86_64__) - if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", - &CPUFrequency)) { - CPUFrequency *= 1000; - } else if (readValueFromFile( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &CPUFrequency)) { - CPUFrequency *= 1000; - } else { - Report("Unable to determine CPU frequency for TSC accounting.\n"); - } -#elif defined(__arm__) || defined(__aarch64__) - // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does - // not have a constant frequency like TSC on x86(_64), it may go faster - // or slower depending on CPU turbo or power saving mode. Furthermore, - // to read from CP15 on ARM a kernel modification or a driver is needed. - // We can not require this from users of compiler-rt. - // So on ARM we use clock_gettime() which gives the result in nanoseconds. - // To get the measurements per second, we scale this by the number of - // nanoseconds per second, pretending that the TSC frequency is 1GHz and - // one TSC tick is 1 nanosecond. - CPUFrequency = NanosecondsPerSecond; -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - - // Since we're here, we get to write the header. We set it up so that the - // header will only be written once, at the start, and let the threads - // logging do writes which just append. - XRayFileHeader Header; - Header.Version = 1; - Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = - CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); - - // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' - // before setting the values in the header. - Header.ConstantTSC = 1; - Header.NonstopTSC = 1; - retryingWriteAll(Fd, reinterpret_cast(&Header), - reinterpret_cast(&Header) + sizeof(Header)); - return Fd; - }(); + static int Fd = __xray_OpenLogFile(); if (Fd == -1) return; thread_local __xray::ThreadExitFlusher Flusher( @@ -237,27 +164,7 @@ // through a pointer offset. auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; R.RecordType = RecordTypes::NORMAL; -#if defined(__x86_64__) - { - unsigned CPU; - R.TSC = __rdtscp(&CPU); - R.CPU = CPU; - } -#elif defined(__arm__) || defined(__aarch64__) - { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime() returned %d, errno=%d.\n", result, int(errno)); - TS.tv_sec = 0; - TS.tv_nsec = 0; - } - R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; - R.CPU = 0; - } -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ + R.TSC = __xray::readTSC(R.CPU); R.TId = TId; R.Type = Type; R.FuncId = FuncId; Index: compiler-rt/trunk/lib/xray/xray_interface_internal.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_interface_internal.h +++ compiler-rt/trunk/lib/xray/xray_interface_internal.h @@ -48,6 +48,8 @@ size_t Entries; }; +uint64_t cycleFrequency(); + bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); Index: compiler-rt/trunk/lib/xray/xray_x86_64.h =================================================================== --- compiler-rt/trunk/lib/xray/xray_x86_64.h +++ compiler-rt/trunk/lib/xray/xray_x86_64.h @@ -0,0 +1,16 @@ +#pragma once +#include + +#include "sanitizer_common/sanitizer_internal_defs.h" +#include "xray_defs.h" + +namespace __xray { + +ALWAYS_INLINE uint64_t readTSC(uint8_t &CPU) XRAY_NEVER_INSTRUMENT { + unsigned LongCPU; + uint64_t TSC = __rdtscp(&LongCPU); + CPU = LongCPU; + return TSC; +} + +} Index: compiler-rt/trunk/lib/xray/xray_x86_64.cc =================================================================== --- compiler-rt/trunk/lib/xray/xray_x86_64.cc +++ compiler-rt/trunk/lib/xray/xray_x86_64.cc @@ -3,10 +3,71 @@ #include "xray_interface_internal.h" #include #include +#include #include +#include +#include namespace __xray { +static std::pair +retryingReadSome(int Fd, char *Begin, char *End) XRAY_NEVER_INSTRUMENT { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d\n", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, + long long *Value) XRAY_NEVER_INSTRUMENT { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + if (!Success) + return false; + close(Fd); + char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +uint64_t cycleFrequency() XRAY_NEVER_INSTRUMENT { + long long CPUFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting.\n"); + } + return CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); +} + static constexpr uint8_t CallOpCode = 0xe8; static constexpr uint16_t MovR10Seq = 0xba41; static constexpr uint16_t Jmp9Seq = 0x09eb;