Index: lib/xray/xray_arm.cc =================================================================== --- lib/xray/xray_arm.cc +++ lib/xray/xray_arm.cc @@ -16,9 +16,36 @@ #include "xray_interface_internal.h" #include #include +#include namespace __xray { +uint64_t cycleFrequency() { + static const uint64_t NanosecondsPerSecond = 1000ULL * 1000 * 1000; + // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does + // not have a constant frequency like TSC on x86(_64), it may go faster + // or slower depending on CPU turbo or power saving mode. Furthermore, + // to read from CP15 on ARM a kernel modification or a driver is needed. + // We can not require this from users of compiler-rt. + // So on ARM we use clock_gettime() which gives the result in nanoseconds. + // To get the measurements per second, we scale this by the number of + // nanoseconds per second, pretending that the TSC frequency is 1GHz and + // one TSC tick is 1 nanosecond. + return NanosecondsPerSecond; +} + +uint64_t readTSC(uint8_t &CPU) { + timespec TS; + int result = clock_gettime(CLOCK_REALTIME, &TS); + if (result != 0) { + Report("clock_gettime() returned %d, errno=%d.", result, int(errno)); + TS.tv_sec = 0; + TS.tv_nsec = 0; + } + CPU = 0; + return TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; +} + // The machine codes for some instructions used in runtime patching. enum class PatchOpcodes : uint32_t { PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr} Index: lib/xray/xray_inmemory_log.cc =================================================================== --- lib/xray/xray_inmemory_log.cc +++ lib/xray/xray_inmemory_log.cc @@ -25,14 +25,6 @@ #include #include -#if defined(__x86_64__) -#include -#elif defined(__arm__) -static const int64_t NanosecondsPerSecond = 1000LL * 1000 * 1000; -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - #include "sanitizer_common/sanitizer_libc.h" #include "xray/xray_records.h" #include "xray_flags.h" @@ -68,51 +60,6 @@ } } -#if defined(__x86_64__) -static std::pair retryingReadSome(int Fd, char *Begin, - char *End) { - auto BytesToRead = std::distance(Begin, End); - ssize_t BytesRead; - ssize_t TotalBytesRead = 0; - while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { - if (BytesRead == -1) { - if (errno == EINTR) - continue; - Report("Read error; errno = %d", errno); - return std::make_pair(TotalBytesRead, false); - } - - TotalBytesRead += BytesRead; - BytesToRead -= BytesRead; - Begin += BytesRead; - } - return std::make_pair(TotalBytesRead, true); -} - -static bool readValueFromFile(const char *Filename, long long *Value) { - int Fd = open(Filename, O_RDONLY | O_CLOEXEC); - if (Fd == -1) - return false; - static constexpr size_t BufSize = 256; - char Line[BufSize] = {}; - ssize_t BytesRead; - bool Success; - std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); - if (!Success) - return false; - close(Fd); - char *End = nullptr; - long long Tmp = internal_simple_strtoll(Line, &End, 10); - bool Result = false; - if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { - *Value = Tmp; - Result = true; - } - return Result; -} - -#endif /* CPU architecture */ - class ThreadExitFlusher { int Fd; XRayRecord *Start; @@ -172,42 +119,13 @@ if (Verbosity()) fprintf(stderr, "XRay: Log file in '%s'\n", TmpFilename); - // Get the cycle frequency from SysFS on Linux. - long long CPUFrequency = -1; -#if defined(__x86_64__) - if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", - &CPUFrequency)) { - CPUFrequency *= 1000; - } else if (readValueFromFile( - "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", - &CPUFrequency)) { - CPUFrequency *= 1000; - } else { - Report("Unable to determine CPU frequency for TSC accounting."); - } -#elif defined(__arm__) - // There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does - // not have a constant frequency like TSC on x86(_64), it may go faster - // or slower depending on CPU turbo or power saving mode. Furthermore, - // to read from CP15 on ARM a kernel modification or a driver is needed. - // We can not require this from users of compiler-rt. - // So on ARM we use clock_gettime() which gives the result in nanoseconds. - // To get the measurements per second, we scale this by the number of - // nanoseconds per second, pretending that the TSC frequency is 1GHz and - // one TSC tick is 1 nanosecond. - CPUFrequency = NanosecondsPerSecond; -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ - // Since we're here, we get to write the header. We set it up so that the // header will only be written once, at the start, and let the threads // logging do writes which just append. XRayFileHeader Header; Header.Version = 1; Header.Type = FileTypes::NAIVE_LOG; - Header.CycleFrequency = - CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); + Header.CycleFrequency = __xray::cycleFrequency(); // FIXME: Actually check whether we have 'constant_tsc' and 'nonstop_tsc' // before setting the values in the header. @@ -227,27 +145,7 @@ // through a pointer offset. auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset]; R.RecordType = RecordTypes::NORMAL; -#if defined(__x86_64__) - { - unsigned CPU; - R.TSC = __rdtscp(&CPU); - R.CPU = CPU; - } -#elif defined(__arm__) - { - timespec TS; - int result = clock_gettime(CLOCK_REALTIME, &TS); - if (result != 0) { - Report("clock_gettime() returned %d, errno=%d.", result, int(errno)); - TS.tv_sec = 0; - TS.tv_nsec = 0; - } - R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec; - R.CPU = 0; - } -#else -#error "Unsupported CPU Architecture" -#endif /* CPU architecture */ + R.TSC = __xray::readTSC(R.CPU); R.TId = TId; R.Type = Type; R.FuncId = FuncId; Index: lib/xray/xray_interface_internal.h =================================================================== --- lib/xray/xray_interface_internal.h +++ lib/xray/xray_interface_internal.h @@ -48,6 +48,9 @@ size_t Entries; }; +uint64_t cycleFrequency(); +uint64_t readTSC(uint8_t &); + bool patchFunctionEntry(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); bool patchFunctionExit(bool Enable, uint32_t FuncId, const XRaySledEntry &Sled); Index: lib/xray/xray_x86_64.cc =================================================================== --- lib/xray/xray_x86_64.cc +++ lib/xray/xray_x86_64.cc @@ -2,10 +2,78 @@ #include "xray_interface_internal.h" #include #include +#include #include +#include +#include +#include namespace __xray { +static std::pair retryingReadSome(int Fd, char *Begin, + char *End) { + auto BytesToRead = std::distance(Begin, End); + ssize_t BytesRead; + ssize_t TotalBytesRead = 0; + while (BytesToRead && (BytesRead = read(Fd, Begin, BytesToRead))) { + if (BytesRead == -1) { + if (errno == EINTR) + continue; + Report("Read error; errno = %d", errno); + return std::make_pair(TotalBytesRead, false); + } + + TotalBytesRead += BytesRead; + BytesToRead -= BytesRead; + Begin += BytesRead; + } + return std::make_pair(TotalBytesRead, true); +} + +static bool readValueFromFile(const char *Filename, long long *Value) { + int Fd = open(Filename, O_RDONLY | O_CLOEXEC); + if (Fd == -1) + return false; + static constexpr size_t BufSize = 256; + char Line[BufSize] = {}; + ssize_t BytesRead; + bool Success; + std::tie(BytesRead, Success) = retryingReadSome(Fd, Line, Line + BufSize); + if (!Success) + return false; + close(Fd); + char *End = nullptr; + long long Tmp = internal_simple_strtoll(Line, &End, 10); + bool Result = false; + if (Line[0] != '\0' && (*End == '\n' || *End == '\0')) { + *Value = Tmp; + Result = true; + } + return Result; +} + +uint64_t cycleFrequency() { + long long CPUFrequency = -1; + if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else if (readValueFromFile( + "/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", + &CPUFrequency)) { + CPUFrequency *= 1000; + } else { + Report("Unable to determine CPU frequency for TSC accounting."); + } + return CPUFrequency == -1 ? 0 : static_cast(CPUFrequency); +} + +uint64_t readTSC(uint8_t &CPU) { + unsigned LongCPU; + uint64_t TSC = __rdtscp(&LongCPU); + CPU = LongCPU; + return TSC; +} + static constexpr uint8_t CallOpCode = 0xe8; static constexpr uint16_t MovR10Seq = 0xba41; static constexpr uint16_t Jmp9Seq = 0x09eb;