diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -19,9 +19,9 @@ try: with open(os.devnull, 'w') as quiet: check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-check-feature-support', 'true', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-check-feature-support', 'true', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) except OSError: print('could not exec llvm-exegesis') config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -142,6 +142,11 @@ return {&Instr}; } + // Checks hardware and software support for current benchmark mode. + // Returns an error if the target host does not have support to run the + // benchmark. + virtual Error checkFeatureSupport() const { return Error::success(); } + // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -674,6 +674,23 @@ return Arch == Triple::x86_64 || Arch == Triple::x86; } + Error checkFeatureSupport() const override { + // LBR is the only feature we conditionally support now. + // So if Lbr is not requested, then we should be able to run the benchmarks. + if (LbrSamplingPeriod == 0) + return Error::success(); + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::CheckLbrSupport(); +#else + return llvm::make_error( + "LBR not supported on this kernel and/or platform", + llvm::errc::not_supported); +#endif + } + static const unsigned kUnavailableRegisters[4]; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -33,6 +33,8 @@ class X86LbrCounter : public pfm::Counter { public: + static llvm::Error checkLbrSupport(); + explicit X86LbrCounter(pfm::PerfEvent &&Event); virtual ~X86LbrCounter(); @@ -43,6 +45,9 @@ readOrError(StringRef FunctionBytes) const override; private: + llvm::Expected> + doReadCounter(const void *From, const void *To) const; + void *MMappedBuffer = nullptr; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -38,6 +38,12 @@ static constexpr size_t kBufferPages = 8; static const size_t kDataBufferSize = kBufferPages * getpagesize(); +// Do a fake use of the argument to prevent it from being optimised out by the +// compiler. +template static void FakeUse(const T &var) { + asm volatile("" : "+m"(const_cast(var))); +} + // Waits for the LBR perf events. static int pollLbrPerfEvent(const int FileDescriptor) { struct pollfd PollFd; @@ -70,7 +76,6 @@ static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, const void *From, const void *To, llvm::SmallVector *CycleArray) { - assert(From != nullptr && To != nullptr); const char *DataPtr = DataBuf; while (DataPtr < DataBuf + DataSize) { struct perf_event_header Header; @@ -149,21 +154,40 @@ ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); } +llvm::Error X86LbrCounter::checkLbrSupport() { + // Do a sample read and check if the results contain non-zero values. + + X86LbrCounter counter(X86LbrPerfEvent(123)); + counter.start(); + + // Just in case the compiler decides to unnroll the loop and get + // rid of all the branches. + int Limit = 10 * (reinterpret_cast(counter.MMappedBuffer) % 3); + int Sum = 0; + for (int I = 0; I < Limit; ++I) { + sum += I * I; + FakeUse(sum); + } + + counter.stop(); + + const auto ResultOrError = counter.doReadCounter(nullptr, nullptr); + if (ResultOrError && !ResultOrError.get().empty()) + // If there is at least one non-zero entry, then LBR is supported. + for (const int64_t &Value : ResultOrError.get()) + if (Value != 0) + return Error::success(); + + return llvm::make_error( + "LBR format with cycles is not suppported on the host.", + llvm::errc::not_supported); +} + llvm::Expected> X86LbrCounter::readOrError(StringRef FunctionBytes) const { - // The max number of time-outs/retries before we give up. - static constexpr int kMaxTimeouts = 160; - // Disable the event before reading ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); - // Parses the LBR buffer and fills CycleArray with the sequence of cycle - // counts from the buffer. - llvm::SmallVector CycleArray; - std::unique_ptr DataBuf(new char[kDataBufferSize]); - int NumTimeouts = 0; - int PollResult = 0; - // Find the boundary of the function so that we could filter the LBRs // to keep only the relevant records. if (FunctionBytes.empty()) @@ -172,6 +196,21 @@ const void *From = reinterpret_cast(FunctionBytes.data()); const void *To = reinterpret_cast(FunctionBytes.data() + FunctionBytes.size()); + return doReadCounter(From, To); +} + +llvm::Expected> +X86LbrCounter::doReadCounter(const void *From, const void *To) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector CycleArray; + std::unique_ptr DataBuf(new char[kDataBufferSize]); + int NumTimeouts = 0; + int PollResult = 0; + while (PollResult <= 0) { PollResult = pollLbrPerfEvent(FileDescriptor); if (PollResult > 0) diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -160,11 +160,11 @@ cl::desc(""), cl::cat(AnalysisOptions), cl::init("")); -static cl::list - AllowedHostCpus("allowed-host-cpu", - cl::desc("If specified, only run the benchmark if the host " - "CPU matches the names"), - cl::cat(Options), cl::ZeroOrMore); +static cl::opt CheckFeatureSupport( + "check-feature-support", + cl::desc("If true, only run the benchmark if specified support is " + "present. Exit with non-zero, otherwise"), + cl::cat(Options), cl::init(false)); static cl::opt AnalysisDisplayUnstableOpcodes( "analysis-display-unstable-clusters", @@ -302,12 +302,8 @@ const LLVMState State(CpuName); - llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU(); - for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end(); - ++Begin) { - if (ActualCpu != *Begin) - ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu)); - } + if (CheckFeatureSupport) + ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(