diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -10,7 +10,7 @@ config.unsupported = True else: - # We need libpfm to be installed and the host to be at least skylake. + # We need libpfm to be installed and the host to be support LBR format with cycles. llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) if not llvm_exegesis_exe: print('llvm-exegesis not found') @@ -18,14 +18,10 @@ else: try: with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) + [llvm_exegesis_exe, '-mode', 'latency', '-x86-lbr-sample-period', '123', '-repetition-mode', 'loop', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) except OSError: print('could not exec llvm-exegesis') config.unsupported = True - if not check_llvm_exegesis_uops_result == 0: - config.unsupported = True if not check_llvm_exegesis_latency_result == 0: config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -142,6 +142,11 @@ return {&Instr}; } + // Checks hardware and software support for current benchmark mode. + // Returns an error if the target host does not have support to run the + // benchmark. + virtual Error checkFeatureSupport() const { return Error::success(); } + // Creates a snippet generator for the given mode. std::unique_ptr createSnippetGenerator(InstructionBenchmark::ModeE Mode, diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp @@ -674,6 +674,23 @@ return Arch == Triple::x86_64 || Arch == Triple::x86; } + Error checkFeatureSupport() const override { + // LBR is the only feature we conditionally support now. + // So if LBR is not requested, then we should be able to run the benchmarks. + if (LbrSamplingPeriod == 0) + return Error::success(); + +#if defined(__linux__) && defined(HAVE_LIBPFM) && \ + defined(LIBPFM_HAS_FIELD_CYCLES) + // If the kernel supports it, the hardware still may not have it. + return X86LbrCounter::checkLbrSupport(); +#else + return llvm::make_error( + "LBR not supported on this kernel and/or platform", + llvm::errc::not_supported); +#endif + } + static const unsigned kUnavailableRegisters[4]; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h @@ -33,6 +33,8 @@ class X86LbrCounter : public pfm::Counter { public: + static llvm::Error checkLbrSupport(); + explicit X86LbrCounter(pfm::PerfEvent &&Event); virtual ~X86LbrCounter(); @@ -43,6 +45,9 @@ readOrError(StringRef FunctionBytes) const override; private: + llvm::Expected> + doReadCounter(const void *From, const void *To) const; + void *MMappedBuffer = nullptr; }; diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp --- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp +++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp @@ -21,6 +21,7 @@ #endif // HAVE_LIBPFM #include +#include #include #include #include @@ -35,6 +36,8 @@ namespace llvm { namespace exegesis { +// Number of entries in the LBR. +static constexpr int kLbrEntries = 16; static constexpr size_t kBufferPages = 8; static const size_t kDataBufferSize = kBufferPages * getpagesize(); @@ -70,7 +73,6 @@ static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize, const void *From, const void *To, llvm::SmallVector *CycleArray) { - assert(From != nullptr && To != nullptr); const char *DataPtr = DataBuf; while (DataPtr < DataBuf + DataSize) { struct perf_event_header Header; @@ -149,21 +151,47 @@ ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */); } +llvm::Error X86LbrCounter::checkLbrSupport() { + // Do a sample read and check if the results contain non-zero values. + + X86LbrCounter counter(X86LbrPerfEvent(123)); + counter.start(); + + // Prevent the compiler from unrolling the loop and get rid of all the + // branches. We need at least 16 iterations. + int Sum = 0; + int V = 1; + + volatile int *P = &V; + auto TimeLimit = + std::chrono::high_resolution_clock::now() + std::chrono::microseconds(5); + + for (int I = 0; + I < kLbrEntries || std::chrono::high_resolution_clock::now() < TimeLimit; + ++I) { + Sum += *P; + } + + counter.stop(); + + auto ResultOrError = counter.doReadCounter(nullptr, nullptr); + if (ResultOrError) + if (!ResultOrError.get().empty()) + // If there is at least one non-zero entry, then LBR is supported. + for (const int64_t &Value : ResultOrError.get()) + if (Value != 0) + return Error::success(); + + return llvm::make_error( + "LBR format with cycles is not suppported on the host.", + llvm::errc::not_supported); +} + llvm::Expected> X86LbrCounter::readOrError(StringRef FunctionBytes) const { - // The max number of time-outs/retries before we give up. - static constexpr int kMaxTimeouts = 160; - // Disable the event before reading ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0); - // Parses the LBR buffer and fills CycleArray with the sequence of cycle - // counts from the buffer. - llvm::SmallVector CycleArray; - std::unique_ptr DataBuf(new char[kDataBufferSize]); - int NumTimeouts = 0; - int PollResult = 0; - // Find the boundary of the function so that we could filter the LBRs // to keep only the relevant records. if (FunctionBytes.empty()) @@ -172,6 +200,21 @@ const void *From = reinterpret_cast(FunctionBytes.data()); const void *To = reinterpret_cast(FunctionBytes.data() + FunctionBytes.size()); + return doReadCounter(From, To); +} + +llvm::Expected> +X86LbrCounter::doReadCounter(const void *From, const void *To) const { + // The max number of time-outs/retries before we give up. + static constexpr int kMaxTimeouts = 160; + + // Parses the LBR buffer and fills CycleArray with the sequence of cycle + // counts from the buffer. + llvm::SmallVector CycleArray; + auto DataBuf = std::make_unique(kDataBufferSize); + int NumTimeouts = 0; + int PollResult = 0; + while (PollResult <= 0) { PollResult = pollLbrPerfEvent(FileDescriptor); if (PollResult > 0) diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -160,12 +160,6 @@ cl::desc(""), cl::cat(AnalysisOptions), cl::init("")); -static cl::list - AllowedHostCpus("allowed-host-cpu", - cl::desc("If specified, only run the benchmark if the host " - "CPU matches the names"), - cl::cat(Options), cl::ZeroOrMore); - static cl::opt AnalysisDisplayUnstableOpcodes( "analysis-display-unstable-clusters", cl::desc("if there is more than one benchmark for an opcode, said " @@ -302,12 +296,9 @@ const LLVMState State(CpuName); - llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU(); - for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end(); - ++Begin) { - if (ActualCpu != *Begin) - ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu)); - } + // Preliminary check to ensure features needed for requested + // benchmark mode are present on target CPU and/or OS. + ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(