diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkResult.h @@ -74,7 +74,8 @@ std::string Error; std::string Info; std::vector AssembledSnippet; - + // How to aggregate measurements. + enum ResultAggregationModeE { Min, Max, Mean, MinVariance }; // Read functions. static Expected readYaml(const LLVMState &State, StringRef Filename); diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -21,6 +21,7 @@ #include "LlvmState.h" #include "MCInstrDescView.h" #include "SnippetRepetitor.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Error.h" #include @@ -65,7 +66,11 @@ class FunctionExecutor { public: virtual ~FunctionExecutor(); + // FIXME deprecate this. virtual Expected runAndMeasure(const char *Counters) const = 0; + + virtual Expected> + runAndSample(const char *Counters) const = 0; }; protected: diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -46,9 +46,29 @@ private: Expected runAndMeasure(const char *Counters) const override { + auto ResultOrError = runAndSample(Counters); + if (ResultOrError) + return ResultOrError.get()[0]; + return ResultOrError.takeError(); + } + + static void + accumulateCounterValues(const llvm::SmallVector &NewValues, + llvm::SmallVector *Result) { + + const size_t NumValues = std::max(NewValues.size(), Result->size()); + if (NumValues > Result->size()) + Result->resize(NumValues, 0); + for (size_t I = 0, End = NewValues.size(); I < End; ++I) + (*Result)[I] += NewValues[I]; + } + + Expected> + runAndSample(const char *Counters) const override { // We sum counts when there are several counters for a single ProcRes // (e.g. P23 on SandyBridge). - int64_t CounterValue = 0; + llvm::SmallVector CounterValues; + int Reserved = 0; SmallVector CounterNames; StringRef(Counters).split(CounterNames, '+'); char *const ScratchPtr = Scratch->ptr(); @@ -61,6 +81,17 @@ return CounterOrError.takeError(); pfm::Counter *Counter = CounterOrError.get().get(); + if (Reserved == 0) { + Reserved = Counter->numValues(); + CounterValues.reserve(Reserved); + } else if (Reserved != Counter->numValues()) + // It'd be wrong to accumulate vectors of different sizes. + return make_error( + llvm::Twine("Inconsistent number of values for counter ") + .concat(CounterName) + .concat(std::to_string(Counter->numValues())) + .concat(" vs expected of ") + .concat(std::to_string(Reserved))); Scratch->clear(); { CrashRecoveryContext CRC; @@ -75,9 +106,13 @@ if (Crashed) return make_error("snippet crashed while running"); } - CounterValue += Counter->read(); + auto ValueOrError = Counter->readOrError(); + if (!ValueOrError) + return ValueOrError.takeError(); + + accumulateCounterValues(ValueOrError.get(), &CounterValues); } - return CounterValue; + return CounterValues; } const LLVMState &State; diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h --- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h @@ -21,13 +21,16 @@ class LatencyBenchmarkRunner : public BenchmarkRunner { public: - LatencyBenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode); + LatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode); ~LatencyBenchmarkRunner() override; private: Expected> runMeasurements(const FunctionExecutor &Executor) const override; + + InstructionBenchmark::ResultAggregationModeE ResultAggMode; }; } // namespace exegesis } // namespace llvm diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp --- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp @@ -8,48 +8,135 @@ #include "LatencyBenchmarkRunner.h" -#include "Target.h" #include "BenchmarkRunner.h" +#include "Target.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/Error.h" +#include +#include namespace llvm { namespace exegesis { -LatencyBenchmarkRunner::LatencyBenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode) +LatencyBenchmarkRunner::LatencyBenchmarkRunner( + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAgg) : BenchmarkRunner(State, Mode) { assert((Mode == InstructionBenchmark::Latency || Mode == InstructionBenchmark::InverseThroughput) && "invalid mode"); + ResultAggMode = ResultAgg; } LatencyBenchmarkRunner::~LatencyBenchmarkRunner() = default; +static double computeVariance(const llvm::SmallVector &Values) { + if (Values.empty()) + return 0.0; + double Sum = std::accumulate(Values.begin(), Values.end(), 0.0); + + const double Mean = Sum / Values.size(); + double Ret = 0; + for (const auto &V : Values) { + double Delta = V - Mean; + Ret += Delta * Delta; + } + return Ret / Values.size(); +} + +static int64_t findMin(const llvm::SmallVector &Values) { + if (Values.empty()) + return 0; + return *std::min_element(Values.begin(), Values.end()); +} + +static int64_t findMax(const llvm::SmallVector &Values) { + if (Values.empty()) + return 0; + return *std::max_element(Values.begin(), Values.end()); +} + +static int64_t findMean(const llvm::SmallVector &Values) { + if (Values.empty()) + return 0; + return std::accumulate(Values.begin(), Values.end(), 0.0) / + static_cast(Values.size()); +} + Expected> LatencyBenchmarkRunner::runMeasurements( const FunctionExecutor &Executor) const { // Cycle measurements include some overhead from the kernel. Repeat the - // measure several times and take the minimum value. + // measure several times and return the aggregated value, as specified by + // ResultAggMode. constexpr const int NumMeasurements = 30; - int64_t MinValue = std::numeric_limits::max(); + llvm::SmallVector AccumulatedValues; + double MinVariance = std::numeric_limits::infinity(); const char *CounterName = State.getPfmCounters().CycleCounter; + // Values count for each run. + int ValuesCount = 0; for (size_t I = 0; I < NumMeasurements; ++I) { - auto ExpectedCounterValue = Executor.runAndMeasure(CounterName); - if (!ExpectedCounterValue) - return ExpectedCounterValue.takeError(); - if (*ExpectedCounterValue < MinValue) - MinValue = *ExpectedCounterValue; + auto ExpectedCounterValues = Executor.runAndSample(CounterName); + if (!ExpectedCounterValues) + return ExpectedCounterValues.takeError(); + ValuesCount = ExpectedCounterValues.get().size(); + if (ValuesCount == 1) + AccumulatedValues.push_back(ExpectedCounterValues.get()[0]); + else { + // We'll keep the reading with lowest variance (ie., most stable) + double Variance = computeVariance(*ExpectedCounterValues); + if (MinVariance > Variance) { + AccumulatedValues = std::move(ExpectedCounterValues.get()); + MinVariance = Variance; + } + } } - std::vector Result; + + std::string ModeName; switch (Mode) { case InstructionBenchmark::Latency: - Result = {BenchmarkMeasure::Create("latency", MinValue)}; + ModeName = "latency"; break; case InstructionBenchmark::InverseThroughput: - Result = {BenchmarkMeasure::Create("inverse_throughput", MinValue)}; + ModeName = "inverse_throughput"; break; default: break; } - return std::move(Result); + + switch (ResultAggMode) { + case InstructionBenchmark::MinVariance: { + if (ValuesCount == 1) + llvm::errs() << "Each sample only has one value. result-aggregation-mode " + "of min-variance is probably non-sensical\n"; + std::vector Result; + Result.reserve(AccumulatedValues.size()); + for (const int64_t Value : AccumulatedValues) + Result.push_back(BenchmarkMeasure::Create(ModeName, Value)); + return std::move(Result); + } + case InstructionBenchmark::Min: { + std::vector Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMin(AccumulatedValues))); + return std::move(Result); + } + case InstructionBenchmark::Max: { + std::vector Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMax(AccumulatedValues))); + return std::move(Result); + } + case InstructionBenchmark::Mean: { + std::vector Result; + Result.push_back( + BenchmarkMeasure::Create(ModeName, findMean(AccumulatedValues))); + return std::move(Result); + } + } + return llvm::make_error(llvm::Twine("Unexpected benchmark mode(") + .concat(std::to_string(Mode)) + .concat(" and unexpected ResultAggMode ") + .concat(std::to_string(ResultAggMode))); } } // namespace exegesis diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h --- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h +++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h @@ -15,9 +15,11 @@ #define LLVM_TOOLS_LLVM_EXEGESIS_PERFHELPER_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" #include "llvm/Support/Error.h" + #include #include #include @@ -85,7 +87,9 @@ int64_t read() const; /// Returns the current value of the counter or error if it cannot be read. - virtual llvm::Expected readOrError() const; + virtual llvm::Expected> readOrError() const; + + virtual int numValues() const; private: PerfEvent Event; diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp --- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp +++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp @@ -119,23 +119,27 @@ int64_t Counter::read() const { auto ValueOrError = readOrError(); - if (ValueOrError) - return ValueOrError.get(); - - errs() << ValueOrError.takeError() << "\n"; + if (ValueOrError) { + if (!ValueOrError.get().empty()) + return ValueOrError.get()[0]; + errs() << "Counter has no reading\n"; + } else + errs() << ValueOrError.takeError() << "\n"; return -1; } -llvm::Expected Counter::readOrError() const { +llvm::Expected> Counter::readOrError() const { int64_t Count = 0; ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count)); if (ReadSize != sizeof(Count)) return llvm::make_error("Failed to read event counter", llvm::errc::io_error); - - return Count; + llvm::SmallVector Result; + Result.push_back(Count); + return Result; } +int Counter::numValues() const { return 1; } #else Counter::Counter(PerfEvent &&Event) : Event(std::move(Event)) {} @@ -148,11 +152,13 @@ int64_t Counter::read() const { return 42; } -llvm::Expected Counter::readOrError() const { +llvm::Expected> Counter::readOrError() const { return llvm::make_error("Not implemented", llvm::errc::io_error); } +int Counter::numValues() const { return 1; } + #endif } // namespace pfm diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -148,9 +148,10 @@ const LLVMState &State, const SnippetGenerator::Options &Opts) const; // Creates a benchmark runner for the given mode. - Expected> - createBenchmarkRunner(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const; + Expected> createBenchmarkRunner( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode = + InstructionBenchmark::Min) const; // Returns the ExegesisTarget for the given triple or nullptr if the target // does not exist. @@ -176,9 +177,11 @@ std::unique_ptr virtual createParallelSnippetGenerator( const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( - const LLVMState &State, InstructionBenchmark::ModeE Mode) const; + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; std::unique_ptr virtual createUopsBenchmarkRunner( - const LLVMState &State) const; + const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; const ExegesisTarget *Next = nullptr; const ArrayRef CpuPfmCounters; diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -68,8 +68,9 @@ } Expected> -ExegesisTarget::createBenchmarkRunner(InstructionBenchmark::ModeE Mode, - const LLVMState &State) const { +ExegesisTarget::createBenchmarkRunner( + InstructionBenchmark::ModeE Mode, const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { PfmCountersInfo PfmCounters = State.getPfmCounters(); switch (Mode) { case InstructionBenchmark::Unknown: @@ -85,12 +86,12 @@ .concat(ModeName) .concat("' mode, sched model does not define a cycle counter.")); } - return createLatencyBenchmarkRunner(State, Mode); + return createLatencyBenchmarkRunner(State, Mode, ResultAggMode); case InstructionBenchmark::Uops: if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters) return make_error("can't run 'uops' mode, sched model does not " "define uops or issue counters."); - return createUopsBenchmarkRunner(State); + return createUopsBenchmarkRunner(State, ResultAggMode); } return nullptr; } @@ -106,12 +107,14 @@ } std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( - const LLVMState &State, InstructionBenchmark::ModeE Mode) const { - return std::make_unique(State, Mode); + const LLVMState &State, InstructionBenchmark::ModeE Mode, + InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { + return std::make_unique(State, Mode, ResultAggMode); } -std::unique_ptr -ExegesisTarget::createUopsBenchmarkRunner(const LLVMState &State) const { +std::unique_ptr ExegesisTarget::createUopsBenchmarkRunner( + const LLVMState &State, + InstructionBenchmark::ResultAggregationModeE /*unused*/) const { return std::make_unique(State); } diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -83,6 +83,21 @@ clEnumValN(exegesis::InstructionBenchmark::Unknown, "analysis", "Analysis"))); +static cl::opt + ResultAggMode( + "result-aggregation-mode", + cl::desc("How to aggregate multi-values result"), cl::cat(Options), + cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min", + "Keep min reading"), + clEnumValN(exegesis::InstructionBenchmark::Max, "max", + "Keep max reading"), + clEnumValN(exegesis::InstructionBenchmark::Mean, "mean", + "Compute mean of all readings"), + clEnumValN(exegesis::InstructionBenchmark::MinVariance, + "min-variance", + "Keep readings set with min-variance")), + cl::init(exegesis::InstructionBenchmark::Min)); + static cl::opt RepetitionMode( "repetition-mode", cl::desc("how to repeat the instruction snippet"), cl::cat(BenchmarkOptions), @@ -281,8 +296,9 @@ const LLVMState State(CpuName); - const std::unique_ptr Runner = ExitOnErr( - State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State)); + const std::unique_ptr Runner = + ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( + BenchmarkMode, State, ResultAggMode)); if (!Runner) { ExitWithError("cannot create benchmark runner"); }