Index: test/tools/llvm-exegesis/X86/latency-by-opcode-name.s =================================================================== --- test/tools/llvm-exegesis/X86/latency-by-opcode-name.s +++ test/tools/llvm-exegesis/X86/latency-by-opcode-name.s @@ -1,6 +1,6 @@ # RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr | FileCheck %s CHECK: mode: latency -CHECK-NEXT: key: +CHECK: key: CHECK-NEXT: instructions: CHECK-NEXT: ADD32rr Index: tools/llvm-exegesis/lib/Analysis.cpp =================================================================== --- tools/llvm-exegesis/lib/Analysis.cpp +++ tools/llvm-exegesis/lib/Analysis.cpp @@ -139,7 +139,7 @@ #endif for (const auto &Measurement : Point.Measurements) { OS << kCsvSep; - writeMeasurementValue(OS, Measurement.Value); + writeMeasurementValue(OS, Measurement.PerInstructionValue); } OS << "\n"; } @@ -410,14 +410,14 @@ return false; } // Find the latency. - SchedClassPoint[0].Value = 0.0; + SchedClassPoint[0].PerInstructionValue = 0.0; for (unsigned I = 0; I < SC.SCDesc->NumWriteLatencyEntries; ++I) { const llvm::MCWriteLatencyEntry *const WLE = STI.getWriteLatencyEntry(SC.SCDesc, I); - SchedClassPoint[0].Value = - std::max(SchedClassPoint[0].Value, WLE->Cycles); + SchedClassPoint[0].PerInstructionValue = + std::max(SchedClassPoint[0].PerInstructionValue, WLE->Cycles); } - ClusterCenterPoint[0].Value = Representative[0].avg(); + ClusterCenterPoint[0].PerInstructionValue = Representative[0].avg(); } else if (Mode == InstructionBenchmark::Uops) { for (int I = 0, E = Representative.size(); I < E; ++I) { // Find the pressure on ProcResIdx `Key`. @@ -433,11 +433,11 @@ [ProcResIdx](const std::pair &WPR) { return WPR.first == ProcResIdx; }); - SchedClassPoint[I].Value = + SchedClassPoint[I].PerInstructionValue = ProcResPressureIt == SC.IdealizedProcResPressure.end() ? 0.0 : ProcResPressureIt->second; - ClusterCenterPoint[I].Value = Representative[I].avg(); + ClusterCenterPoint[I].PerInstructionValue = Representative[I].avg(); } } else { llvm::errs() << "unimplemented measurement matching for mode " << Mode Index: tools/llvm-exegesis/lib/BenchmarkResult.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkResult.h +++ tools/llvm-exegesis/lib/BenchmarkResult.h @@ -39,7 +39,12 @@ struct BenchmarkMeasure { std::string Key; - double Value; + // This is the per-instruction value, i.e. measured quantity scaled per + // instruction. + double PerInstructionValue; + // This is the per-snippet value, i.e. measured quantity for one repetition of + // the whole snippet. + double PerSnippetValue; std::string DebugString; }; Index: tools/llvm-exegesis/lib/BenchmarkResult.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkResult.cpp +++ tools/llvm-exegesis/lib/BenchmarkResult.cpp @@ -156,9 +156,10 @@ // e.g. { "key": "the key", "value": 0123 } template <> struct MappingTraits { static void mapping(IO &Io, exegesis::BenchmarkMeasure &Obj) { - Io.mapRequired("key", Obj.Key); - Io.mapRequired("value", Obj.Value); Io.mapOptional("debug_string", Obj.DebugString); + Io.mapRequired("value", Obj.PerInstructionValue); + Io.mapOptional("snippet_value", Obj.PerSnippetValue); + Io.mapRequired("key", Obj.Key); } static const bool flow = true; }; @@ -302,9 +303,9 @@ Key = BM.Key; assert(Key == BM.Key); ++NumValues; - SumValues += BM.Value; - MaxValue = std::max(MaxValue, BM.Value); - MinValue = std::min(MinValue, BM.Value); + SumValues += BM.PerInstructionValue; + MaxValue = std::max(MaxValue, BM.PerInstructionValue); + MinValue = std::min(MinValue, BM.PerInstructionValue); } } // namespace exegesis Index: tools/llvm-exegesis/lib/BenchmarkRunner.h =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.h +++ tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -69,8 +69,8 @@ private: virtual std::vector - runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, - const unsigned NumRepetitions) const = 0; + runMeasurements(const ExecutableFunction &EF, + ScratchSpace &Scratch) const = 0; llvm::Expected writeObjectFile(const BenchmarkCode &Configuration, Index: tools/llvm-exegesis/lib/BenchmarkRunner.cpp =================================================================== --- tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -87,7 +87,15 @@ << *ObjectFilePath << "\n"; const ExecutableFunction EF(State.createTargetMachine(), getObjectFromFile(*ObjectFilePath)); - InstrBenchmark.Measurements = runMeasurements(EF, *Scratch, NumRepetitions); + InstrBenchmark.Measurements = runMeasurements(EF, *Scratch); + assert(InstrBenchmark.NumRepetitions > 0 && "invalid NumRepetitions"); + for (BenchmarkMeasure &BM : InstrBenchmark.Measurements) { + // Scale the measurements by instruction. + BM.PerInstructionValue /= InstrBenchmark.NumRepetitions; + // Scale the measurements by snippet. + BM.PerSnippetValue *= static_cast(BC.Instructions.size()) / + InstrBenchmark.NumRepetitions; + } return InstrBenchmark; } Index: tools/llvm-exegesis/lib/Clustering.cpp =================================================================== --- tools/llvm-exegesis/lib/Clustering.cpp +++ tools/llvm-exegesis/lib/Clustering.cpp @@ -53,7 +53,7 @@ const std::vector &Q) const { double DistanceSquared = 0.0; for (size_t I = 0, E = P.size(); I < E; ++I) { - const auto Diff = P[I].Value - Q[I].Value; + const auto Diff = P[I].PerInstructionValue - Q[I].PerInstructionValue; DistanceSquared += Diff * Diff; } return DistanceSquared <= EpsilonSquared_; Index: tools/llvm-exegesis/lib/Latency.h =================================================================== --- tools/llvm-exegesis/lib/Latency.h +++ tools/llvm-exegesis/lib/Latency.h @@ -44,8 +44,8 @@ private: std::vector - runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, - const unsigned NumRepetitions) const override; + runMeasurements(const ExecutableFunction &EF, + ScratchSpace &Scratch) const override; virtual const char *getCounterName() const; }; Index: tools/llvm-exegesis/lib/Latency.cpp =================================================================== --- tools/llvm-exegesis/lib/Latency.cpp +++ tools/llvm-exegesis/lib/Latency.cpp @@ -109,8 +109,7 @@ std::vector LatencyBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, - ScratchSpace &Scratch, - const unsigned NumRepetitions) const { + ScratchSpace &Scratch) const { // Cycle measurements include some overhead from the kernel. Repeat the // measure several times and take the minimum value. constexpr const int NumMeasurements = 30; @@ -131,7 +130,8 @@ if (Value < MinLatency) MinLatency = Value; } - return {{"latency", static_cast(MinLatency) / NumRepetitions, ""}}; + return {{"latency", static_cast(MinLatency), + static_cast(MinLatency), ""}}; } } // namespace exegesis Index: tools/llvm-exegesis/lib/Uops.h =================================================================== --- tools/llvm-exegesis/lib/Uops.h +++ tools/llvm-exegesis/lib/Uops.h @@ -71,8 +71,8 @@ private: std::vector - runMeasurements(const ExecutableFunction &EF, ScratchSpace &Scratch, - const unsigned NumRepetitions) const override; + runMeasurements(const ExecutableFunction &EF, + ScratchSpace &Scratch) const override; }; } // namespace exegesis Index: tools/llvm-exegesis/lib/Uops.cpp =================================================================== --- tools/llvm-exegesis/lib/Uops.cpp +++ tools/llvm-exegesis/lib/Uops.cpp @@ -252,8 +252,7 @@ std::vector UopsBenchmarkRunner::runMeasurements(const ExecutableFunction &Function, - ScratchSpace &Scratch, - const unsigned NumRepetitions) const { + ScratchSpace &Scratch) const { const auto &SchedModel = State.getSubtargetInfo().getSchedModel(); std::vector Result; @@ -281,7 +280,8 @@ CounterValue += Counter.read(); } Result.push_back({llvm::itostr(ProcResIdx), - static_cast(CounterValue) / NumRepetitions, + static_cast(CounterValue), + static_cast(CounterValue), SchedModel.getProcResource(ProcResIdx)->Name}); } return Result; Index: unittests/tools/llvm-exegesis/ClusteringTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/ClusteringTest.cpp +++ unittests/tools/llvm-exegesis/ClusteringTest.cpp @@ -26,13 +26,18 @@ std::vector Points(6); // Cluster around (x=0, y=1, z=2): points {0, 3}. - Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, "A"}}; - Points[3].Measurements = {{"x", -0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}}; + Points[0].Measurements = { + {"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, "A"}}; + Points[3].Measurements = { + {"x", -0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}}; // Cluster around (x=1, y=1, z=2): points {1, 4}. - Points[1].Measurements = {{"x", 1.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}}; - Points[4].Measurements = {{"x", 0.99, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}}; + Points[1].Measurements = { + {"x", 1.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}}; + Points[4].Measurements = { + {"x", 0.99, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}}; // Cluster around (x=0, y=0, z=0): points {5}, marked as noise. - Points[5].Measurements = {{"x", 0.0, ""}, {"y", 0.01, ""}, {"z", -0.02, ""}}; + Points[5].Measurements = { + {"x", 0.0, 0.0, ""}, {"y", 0.01, 0.0, ""}, {"z", -0.02, 0.0, ""}}; // Error cluster: points {2} Points[2].Error = "oops"; @@ -64,8 +69,9 @@ TEST(ClusteringTest, Clusters3D_InvalidSize) { std::vector Points(6); - Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}}; - Points[1].Measurements = {{"y", 1.02, ""}, {"z", 1.98, ""}}; + Points[0].Measurements = { + {"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}}; + Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"z", 1.98, 0.0, ""}}; auto Error = InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError(); ASSERT_TRUE((bool)Error); @@ -74,8 +80,8 @@ TEST(ClusteringTest, Clusters3D_InvalidOrder) { std::vector Points(6); - Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}}; - Points[1].Measurements = {{"y", 1.02, ""}, {"x", 1.98, ""}}; + Points[0].Measurements = {{"x", 0.01, 0.0, ""}, {"y", 1.02, 0.0, ""}}; + Points[1].Measurements = {{"y", 1.02, 0.0, ""}, {"x", 1.98, 0.0, ""}}; auto Error = InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError(); ASSERT_TRUE((bool)Error); Index: unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp =================================================================== --- unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp +++ unittests/tools/llvm-exegesis/X86/BenchmarkResultTest.cpp @@ -28,7 +28,8 @@ namespace exegesis { bool operator==(const BenchmarkMeasure &A, const BenchmarkMeasure &B) { - return std::tie(A.Key, A.Value) == std::tie(B.Key, B.Value); + return std::tie(A.Key, A.PerInstructionValue, A.PerSnippetValue) == + std::tie(B.Key, B.PerInstructionValue, B.PerSnippetValue); } static std::string Dump(const llvm::MCInst &McInst) { @@ -72,8 +73,8 @@ ToDisk.CpuName = "cpu_name"; ToDisk.LLVMTriple = "llvm_triple"; ToDisk.NumRepetitions = 1; - ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, "debug a"}); - ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, ""}); + ToDisk.Measurements.push_back(BenchmarkMeasure{"a", 1, 1, "debug a"}); + ToDisk.Measurements.push_back(BenchmarkMeasure{"b", 2, 2, ""}); ToDisk.Error = "error"; ToDisk.Info = "info"; @@ -122,10 +123,10 @@ TEST(BenchmarkResultTest, BenchmarkMeasureStats) { BenchmarkMeasureStats Stats; - Stats.push(BenchmarkMeasure{"a", 0.5, "debug a"}); - Stats.push(BenchmarkMeasure{"a", 1.5, "debug a"}); - Stats.push(BenchmarkMeasure{"a", -1.0, "debug a"}); - Stats.push(BenchmarkMeasure{"a", 0.0, "debug a"}); + Stats.push(BenchmarkMeasure{"a", 0.5, 0.0, "debug a"}); + Stats.push(BenchmarkMeasure{"a", 1.5, 0.0, "debug a"}); + Stats.push(BenchmarkMeasure{"a", -1.0, 0.0, "debug a"}); + Stats.push(BenchmarkMeasure{"a", 0.0, 0.0, "debug a"}); EXPECT_EQ(Stats.min(), -1.0); EXPECT_EQ(Stats.max(), 1.5); EXPECT_EQ(Stats.avg(), 0.25); // (0.5+1.5-1.0+0.0) / 4