diff --git a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h --- a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h +++ b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h @@ -49,9 +49,12 @@ }; struct PerDistributionData { - double MedianBytesPerSecond; // Median of samples for this distribution. - double Score; // Normalized score for this distribution. - Grade::GradeEnum Grade; // Grade for this distribution. + std::vector BytesPerSecondSamples; + double BytesPerSecondMedian; // Median of samples for this distribution. + double BytesPerSecondMean; // Mean of samples for this distribution. + double BytesPerSecondVariance; // Variance of samples for this distribution. + double Score; // Normalized score for this distribution. + Grade::GradeEnum Grade; // Grade for this distribution. }; struct FunctionData { diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp --- a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp +++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp @@ -76,29 +76,48 @@ return BAD; } +static double computeUnbiasedSampleVariance(const std::vector &Samples, + const double SampleMean) { + assert(!Samples.empty()); + if (Samples.size() == 1) + return 0; + double DiffSquaresSum = 0; + for (const double S : Samples) { + const double Diff = S - SampleMean; + DiffSquaresSum += Diff * Diff; + } + return DiffSquaresSum / (Samples.size() - 1); +} + +static void processPerDistributionData(PerDistributionData &Data) { + auto &Samples = Data.BytesPerSecondSamples; + assert(!Samples.empty()); + // Sample Mean + const double Sum = std::accumulate(Samples.begin(), Samples.end(), 0.0); + Data.BytesPerSecondMean = Sum / Samples.size(); + // Unbiased Sample Variance + Data.BytesPerSecondVariance = + computeUnbiasedSampleVariance(Samples, Data.BytesPerSecondMean); + // Median + const size_t HalfSize = Samples.size() / 2; + std::nth_element(Samples.begin(), Samples.begin() + HalfSize, Samples.end()); + Data.BytesPerSecondMedian = Samples[HalfSize]; +} + std::vector getThroughputs(ArrayRef Samples) { - std::unordered_map, SampleId::Hasher> - BucketedSamples; - for (const auto &S : Samples) - BucketedSamples[S.Id].push_back(S.BytesPerSecond); - std::unordered_map, FunctionId::Hasher> - Throughputs; - for (auto &Pair : BucketedSamples) { - const auto &Id = Pair.first; - auto &Values = Pair.second; - const size_t HalfSize = Values.size() / 2; - std::nth_element(Values.begin(), Values.begin() + HalfSize, Values.end()); - const double MedianValue = Values[HalfSize]; - Throughputs[Id.Function][Id.Distribution.Name] = MedianValue; + std::unordered_map Functions; + for (const auto &S : Samples) { + auto &Function = Functions[S.Id.Function]; + auto &Data = Function.PerDistributionData[S.Id.Distribution.Name]; + Data.BytesPerSecondSamples.push_back(S.BytesPerSecond); } + std::vector Output; - for (auto &Pair : Throughputs) { - FunctionData Data; - Data.Id = Pair.first; - for (const auto &Pair : Pair.second) - Data.PerDistributionData[Pair.getKey()].MedianBytesPerSecond = - Pair.getValue(); - Output.push_back(std::move(Data)); + for (auto &[FunctionId, Function] : Functions) { + Function.Id = FunctionId; + for (auto &Pair : Function.PerDistributionData) + processPerDistributionData(Pair.second); + Output.push_back(std::move(Function)); } return Output; } @@ -130,7 +149,7 @@ const FunctionType Type = Function.Id.Type; for (const auto &Pair : Function.PerDistributionData) { const auto &Distribution = Pair.getKey(); - const double Throughput = Pair.getValue().MedianBytesPerSecond; + const double Throughput = Pair.getValue().BytesPerSecondMedian; const Key K{Type, Distribution}; ThroughputMinMax[K].update(Throughput); } @@ -140,7 +159,7 @@ const FunctionType Type = Function.Id.Type; for (const auto &Pair : Function.PerDistributionData) { const auto &Distribution = Pair.getKey(); - const double Throughput = Pair.getValue().MedianBytesPerSecond; + const double Throughput = Pair.getValue().BytesPerSecondMedian; const Key K{Type, Distribution}; Function.PerDistributionData[Distribution].Score = ThroughputMinMax[K].normalize(Throughput); diff --git a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp --- a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp +++ b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp @@ -10,6 +10,7 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::DoubleNear; using testing::ElementsAre; using testing::Pair; using testing::SizeIs; @@ -31,8 +32,10 @@ EXPECT_THAT(Data[0].Id, Foo1); EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); // A single value is provided. - EXPECT_THAT( - Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 4); + const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name); + EXPECT_THAT(DistributionData.BytesPerSecondMedian, 4); + EXPECT_THAT(DistributionData.BytesPerSecondMean, 4); + EXPECT_THAT(DistributionData.BytesPerSecondVariance, 0); } TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) { @@ -48,8 +51,10 @@ EXPECT_THAT(Data[0].PerDistributionData, SizeIs(1)); // When multiple values are provided we pick the median one (here median of 4, // 5, 5). - EXPECT_THAT( - Data[0].PerDistributionData.lookup(DistA.Name).MedianBytesPerSecond, 5); + const auto &DistributionData = Data[0].PerDistributionData.lookup(DistA.Name); + EXPECT_THAT(DistributionData.BytesPerSecondMedian, 5); + EXPECT_THAT(DistributionData.BytesPerSecondMean, DoubleNear(4.6, 0.1)); + EXPECT_THAT(DistributionData.BytesPerSecondVariance, DoubleNear(0.33, 0.01)); } TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) { @@ -86,11 +91,11 @@ [](const FunctionData &A, const FunctionData &B) { return A.Id < B.Id; }); EXPECT_THAT(Data[0].Id, Foo1); - EXPECT_THAT(Data[0].PerDistributionData.lookup("A").MedianBytesPerSecond, 1); + EXPECT_THAT(Data[0].PerDistributionData.lookup("A").BytesPerSecondMedian, 1); EXPECT_THAT(Data[1].Id, Foo2); - EXPECT_THAT(Data[1].PerDistributionData.lookup("A").MedianBytesPerSecond, 2); + EXPECT_THAT(Data[1].PerDistributionData.lookup("A").BytesPerSecondMedian, 2); EXPECT_THAT(Data[2].Id, Foo3); - EXPECT_THAT(Data[2].PerDistributionData.lookup("A").MedianBytesPerSecond, 3); + EXPECT_THAT(Data[2].PerDistributionData.lookup("A").BytesPerSecondMedian, 3); // Normalizes throughput per distribution. fillScores(Data);