diff --git a/libc/benchmarks/automemcpy/ResultAnalyzer.cpp b/libc/benchmarks/automemcpy/ResultAnalyzer.cpp new file mode 100644 --- /dev/null +++ b/libc/benchmarks/automemcpy/ResultAnalyzer.cpp @@ -0,0 +1,345 @@ +//===-- JSON serialization routines -----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This code analyzes the json file produced by the `automemcpy` binary. +// It works as follows: +// - Reads one or more json files. +// - If there are several runs for the same function and distribution, picks the +// median throughput (aka `BytesPerSecond`). +// - Aggregates the throughput per distributions and scores them from worst (0) +// to best (1). +// - Each distribution categorizes each function into one of the following +// categories: EXCELLENT, VERY_GOOD, GOOD, PASSABLE, INADEQUATE, MEDIOCRE, +// BAD. +// - A process similar to the Majority Judgment voting system is used to `elect` +// the best function. The histogram of grades is returned so we can +// distinguish between functions with the same final grade. In the following +// example both functions grade EXCELLENT but we may prefer the second one. +// +// | | EXCELLENT | VERY_GOOD | GOOD | PASSABLE | ... +// |------------|-----------|-----------|------|----------| ... +// | Function_1 | 7 | 1 | 2 | | ... +// | Function_2 | 6 | 4 | | | ... + +#include "benchmarks/LibcFunctionPrototypes.h" +#include "benchmarks/automemcpy/FunctionDescriptor.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/JSON.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include +#include +#include + +namespace llvm { + +static cl::list InputFilenames(cl::Positional, cl::OneOrMore, + cl::desc("")); + +namespace automemcpy { + +extern ArrayRef getFunctionDescriptors(); + +static StringMap createFunctionDescriptorMap() { + StringMap Descriptors; + for (const NamedFunctionDescriptor &FD : getFunctionDescriptors()) + Descriptors.insert_or_assign(FD.Name, &FD.Desc); + return Descriptors; +} + +static const FunctionDescriptor &getFunctionDescriptor(StringRef FunctionName) { + static StringMap Descriptors = + createFunctionDescriptorMap(); + const auto *FD = Descriptors.lookup(FunctionName); + if (!FD) + report_fatal_error( + Twine("No FunctionDescriptor for ").concat(FunctionName)); + return *FD; +} + +struct FunctionId { + StringRef Name; + FunctionType Type; + COMPARABLE_AND_HASHABLE(FunctionId, Type, Name) +}; + +struct DistributionId { + StringRef Name; + COMPARABLE_AND_HASHABLE(DistributionId, Name) +}; + +struct SampleId { + FunctionId Function; + DistributionId Distribution; + COMPARABLE_AND_HASHABLE(SampleId, Function.Type, Function.Name, + Distribution.Name) +}; + +struct Sample { + SampleId Id; + double BytesPerSecond = 0; +}; + +struct Grade { + enum GradeEnum { + EXCELLENT, + VERY_GOOD, + GOOD, + PASSABLE, + INADEQUATE, + MEDIOCRE, + BAD, + ARRAY_SIZE, + }; + + static StringRef getString(const GradeEnum &GE) { + switch (GE) { + case EXCELLENT: + return "EXCELLENT"; + case VERY_GOOD: + return "VERY_GOOD"; + case GOOD: + return "GOOD"; + case PASSABLE: + return "PASSABLE"; + case INADEQUATE: + return "INADEQUATE"; + case MEDIOCRE: + return "MEDIOCRE"; + case BAD: + return "BAD"; + case ARRAY_SIZE: + report_fatal_error("logic error"); + } + } + + static GradeEnum judge(double Score) { + if (Score >= 6. / 7) + return EXCELLENT; + if (Score >= 5. / 7) + return VERY_GOOD; + if (Score >= 4. / 7) + return GOOD; + if (Score >= 3. / 7) + return PASSABLE; + if (Score >= 2. / 7) + return INADEQUATE; + if (Score >= 1. / 7) + return MEDIOCRE; + return BAD; + } +}; + +// A GradeEnum indexed array with counts for each grade. +using GradeHistogram = std::array; + +void PrintUTF8(raw_ostream &Stream, const GradeHistogram &GH) { + static constexpr std::array kCharacters = { + " ", "▁", "▂", "▃", "▄", "▅", "▆", "▇", "█"}; + + const size_t Max = *std::max_element(GH.begin(), GH.end()); + for (size_t I = 0; I < GH.size(); ++I) { + size_t Index = (float(GH[I]) / Max) * (kCharacters.size() - 1); + Stream << kCharacters.at(Index); + } +} + +struct FunctionData { + FunctionId Id; + StringMap Throughputs; // Median of samples per distribution + StringMap Scores; // Normalized score per distribution + StringMap GradeAttribution; // Grade per distribution + GradeHistogram GradeHisto = {}; // GradeEnum indexed array + Grade::GradeEnum FinalGrade = Grade::BAD; // Overall grade for this function +}; + +static std::vector getThroughputs(ArrayRef Samples) { + std::unordered_map, SampleId::Hasher> + BucketedSamples; + for (const auto &S : Samples) + BucketedSamples[S.Id].push_back(S.BytesPerSecond); + std::unordered_map, FunctionId::Hasher> + Throughputs; + for (auto &Pair : BucketedSamples) { + const auto &Id = Pair.first; + auto &Values = Pair.second; + std::sort(Values.begin(), Values.end()); + const double MedianValue = Values[Values.size() / 2]; + Throughputs[Id.Function][Id.Distribution.Name] = MedianValue; + } + std::vector Output; + for (auto &Pair : Throughputs) { + FunctionData Data; + Data.Id = Pair.first; + Data.Throughputs = std::move(Pair.second); + Output.push_back(std::move(Data)); + } + return Output; +} + +static void fillScores(MutableArrayRef Functions) { + // A key to bucket throughput per function type and distribution. + struct Key { + FunctionType Type; + StringRef Distribution; + + COMPARABLE_AND_HASHABLE(Key, Type, Distribution) + }; + // Tracks minimum and maximum values. + struct MinMax { + double Min = std::numeric_limits::max(); + double Max = std::numeric_limits::min(); + void update(double Value) { + if (Value < Min) + Min = Value; + if (Value > Max) + Max = Value; + } + double normalize(double Value) const { return (Value - Min) / (Max - Min); } + }; + + std::unordered_map ThroughputMinMax; + for (const auto &Function : Functions) { + const FunctionType Type = Function.Id.Type; + for (const auto &Pair : Function.Throughputs) { + const auto &Distribution = Pair.getKey(); + const double Throughput = Pair.getValue(); + const Key K{Type, Distribution}; + ThroughputMinMax[K].update(Throughput); + } + } + for (auto &Function : Functions) { + const FunctionType Type = Function.Id.Type; + for (const auto &Pair : Function.Throughputs) { + const auto &Distribution = Pair.getKey(); + const double Throughput = Pair.getValue(); + const Key K{Type, Distribution}; + Function.Scores[Distribution] = ThroughputMinMax[K].normalize(Throughput); + } + } +} + +static void castVotes(MutableArrayRef Functions) { + for (FunctionData &Function : Functions) + for (const auto &Pair : Function.Scores) { + const StringRef Distribution = Pair.getKey(); + const double Score = Pair.getValue(); + const auto G = Grade::judge(Score); + ++(Function.GradeHisto[G]); + Function.GradeAttribution[Distribution] = G; + } + + for (FunctionData &Function : Functions) { + const auto &GradeHisto = Function.GradeHisto; + const size_t Votes = + std::accumulate(GradeHisto.begin(), GradeHisto.end(), 0U); + const size_t MedianVote = Votes / 2; + size_t CountedVotes = 0; + Grade::GradeEnum MedianGrade = Grade::BAD; + for (size_t I = 0; I < GradeHisto.size(); ++I) { + CountedVotes += GradeHisto[I]; + if (CountedVotes > MedianVote) { + MedianGrade = Grade::GradeEnum(I); + break; + } + } + Function.FinalGrade = MedianGrade; + } +} + +struct JsonFile { + std::vector Samples; +}; + +static StringRef getInternalizedString(StringRef VolatileStr) { + static llvm::StringSet<> StringCache; + return StringCache.insert(VolatileStr).first->getKey(); +} + +bool fromJSON(const json::Value &V, Sample &Out, json::Path P) { + std::string Label; + json::ObjectMapper O(V, P); + if (O && O.map("bytes_per_second", Out.BytesPerSecond) && + O.map("label", Label)) { + const auto LabelPair = StringRef(Label).split(','); + Out.Id.Function.Name = getInternalizedString(LabelPair.first); + Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type; + Out.Id.Distribution.Name = getInternalizedString(LabelPair.second); + return true; + } + return false; +} + +bool fromJSON(const json::Value &V, JsonFile &JF, json::Path P) { + json::ObjectMapper O(V, P); + return O && O.map("benchmarks", JF.Samples); +} + +static ExitOnError ExitOnErr; + +JsonFile parseJsonResults(StringRef Filename) { + auto Buf = ExitOnErr(errorOrToExpected( + MemoryBuffer::getFile(Filename, /*bool IsText=*/true, + /*RequiresNullTerminator=*/false))); + auto JsonValue = ExitOnErr(json::parse(Buf->getBuffer())); + json::Path::Root Root; + JsonFile JF; + if (!fromJSON(JsonValue, JF, Root)) + ExitOnErr(Root.getError()); + return JF; +} + +int Main(int argc, char **argv) { + ExitOnErr.setBanner(std::string(argv[0]) + ": "); + cl::ParseCommandLineOptions(argc, argv, "Automemcpy Json Results Analyzer\n"); + std::vector Samples; + + for (const auto &Filename : InputFilenames) { + auto Result = parseJsonResults(Filename); + llvm::append_range(Samples, Result.Samples); + } + + std::vector Functions = getThroughputs(Samples); + fillScores(Functions); + castVotes(Functions); + + // TODO: Implement tie breaking algorithm. + std::sort(Functions.begin(), Functions.end(), + [](const FunctionData &A, const FunctionData &B) { + return A.FinalGrade < B.FinalGrade; + }); + + // Present data by function type. + std::stable_sort(Functions.begin(), Functions.end(), + [](const FunctionData &A, const FunctionData &B) { + return A.Id.Type < B.Id.Type; + }); + + for (const FunctionData &Function : Functions) { + outs() << formatv("{0,-10}", Grade::getString(Function.FinalGrade)); + outs() << " |"; + PrintUTF8(outs(), Function.GradeHisto); + outs() << "| "; + outs().resetColor(); + outs() << formatv("{0,+25}", Function.Id.Name); + outs() << "\n"; + } + + return EXIT_SUCCESS; +} + +} // namespace automemcpy +} // namespace llvm + +int main(int argc, char **argv) { return llvm::automemcpy::Main(argc, argv); }