diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt --- a/libc/benchmarks/CMakeLists.txt +++ b/libc/benchmarks/CMakeLists.txt @@ -171,3 +171,25 @@ add_libc_multi_impl_benchmark(memset) add_libc_multi_impl_benchmark(bzero) add_libc_multi_impl_benchmark(memcmp) + +#============================================================================== +# Google Benchmarking tool +#============================================================================== + +# This target uses the Google Benchmark facility to report throughput for llvm +# libc memory functions compiled for the host machine. This is useful to +# continuously monitor the performance of the memory functions. +add_executable(libc.benchmarks.memory_functions.opt_host + EXCLUDE_FROM_ALL + LibcMemoryGoogleBenchmarkMain.cpp +) + +target_link_libraries(libc.benchmarks.memory_functions.opt_host + PRIVATE + libc-memory-benchmark + libc.src.string.memcmp_opt_host + libc.src.string.memcpy_opt_host + libc.src.string.memset_opt_host + libc.src.string.bzero_opt_host + benchmark_main +) diff --git a/libc/benchmarks/LibcMemoryBenchmark.h b/libc/benchmarks/LibcMemoryBenchmark.h --- a/libc/benchmarks/LibcMemoryBenchmark.h +++ b/libc/benchmarks/LibcMemoryBenchmark.h @@ -190,7 +190,9 @@ struct CopyHarness : public ParameterBatch { CopyHarness(); - static const ArrayRef Distributions; + inline static const ArrayRef getDistributions() { + return getMemcpySizeDistributions(); + } inline void *Call(ParameterType Parameter, void *(*memcpy)(void *__restrict, const void *__restrict, @@ -209,7 +211,9 @@ struct SetHarness : public ParameterBatch { SetHarness(); - static const ArrayRef Distributions; + inline static const ArrayRef getDistributions() { + return getMemsetSizeDistributions(); + } inline void *Call(ParameterType Parameter, void *(*memset)(void *, int, size_t)) { @@ -231,7 +235,9 @@ struct ComparisonHarness : public ParameterBatch { ComparisonHarness(); - static const ArrayRef Distributions; + inline static const ArrayRef getDistributions() { + return getMemcmpSizeDistributions(); + } inline int Call(ParameterType Parameter, int (*memcmp)(const void *, const void *, size_t)) { diff --git a/libc/benchmarks/LibcMemoryBenchmark.cpp b/libc/benchmarks/LibcMemoryBenchmark.cpp --- a/libc/benchmarks/LibcMemoryBenchmark.cpp +++ b/libc/benchmarks/LibcMemoryBenchmark.cpp @@ -104,13 +104,6 @@ .concat(llvm::Twine(BufferSize))); } -const ArrayRef CopyHarness::Distributions = - getMemcpySizeDistributions(); -const ArrayRef ComparisonHarness::Distributions = - getMemcmpSizeDistributions(); -const ArrayRef SetHarness::Distributions = - getMemsetSizeDistributions(); - CopyHarness::CopyHarness() : ParameterBatch(2), SrcBuffer(ParameterBatch::BufferSize), DstBuffer(ParameterBatch::BufferSize) {} diff --git a/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp new file mode 100644 --- /dev/null +++ b/libc/benchmarks/LibcMemoryGoogleBenchmarkMain.cpp @@ -0,0 +1,98 @@ +#include "LibcBenchmark.h" +#include "LibcMemoryBenchmark.h" +#include "MemorySizeDistributions.h" +#include "benchmark/benchmark.h" +#include +#include +#include + +namespace __llvm_libc { + +extern void *memcpy(void *__restrict, const void *__restrict, size_t); +extern void *memset(void *, int, size_t); +extern void bzero(void *, size_t); +extern int memcmp(const void *, const void *, size_t); + +} // namespace __llvm_libc + +using llvm::Align; +using llvm::ArrayRef; +using llvm::libc_benchmarks::ComparisonHarness; +using llvm::libc_benchmarks::CopyHarness; +using llvm::libc_benchmarks::MemorySizeDistribution; +using llvm::libc_benchmarks::OffsetDistribution; +using llvm::libc_benchmarks::SetHarness; + +static constexpr Align kBenchmarkAlignment = Align::Constant<1>(); + +template struct Randomized : public Harness { + Randomized(benchmark::State &State) + : State(State), Distribution(Harness::getDistributions()[State.range(0)]), + Probabilities(Distribution.Probabilities), + SizeSampler(Probabilities.begin(), Probabilities.end()), + OffsetSampler(Harness::BufferSize, Probabilities.size() - 1, + kBenchmarkAlignment) { + for (auto &P : Harness::Parameters) { + P.OffsetBytes = OffsetSampler(Gen); + P.SizeBytes = SizeSampler(Gen); + Harness::checkValid(P); + } + } + + ~Randomized() { + const size_t AvgBytesPerIteration = + Harness::getBatchBytes() / Harness::BatchSize; + const size_t TotalBytes = State.iterations() * AvgBytesPerIteration; + State.SetBytesProcessed(TotalBytes); + State.SetLabel(Distribution.Name.str()); + State.counters["bytes_per_cycle"] = benchmark::Counter( + TotalBytes / benchmark::CPUInfo::Get().cycles_per_second, + benchmark::Counter::kIsRate); + } + + template inline void runBatch(Function foo) { + for (const auto &P : Harness::Parameters) + benchmark::DoNotOptimize(Harness::Call(P, foo)); + } + +private: + benchmark::State &State; + Harness UP; + MemorySizeDistribution Distribution; + ArrayRef Probabilities; + std::discrete_distribution SizeSampler; + OffsetDistribution OffsetSampler; + std::mt19937_64 Gen; +}; + +template static int64_t getMaxIndex() { + return Harness::getDistributions().size() - 1; +} + +void BM_Memcpy(benchmark::State &State) { + Randomized Harness(State); + while (State.KeepRunningBatch(Harness.BatchSize)) + Harness.runBatch(__llvm_libc::memcpy); +} +BENCHMARK(BM_Memcpy)->DenseRange(0, getMaxIndex()); + +void BM_Memcmp(benchmark::State &State) { + Randomized Harness(State); + while (State.KeepRunningBatch(Harness.BatchSize)) + Harness.runBatch(__llvm_libc::memcmp); +} +BENCHMARK(BM_Memcmp)->DenseRange(0, getMaxIndex()); + +void BM_Memset(benchmark::State &State) { + Randomized Harness(State); + while (State.KeepRunningBatch(Harness.BatchSize)) + Harness.runBatch(__llvm_libc::memset); +} +BENCHMARK(BM_Memset)->DenseRange(0, getMaxIndex()); + +void BM_Bzero(benchmark::State &State) { + Randomized Harness(State); + while (State.KeepRunningBatch(Harness.BatchSize)) + Harness.runBatch(__llvm_libc::bzero); +} +BENCHMARK(BM_Bzero)->DenseRange(0, getMaxIndex());