diff --git a/libc/benchmarks/CMakeLists.txt b/libc/benchmarks/CMakeLists.txt --- a/libc/benchmarks/CMakeLists.txt +++ b/libc/benchmarks/CMakeLists.txt @@ -145,11 +145,27 @@ # Benchmarking tool #============================================================================== -add_executable(libc-benchmark-main - EXCLUDE_FROM_ALL - LibcMemoryBenchmarkMain.cpp -) -foreach(entrypoint_target libc.src.string.memcpy libc.src.string.memset) - get_target_property(entrypoint_object_file ${entrypoint_target} "OBJECT_FILE_RAW") - target_link_libraries(libc-benchmark-main PUBLIC json ${entrypoint_object_file}) -endforeach() +# Benchmark all implementations that can run on the target CPU. +function(add_libc_multi_impl_benchmark name) + get_property(fq_implementations GLOBAL PROPERTY ${name}_implementations) + foreach(fq_config_name IN LISTS fq_implementations) + get_target_property(required_cpu_features ${fq_config_name} REQUIRE_CPU_FEATURES) + cpu_supports(can_run "${required_cpu_features}") + if(can_run) + set(benchmark_name ${fq_config_name}_benchmark) + add_executable(${benchmark_name} + EXCLUDE_FROM_ALL + LibcMemoryBenchmarkMain.cpp + ) + get_target_property(entrypoint_object_file ${fq_config_name} "OBJECT_FILE_RAW") + target_link_libraries(${benchmark_name} PUBLIC json ${entrypoint_object_file}) + string(TOUPPER ${name} name_upper) + target_compile_definitions(${benchmark_name} PRIVATE "-DLIBC_BENCHMARK_FUNCTION_${name_upper}=1" "-DLIBC_BENCHMARK_FUNCTION_NAME=\"${fq_config_name}\"") + else() + message(STATUS "Skipping benchmark for '${fq_config_name}' insufficient host cpu features '${required_cpu_features}'") + endif() + endforeach() +endfunction() + +add_libc_multi_impl_benchmark(memcpy) +add_libc_multi_impl_benchmark(memset) diff --git a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp --- a/libc/benchmarks/LibcMemoryBenchmarkMain.cpp +++ b/libc/benchmarks/LibcMemoryBenchmarkMain.cpp @@ -27,17 +27,9 @@ namespace llvm { namespace libc_benchmarks { -enum Function { memcpy, memset }; - static cl::opt StudyName("study-name", cl::desc("The name for this study"), cl::Required); -static cl::opt - MemoryFunction("function", cl::desc("Sets the function to benchmark:"), - cl::values(clEnumVal(memcpy, "__llvm_libc::memcpy"), - clEnumVal(memset, "__llvm_libc::memset")), - cl::Required); - static cl::opt SizeDistributionName("size-distribution-name", cl::desc("The name of the distribution to use")); @@ -75,12 +67,12 @@ unsigned SizeBytes : 16; // max : 16 KiB - 1 }; -struct MemcpyBenchmark { +#if defined(LIBC_BENCHMARK_FUNCTION_MEMCPY) +struct Benchmark { static constexpr auto GetDistributions = &getMemcpySizeDistributions; static constexpr size_t BufferCount = 2; - static void amend(Study &S) { S.Configuration.Function = "memcpy"; } - MemcpyBenchmark(const size_t BufferSize) + Benchmark(const size_t BufferSize) : SrcBuffer(BufferSize), DstBuffer(BufferSize) {} inline auto functor() { @@ -94,13 +86,12 @@ AlignedBuffer SrcBuffer; AlignedBuffer DstBuffer; }; - -struct MemsetBenchmark { +#elif defined(LIBC_BENCHMARK_FUNCTION_MEMSET) +struct Benchmark { static constexpr auto GetDistributions = &getMemsetSizeDistributions; static constexpr size_t BufferCount = 1; - static void amend(Study &S) { S.Configuration.Function = "memset"; } - MemsetBenchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} + Benchmark(const size_t BufferSize) : DstBuffer(BufferSize) {} inline auto functor() { return [this](ParameterType P) { @@ -112,9 +103,11 @@ AlignedBuffer DstBuffer; }; +#else +#error "Missing LIBC_BENCHMARK_FUNCTION_XXX definition" +#endif -template struct Harness : Benchmark { - using Benchmark::functor; +struct Harness : Benchmark { Harness(const size_t BufferSize, size_t BatchParameterCount, std::function SizeSampler, @@ -140,11 +133,6 @@ std::function OffsetSampler; }; -struct IBenchmark { - virtual ~IBenchmark() {} - virtual Study run() = 0; -}; - size_t getL1DataCacheSize() { const std::vector &CacheInfos = HostState::get().Caches; const auto IsL1DataCache = [](const CacheInfo &CI) { @@ -156,7 +144,7 @@ report_fatal_error("Unable to read L1 Cache Data Size"); } -template struct MemfunctionBenchmark : IBenchmark { +struct MemfunctionBenchmark { MemfunctionBenchmark(int64_t L1Size = getL1DataCacheSize()) : AvailableSize(L1Size - L1LeftAsideBytes - ParameterStorageBytes), BufferSize(AvailableSize / Benchmark::BufferCount), @@ -217,12 +205,10 @@ else SC.SizeDistributionName = SizeDistributionName; SC.AccessAlignment = MaybeAlign(AlignedAccess); - - // Delegate specific flags and configuration. - Benchmark::amend(Study); + SC.Function = LIBC_BENCHMARK_FUNCTION_NAME; } - Study run() override { + Study run() { if (SweepMode) runSweepMode(); else @@ -280,8 +266,7 @@ void runTrials(const BenchmarkOptions &Options, std::function SizeSampler, std::function OffsetSampler) { - Harness B(BufferSize, BatchParameterCount, SizeSampler, - OffsetSampler); + Harness B(BufferSize, BatchParameterCount, SizeSampler, OffsetSampler); for (size_t i = 0; i < NumTrials; ++i) { const BenchmarkResult Result = benchmark(Options, B, B.functor()); Study.Measurements.push_back(Result.BestGuess); @@ -313,15 +298,6 @@ } }; -std::unique_ptr getMemfunctionBenchmark() { - switch (MemoryFunction) { - case memcpy: - return std::make_unique>(); - case memset: - return std::make_unique>(); - } -} - void writeStudy(const Study &S) { std::error_code EC; raw_fd_ostream FOS(Output, EC); @@ -337,8 +313,8 @@ void main() { checkRequirements(); - auto MB = getMemfunctionBenchmark(); - writeStudy(MB->run()); + MemfunctionBenchmark MB; + writeStudy(MB.run()); } } // namespace libc_benchmarks