Index: MicroBenchmarks/CMakeLists.txt =================================================================== --- MicroBenchmarks/CMakeLists.txt +++ MicroBenchmarks/CMakeLists.txt @@ -6,3 +6,4 @@ add_subdirectory(harris) add_subdirectory(ImageProcessing) add_subdirectory(LoopInterchange) +add_subdirectory(MemFunctions) Index: MicroBenchmarks/MemFunctions/CMakeLists.txt =================================================================== --- /dev/null +++ MicroBenchmarks/MemFunctions/CMakeLists.txt @@ -0,0 +1,5 @@ +llvm_test_run(WORKDIR ${CMAKE_CURRENT_BINARY_DIR}) + +llvm_test_executable(MemFunctions main.cpp) + +target_link_libraries(MemFunctions benchmark) Index: MicroBenchmarks/MemFunctions/main.cpp =================================================================== --- /dev/null +++ MicroBenchmarks/MemFunctions/main.cpp @@ -0,0 +1,118 @@ +//===- main.cc - Memory Functions Benchmarks ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Memory functions (memcmp, memcpy, ...) are typically recognized by the +// compiler and expanded to specific asm patterns when the size is known at +// compile time. THese microbenchmarks help catch potential CodeGen regressions. +// +// Note that these microbenchmarks do not represent a typical real-life +// situation. They are designed to test the LLVM CodeGen. In particular, +// real-life applications will typically be memory- rather than compute-bound +// when manipulating memory. +// +//===----------------------------------------------------------------------===// + +#include "benchmark/benchmark.h" + +#include +#include + +// Benchmarks memcmp(p, q, size) where n is known at compile time. +// The compiler typically inlines this to loads and compares. +template +void BM_MemCmp(benchmark::State &state) { + constexpr const size_t kNumElements = 4096 / kSize; + + std::vector p_storage(kNumElements * kSize); + std::vector q_storage(kNumElements * kSize); + char* p = p_storage.data(); + const char* q = q_storage.data(); + + for (int i = 0; i < kNumElements; ++i) + Mod().template Change(p + i * kSize); + + benchmark::DoNotOptimize(p_storage); + benchmark::DoNotOptimize(q_storage); + + for (auto _ : state) { + for (int i = 0; i < kNumElements; ++i) { + int res = Pred()(memcmp(p + i * kSize, q + i * kSize, kSize)); + benchmark::DoNotOptimize(res); + } + } + state.SetBytesProcessed(p_storage.size() * state.iterations()); + +} + + +// Predicates. +struct EqZero { + bool operator()(int v) const { return v == 0; } +}; +struct LessThanZero { + bool operator()(int v) const { return v < 0; } +}; +struct GreaterThanZero { + bool operator()(int v) const { return v > 0; } +}; + +// Functors to change the first/mid/last or no value. +struct None { + template + void Change(char* const p) const {} +}; +struct First { + template + void Change(char* const p) const { p[0] = 128; } +}; +struct Mid { + template + void Change(char* const p) const { p[kSize / 2] = 128; } +}; +struct Last { + template + void Change(char* const p) const { p[kSize - 1] = 128; } +}; + +#define MEMCMP_BENCHMARK(size) \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, EqZero, None)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, EqZero, First)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, EqZero, Mid)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, EqZero, Last)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, LessThanZero, None)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, LessThanZero, First)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, LessThanZero, Mid)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, LessThanZero, Last)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, GreaterThanZero, None)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, GreaterThanZero, First)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, GreaterThanZero, Mid)->Unit(benchmark::kNanosecond); \ + BENCHMARK_TEMPLATE(BM_MemCmp, size, GreaterThanZero, Last)->Unit(benchmark::kNanosecond); + +MEMCMP_BENCHMARK(1) +MEMCMP_BENCHMARK(2) +MEMCMP_BENCHMARK(3) +MEMCMP_BENCHMARK(4) +MEMCMP_BENCHMARK(5) +MEMCMP_BENCHMARK(6) +MEMCMP_BENCHMARK(7) +MEMCMP_BENCHMARK(8) +MEMCMP_BENCHMARK(15) +MEMCMP_BENCHMARK(16) +MEMCMP_BENCHMARK(31) +MEMCMP_BENCHMARK(32) +MEMCMP_BENCHMARK(63) +MEMCMP_BENCHMARK(64) + +int main(int argc, char *argv[]) { + ::benchmark::Initialize(&argc, argv); + if (::benchmark::ReportUnrecognizedArguments(argc, argv)) + return 1; + ::benchmark::RunSpecifiedBenchmarks(); + + return 0; +}