Index: MicroBenchmarks/CMakeLists.txt =================================================================== --- MicroBenchmarks/CMakeLists.txt +++ MicroBenchmarks/CMakeLists.txt @@ -7,4 +7,5 @@ add_subdirectory(harris) add_subdirectory(ImageProcessing) add_subdirectory(LoopInterchange) +add_subdirectory(MatrixTypes) add_subdirectory(MemFunctions) Index: MicroBenchmarks/MatrixTypes/CMakeLists.txt =================================================================== --- /dev/null +++ MicroBenchmarks/MatrixTypes/CMakeLists.txt @@ -0,0 +1,11 @@ +include(CheckCXXCompilerFlag) + +# Enable matrix types extension benchmarks for compilers supporting -fenable-matrix. +check_cxx_compiler_flag(-fenable-matrix COMPILER_HAS_MATRIX_FLAG) +if (COMPILER_HAS_MATRIX_FLAG) + llvm_test_run(WORKDIR ${CMAKE_CURRENT_BINARY_DIR}) + + set_property(SOURCE main.cpp PROPERTY COMPILE_FLAGS -fenable-matrix) + llvm_test_executable(MatrixTypes main.cpp) + target_link_libraries(MatrixTypes benchmark) +endif() Index: MicroBenchmarks/MatrixTypes/main.cpp =================================================================== --- /dev/null +++ MicroBenchmarks/MatrixTypes/main.cpp @@ -0,0 +1,214 @@ +#include +#include + +#include "benchmark/benchmark.h" + +// Micro benchmarks for the matrix types extensions. + +#if __has_extension(matrix_types) + +namespace { + +template +using matrix_t = ElementTy __attribute__((matrix_type(R, C))); + +template +std::unique_ptr> allocateMatrix() { + return std::unique_ptr>( + new matrix_t); +} + +template ::value, + int> = 0> +void initRandom(matrix_t &M) { + std::default_random_engine generator; + std::uniform_real_distribution distribution; + + for (unsigned I = 0; I < R; I++) + for (unsigned J = 0; J < C; J++) + M[I][J] = distribution(generator); +} + +template < + typename ElementTy, unsigned R, unsigned C, + typename std::enable_if_t::value, int> = 0> +void initRandom(matrix_t &M) { + std::default_random_engine generator; + std::uniform_int_distribution distribution; + + for (unsigned I = 0; I < R; I++) + for (unsigned J = 0; J < C; J++) + M[I][J] = distribution(generator); +} + +template +static void BM_MatrixTypes_Mult(benchmark::State &state) { + auto XPtr = allocateMatrix(); + auto YPtr = allocateMatrix(); + + auto ZPtr = allocateMatrix(); + + matrix_t &X = *XPtr; + matrix_t &Y = *YPtr; + matrix_t &Z = *ZPtr; + + initRandom(X); + initRandom(Y); + for (auto _ : state) { + benchmark::DoNotOptimize(XPtr); + benchmark::DoNotOptimize(YPtr); + benchmark::DoNotOptimize(ZPtr); + Z = X * Y; + } +} + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 3, 3, 3, 3); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 3, 3, 3, 3); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, char, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, unsigned, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, long long int, 4, 4, 4, 4); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 3, 2, 2, 5); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 3, 2, 2, 5); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, char, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, unsigned, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, long long int, 8, 8, 8, 8); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 12, 8, 8, 14); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 12, 8, 8, 14); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 15, 19, 19, 15); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 15, 19, 19, 15); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 16, 16, 16, 16); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 16, 16, 16, 16); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 32, 32, 32, 32); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 32, 32, 32, 32); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 48, 48, 48, 48); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 48, 48, 48, 48); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, float, 64, 64, 64, 64); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult, double, 64, 64, 64, 64); + +template +static void BM_MatrixTypes_Mult_Transpose(benchmark::State &state) { + auto XPtr = allocateMatrix(); + auto YPtr = allocateMatrix(); + + // Y is transposed before multiplying. + auto ZPtr = allocateMatrix(); + + matrix_t &X = *XPtr; + matrix_t &Y = *YPtr; + matrix_t &Z = *ZPtr; + + initRandom(X); + initRandom(Y); + for (auto _ : state) { + benchmark::DoNotOptimize(XPtr); + benchmark::DoNotOptimize(YPtr); + benchmark::DoNotOptimize(ZPtr); + Z = X * __builtin_matrix_transpose(Y); + } +} + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 3, 3, 3, 3); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 3, 3, 3, 3); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, char, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, unsigned, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, long long int, 4, 4, 4, 4); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 3, 2, 5, 2); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 3, 2, 5, 2); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, char, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, unsigned, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, long long int, 8, 8, 8, 8); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 12, 8, 14, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 12, 8, 14, 8); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 15, 19, 15, 19); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 15, 19, 15, 19); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 16, 16, 16, 16); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 16, 16, 16, 16); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, float, 32, 32, 32, 32); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Transpose, double, 32, 32, 32, 32); + +template +static void BM_MatrixTypes_Mult_Add(benchmark::State &state) { + auto XPtr = allocateMatrix(); + auto YPtr = allocateMatrix(); + + // Y is transposed before multiplying. + auto ZPtr = allocateMatrix(); + + matrix_t &X = *XPtr; + matrix_t &Y = *YPtr; + matrix_t &Z = *ZPtr; + + initRandom(X); + initRandom(Y); + initRandom(Z); + for (auto _ : state) { + benchmark::DoNotOptimize(XPtr); + benchmark::DoNotOptimize(YPtr); + benchmark::DoNotOptimize(ZPtr); + Z = Z + X * Y; + } +} + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 3, 3, 3, 3); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 3, 3, 3, 3); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, char, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, unsigned, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 4, 4, 4, 4); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, long long int, 4, 4, 4, 4); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 3, 2, 5, 2); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 3, 2, 5, 2); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, char, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, unsigned, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 8, 8, 8, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, long long int, 8, 8, 8, 8); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 12, 8, 14, 8); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 12, 8, 14, 8); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 15, 19, 15, 19); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 15, 19, 15, 19); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 16, 16, 16, 16); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 16, 16, 16, 16); + +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, float, 32, 32, 32, 32); +BENCHMARK_TEMPLATE(BM_MatrixTypes_Mult_Add, double, 32, 32, 32, 32); + +} // namespace + +#endif + +BENCHMARK_MAIN();