diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt --- a/mlir/CMakeLists.txt +++ b/mlir/CMakeLists.txt @@ -62,6 +62,7 @@ add_definitions(-DMLIR_INCLUDE_TESTS) add_subdirectory(unittests) add_subdirectory(test) + add_subdirectory(integration_test) endif() # Tools needs to come late to ensure that MLIR_ALL_LIBS is populated. # Generally things after this point may depend on MLIR_ALL_LIBS or libMLIR.so. diff --git a/mlir/integration_test/CMakeLists.txt b/mlir/integration_test/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/integration_test/CMakeLists.txt @@ -0,0 +1,30 @@ +llvm_canonicalize_cmake_booleans( + LLVM_BUILD_EXAMPLES + ) + +# Passed to lit.site.cfg.py.in to set up the path where to find the libraries. +set(MLIR_RUNNER_UTILS_DIR ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) + +set(MLIR_INTEGRATION_TEST_DEPENDS + FileCheck count not + mlir-cpu-runner + mlir-opt + mlir_runner_utils + mlir_c_runner_utils + ) + +configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py + ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py + ) + +add_lit_testsuite(check-integration-mlir "Running the MLIR integration tests" + ${CMAKE_CURRENT_BINARY_DIR} + DEPENDS ${MLIR_INTEGRATION_TEST_DEPENDS} + ) +set_target_properties(check-integration-mlir PROPERTIES FOLDER "integration_test") + +add_lit_testsuites(check-integration-mlir ${CMAKE_CURRENT_SOURCE_DIR} + DEPENDS ${MLIR_INTEGRATION_TEST_DEPS} +) diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-broadcast.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-broadcast.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-broadcast.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %i = constant 2147483647: i32 + %l = constant 9223372036854775807 : i64 + + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + + // Test simple broadcasts. + %vi = vector.broadcast %i : i32 to vector<2xi32> + %vl = vector.broadcast %l : i64 to vector<2xi64> + %vf = vector.broadcast %f1 : f32 to vector<2x2x2xf32> + vector.print %vi : vector<2xi32> + vector.print %vl : vector<2xi64> + vector.print %vf : vector<2x2x2xf32> + // CHECK: ( 2147483647, 2147483647 ) + // CHECK: ( 9223372036854775807, 9223372036854775807 ) + // CHECK: ( ( ( 1, 1 ), ( 1, 1 ) ), ( ( 1, 1 ), ( 1, 1 ) ) ) + + // Test "duplication" in leading dimensions. + %v0 = vector.broadcast %f1 : f32 to vector<4xf32> + %v1 = vector.insert %f2, %v0[1] : f32 into vector<4xf32> + %v2 = vector.insert %f3, %v1[2] : f32 into vector<4xf32> + %v3 = vector.insert %f4, %v2[3] : f32 into vector<4xf32> + %v4 = vector.broadcast %v3 : vector<4xf32> to vector<3x4xf32> + %v5 = vector.broadcast %v3 : vector<4xf32> to vector<2x2x4xf32> + vector.print %v3 : vector<4xf32> + vector.print %v4 : vector<3x4xf32> + vector.print %v5 : vector<2x2x4xf32> + // CHECK: ( 1, 2, 3, 4 ) + // CHECK: ( ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ) ) + // CHECK: ( ( ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ) ), ( ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ) ) ) + + // Test straightforward "stretch" on a 1-D "scalar". + %x = vector.broadcast %f5 : f32 to vector<1xf32> + %y = vector.broadcast %x : vector<1xf32> to vector<8xf32> + vector.print %y : vector<8xf32> + // CHECK : ( 5, 5, 5, 5, 5, 5, 5, 5 ) + + // Test "stretch" in leading dimension. + %s = vector.broadcast %v3 : vector<4xf32> to vector<1x4xf32> + %t = vector.broadcast %s : vector<1x4xf32> to vector<3x4xf32> + vector.print %s : vector<1x4xf32> + vector.print %t : vector<3x4xf32> + // CHECK: ( ( 1, 2, 3, 4 ) ) + // CHECK: ( ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ), ( 1, 2, 3, 4 ) ) + + // Test "stretch" in trailing dimension. + %a0 = vector.broadcast %f1 : f32 to vector<3x1xf32> + %a1 = vector.insert %f2, %a0[1, 0] : f32 into vector<3x1xf32> + %a2 = vector.insert %f3, %a1[2, 0] : f32 into vector<3x1xf32> + %a3 = vector.broadcast %a2 : vector<3x1xf32> to vector<3x4xf32> + vector.print %a2 : vector<3x1xf32> + vector.print %a3 : vector<3x4xf32> + // CHECK: ( ( 1 ), ( 2 ), ( 3 ) ) + // CHECK: ( ( 1, 1, 1, 1 ), ( 2, 2, 2, 2 ), ( 3, 3, 3, 3 ) ) + + // Test "stretch" in middle dimension. + %m0 = vector.broadcast %f0 : f32 to vector<3x1x2xf32> + %m1 = vector.insert %f1, %m0[0, 0, 1] : f32 into vector<3x1x2xf32> + %m2 = vector.insert %f2, %m1[1, 0, 0] : f32 into vector<3x1x2xf32> + %m3 = vector.insert %f3, %m2[1, 0, 1] : f32 into vector<3x1x2xf32> + %m4 = vector.insert %f4, %m3[2, 0, 0] : f32 into vector<3x1x2xf32> + %m5 = vector.insert %f5, %m4[2, 0, 1] : f32 into vector<3x1x2xf32> + %m6 = vector.broadcast %m5 : vector<3x1x2xf32> to vector<3x4x2xf32> + vector.print %m5 : vector<3x1x2xf32> + vector.print %m6 : vector<3x4x2xf32> + // CHECK: ( ( ( 0, 1 ) ), ( ( 2, 3 ) ), ( ( 4, 5 ) ) ) + // CHECK: ( ( ( 0, 1 ), ( 0, 1 ), ( 0, 1 ), ( 0, 1 ) ), + // CHECK-SAME: ( ( 2, 3 ), ( 2, 3 ), ( 2, 3 ), ( 2, 3 ) ), + // CHECK-SAME: ( ( 4, 5 ), ( 4, 5 ), ( 4, 5 ), ( 4, 5 ) ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-constant-mask.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-constant-mask.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-constant-mask.mlir @@ -0,0 +1,49 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %0 = vector.constant_mask [4] : vector<8xi1> + vector.print %0 : vector<8xi1> + // CHECK: ( 1, 1, 1, 1, 0, 0, 0, 0 ) + + %1 = vector.constant_mask [1, 3] : vector<4x4xi1> + vector.print %1 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 1, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) + + %2 = vector.constant_mask [2, 2] : vector<4x4xi1> + vector.print %2 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 0, 0 ), ( 1, 1, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) + + %3 = vector.constant_mask [2, 4] : vector<4x4xi1> + vector.print %3 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) + + %4 = vector.constant_mask [3, 1] : vector<4x4xi1> + vector.print %4 : vector<4x4xi1> + // CHECK: ( ( 1, 0, 0, 0 ), ( 1, 0, 0, 0 ), ( 1, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) + + %5 = vector.constant_mask [3, 2] : vector<4x4xi1> + vector.print %5 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 0, 0 ), ( 1, 1, 0, 0 ), ( 1, 1, 0, 0 ), ( 0, 0, 0, 0 ) ) + + %6 = vector.constant_mask [4, 3] : vector<4x4xi1> + vector.print %6 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ) ) + + %7 = vector.constant_mask [4, 4] : vector<4x4xi1> + vector.print %7 : vector<4x4xi1> + // CHECK: ( ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ) ) + + %8 = vector.constant_mask [1, 2, 3] : vector<2x3x4xi1> + vector.print %8 : vector<2x3x4xi1> + // CHECK: ( ( ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ), ( 0, 0, 0, 0 ) ), ( ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ), ( 0, 0, 0, 0 ) ) ) + + %9 = vector.constant_mask [2, 2, 3] : vector<2x3x4xi1> + vector.print %9 : vector<2x3x4xi1> + // CHECK: ( ( ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ), ( 0, 0, 0, 0 ) ), ( ( 1, 1, 1, 0 ), ( 1, 1, 1, 0 ), ( 0, 0, 0, 0 ) ) ) + + return +} + diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-contraction.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-contraction.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-contraction.mlir @@ -0,0 +1,383 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +#dotp_accesses = [ + affine_map<(i) -> (i)>, + affine_map<(i) -> (i)>, + affine_map<(i) -> ()> +] +#dotp_trait = { + indexing_maps = #dotp_accesses, + iterator_types = ["reduction"] +} + +#matvec_accesses = [ + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> (j)>, + affine_map<(i, j) -> (i)> +] +#matvec_trait = { + indexing_maps = #matvec_accesses, + iterator_types = ["parallel", "reduction"] +} + +#mattransvec_accesses = [ + affine_map<(i, j) -> (j, i)>, + affine_map<(i, j) -> (j)>, + affine_map<(i, j) -> (i)> +] +#mattransvec_trait = { + indexing_maps = #mattransvec_accesses, + iterator_types = ["parallel", "reduction"] +} + +#matmat_accesses = [ + affine_map<(i, j, k) -> (i, k)>, + affine_map<(i, j, k) -> (k, j)>, + affine_map<(i, j, k) -> (i, j)> +] +#matmat_trait = { + indexing_maps = #matmat_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +#mattransmat_accesses = [ + affine_map<(i, j, k) -> (k, i)>, + affine_map<(i, j, k) -> (k, j)>, + affine_map<(i, j, k) -> (i, j)> +] +#mattransmat_trait = { + indexing_maps = #mattransmat_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +#matmattrans_accesses = [ + affine_map<(i, j, k) -> (i, k)>, + affine_map<(i, j, k) -> (j, k)>, + affine_map<(i, j, k) -> (i, j)> +] +#matmattrans_trait = { + indexing_maps = #matmattrans_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +#mattransmattrans_accesses = [ + affine_map<(i, j, k) -> (k, i)>, + affine_map<(i, j, k) -> (j, k)>, + affine_map<(i, j, k) -> (i, j)> +] +#mattransmattrans_trait = { + indexing_maps = #mattransmattrans_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +#matmat_then_trans_accesses = [ + affine_map<(i, j, k) -> (i, k)>, + affine_map<(i, j, k) -> (k, j)>, + affine_map<(i, j, k) -> (j, i)> +] +#matmat_then_trans_trait = { + indexing_maps = #matmat_then_trans_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +#contract2d_accesses = [ + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> ()> +] +#contract2d_trait = { + indexing_maps = #contract2d_accesses, + iterator_types = ["reduction", "reduction"] +} + +#contract2d_alt_accesses = [ + affine_map<(i, j) -> (j, i)>, + affine_map<(i, j) -> (j, i)>, + affine_map<(i, j) -> ()> +] +#contract2d_alt_trait = { + indexing_maps = #contract2d_alt_accesses, + iterator_types = ["reduction", "reduction"] +} + +#contract2d_trans_accesses = [ + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> (j, i)>, + affine_map<(i, j) -> ()> +] +#contract2d_trans_trait = { + indexing_maps = #contract2d_trans_accesses, + iterator_types = ["reduction", "reduction"] +} + +#contract2d_trans_alt_accesses = [ + affine_map<(i, j) -> (j, i)>, + affine_map<(i, j) -> (i, j)>, + affine_map<(i, j) -> ()> +] +#contract2d_trans_alt_trait = { + indexing_maps = #contract2d_trans_alt_accesses, + iterator_types = ["reduction", "reduction"] +} + +#column_major_matmat_accesses = [ + affine_map<(i, j, k) -> (k, j)>, + affine_map<(i, j, k) -> (i, k)>, + affine_map<(i, j, k) -> (j, i)> +] +#column_major_matmat_trait = { + indexing_maps = #column_major_matmat_accesses, + iterator_types = ["parallel", "parallel", "reduction"] +} + +func @entry() { + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant 6.0: f32 + %f7 = constant 7.0: f32 + %f8 = constant 8.0: f32 + + // Zero vectors. + %z1 = vector.broadcast %f0 : f32 to vector<2xf32> + %z2 = vector.broadcast %f0 : f32 to vector<2x2xf32> + %z3 = vector.broadcast %f0 : f32 to vector<3x4xf32> + + // Construct test vectors. + %0 = vector.broadcast %f1 : f32 to vector<2xf32> + %a = vector.insert %f2, %0[1] : f32 into vector<2xf32> + %1 = vector.broadcast %f3 : f32 to vector<2xf32> + %b = vector.insert %f4, %1[1] : f32 into vector<2xf32> + %2 = vector.broadcast %f5 : f32 to vector<2xf32> + %c = vector.insert %f6, %2[1] : f32 into vector<2xf32> + %3 = vector.broadcast %f7 : f32 to vector<2xf32> + %d = vector.insert %f8, %3[1] : f32 into vector<2xf32> + + vector.print %a : vector<2xf32> + vector.print %b : vector<2xf32> + vector.print %c : vector<2xf32> + vector.print %d : vector<2xf32> + // + // test vectors: + // + // CHECK: ( 1, 2 ) + // CHECK: ( 3, 4 ) + // CHECK: ( 5, 6 ) + // CHECK: ( 7, 8 ) + + // Construct test matrices. + %4 = vector.broadcast %f0 : f32 to vector<2x2xf32> + %5 = vector.insert %a, %4[0] : vector<2xf32> into vector<2x2xf32> + %A = vector.insert %b, %5[1] : vector<2xf32> into vector<2x2xf32> + %6 = vector.broadcast %f0 : f32 to vector<2x2xf32> + %7 = vector.insert %c, %6[0] : vector<2xf32> into vector<2x2xf32> + %B = vector.insert %d, %7[1] : vector<2xf32> into vector<2x2xf32> + %8 = vector.broadcast %f0 : f32 to vector<3x2xf32> + %9 = vector.insert %a, %8[0] : vector<2xf32> into vector<3x2xf32> + %10 = vector.insert %b, %9[1] : vector<2xf32> into vector<3x2xf32> + %C = vector.insert %c, %10[2] : vector<2xf32> into vector<3x2xf32> + %11 = vector.tuple %A, %B : vector<2x2xf32>, vector<2x2xf32> + %D = vector.insert_slices %11, [2, 2], [1, 1] + : tuple, vector<2x2xf32>> into vector<2x4xf32> + + vector.print %A : vector<2x2xf32> + vector.print %B : vector<2x2xf32> + vector.print %C : vector<3x2xf32> + vector.print %D : vector<2x4xf32> + // + // test matrices: + // + // CHECK: ( ( 1, 2 ), ( 3, 4 ) ) + // CHECK: ( ( 5, 6 ), ( 7, 8 ) ) + // CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) ) + // CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) ) + + // Contraction: dot-product a x b + %dp1 = vector.contract #dotp_trait %a, %b, %f0 + : vector<2xf32>, vector<2xf32> into f32 + %dp2 = vector.contract #dotp_trait %a, %b, %f1 + : vector<2xf32>, vector<2xf32> into f32 + + vector.print %dp1 : f32 + vector.print %dp2 : f32 + // + // dot products: + // + // CHECK: 11 + // CHECK: 12 + + // Contraction: matrix-vector A x c + %mv1 = vector.contract #matvec_trait %A, %c, %z1 + : vector<2x2xf32>, vector<2xf32> into vector<2xf32> + %mv2 = vector.contract #matvec_trait %A, %c, %a + : vector<2x2xf32>, vector<2xf32> into vector<2xf32> + + vector.print %mv1 : vector<2xf32> + vector.print %mv2 : vector<2xf32> + // + // matrix x vector: + // + // CHECK: ( 17, 39 ) + // CHECK: ( 18, 41 ) + + // Contraction: matrix-trans-vector A^T x c + %mv3 = vector.contract #mattransvec_trait %A, %c, %z1 + : vector<2x2xf32>, vector<2xf32> into vector<2xf32> + %mv4 = vector.contract #mattransvec_trait %A, %c, %a + : vector<2x2xf32>, vector<2xf32> into vector<2xf32> + + vector.print %mv3 : vector<2xf32> + vector.print %mv4 : vector<2xf32> + // + // matrix x vector: + // + // CHECK: ( 23, 34 ) + // CHECK: ( 24, 36 ) + + // Contraction: matrix-matrix A x B + %mm1 = vector.contract #matmat_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %mm2 = vector.contract #matmat_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %mm1 : vector<2x2xf32> + vector.print %mm2 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 19, 22 ), ( 43, 50 ) ) + // CHECK: ( ( 20, 24 ), ( 46, 54 ) ) + + // Contraction: matrix-matrix A x B where A, B, C have column-major layout. + // ( 1 * 5 + 3 * 6 = 23, 2 * 5 + 4 * 6 = 34) + // ( 1 * 7 + 3 * 8 = 31, 2 * 7 + 4 * 8 = 46) + // + + // ( ( 1, 2 ), ( 3, 4 ) ) + %llvm_matrix_column_major_mm0 = + vector.contract #column_major_matmat_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %llvm_matrix_column_major_mm1 = + vector.contract #column_major_matmat_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %llvm_matrix_column_major_mm0 : vector<2x2xf32> + vector.print %llvm_matrix_column_major_mm1 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 23, 31 ), ( 34, 46 ) ) + // CHECK: ( ( 24, 33 ), ( 37, 50 ) ) + + // Contraction: matrix-trans-matrix A^T x B + %mm3 = vector.contract #mattransmat_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %mm4 = vector.contract #mattransmat_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %mm3 : vector<2x2xf32> + vector.print %mm4 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 26, 30 ), ( 38, 44 ) ) + // CHECK: ( ( 27, 32 ), ( 41, 48 ) ) + + // Contraction: matrix-matrix-trans A x B^T + %mm5 = vector.contract #matmattrans_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %mm6 = vector.contract #matmattrans_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %mm5 : vector<2x2xf32> + vector.print %mm6 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 17, 23 ), ( 39, 53 ) ) + // CHECK: ( ( 18, 25 ), ( 42, 57 ) ) + + // Contraction: matrix-trans-matrix-trans A^T x B^T + %mm7 = vector.contract #mattransmattrans_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %mm8 = vector.contract #mattransmattrans_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %mm7 : vector<2x2xf32> + vector.print %mm8 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 23, 31 ), ( 34, 46 ) ) + // CHECK: ( ( 24, 33 ), ( 37, 50 ) ) + + // Contraction: matrix-matrix-then-trans (A x B)^T + %mm9 = vector.contract #matmat_then_trans_trait %A, %B, %z2 + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + %mm10 = vector.contract #matmat_then_trans_trait %A, %B, %A + : vector<2x2xf32>, vector<2x2xf32> into vector<2x2xf32> + + vector.print %mm9 : vector<2x2xf32> + vector.print %mm10 : vector<2x2xf32> + // + // matrix x matrix: + // + // CHECK: ( ( 19, 43 ), ( 22, 50 ) ) + // CHECK: ( ( 20, 45 ), ( 25, 54 ) ) + + // Contraction: matrix-matrix C x D + %mm11 = vector.contract #matmat_trait %C, %D, %z3 + : vector<3x2xf32>, vector<2x4xf32> into vector<3x4xf32> + %mm12 = vector.contract #matmat_trait %C, %D, %mm11 + : vector<3x2xf32>, vector<2x4xf32> into vector<3x4xf32> + + vector.print %mm11 : vector<3x4xf32> + vector.print %mm12 : vector<3x4xf32> + // CHECK: ( ( 7, 10, 19, 22 ), ( 15, 22, 43, 50 ), ( 23, 34, 67, 78 ) ) + // CHECK: ( ( 14, 20, 38, 44 ), ( 30, 44, 86, 100 ), ( 46, 68, 134, 156 ) ) + + // Contractions in 2D. + %c1 = vector.contract #contract2d_trait %A, %B, %f0 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c2 = vector.contract #contract2d_trait %A, %B, %f1 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c3 = vector.contract #contract2d_alt_trait %A, %B, %f0 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c4 = vector.contract #contract2d_alt_trait %A, %B, %f1 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c5 = vector.contract #contract2d_trans_trait %A, %B, %f0 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c6 = vector.contract #contract2d_trans_trait %A, %B, %f1 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c7 = vector.contract #contract2d_trans_alt_trait %A, %B, %f0 + : vector<2x2xf32>, vector<2x2xf32> into f32 + %c8 = vector.contract #contract2d_trans_alt_trait %A, %B, %f1 + : vector<2x2xf32>, vector<2x2xf32> into f32 + + vector.print %c1 : f32 + vector.print %c2 : f32 + vector.print %c3 : f32 + vector.print %c4 : f32 + vector.print %c5 : f32 + vector.print %c6 : f32 + vector.print %c7 : f32 + vector.print %c8 : f32 + // + // 2D contractions: + // + // CHECK: 70 + // CHECK: 71 + // CHECK: 70 + // CHECK: 71 + // CHECK: 69 + // CHECK: 70 + // CHECK: 69 + // CHECK: 70 + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-extract-slices.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-extract-slices.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-extract-slices.mlir @@ -0,0 +1,79 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant 6.0: f32 + %f7 = constant 7.0: f32 + %f8 = constant 8.0: f32 + %f9 = constant 9.0: f32 + %f10 = constant 10.0: f32 + %f11 = constant 11.0: f32 + %f12 = constant 12.0: f32 + %f13 = constant 13.0: f32 + %f14 = constant 14.0: f32 + %f15 = constant 15.0: f32 + + %a0 = vector.broadcast %f0 : f32 to vector<4x4xf32> + %a1 = vector.insert %f0, %a0[0, 0] : f32 into vector<4x4xf32> + %a2 = vector.insert %f1, %a1[0, 1] : f32 into vector<4x4xf32> + %a3 = vector.insert %f2, %a2[0, 2] : f32 into vector<4x4xf32> + %a4 = vector.insert %f3, %a3[0, 3] : f32 into vector<4x4xf32> + %a5 = vector.insert %f4, %a4[1, 0] : f32 into vector<4x4xf32> + %a6 = vector.insert %f5, %a5[1, 1] : f32 into vector<4x4xf32> + %a7 = vector.insert %f6, %a6[1, 2] : f32 into vector<4x4xf32> + %a8 = vector.insert %f7, %a7[1, 3] : f32 into vector<4x4xf32> + %a9 = vector.insert %f8, %a8[2, 0] : f32 into vector<4x4xf32> + %a10 = vector.insert %f9, %a9[2, 1] : f32 into vector<4x4xf32> + %a11 = vector.insert %f10, %a10[2, 2] : f32 into vector<4x4xf32> + %a12 = vector.insert %f11, %a11[2, 3] : f32 into vector<4x4xf32> + %a13 = vector.insert %f12, %a12[3, 0] : f32 into vector<4x4xf32> + %a14 = vector.insert %f13, %a13[3, 1] : f32 into vector<4x4xf32> + %a15 = vector.insert %f14, %a14[3, 2] : f32 into vector<4x4xf32> + %a16 = vector.insert %f15, %a15[3, 3] : f32 into vector<4x4xf32> + + vector.print %a16 : vector<4x4xf32> + // + // test matrix: + // + // CHECK: ( ( 0, 1, 2, 3 ), ( 4, 5, 6, 7 ), ( 8, 9, 10, 11 ), ( 12, 13, 14, 15 ) ) + + // Tile 4x4 with 3x3 as follows: + // + // +--------+--+ + // +0 1 2| 3| + // |4 5 6| 7| + // |8 9 10|11| + // +--------+--+ + // |12 13 14|15| + // +--------+--+ + // + %es = vector.extract_slices %a16, [3, 3], [1, 1] : + vector<4x4xf32> into tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> + + %0 = vector.tuple_get %es, 0 : tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> + %1 = vector.tuple_get %es, 1 : tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> + %2 = vector.tuple_get %es, 2 : tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> + %3 = vector.tuple_get %es, 3 : tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> + + vector.print %0 : vector<3x3xf32> + vector.print %1 : vector<3x1xf32> + vector.print %2 : vector<1x3xf32> + vector.print %3 : vector<1x1xf32> + // + // extract slices: + // + // CHECK: ( ( 0, 1, 2 ), ( 4, 5, 6 ), ( 8, 9, 10 ) ) + // CHECK: ( ( 3 ), ( 7 ), ( 11 ) ) + // CHECK: ( ( 12, 13, 14 ) ) + // CHECK: ( ( 15 ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-extract-strided-slice.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-extract-strided-slice.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-extract-strided-slice.mlir @@ -0,0 +1,32 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %v1 = vector.broadcast %f1 : f32 to vector<8xf32> + %v2 = vector.broadcast %f2 : f32 to vector<8xf32> + %v3 = vector.broadcast %f3 : f32 to vector<8xf32> + %v4 = vector.broadcast %f4 : f32 to vector<8xf32> + + %a0 = vector.broadcast %f0 : f32 to vector<4x4x8xf32> + %a1 = vector.insert %v1, %a0[1, 1] : vector<8xf32> into vector<4x4x8xf32> + %a2 = vector.insert %v2, %a1[1, 2] : vector<8xf32> into vector<4x4x8xf32> + %a3 = vector.insert %v3, %a2[2, 1] : vector<8xf32> into vector<4x4x8xf32> + %a4 = vector.insert %v4, %a3[2, 2] : vector<8xf32> into vector<4x4x8xf32> + + %ss = vector.extract_strided_slice %a4 {offsets = [1, 1], sizes = [2, 2], strides = [1, 1]} : vector<4x4x8xf32> to vector<2x2x8xf32> + + vector.print %ss : vector<2x2x8xf32> + // + // extract strided slice: + // + // CHECK: ( ( ( 1, 1, 1, 1, 1, 1, 1, 1 ), ( 2, 2, 2, 2, 2, 2, 2, 2 ) ), ( ( 3, 3, 3, 3, 3, 3, 3, 3 ), ( 4, 4, 4, 4, 4, 4, 4, 4 ) ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-col.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-col.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-col.mlir @@ -0,0 +1,78 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=column-major \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f64 + %f1 = constant 1.0: f64 + %f2 = constant 2.0: f64 + %f3 = constant 3.0: f64 + %f4 = constant 4.0: f64 + %f5 = constant 5.0: f64 + %f6 = constant 6.0: f64 + %f7 = constant 7.0: f64 + + // Construct test vectors. + %0 = vector.broadcast %f0 : f64 to vector<4xf64> + %1 = vector.insert %f1, %0[1] : f64 into vector<4xf64> + %2 = vector.insert %f2, %1[2] : f64 into vector<4xf64> + %a = vector.insert %f3, %2[3] : f64 into vector<4xf64> + %3 = vector.broadcast %f4 : f64 to vector<4xf64> + %4 = vector.insert %f5, %3[1] : f64 into vector<4xf64> + %5 = vector.insert %f6, %4[2] : f64 into vector<4xf64> + %b = vector.insert %f7, %5[3] : f64 into vector<4xf64> + %6 = vector.broadcast %f0 : f64 to vector<6xf64> + %7 = vector.insert %f1, %6[1] : f64 into vector<6xf64> + %8 = vector.insert %f2, %7[2] : f64 into vector<6xf64> + %9 = vector.insert %f3, %8[3] : f64 into vector<6xf64> + %10 = vector.insert %f4, %9[4] : f64 into vector<6xf64> + %c = vector.insert %f5, %10[5] : f64 into vector<6xf64> + + vector.print %a : vector<4xf64> + vector.print %b : vector<4xf64> + vector.print %c : vector<6xf64> + // + // Test vectors: + // + // CHECK: ( 0, 1, 2, 3 ) + // CHECK: ( 4, 5, 6, 7 ) + // CHECK: ( 0, 1, 2, 3, 4, 5 ) + + // Performs matrix transpositions interpreting the vectors as + // flattened column-major 2-D matrices. + // + // ( 0, 2 ) ( 0, 1 ) | /| + // ( 1, 3 ) -> ( 2, 3 ) |/ | column-major! + // + // ( 4, 6 ) ( 4, 5 ) + // ( 5, 7 ) -> ( 6, 7 ) + // + // ( 0, 2, 4 ) ( 0, 1 ) + // ( 1, 3, 5 ) -> ( 2, 3 ) + // ( 4, 5 ) + // + // ( 0, 3 ) ( 0, 1, 2 ) + // ( 1, 4 ) -> ( 3, 4, 5 ) + // ( 2, 5 ) + // + %d = vector.flat_transpose %a { rows = 2: i32, columns = 2: i32 } : vector<4xf64> -> vector<4xf64> + %e = vector.flat_transpose %b { rows = 2: i32, columns = 2: i32 } : vector<4xf64> -> vector<4xf64> + %f = vector.flat_transpose %c { rows = 2: i32, columns = 3: i32 } : vector<6xf64> -> vector<6xf64> + %g = vector.flat_transpose %c { rows = 3: i32, columns = 2: i32 } : vector<6xf64> -> vector<6xf64> + + vector.print %d : vector<4xf64> + vector.print %e : vector<4xf64> + vector.print %f : vector<6xf64> + vector.print %g : vector<6xf64> + // + // Transposed results: + // + // CHECK: ( 0, 2, 1, 3 ) + // CHECK: ( 4, 6, 5, 7 ) + // CHECK: ( 0, 2, 4, 1, 3, 5 ) + // CHECK: ( 0, 3, 1, 4, 2, 5 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-row.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-row.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-flat-transpose-row.mlir @@ -0,0 +1,78 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=row-major \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f64 + %f1 = constant 1.0: f64 + %f2 = constant 2.0: f64 + %f3 = constant 3.0: f64 + %f4 = constant 4.0: f64 + %f5 = constant 5.0: f64 + %f6 = constant 6.0: f64 + %f7 = constant 7.0: f64 + + // Construct test vectors. + %0 = vector.broadcast %f0 : f64 to vector<4xf64> + %1 = vector.insert %f1, %0[1] : f64 into vector<4xf64> + %2 = vector.insert %f2, %1[2] : f64 into vector<4xf64> + %a = vector.insert %f3, %2[3] : f64 into vector<4xf64> + %3 = vector.broadcast %f4 : f64 to vector<4xf64> + %4 = vector.insert %f5, %3[1] : f64 into vector<4xf64> + %5 = vector.insert %f6, %4[2] : f64 into vector<4xf64> + %b = vector.insert %f7, %5[3] : f64 into vector<4xf64> + %6 = vector.broadcast %f0 : f64 to vector<6xf64> + %7 = vector.insert %f1, %6[1] : f64 into vector<6xf64> + %8 = vector.insert %f2, %7[2] : f64 into vector<6xf64> + %9 = vector.insert %f3, %8[3] : f64 into vector<6xf64> + %10 = vector.insert %f4, %9[4] : f64 into vector<6xf64> + %c = vector.insert %f5, %10[5] : f64 into vector<6xf64> + + vector.print %a : vector<4xf64> + vector.print %b : vector<4xf64> + vector.print %c : vector<6xf64> + // + // Test vectors: + // + // CHECK: ( 0, 1, 2, 3 ) + // CHECK: ( 4, 5, 6, 7 ) + // CHECK: ( 0, 1, 2, 3, 4, 5 ) + + // Performs matrix transpositions interpreting the vectors as + // flattened row-major 2-D matrices. + // + // ( 0, 1 ) ( 0, 2 ) + // ( 2, 3 ) -> ( 1, 3 ) + // + // ( 4, 5 ) ( 4, 6 ) + // ( 6, 7 ) -> ( 5, 7 ) + // + // ( 0, 1, 2 ) ( 0, 3 ) + // ( 3, 4, 5 ) -> ( 1, 4 ) + // ( 2, 5 ) + // + // ( 0, 1 ) ( 0, 2, 4 ) + // ( 2, 3 ) -> ( 1, 3, 5 ) + // ( 4, 5 ) + // + %d = vector.flat_transpose %a { rows = 2: i32, columns = 2: i32 } : vector<4xf64> -> vector<4xf64> + %e = vector.flat_transpose %b { rows = 2: i32, columns = 2: i32 } : vector<4xf64> -> vector<4xf64> + %f = vector.flat_transpose %c { rows = 2: i32, columns = 3: i32 } : vector<6xf64> -> vector<6xf64> + %g = vector.flat_transpose %c { rows = 3: i32, columns = 2: i32 } : vector<6xf64> -> vector<6xf64> + + vector.print %d : vector<4xf64> + vector.print %e : vector<4xf64> + vector.print %f : vector<6xf64> + vector.print %g : vector<6xf64> + // + // Transposed results: + // + // CHECK: ( 0, 2, 1, 3 ) + // CHECK: ( 4, 6, 5, 7 ) + // CHECK: ( 0, 3, 1, 4, 2, 5 ) + // CHECK: ( 0, 2, 4, 1, 3, 5 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-fma.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-fma.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-fma.mlir @@ -0,0 +1,28 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f1 = constant 1.0: f32 + %f3 = constant 3.0: f32 + %f7 = constant 7.0: f32 + %v1 = vector.broadcast %f1 : f32 to vector<8xf32> + %v3 = vector.broadcast %f3 : f32 to vector<8xf32> + %v7 = vector.broadcast %f7 : f32 to vector<8xf32> + vector.print %v1 : vector<8xf32> + vector.print %v3 : vector<8xf32> + vector.print %v7 : vector<8xf32> + // + // test vectors: + // + // CHECK: ( 1, 1, 1, 1, 1, 1, 1, 1 ) + // CHECK: ( 3, 3, 3, 3, 3, 3, 3, 3 ) + // CHECK: ( 7, 7, 7, 7, 7, 7, 7, 7 ) + + %v = vector.fma %v3, %v7, %v1: vector<8xf32> + vector.print %v : vector<8xf32> + // CHECK: ( 22, 22, 22, 22, 22, 22, 22, 22 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-insert-slices.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-insert-slices.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-insert-slices.mlir @@ -0,0 +1,72 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant 6.0: f32 + %f7 = constant 7.0: f32 + %f8 = constant 8.0: f32 + %f9 = constant 9.0: f32 + %f10 = constant 10.0: f32 + %f11 = constant 11.0: f32 + %f12 = constant 12.0: f32 + %f13 = constant 13.0: f32 + %f14 = constant 14.0: f32 + %f15 = constant 15.0: f32 + + %a0 = vector.broadcast %f0 : f32 to vector<3x3xf32> + %a1 = vector.insert %f0, %a0[0, 0] : f32 into vector<3x3xf32> + %a2 = vector.insert %f1, %a1[0, 1] : f32 into vector<3x3xf32> + %a3 = vector.insert %f2, %a2[0, 2] : f32 into vector<3x3xf32> + %a4 = vector.insert %f4, %a3[1, 0] : f32 into vector<3x3xf32> + %a5 = vector.insert %f5, %a4[1, 1] : f32 into vector<3x3xf32> + %a6 = vector.insert %f6, %a5[1, 2] : f32 into vector<3x3xf32> + %a7 = vector.insert %f8, %a6[2, 0] : f32 into vector<3x3xf32> + %a8 = vector.insert %f9, %a7[2, 1] : f32 into vector<3x3xf32> + %a9 = vector.insert %f10, %a8[2, 2] : f32 into vector<3x3xf32> + + %b0 = vector.broadcast %f0 : f32 to vector<3x1xf32> + %b1 = vector.insert %f3, %b0[0, 0] : f32 into vector<3x1xf32> + %b2 = vector.insert %f7, %b1[1, 0] : f32 into vector<3x1xf32> + %b3 = vector.insert %f11, %b2[2, 0] : f32 into vector<3x1xf32> + + %c0 = vector.broadcast %f0 : f32 to vector<1x3xf32> + %c1 = vector.insert %f12, %c0[0, 0] : f32 into vector<1x3xf32> + %c2 = vector.insert %f13, %c1[0, 1] : f32 into vector<1x3xf32> + %c3 = vector.insert %f14, %c2[0, 2] : f32 into vector<1x3xf32> + + %d0 = vector.broadcast %f0 : f32 to vector<1x1xf32> + %d1 = vector.insert %f15, %d0[0, 0] : f32 into vector<1x1xf32> + + vector.print %a9 : vector<3x3xf32> + vector.print %b3 : vector<3x1xf32> + vector.print %c3 : vector<1x3xf32> + vector.print %d1 : vector<1x1xf32> + // + // input slices: + // + // CHECK: ( ( 0, 1, 2 ), ( 4, 5, 6 ), ( 8, 9, 10 ) ) + // CHECK: ( ( 3 ), ( 7 ), ( 11 ) ) + // CHECK: ( ( 12, 13, 14 ) ) + // CHECK: ( ( 15 ) ) + + %vt = vector.tuple %a9, %b3, %c3, %d1 : + vector<3x3xf32>, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32> + %is = vector.insert_slices %vt, [3, 3], [1, 1] : + tuple, vector<3x1xf32>, vector<1x3xf32>, vector<1x1xf32>> into vector<4x4xf32> + + vector.print %is : vector<4x4xf32> + // + // insert slices: + // + // CHECK: ( ( 0, 1, 2, 3 ), ( 4, 5, 6, 7 ), ( 8, 9, 10, 11 ), ( 12, 13, 14, 15 ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-insert-strided-slice.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-insert-strided-slice.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-insert-strided-slice.mlir @@ -0,0 +1,36 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %v1 = vector.broadcast %f1 : f32 to vector<4xf32> + %v2 = vector.broadcast %f2 : f32 to vector<3xf32> + %v3 = vector.broadcast %f3 : f32 to vector<4x4xf32> + %v4 = vector.broadcast %f4 : f32 to vector<1xf32> + + %s1 = vector.insert_strided_slice %v1, %v3 {offsets = [2, 0], strides = [1]} : vector<4xf32> into vector<4x4xf32> + %s2 = vector.insert_strided_slice %v2, %s1 {offsets = [1, 1], strides = [1]} : vector<3xf32> into vector<4x4xf32> + %s3 = vector.insert_strided_slice %v2, %s2 {offsets = [0, 0], strides = [1]} : vector<3xf32> into vector<4x4xf32> + %s4 = vector.insert_strided_slice %v4, %s3 {offsets = [3, 3], strides = [1]} : vector<1xf32> into vector<4x4xf32> + + vector.print %v3 : vector<4x4xf32> + vector.print %s1 : vector<4x4xf32> + vector.print %s2 : vector<4x4xf32> + vector.print %s3 : vector<4x4xf32> + vector.print %s4 : vector<4x4xf32> + // + // insert strided slice: + // + // CHECK: ( ( 3, 3, 3, 3 ), ( 3, 3, 3, 3 ), ( 3, 3, 3, 3 ), ( 3, 3, 3, 3 ) ) + // CHECK: ( ( 3, 3, 3, 3 ), ( 3, 3, 3, 3 ), ( 1, 1, 1, 1 ), ( 3, 3, 3, 3 ) ) + // CHECK: ( ( 3, 3, 3, 3 ), ( 3, 2, 2, 2 ), ( 1, 1, 1, 1 ), ( 3, 3, 3, 3 ) ) + // CHECK: ( ( 2, 2, 2, 3 ), ( 3, 2, 2, 2 ), ( 1, 1, 1, 1 ), ( 3, 3, 3, 3 ) ) + // CHECK: ( ( 2, 2, 2, 3 ), ( 3, 2, 2, 2 ), ( 1, 1, 1, 1 ), ( 3, 3, 3, 4 ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-col.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-col.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-col.mlir @@ -0,0 +1,53 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=column-major \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f64 + %f1 = constant 1.0: f64 + %f2 = constant 2.0: f64 + %f3 = constant 3.0: f64 + %f4 = constant 4.0: f64 + %f5 = constant 5.0: f64 + %f6 = constant 6.0: f64 + %f7 = constant 7.0: f64 + + // Construct test vectors. + %0 = vector.broadcast %f0 : f64 to vector<4xf64> + %1 = vector.insert %f1, %0[1] : f64 into vector<4xf64> + %2 = vector.insert %f2, %1[2] : f64 into vector<4xf64> + %a = vector.insert %f3, %2[3] : f64 into vector<4xf64> + %3 = vector.broadcast %f4 : f64 to vector<4xf64> + %4 = vector.insert %f5, %3[1] : f64 into vector<4xf64> + %5 = vector.insert %f6, %4[2] : f64 into vector<4xf64> + %b = vector.insert %f7, %5[3] : f64 into vector<4xf64> + + vector.print %a : vector<4xf64> + vector.print %b : vector<4xf64> + // + // test vectors: + // + // CHECK: ( 0, 1, 2, 3 ) + // CHECK: ( 4, 5, 6, 7 ) + + // Performs matrix x matrix, interpreting the vectors as + // flattened column-major 2-D matrices. + // + // ( 0, 2 ) (4, 6) ( 10, 14 ) | /| + // x = |/ | column-major! + // ( 1, 3 ) (5, 7) ( 19, 27 ) + // + %c = vector.matrix_multiply %a, %b + { lhs_rows = 2: i32, lhs_columns = 2: i32 , rhs_columns = 2: i32 } + : (vector<4xf64>, vector<4xf64>) -> vector<4xf64> + + vector.print %c : vector<4xf64> + // + // matrix x matrix: + // + // CHECK: ( 10, 19, 14, 27 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-row.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-row.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-matrix-multiply-row.mlir @@ -0,0 +1,53 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -lower-matrix-intrinsics -matrix-allow-contract -matrix-default-layout=row-major \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f64 + %f1 = constant 1.0: f64 + %f2 = constant 2.0: f64 + %f3 = constant 3.0: f64 + %f4 = constant 4.0: f64 + %f5 = constant 5.0: f64 + %f6 = constant 6.0: f64 + %f7 = constant 7.0: f64 + + // Construct test vectors. + %0 = vector.broadcast %f0 : f64 to vector<4xf64> + %1 = vector.insert %f1, %0[1] : f64 into vector<4xf64> + %2 = vector.insert %f2, %1[2] : f64 into vector<4xf64> + %a = vector.insert %f3, %2[3] : f64 into vector<4xf64> + %3 = vector.broadcast %f4 : f64 to vector<4xf64> + %4 = vector.insert %f5, %3[1] : f64 into vector<4xf64> + %5 = vector.insert %f6, %4[2] : f64 into vector<4xf64> + %b = vector.insert %f7, %5[3] : f64 into vector<4xf64> + + vector.print %a : vector<4xf64> + vector.print %b : vector<4xf64> + // + // test vectors: + // + // CHECK: ( 0, 1, 2, 3 ) + // CHECK: ( 4, 5, 6, 7 ) + + // Performs matrix x matrix, interpreting the vectors as + // flattened row-major 2-D matrices. + // + // ( 0, 1 ) (4, 5) ( 6, 7 ) + // x = + // ( 2, 3 ) (6, 7) ( 26, 31 ) + // + %c = vector.matrix_multiply %a, %b + { lhs_rows = 2: i32, lhs_columns = 2: i32 , rhs_columns = 2: i32 } + : (vector<4xf64>, vector<4xf64>) -> vector<4xf64> + + vector.print %c : vector<4xf64> + // + // matrix x matrix: + // + // CHECK: ( 6, 7, 26, 31 ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-outerproduct.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-outerproduct.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-outerproduct.mlir @@ -0,0 +1,76 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +!vector_type_A = type vector<8xf32> +!vector_type_B = type vector<8xf32> +!vector_type_C = type vector<8x8xf32> + +!vector_type_X = type vector<2xf32> +!vector_type_Y = type vector<3xf32> +!vector_type_Z = type vector<2x3xf32> + +func @vector_outerproduct_splat_8x8(%fa: f32, %fb: f32, %fc: f32) -> !vector_type_C { + %a = splat %fa: !vector_type_A + %b = splat %fb: !vector_type_B + %c = splat %fc: !vector_type_C + %d = vector.outerproduct %a, %b, %c : !vector_type_A, !vector_type_B + return %d: !vector_type_C +} + +func @vector_outerproduct_vec_2x3(%x : !vector_type_X, + %y : !vector_type_Y) -> !vector_type_Z { + %o = vector.outerproduct %x, %y : !vector_type_X, !vector_type_Y + return %o: !vector_type_Z +} + +func @vector_outerproduct_vec_2x3_acc(%x : !vector_type_X, + %y : !vector_type_Y, + %z : !vector_type_Z) -> !vector_type_Z { + %o = vector.outerproduct %x, %y, %z : !vector_type_X, !vector_type_Y + return %o: !vector_type_Z +} + +func @entry() { + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f10 = constant 10.0: f32 + + // Simple case, splat scalars into vectors, then take outer product. + %v = call @vector_outerproduct_splat_8x8(%f1, %f2, %f10) + : (f32, f32, f32) -> (!vector_type_C) + vector.print %v : !vector_type_C + // + // outer product 8x8: + // + // CHECK-COUNT-8: ( 12, 12, 12, 12, 12, 12, 12, 12 ) + + // Direct outerproduct on vectors with different size. + %0 = vector.broadcast %f1 : f32 to !vector_type_X + %x = vector.insert %f2, %0[1] : f32 into !vector_type_X + %1 = vector.broadcast %f3 : f32 to !vector_type_Y + %2 = vector.insert %f4, %1[1] : f32 into !vector_type_Y + %y = vector.insert %f5, %2[2] : f32 into !vector_type_Y + + %p = call @vector_outerproduct_vec_2x3(%x, %y) + : (!vector_type_X, !vector_type_Y) -> (!vector_type_Z) + vector.print %p : !vector_type_Z + // + // outer product 2x3: + // + // CHECK: ( ( 3, 4, 5 ), ( 6, 8, 10 ) ) + + %q = call @vector_outerproduct_vec_2x3_acc(%x, %y, %p) + : (!vector_type_X, !vector_type_Y, !vector_type_Z) -> (!vector_type_Z) + vector.print %q : !vector_type_Z + // + // outer product 2x3: + // + // CHECK: ( ( 6, 8, 10 ), ( 12, 16, 20 ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f32.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f32.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f32.mlir @@ -0,0 +1,50 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + // Construct test vector. + %f1 = constant 1.5: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant -1.0: f32 + %f7 = constant -2.0: f32 + %f8 = constant -4.0: f32 + %f9 = constant -0.25: f32 + %f10 = constant -16.0: f32 + %v0 = vector.broadcast %f1 : f32 to vector<10xf32> + %v1 = vector.insert %f2, %v0[1] : f32 into vector<10xf32> + %v2 = vector.insert %f3, %v1[2] : f32 into vector<10xf32> + %v3 = vector.insert %f4, %v2[3] : f32 into vector<10xf32> + %v4 = vector.insert %f5, %v3[4] : f32 into vector<10xf32> + %v5 = vector.insert %f6, %v4[5] : f32 into vector<10xf32> + %v6 = vector.insert %f7, %v5[6] : f32 into vector<10xf32> + %v7 = vector.insert %f8, %v6[7] : f32 into vector<10xf32> + %v8 = vector.insert %f9, %v7[8] : f32 into vector<10xf32> + %v9 = vector.insert %f10, %v8[9] : f32 into vector<10xf32> + vector.print %v9 : vector<10xf32> + // + // test vector: + // + // CHECK: ( 1.5, 2, 3, 4, 5, -1, -2, -4, -0.25, -16 ) + + // Various vector reductions. Not full functional unit tests, but + // a simple integration test to see if the code runs end-to-end. + %0 = vector.reduction "add", %v9 : vector<10xf32> into f32 + vector.print %0 : f32 + // CHECK: -7.75 + %1 = vector.reduction "mul", %v9 : vector<10xf32> into f32 + vector.print %1 : f32 + // CHECK: -5760 + %2 = vector.reduction "min", %v9 : vector<10xf32> into f32 + vector.print %2 : f32 + // CHECK: -16 + %3 = vector.reduction "max", %v9 : vector<10xf32> into f32 + vector.print %3 : f32 + // CHECK: 5 + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f64.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f64.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-f64.mlir @@ -0,0 +1,50 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + // Construct test vector. + %f1 = constant 1.5: f64 + %f2 = constant 2.0: f64 + %f3 = constant 3.0: f64 + %f4 = constant 4.0: f64 + %f5 = constant 5.0: f64 + %f6 = constant -1.0: f64 + %f7 = constant -2.0: f64 + %f8 = constant -4.0: f64 + %f9 = constant -0.25: f64 + %f10 = constant -16.0: f64 + %v0 = vector.broadcast %f1 : f64 to vector<10xf64> + %v1 = vector.insert %f2, %v0[1] : f64 into vector<10xf64> + %v2 = vector.insert %f3, %v1[2] : f64 into vector<10xf64> + %v3 = vector.insert %f4, %v2[3] : f64 into vector<10xf64> + %v4 = vector.insert %f5, %v3[4] : f64 into vector<10xf64> + %v5 = vector.insert %f6, %v4[5] : f64 into vector<10xf64> + %v6 = vector.insert %f7, %v5[6] : f64 into vector<10xf64> + %v7 = vector.insert %f8, %v6[7] : f64 into vector<10xf64> + %v8 = vector.insert %f9, %v7[8] : f64 into vector<10xf64> + %v9 = vector.insert %f10, %v8[9] : f64 into vector<10xf64> + vector.print %v9 : vector<10xf64> + // + // test vector: + // + // CHECK: ( 1.5, 2, 3, 4, 5, -1, -2, -4, -0.25, -16 ) + + // Various vector reductions. Not full functional unit tests, but + // a simple integration test to see if the code runs end-to-end. + %0 = vector.reduction "add", %v9 : vector<10xf64> into f64 + vector.print %0 : f64 + // CHECK: -7.75 + %1 = vector.reduction "mul", %v9 : vector<10xf64> into f64 + vector.print %1 : f64 + // CHECK: -5760 + %2 = vector.reduction "min", %v9 : vector<10xf64> into f64 + vector.print %2 : f64 + // CHECK: -16 + %3 = vector.reduction "max", %v9 : vector<10xf64> into f64 + vector.print %3 : f64 + // CHECK: 5 + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i32.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i32.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i32.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + // Construct test vector. + %i1 = constant 1: i32 + %i2 = constant 2: i32 + %i3 = constant 3: i32 + %i4 = constant 4: i32 + %i5 = constant 5: i32 + %i6 = constant -1: i32 + %i7 = constant -2: i32 + %i8 = constant -4: i32 + %i9 = constant -80: i32 + %i10 = constant -16: i32 + %v0 = vector.broadcast %i1 : i32 to vector<10xi32> + %v1 = vector.insert %i2, %v0[1] : i32 into vector<10xi32> + %v2 = vector.insert %i3, %v1[2] : i32 into vector<10xi32> + %v3 = vector.insert %i4, %v2[3] : i32 into vector<10xi32> + %v4 = vector.insert %i5, %v3[4] : i32 into vector<10xi32> + %v5 = vector.insert %i6, %v4[5] : i32 into vector<10xi32> + %v6 = vector.insert %i7, %v5[6] : i32 into vector<10xi32> + %v7 = vector.insert %i8, %v6[7] : i32 into vector<10xi32> + %v8 = vector.insert %i9, %v7[8] : i32 into vector<10xi32> + %v9 = vector.insert %i10, %v8[9] : i32 into vector<10xi32> + vector.print %v9 : vector<10xi32> + // + // test vector: + // + // CHECK: ( 1, 2, 3, 4, 5, -1, -2, -4, -80, -16 ) + + // Various vector reductions. Not full functional unit tests, but + // a simple integration test to see if the code runs end-to-end. + %0 = vector.reduction "add", %v9 : vector<10xi32> into i32 + vector.print %0 : i32 + // CHECK: -88 + %1 = vector.reduction "mul", %v9 : vector<10xi32> into i32 + vector.print %1 : i32 + // CHECK: -1228800 + %2 = vector.reduction "min", %v9 : vector<10xi32> into i32 + vector.print %2 : i32 + // CHECK: -80 + %3 = vector.reduction "max", %v9 : vector<10xi32> into i32 + vector.print %3 : i32 + // CHECK: 5 + %4 = vector.reduction "and", %v9 : vector<10xi32> into i32 + vector.print %4 : i32 + // CHECK: 0 + %5 = vector.reduction "or", %v9 : vector<10xi32> into i32 + vector.print %5 : i32 + // CHECK: -1 + %6 = vector.reduction "xor", %v9 : vector<10xi32> into i32 + vector.print %6 : i32 + // CHECK: -68 + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i64.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i64.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-reductions-i64.mlir @@ -0,0 +1,59 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + // Construct test vector. + %i1 = constant 1: i64 + %i2 = constant 2: i64 + %i3 = constant 3: i64 + %i4 = constant 4: i64 + %i5 = constant 5: i64 + %i6 = constant -1: i64 + %i7 = constant -2: i64 + %i8 = constant -4: i64 + %i9 = constant -80: i64 + %i10 = constant -16: i64 + %v0 = vector.broadcast %i1 : i64 to vector<10xi64> + %v1 = vector.insert %i2, %v0[1] : i64 into vector<10xi64> + %v2 = vector.insert %i3, %v1[2] : i64 into vector<10xi64> + %v3 = vector.insert %i4, %v2[3] : i64 into vector<10xi64> + %v4 = vector.insert %i5, %v3[4] : i64 into vector<10xi64> + %v5 = vector.insert %i6, %v4[5] : i64 into vector<10xi64> + %v6 = vector.insert %i7, %v5[6] : i64 into vector<10xi64> + %v7 = vector.insert %i8, %v6[7] : i64 into vector<10xi64> + %v8 = vector.insert %i9, %v7[8] : i64 into vector<10xi64> + %v9 = vector.insert %i10, %v8[9] : i64 into vector<10xi64> + vector.print %v9 : vector<10xi64> + // + // test vector: + // + // CHECK: ( 1, 2, 3, 4, 5, -1, -2, -4, -80, -16 ) + + // Various vector reductions. Not full functional unit tests, but + // a simple integration test to see if the code runs end-to-end. + %0 = vector.reduction "add", %v9 : vector<10xi64> into i64 + vector.print %0 : i64 + // CHECK: -88 + %1 = vector.reduction "mul", %v9 : vector<10xi64> into i64 + vector.print %1 : i64 + // CHECK: -1228800 + %2 = vector.reduction "min", %v9 : vector<10xi64> into i64 + vector.print %2 : i64 + // CHECK: -80 + %3 = vector.reduction "max", %v9 : vector<10xi64> into i64 + vector.print %3 : i64 + // CHECK: 5 + %4 = vector.reduction "and", %v9 : vector<10xi64> into i64 + vector.print %4 : i64 + // CHECK: 0 + %5 = vector.reduction "or", %v9 : vector<10xi64> into i64 + vector.print %5 : i64 + // CHECK: -1 + %6 = vector.reduction "xor", %v9 : vector<10xi64> into i64 + vector.print %6 : i64 + // CHECK: -68 + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-shape-cast.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-shape-cast.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-shape-cast.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant 6.0: f32 + + // Construct test vector. + %0 = vector.broadcast %f1 : f32 to vector<3x2xf32> + %1 = vector.insert %f2, %0[0, 1] : f32 into vector<3x2xf32> + %2 = vector.insert %f3, %1[1, 0] : f32 into vector<3x2xf32> + %3 = vector.insert %f4, %2[1, 1] : f32 into vector<3x2xf32> + %4 = vector.insert %f5, %3[2, 0] : f32 into vector<3x2xf32> + %x = vector.insert %f6, %4[2, 1] : f32 into vector<3x2xf32> + vector.print %x : vector<3x2xf32> + // CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) ) + + // Reshapes. + %a = vector.shape_cast %x : vector<3x2xf32> to vector<3x2xf32> + %c = vector.shape_cast %x : vector<3x2xf32> to vector<6xf32> + %d = vector.shape_cast %c : vector<6xf32> to vector<2x3xf32> + %e = vector.shape_cast %c : vector<6xf32> to vector<3x2xf32> + + // Reshaped vectors: + // CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) ) + // CHECK: ( 1, 2, 3, 4, 5, 6 ) + // CHECK: ( ( 1, 2, 3 ), ( 4, 5, 6 ) ) + // CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) ) + vector.print %a : vector<3x2xf32> + vector.print %c : vector<6xf32> + vector.print %d : vector<2x3xf32> + vector.print %e : vector<3x2xf32> + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-shuffle.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-shuffle.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-shuffle.mlir @@ -0,0 +1,24 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %v1 = vector.broadcast %f1 : f32 to vector<2x4xf32> + %v2 = vector.broadcast %f2 : f32 to vector<2x4xf32> + vector.print %v1 : vector<2x4xf32> + vector.print %v2 : vector<2x4xf32> + // + // test vectors: + // + // CHECK: ( ( 1, 1, 1, 1 ), ( 1, 1, 1, 1 ) ) + // CHECK: ( ( 2, 2, 2, 2 ), ( 2, 2, 2, 2 ) ) + + %v3 = vector.shuffle %v1, %v2 [3, 1, 2] : vector<2x4xf32>, vector<2x4xf32> + vector.print %v3 : vector<3x4xf32> + // CHECK: ( ( 2, 2, 2, 2 ), ( 1, 1, 1, 1 ), ( 2, 2, 2, 2 ) ) + + return +} diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir @@ -0,0 +1,51 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @transfer_read_1d(%A : memref, %base: index) { + %fm42 = constant -42.0: f32 + %f = vector.transfer_read %A[%base], %fm42 + {permutation_map = affine_map<(d0) -> (d0)>} : + memref, vector<13xf32> + vector.print %f: vector<13xf32> + return +} + +func @transfer_write_1d(%A : memref, %base: index) { + %f0 = constant 0.0 : f32 + %vf0 = splat %f0 : vector<4xf32> + vector.transfer_write %vf0, %A[%base] + {permutation_map = affine_map<(d0) -> (d0)>} : + vector<4xf32>, memref + return +} + +func @entry() { + %c0 = constant 0: index + %c1 = constant 1: index + %c2 = constant 2: index + %c3 = constant 3: index + %c4 = constant 4: index + %c5 = constant 5: index + %A = alloc(%c5) : memref + scf.for %i = %c0 to %c5 step %c1 { + %i32 = index_cast %i : index to i32 + %fi = sitofp %i32 : i32 to f32 + store %fi, %A[%i] : memref + } + // On input, memory contains [[ 0, 1, 2, 3, 4, xxx garbage xxx ]] + // Read shifted by 2 and pad with -42: + // ( 2, 3, 4, -42, ..., -42) + call @transfer_read_1d(%A, %c2) : (memref, index) -> () + // Write into memory shifted by 3 + // memory contains [[ 0, 1, 2, 0, 0, xxx garbage xxx ]] + call @transfer_write_1d(%A, %c3) : (memref, index) -> () + // Read shifted by 0 and pad with -42: + // ( 0, 1, 2, 0, 0, -42, ..., -42) + call @transfer_read_1d(%A, %c0) : (memref, index) -> () + return +} + +// CHECK: ( 2, 3, 4, -42, -42, -42, -42, -42, -42, -42, -42, -42, -42 ) +// CHECK: ( 0, 1, 2, 0, 0, -42, -42, -42, -42, -42, -42, -42, -42 ) diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir @@ -0,0 +1,101 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @transfer_write16_1d(%A : memref, %base: index) { + %f = constant 16.0 : f32 + %v = splat %f : vector<16xf32> + vector.transfer_write %v, %A[%base] + {permutation_map = affine_map<(d0) -> (d0)>} + : vector<16xf32>, memref + return +} + +func @transfer_write13_1d(%A : memref, %base: index) { + %f = constant 13.0 : f32 + %v = splat %f : vector<13xf32> + vector.transfer_write %v, %A[%base] + {permutation_map = affine_map<(d0) -> (d0)>} + : vector<13xf32>, memref + return +} + +func @transfer_write17_1d(%A : memref, %base: index) { + %f = constant 17.0 : f32 + %v = splat %f : vector<17xf32> + vector.transfer_write %v, %A[%base] + {permutation_map = affine_map<(d0) -> (d0)>} + : vector<17xf32>, memref + return +} + +func @transfer_read_1d(%A : memref) -> vector<32xf32> { + %z = constant 0: index + %f = constant 0.0: f32 + %r = vector.transfer_read %A[%z], %f + {permutation_map = affine_map<(d0) -> (d0)>} + : memref, vector<32xf32> + return %r : vector<32xf32> +} + +func @entry() { + %c0 = constant 0: index + %c1 = constant 1: index + %c32 = constant 32: index + %A = alloc(%c32) {alignment=64} : memref + scf.for %i = %c0 to %c32 step %c1 { + %f = constant 0.0: f32 + store %f, %A[%i] : memref + } + + // On input, memory contains all zeros. + %0 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %0 : vector<32xf32> + + // Overwrite with 16 values of 16 at base 4. + %c4 = constant 4: index + call @transfer_write16_1d(%A, %c4) : (memref, index) -> () + %1 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %1 : vector<32xf32> + + // Overwrite with 13 values of 13 at base 3. + %c3 = constant 3: index + call @transfer_write13_1d(%A, %c3) : (memref, index) -> () + %2 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %2 : vector<32xf32> + + // Overwrite with 17 values of 17 at base 7. + %c7 = constant 7: index + call @transfer_write17_1d(%A, %c3) : (memref, index) -> () + %3 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %3 : vector<32xf32> + + // Overwrite with 13 values of 13 at base 8. + %c8 = constant 8: index + call @transfer_write13_1d(%A, %c8) : (memref, index) -> () + %4 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %4 : vector<32xf32> + + // Overwrite with 17 values of 17 at base 14. + %c14 = constant 14: index + call @transfer_write17_1d(%A, %c14) : (memref, index) -> () + %5 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %5 : vector<32xf32> + + // Overwrite with 13 values of 13 at base 19. + %c19 = constant 19: index + call @transfer_write13_1d(%A, %c19) : (memref, index) -> () + %6 = call @transfer_read_1d(%A) : (memref) -> (vector<32xf32>) + vector.print %6 : vector<32xf32> + + return +} + +// CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) +// CHECK: ( 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) +// CHECK: ( 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) +// CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) +// CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ) +// CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0 ) +// CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13 ) diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transpose.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transpose.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Vector/CPU/test-transpose.mlir @@ -0,0 +1,120 @@ +// RUN: mlir-opt %s -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e entry -entry-point-result=void \ +// RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext | \ +// RUN: FileCheck %s + +func @entry() { + %f0 = constant 0.0: f32 + %f1 = constant 1.0: f32 + %f2 = constant 2.0: f32 + %f3 = constant 3.0: f32 + %f4 = constant 4.0: f32 + %f5 = constant 5.0: f32 + %f6 = constant 6.0: f32 + %f7 = constant 7.0: f32 + %f8 = constant 8.0: f32 + + // Construct test vectors and matrices. + %0 = vector.broadcast %f1 : f32 to vector<2xf32> + %a = vector.insert %f2, %0[1] : f32 into vector<2xf32> + %1 = vector.broadcast %f3 : f32 to vector<2xf32> + %b = vector.insert %f4, %1[1] : f32 into vector<2xf32> + %2 = vector.broadcast %f5 : f32 to vector<2xf32> + %c = vector.insert %f6, %2[1] : f32 into vector<2xf32> + %3 = vector.broadcast %f7 : f32 to vector<2xf32> + %d = vector.insert %f8, %3[1] : f32 into vector<2xf32> + %4 = vector.broadcast %f0 : f32 to vector<2x2xf32> + %5 = vector.insert %a, %4[0] : vector<2xf32> into vector<2x2xf32> + %A = vector.insert %b, %5[1] : vector<2xf32> into vector<2x2xf32> + %6 = vector.broadcast %f0 : f32 to vector<2x2xf32> + %7 = vector.insert %c, %6[0] : vector<2xf32> into vector<2x2xf32> + %B = vector.insert %d, %7[1] : vector<2xf32> into vector<2x2xf32> + %8 = vector.broadcast %f0 : f32 to vector<3x2xf32> + %9 = vector.insert %a, %8[0] : vector<2xf32> into vector<3x2xf32> + %10 = vector.insert %b, %9[1] : vector<2xf32> into vector<3x2xf32> + %C = vector.insert %c, %10[2] : vector<2xf32> into vector<3x2xf32> + %11 = vector.tuple %A, %B : vector<2x2xf32>, vector<2x2xf32> + %D = vector.insert_slices %11, [2, 2], [1, 1] + : tuple, vector<2x2xf32>> into vector<2x4xf32> + + vector.print %A : vector<2x2xf32> + vector.print %B : vector<2x2xf32> + vector.print %C : vector<3x2xf32> + vector.print %D : vector<2x4xf32> + // + // test matrices: + // + // CHECK: ( ( 1, 2 ), ( 3, 4 ) ) + // CHECK: ( ( 5, 6 ), ( 7, 8 ) ) + // CHECK: ( ( 1, 2 ), ( 3, 4 ), ( 5, 6 ) ) + // CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) ) + + %tA = vector.transpose %A, [1, 0] : vector<2x2xf32> to vector<2x2xf32> + %tB = vector.transpose %B, [1, 0] : vector<2x2xf32> to vector<2x2xf32> + %tC = vector.transpose %C, [1, 0] : vector<3x2xf32> to vector<2x3xf32> + %tD = vector.transpose %D, [1, 0] : vector<2x4xf32> to vector<4x2xf32> + + vector.print %tA : vector<2x2xf32> + vector.print %tB : vector<2x2xf32> + vector.print %tC : vector<2x3xf32> + vector.print %tD : vector<4x2xf32> + // + // transposed matrices: + // + // CHECK: ( ( 1, 3 ), ( 2, 4 ) ) + // CHECK: ( ( 5, 7 ), ( 6, 8 ) ) + // CHECK: ( ( 1, 3, 5 ), ( 2, 4, 6 ) ) + // CHECK: ( ( 1, 3 ), ( 2, 4 ), ( 5, 7 ), ( 6, 8 ) ) + + %idD = vector.transpose %D, [0, 1] : vector<2x4xf32> to vector<2x4xf32> + %ttD = vector.transpose %tD, [1, 0] : vector<4x2xf32> to vector<2x4xf32> + + vector.print %idD : vector<2x4xf32> + vector.print %ttD : vector<2x4xf32> + // + // back to original after transpose matrices: + // + // CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) ) + // CHECK: ( ( 1, 2, 5, 6 ), ( 3, 4, 7, 8 ) ) + + // Construct test tensor. + %p = vector.broadcast %f1 : f32 to vector<2x2x2xf32> + %q = vector.insert %f2, %p[0, 0, 1] : f32 into vector<2x2x2xf32> + %r = vector.insert %f3, %q[0, 1, 0] : f32 into vector<2x2x2xf32> + %s = vector.insert %f4, %r[0, 1, 1] : f32 into vector<2x2x2xf32> + %t = vector.insert %f5, %s[1, 0, 0] : f32 into vector<2x2x2xf32> + %u = vector.insert %f6, %t[1, 0, 1] : f32 into vector<2x2x2xf32> + %v = vector.insert %f7, %u[1, 1, 0] : f32 into vector<2x2x2xf32> + %w = vector.insert %f8, %v[1, 1, 1] : f32 into vector<2x2x2xf32> + + vector.print %w : vector<2x2x2xf32> + // + // test tensors: + // + // CHECK: ( ( ( 1, 2 ), ( 3, 4 ) ), ( ( 5, 6 ), ( 7, 8 ) ) ) + + %tP = vector.transpose %w, [0, 1, 2] : vector<2x2x2xf32> to vector<2x2x2xf32> + %tQ = vector.transpose %w, [0, 2, 1] : vector<2x2x2xf32> to vector<2x2x2xf32> + %tR = vector.transpose %w, [1, 0, 2] : vector<2x2x2xf32> to vector<2x2x2xf32> + %tS = vector.transpose %w, [2, 0, 1] : vector<2x2x2xf32> to vector<2x2x2xf32> + %tT = vector.transpose %w, [1, 2, 0] : vector<2x2x2xf32> to vector<2x2x2xf32> + %tU = vector.transpose %w, [2, 1, 0] : vector<2x2x2xf32> to vector<2x2x2xf32> + + vector.print %tP : vector<2x2x2xf32> + vector.print %tQ : vector<2x2x2xf32> + vector.print %tR : vector<2x2x2xf32> + vector.print %tS : vector<2x2x2xf32> + vector.print %tT : vector<2x2x2xf32> + vector.print %tU : vector<2x2x2xf32> + // + // transposed tensors: + // + // CHECK: ( ( ( 1, 2 ), ( 3, 4 ) ), ( ( 5, 6 ), ( 7, 8 ) ) ) + // CHECK: ( ( ( 1, 3 ), ( 2, 4 ) ), ( ( 5, 7 ), ( 6, 8 ) ) ) + // CHECK: ( ( ( 1, 2 ), ( 5, 6 ) ), ( ( 3, 4 ), ( 7, 8 ) ) ) + // CHECK: ( ( ( 1, 3 ), ( 5, 7 ) ), ( ( 2, 4 ), ( 6, 8 ) ) ) + // CHECK: ( ( ( 1, 5 ), ( 2, 6 ) ), ( ( 3, 7 ), ( 4, 8 ) ) ) + // CHECK: ( ( ( 1, 5 ), ( 3, 7 ) ), ( ( 2, 6 ), ( 4, 8 ) ) ) + + return +} diff --git a/mlir/integration_test/lit.cfg.py b/mlir/integration_test/lit.cfg.py new file mode 100644 --- /dev/null +++ b/mlir/integration_test/lit.cfg.py @@ -0,0 +1,58 @@ +# -*- Python -*- + +import os +import platform +import re +import subprocess +import tempfile + +import lit.formats +import lit.util + +from lit.llvm import llvm_config +from lit.llvm.subst import ToolSubst + +# Configuration file for the 'lit' integration test runner. + +# name: The name of this integration test suite. +config.name = 'MLIR-INTEGRATION' + +config.test_format = lit.formats.ShTest(not llvm_config.use_lit_shell) + +# suffixes: A list of file extensions to treat as integration test files. +config.suffixes = ['.mlir'] + +# test_source_root: The root path where integration tests are located. +config.test_source_root = os.path.dirname(__file__) + +# test_exec_root: The root path where integration tests should be run. +config.test_exec_root = os.path.join(config.mlir_obj_root, 'integration_test') + +config.substitutions.append(('%PATH%', config.environment['PATH'])) +config.substitutions.append(('%shlibext', config.llvm_shlib_ext)) +config.substitutions.append(('%mlir_src_root', config.mlir_src_root)) + +llvm_config.with_system_environment(['HOME', 'INCLUDE', 'LIB', 'TMP', 'TEMP']) + +llvm_config.use_default_substitutions() + +# excludes: A list of directories to exclude from the integraiton testsuite. +config.excludes = ['CMakeLists.txt', 'README.txt', 'LICENSE.txt'] + +# Tweak the PATH to include the tools dir. +llvm_config.with_environment('PATH', config.llvm_tools_dir, append_path=True) +tool_dirs = [config.mlir_tools_dir, config.llvm_tools_dir] +tools = [ + 'mlir-opt', + 'mlir-cpu-runner', +] + +# The following tools are optional. +tools.extend([ + ToolSubst( + '%mlir_runner_utils_dir', + config.mlir_runner_utils_dir, + unresolved='ignore'), +]) + +llvm_config.add_tool_substitutions(tools, tool_dirs) diff --git a/mlir/integration_test/lit.site.cfg.py.in b/mlir/integration_test/lit.site.cfg.py.in new file mode 100644 --- /dev/null +++ b/mlir/integration_test/lit.site.cfg.py.in @@ -0,0 +1,51 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.host_triple = "@LLVM_HOST_TRIPLE@" +config.target_triple = "@TARGET_TRIPLE@" +config.llvm_src_root = "@LLVM_SOURCE_DIR@" +config.llvm_obj_root = "@LLVM_BINARY_DIR@" +config.llvm_tools_dir = "@LLVM_TOOLS_DIR@" +config.llvm_lib_dir = "@LLVM_LIBRARY_DIR@" +config.llvm_shlib_dir = "@SHLIBDIR@" +config.llvm_shlib_ext = "@SHLIBEXT@" +config.llvm_exe_ext = "@EXEEXT@" +config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" +config.python_executable = "@PYTHON_EXECUTABLE@" +config.gold_executable = "@GOLD_EXECUTABLE@" +config.ld64_executable = "@LD64_EXECUTABLE@" +config.enable_shared = @ENABLE_SHARED@ +config.enable_assertions = @ENABLE_ASSERTIONS@ +config.targets_to_build = "@TARGETS_TO_BUILD@" +config.native_target = "@LLVM_NATIVE_ARCH@" +config.llvm_bindings = "@LLVM_BINDINGS@".split(' ') +config.host_os = "@HOST_OS@" +config.host_cc = "@HOST_CC@" +config.host_cxx = "@HOST_CXX@" +config.host_cmake = "@CMAKE_COMMAND@" +# Note: ldflags can contain double-quoted paths, so must use single quotes here. +config.host_ldflags = '@HOST_LDFLAGS@' +config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" +config.llvm_host_triple = '@LLVM_HOST_TRIPLE@' +config.host_arch = "@HOST_ARCH@" +config.mlir_src_root = "@MLIR_SOURCE_DIR@" +config.mlir_obj_root = "@MLIR_BINARY_DIR@" +config.mlir_runner_utils_dir = "@MLIR_RUNNER_UTILS_DIR@" +config.mlir_tools_dir = "@MLIR_TOOLS_DIR@" + +# Support substitution of the tools_dir with user parameters. This is +# used when we can't determine the tool dir at configuration time. +try: + config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params + config.llvm_shlib_dir = config.llvm_shlib_dir % lit_config.params +except KeyError: + e = sys.exc_info()[1] + key, = e.args + lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key)) + +import lit.llvm +lit.llvm.initialize(lit_config, config) + +# Let the main config do the real work. +lit_config.load_config(config, "@MLIR_SOURCE_DIR@/integration_test/lit.cfg.py")