diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-call.mlir @@ -0,0 +1,65 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=4" -linalg-tile="linalg-tile-sizes=1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns a 1-D buffer of size %s1 filled with the value %f +func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref { + %buf = alloc(%s1) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D = call @alloc_1d_filled_f32(%c3, %val) : (index, f32) -> (memref) + %in1D = call @alloc_1d_filled_f32(%c8, %val) : (index, f32) -> (memref) + %out1D = call @alloc_1d_filled_f32(%c6, %zero) : (index, f32) -> (memref) + + store %f10, %in1D[%c3] : memref + call @conv_1d(%in1D, %filter1D, %out1D) : (memref, memref, memref) -> () + %out1D_ = memref_cast %out1D : memref to memref<*xf32> + call @print_memref_f32(%out1D_): (memref<*xf32>) -> () + + dealloc %filter1D : memref + dealloc %in1D : memref + dealloc %out1D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [12, 28, 28, 28, 12, 12] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-ncw-call.mlir @@ -0,0 +1,71 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_ncw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c3, %val) : (index, index, index, f32) -> (memref) + %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref) + %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_ncw[%c0, %c0, %c3] : memref + call @conv_1d_ncw(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref, memref, memref) -> () + %out1D_ncw_ = memref_cast %out1D_ncw : memref to memref<*xf32> + call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> () + + dealloc %filter1D_ncw : memref + dealloc %in1D_ncw : memref + dealloc %out1D_ncw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12, 28, 28, 28, 12, 12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-1d-nwc-call.mlir @@ -0,0 +1,82 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_nwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_nwc = call @alloc_3d_filled_f32(%c1, %c3, %c1, %val) : (index, index, index, f32) -> (memref) + %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref) + %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_nwc[%c0, %c3, %c0] : memref + call @conv_1d_nwc(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () + %out1D_nwc_ = memref_cast %out1D_nwc : memref to memref<*xf32> + call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> () + + dealloc %filter1D_nwc : memref + dealloc %in1D_nwc : memref + dealloc %out1D_nwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12], +// CHECK-COUNT-3: [28], +// CHECK-NEXT: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-call.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2" -linalg-tile="linalg-tile-sizes=1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f +func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D = call @alloc_2d_filled_f32(%c3, %c3, %val) : (index, index, f32) -> (memref) + %in2D = call @alloc_2d_filled_f32(%c8, %c8, %val) : (index, index, f32) -> (memref) + %out2D = call @alloc_2d_filled_f32(%c6, %c6, %zero) : (index, index, f32) -> (memref) + + store %f10, %in2D[%c0, %c3] : memref + call @conv_2d(%in2D, %filter2D, %out2D) : (memref, memref, memref) -> () + %out2D_ = memref_cast %out2D : memref to memref<*xf32> + call @print_memref_f32(%out2D_): (memref<*xf32>) -> () + + dealloc %filter2D : memref + dealloc %in2D : memref + dealloc %out2D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [36, 52, 52, 52, 36, 36], +// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nchw-call.mlir @@ -0,0 +1,84 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_nchw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nchw = call @alloc_4d_filled_f32(%c1, %c1, %c3, %c3, %val) : (index, index, index, index, f32) -> (memref) + %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref + call @conv_2d_nchw(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref, memref, memref) -> () + %out2D_nchw_ = memref_cast %out2D_nchw : memref to memref<*xf32> + call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> () + + dealloc %filter2D_nchw : memref + dealloc %in2D_nchw : memref + dealloc %out2D_nchw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [36, 52, 52, 52, 36, 36], +// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-2d-nhwc-call.mlir @@ -0,0 +1,130 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -linalg-tile="linalg-tile-sizes=1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_nhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nhwc = call @alloc_4d_filled_f32(%c1, %c3, %c3, %c3, %val) :(index, index, index, index, f32) -> (memref) + %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref + call @conv_2d_nhwc(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () + %out2D_nhwc_ = memref_cast %out2D_nhwc : memref to memref<*xf32> + call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> () + + dealloc %filter2D_nhwc : memref + dealloc %in2D_nhwc : memref + dealloc %out2D_nhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-call.mlir @@ -0,0 +1,87 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,2,2" -linalg-tile="linalg-tile-sizes=1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D = call @alloc_3d_filled_f32(%c3, %c3, %c3, %val) : (index, index, index, f32) -> (memref) + %in3D = call @alloc_3d_filled_f32(%c8, %c8, %c8, %val) : (index, index, index, f32) -> (memref) + %out3D = call @alloc_3d_filled_f32(%c6, %c6, %c6, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in3D[%c0, %c0, %c3] : memref + call @conv_3d(%in3D, %filter3D, %out3D) : (memref, memref, memref) -> () + %out3D_ = memref_cast %out3D : memref to memref<*xf32> + call @print_memref_f32(%out3D_): (memref<*xf32>) -> () + + dealloc %filter3D : memref + dealloc %in3D : memref + dealloc %out3D : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [108, 124, 124, 124, 108, 108], +// CHECK-COUNT-5: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ncdhw-call.mlir @@ -0,0 +1,91 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_ncdhw(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c3, %c3, %c3, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref + call @conv_3d_ncdhw(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref, memref, memref) -> () + %out3D_ncdhw_ = memref_cast %out3D_ncdhw : memref to memref<*xf32> + call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> () + + dealloc %filter3D_ncdhw : memref + dealloc %in3D_ncdhw : memref + dealloc %out3D_ncdhw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108, 124, 124, 124, 108, 108], +// CHECK-COUNT-5: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/Conv/test-conv-3d-ndhwc-call.mlir @@ -0,0 +1,193 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops \ +// RUN: -convert-linalg-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=1,1,1,1,1" -test-conv-vectorization \ +// RUN: -convert-linalg-to-loops -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -linalg-tile="linalg-tile-sizes=1,1,1,1,1" \ +// RUN: -test-conv-vectorization -convert-linalg-to-loops \ +// RUN: -test-vector-contraction-conversion=vector-outerproduct=0 \ +// RUN: -convert-vector-to-scf -convert-linalg-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_ndhwc(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref, memref, memref) + return +} + + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c3, %c3, %c3, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref + call @conv_3d_ndhwc(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () + %out3D_ndhwc_ = memref_cast %out3D_ndhwc : memref to memref<*xf32> + call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> () + + dealloc %filter3D_ndhwc : memref + dealloc %in3D_ndhwc : memref + dealloc %out3D_ndhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Conversion/LinalgToVector/linalg-to-vector.mlir @@ -0,0 +1,167 @@ +// RUN: mlir-opt %s -test-conv-vectorization --cse | FileCheck %s + +// CHECK-DAG: #[[$map0:.*]] = affine_map<(d0) -> (d0)> +// CHECK-DAG: #[[$map1:.*]] = affine_map<(d0) -> ()> +// CHECK-DAG: #[[$map2:.*]] = affine_map<(d0, d1, d2) -> (d1, d2)> +// CHECK-DAG: #[[$map3:.*]] = affine_map<(d0, d1) -> (d0, d1)> +// CHECK-DAG: #[[$map4:.*]] = affine_map<(d0, d1) -> ()> +// CHECK-DAG: #[[$map5:.*]] = affine_map<(d0, d1, d2, d3) -> (d1, d2, d3)> +// CHECK-DAG: #[[$map6:.*]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +// CHECK-DAG: #[[$map7:.*]] = affine_map<(d0, d1, d2) -> ()> +// CHECK-DAG: #[[$map8:.*]] = affine_map<(d0, d1, d2, d3, d4) -> (d1, d2, d3, d4)> +// CHECK-DAG: #[[$map9:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> +// CHECK-DAG: #[[$map10:.*]] = affine_map<(d0, d1, d2, d3) -> ()> + +func @conv_1d(%arg0: memref<3xf32>, %arg1: memref<3xf32>, %arg2: memref) { + linalg.conv_1d %arg0, %arg1, %arg2 : (memref<3xf32>, memref<3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]]], %[[cst]] : memref<3xf32>, vector<3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map0]], #[[$map0]], #[[$map1]]], iterator_types = ["reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3xf32>, vector<3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]]] : memref +// CHECK: return + +func @conv_1d_ncw(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_ncw %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_ncw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + + +func @conv_1d_nwc(%arg0: memref<1x3x3xf32>, %arg1: memref<1x3x3xf32>, %arg2: memref) { + linalg.conv_1d_nwc %arg0, %arg1, %arg2 : (memref<1x3x3xf32>, memref<1x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_1d_nwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d(%arg0: memref<3x3xf32>, %arg1: memref<3x3xf32>, %arg2: memref) { + linalg.conv_2d %arg0, %arg1, %arg2 : (memref<3x3xf32>, memref<3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]]], %[[cst]] : memref<3x3xf32>, vector<3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map3]], #[[$map3]], #[[$map4]]], iterator_types = ["reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3xf32>, vector<3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nchw(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nchw %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nchw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_2d_nhwc(%arg0: memref<1x3x3x3xf32>, %arg1: memref<1x3x3x3xf32>, %arg2: memref) { + linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_2d_nhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d(%arg0: memref<3x3x3xf32>, %arg1: memref<3x3x3xf32>, %arg2: memref) { + linalg.conv_3d %arg0, %arg1, %arg2 : (memref<3x3x3xf32>, memref<3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<3x3x3xf32>, vector<3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map6]], #[[$map6]], #[[$map7]]], iterator_types = ["reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ncdhw(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ncdhw %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ncdhw +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return + +func @conv_3d_ndhwc(%arg0: memref<1x3x3x3x3xf32>, %arg1: memref<1x3x3x3x3xf32>, %arg2: memref) { + linalg.conv_3d_ndhwc %arg0, %arg1, %arg2 : (memref<1x3x3x3x3xf32>, memref<1x3x3x3x3xf32>, memref) + return +} + +// CHECK-LABEL: @conv_3d_ndhwc +// CHECK-SAME: %[[arg0:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg1:[a-zA-Z0-9]+]]: memref<1x3x3x3x3xf32> +// CHECK-SAME: %[[arg2:[a-zA-Z0-9]+]]: memref, vector<3x3x3x3xf32> +// CHECK: %[[v1:.*]] = vector.transfer_read %[[arg1]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]], %[[cst]] : memref<1x3x3x3x3xf32>, vector<3x3x3x3xf32> +// CHECK: %[[v2:.*]] = vector.contract {indexing_maps = [#[[$map9]], #[[$map9]], #[[$map10]]], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} %[[v0]], %[[v1]], %[[cst]] : vector<3x3x3x3xf32>, vector<3x3x3x3xf32> into f32 +// CHECK: store %[[v2]], %[[arg2]][%[[c0]], %[[c0]], %[[c0]], %[[c0]], %[[c0]]] : memref +// CHECK: return diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -5,6 +5,7 @@ TestExpandTanh.cpp TestCallGraph.cpp TestConstantFold.cpp + TestConvVectorization.cpp TestConvertCallOp.cpp TestConvertGPUKernelToCubin.cpp TestConvertGPUKernelToHsaco.cpp diff --git a/mlir/test/lib/Transforms/TestConvVectorization.cpp b/mlir/test/lib/Transforms/TestConvVectorization.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/Transforms/TestConvVectorization.cpp @@ -0,0 +1,205 @@ +//===- LinalgToVector.cpp - Linalg to Vector dialect conversion -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Linalg/IR/LinalgOps.h" +#include "mlir/Dialect/Linalg/Utils/Utils.h" +#include "mlir/Dialect/SCF/SCF.h" +#include "mlir/Dialect/StandardOps/IR/Ops.h" +#include "mlir/Dialect/Vector/VectorOps.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/DialectConversion.h" + +using namespace mlir; + +/// Creates zero constant of specified type. +static Value getZero(Location loc, Type type, PatternRewriter &rewriter) { + Attribute zeroAttr = rewriter.getZeroAttr(type); + return rewriter.create(loc, type, zeroAttr); +} + +namespace { + +/// Converts Convolution op into vector contraction. +/// +/// Conversion expects ConvOp to have dimensions marked in the *mask* as +/// false of size 1. This ensures that the ConvOp can be lowered to vector +/// contraction of dimensions marked in the *mask* as true. +/// +/// A good example is ConvNHWCOp which is 2D Conv op with channels as the last +/// dimension. For this op we contract last 3 dimensions. +/// The initial op definition looks like this: +/// ``` +/// linalg.conv_2d_nhwc %arg0, %arg1, %arg2 : +/// (memref<1x3x3x3xf32>, memref<1x3x3x3xf32>, memref) +/// ``` +/// This op can be expressed as a dot product between %arg0 (input) and +/// %arg1 (kernel) which is written into first entry of %arg2 (output). This is +/// the ConvOp this pass expects and converts into: +/// ``` +/// #map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> +/// #map1 = affine_map<(d0, d1, d2) -> ()> +/// ..... +/// %0 = vector.transfer_read %arg0[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %1 = vector.transfer_read %arg1[%c0, %c0, %c0, %c0], %c0_f32 +/// : memref<1x3x3x3xf32>, vector<3x3x3xf32> +/// %2 = vector.contract {indexing_maps = [#map0, #map0, #map1], +/// iterator_types = ["reduction", "reduction", "reduction"]} %0, %1, +/// %c0_f32 : vector<3x3x3xf32>, vector<3x3x3xf32> into f32 +/// store %2, %arg2[%c0, %c0, %c0, %c0] : memref +/// ``` +/// where first 2 operations read input and kernel memory buffers into vectors. +/// Subsequently, they are contracted together and the result is written to +/// the first entry of the output buffer. +template +struct ConvOpConversion : public OpRewritePattern { + SmallVector mask; + ConvOpConversion(MLIRContext *context, SmallVector msk) + : OpRewritePattern(context) { + assert(msk.size() == N && "Mask size does not match rank"); + this->mask = msk; + } + + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ConvOp op, + PatternRewriter &rewriter) const override { + const uint dimSize = 3; + Location loc = op.getLoc(); + MLIRContext *context = op.getContext(); + + ShapedType inShapeType = op.getInputShapedType(0); + ShapedType kShapeType = op.getInputShapedType(1); + + ArrayRef inShape = inShapeType.getShape(); + ArrayRef kShape = kShapeType.getShape(); + + if (!inShapeType.hasStaticShape() || !kShapeType.hasStaticShape()) + return failure(); + + SmallVector mapping; + // Fail to apply when the size of not vectorized dimension is not 1 or + // when the size of vectorized dimension is not dimSize. + for (unsigned i = 0; i < N; i++) { + if (!mask[i] && (inShape[i] != 1 || kShape[i] != 1)) + return failure(); + if (mask[i] && (inShape[i] != dimSize || kShape[i] != dimSize)) + return failure(); + + if (mask[i]) + mapping.push_back(getAffineDimExpr(i, context)); + } + + Value input = op.getInput(0); + Value kernel = op.getInput(1); + Value output = op.getOutputBuffer(0); + + uint rank = inShapeType.getRank(); + uint numDims = mapping.size(); + Type elemType = inShapeType.getElementType(); + Type indexType = rewriter.getIndexType(); + + auto map = AffineMap::get(rank, 0, mapping, context); + SmallVector zeros(rank, getZero(loc, indexType, rewriter)); + auto vecType = + VectorType::get(SmallVector(numDims, dimSize), elemType); + + auto inputVec = rewriter.create(loc, vecType, input, + zeros, map); + + auto kernelVec = rewriter.create( + loc, vecType, kernel, zeros, map); + + auto acc = getZero(loc, elemType, rewriter); + + std::array indexingMaps{ + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::getMultiDimIdentityMap(numDims, context), + AffineMap::get(numDims, 0, {}, context)}; + + std::vector iteratorTypes(numDims, "reduction"); + + auto result = rewriter.create( + loc, inputVec, kernelVec, acc, + rewriter.getAffineMapArrayAttr(indexingMaps), + rewriter.getStrArrayAttr(iteratorTypes)); + + rewriter.create(loc, result, output, ValueRange(zeros)); + rewriter.eraseOp(op); + return success(); + } +}; + +void populateLinalgToVectorPatterns(MLIRContext *context, + OwningRewritePatternList &patterns) { + patterns.insert>( + context, SmallVector{true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true}); + + patterns.insert>( + context, SmallVector{true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true}); + + patterns.insert>( + context, SmallVector{true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); + + patterns.insert>( + context, SmallVector{false, true, true, true, true}); +} + +/// A pass converting MLIR Linalg ops into Vector ops. +class TestConvVectorization + : public PassWrapper> { + void runOnOperation() override; + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + registry.insert(); + registry.insert(); + } +}; +} // namespace + +void TestConvVectorization::runOnOperation() { + MLIRContext *context = &getContext(); + ModuleOp module = getOperation(); + + ConversionTarget target(*context); + target.addLegalDialect(); + target.addLegalOp(); + target.addLegalOp(); + + OwningRewritePatternList patterns; + populateLinalgToVectorPatterns(context, patterns); + + if (failed(applyPartialConversion(module, target, patterns))) + return signalPassFailure(); +} + +namespace mlir { +void registerTestConvVectorization() { + PassRegistration testTransformPatternsPass( + "test-conv-vectorization", "Test vectorization of convolutions"); +} +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -45,6 +45,7 @@ void registerTestBufferPlacementPreparationPass(); void registerTestCallGraphPass(); void registerTestConstantFold(); +void registerTestConvVectorization(); void registerTestConvertGPUKernelToCubinPass(); void registerTestConvertGPUKernelToHsacoPass(); void registerTestDominancePass(); @@ -91,6 +92,7 @@ registerTestAffineLoopUnswitchingPass(); registerTestLoopPermutationPass(); registerTestCallGraphPass(); + registerTestConvVectorization(); registerTestConstantFold(); #if MLIR_CUDA_CONVERSIONS_ENABLED registerTestConvertGPUKernelToCubinPass();