diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc @@ -91,3 +91,102 @@ O(n, f, d, h, w) = std_addf(std_mulf( I(n, c, d + kd, h + kh, w + kw), K(f, c, kd, kh, kw))); } + +ods_def: +def conv_1d_input_nwc_filter_wcf(I: f32(N, W, C), K: f32(KW, C, F)) -> (O: f32(N, W, F)) + attr(strides: 1xi64, dilations: 1xi64) +""" A 1-D convolution given NWC layout input and WCF layout filter. + +Computes a 1-D convolution given 3-D input and filter. The data layout +of input is NWC and the data layout of filter is WCF. + +""" +{ + O(n, w, f) = std_addf( + std_mulf(I(n, w * strides[0] + kw * dilations[0], c), K(kw, c, f))); +} + +ods_def: +def conv_1d_input_ncw_filter_wcf(I: f32(N, C, W), K: f32(KW, C, F)) -> (O: f32(N, F, W)) + attr(strides: 1xi64, dilations: 1xi64) +""" A 1-D convolution given NCW layout input and WCF layout filter. + +Computes a 1-D convolution given 3-D input and filter. The data layout +of input is NCW and the data layout of filter is WCF. + +""" +{ + O(n, f, w) = std_addf( + std_mulf(I(n, c, w * strides[0] + kw * dilations[0]), K(kw, c, f))); +} + +ods_def: +def conv_2d_input_nhwc_filter_hwcf(I: f32(N, H, W, C), K: f32(KH, KW, C, F)) -> (O: f32(N, H, W, F)) + attr(strides: 2xi64, dilations: 2xi64) +""" A 2-D convolution given NHWC layout input and HWCF layout filter. + +Computes a 2-D convolution given 4-D input and filter. The data layout +of input is NHWC and the data layout of filter is HWCF. + +""" +{ + O(n, h, w, f) = + std_addf(std_mulf(I(n, h * strides[0] + kh * dilations[0], + w * strides[1] + kw * dilations[1], c), + K(kh, kw, c, f))); +} + +ods_def: +def conv_2d_input_nchw_filter_hwcf + (I: f32(N, C, H, W), K: f32(KH, KW, C, F)) + -> (O: f32(N, F, H, W)) + attr(strides: 2xi64, dilations: 2xi64) +""" A 2-D convolution given NCHW layout input and HWCF layout filter. + +Computes a 2-D convolution given 4-D input and filter. The data layout +of input is NCHW and the data layout of filter is HWCF. + +""" +{ + O(n, f, h, w) = + std_addf(std_mulf(I(n, c, h * strides[0] + kh * dilations[0], + w * strides[1] + kw * dilations[1]), + K(kh, kw, c, f))); +} + +ods_def: +def conv_3d_input_ndhwc_filter_dhwcf + (I: f32(N, D, H, W, C), K: f32(KD, KH, KW, C, F)) + -> (O: f32(N, D, H, W, F)) + attr(strides: 3xi64, dilations: 3xi64) +""" A 3-D convolution given NDHWC layout input and DHWCF layout filter. + +Computes a 3-D convolution given 5-D input and filter. The data layout +of input is NDHWC and the data layout of filter is DHWCF. + +""" +{ + O(n, d, h, w, f) = + std_addf(std_mulf(I(n, d * strides[0] + kd * dilations[0], + h * strides[1] + kh * dilations[1], + w * strides[2] + kw * dilations[2], c), + K(kd, kh, kw, c, f))); +} + +ods_def: +def conv_3d_input_ncdhw_filter_dhwcf + (I: f32(N, C, D, H, W), K: f32(KD, KH, KW, C, F)) + -> (O: f32(N, F, D, H, W)) + attr(strides: 3xi64, dilations: 3xi64) +""" A 3-D convolution given NCDHW layout input and DHWCF layout filter. + +Computes a 3-D convolution given 5-D input and filter. The data layout +of input is NCDHW and the data layout of filter is DHWCF. + +""" +{ + O(n, f, d, h, w) = std_addf(std_mulf( + I(n, c, d * strides[0] + kd * dilations[0], + h * strides[1] + kh * dilations[1], w * strides[2] + kw * dilations[2]), + K(kd, kh, kw, c, f))); +} diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-ncw-filter-wcf-call.mlir @@ -0,0 +1,70 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_input_ncw_filter_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>, + strides = dense<1> : tensor<1xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_ncw = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (memref) + %in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref) + %out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_ncw[%c0, %c0, %c3] : memref + call @conv_1d_input_ncw_filter_wcf(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref, memref, memref) -> () + %out1D_ncw_ = memref_cast %out1D_ncw : memref to memref<*xf32> + call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> () + + dealloc %filter1D_ncw : memref + dealloc %in1D_ncw : memref + dealloc %out1D_ncw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12, 28, 28, 28, 12, 12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-1d-input-nwc-filter-wcf-call.mlir @@ -0,0 +1,81 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f +func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_1d_input_nwc_filter_wcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>, + strides = dense<1> : tensor<1xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter1D_nwc = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (memref) + %in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref) + %out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref) + + store %f10, %in1D_nwc[%c0, %c3, %c0] : memref + call @conv_1d_input_nwc_filter_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref, memref, memref) -> () + %out1D_nwc_ = memref_cast %out1D_nwc : memref to memref<*xf32> + call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> () + + dealloc %filter1D_nwc : memref + dealloc %in1D_nwc : memref + dealloc %out1D_nwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [12], +// CHECK-COUNT-3: [28], +// CHECK-NEXT: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-5: [12], +// CHECK-NEXT: [12] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nchw-filter-hwcf-call.mlir @@ -0,0 +1,83 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_input_nchw_filter_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nchw = call @alloc_4d_filled_f32(%c3, %c3, %c1, %c1, %val) : (index, index, index, index, f32) -> (memref) + %in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref + call @conv_2d_input_nchw_filter_hwcf(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref, memref, memref) -> () + %out2D_nchw_ = memref_cast %out2D_nchw : memref to memref<*xf32> + call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> () + + dealloc %filter2D_nchw : memref + dealloc %in2D_nchw : memref + dealloc %out2D_nchw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [36, 52, 52, 52, 36, 36], +// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-2d-input-nhwc-filter-hwcf-call.mlir @@ -0,0 +1,129 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,3,3,2" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f +func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_2d_input_nhwc_filter_hwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_2d_input_nhwc_filter_hwcf {dilations = dense<1> : tensor<2xi64>, + strides = dense<1> : tensor<2xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter2D_nhwc = call @alloc_4d_filled_f32(%c3, %c3, %c3, %c1, %val) :(index, index, index, index, f32) -> (memref) + %in2D_nhwc = call @alloc_4d_filled_f32(%c3, %c8, %c8, %c3, %val) : (index, index, index, index, f32) -> (memref) + %out2D_nhwc = call @alloc_4d_filled_f32(%c3, %c6, %c6, %c1, %zero) : (index, index, index, index, f32) -> (memref) + + store %f10, %in2D_nhwc[%c0, %c0, %c3, %c0] : memref + call @conv_2d_input_nhwc_filter_hwcf(%in2D_nhwc, %filter2D_nhwc, %out2D_nhwc) : (memref, memref, memref) -> () + %out2D_nhwc_ = memref_cast %out2D_nhwc : memref to memref<*xf32> + call @print_memref_f32(%out2D_nhwc_): (memref<*xf32>) -> () + + dealloc %filter2D_nhwc : memref + dealloc %in2D_nhwc : memref + dealloc %out2D_nhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ncdhw-filter-dhwcf-call.mlir @@ -0,0 +1,90 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,5,5,5" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_input_ncdhw_filter_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_input_ncdhw_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ncdhw = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c8, %c8, %c8, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ncdhw = call @alloc_5d_filled_f32(%c1, %c1, %c6, %c6, %c6, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ncdhw[%c0, %c0, %c0, %c0, %c3] : memref + call @conv_3d_input_ncdhw_filter_dhwcf(%in3D_ncdhw, %filter3D_ncdhw, %out3D_ncdhw) : (memref, memref, memref) -> () + %out3D_ncdhw_ = memref_cast %out3D_ncdhw : memref to memref<*xf32> + call @print_memref_f32(%out3D_ncdhw_): (memref<*xf32>) -> () + + dealloc %filter3D_ncdhw : memref + dealloc %in3D_ncdhw : memref + dealloc %out3D_ncdhw : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108, 124, 124, 124, 108, 108], +// CHECK-COUNT-5: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108, 108, 108, 108, 108, 108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir new file mode 100644 --- /dev/null +++ b/mlir/integration_test/Dialect/Linalg/CPU/test-conv-3d-input-ndhwc-filter-dhwcf-call.mlir @@ -0,0 +1,192 @@ +// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" -convert-linalg-to-loops -convert-scf-to-std \ +// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,5,5,5" \ +// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,1,3,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \ +// RUN: mlir-cpu-runner -e main -entry-point-result=void \ +// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \ +// RUN: | FileCheck %s + +func private @print_memref_f32(memref<*xf32>) + +// Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f +func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { + %buf = alloc(%s1, %s2, %s3, %s4, %s5) : memref + linalg.fill(%buf, %f) : memref, f32 + return %buf : memref +} + +func @conv_3d_input_ndhwc_filter_dhwcf(%arg0: memref, %arg1: memref, %arg2: memref) { + linalg.conv_3d_input_ndhwc_filter_dhwcf {dilations = dense<1> : tensor<3xi64>, + strides = dense<1> : tensor<3xi64>} + ins (%arg0, %arg1: memref, memref) + outs (%arg2: memref) + return +} + + +func @main() { + %c0 = constant 0 : index + %c1 = constant 1 : index + %c3 = constant 3 : index + %c6 = constant 6 : index + %c8 = constant 8 : index + %f10 = constant 10.00000e+00 : f32 + %val = constant 2.00000e+00 : f32 + %zero = constant 0.00000e+00 : f32 + + %filter3D_ndhwc = call @alloc_5d_filled_f32(%c3, %c3, %c3, %c1, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %in3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c8, %c8, %c8, %c1, %val) : (index, index, index, index, index, f32) -> (memref) + %out3D_ndhwc = call @alloc_5d_filled_f32(%c1, %c6, %c6, %c6, %c1, %zero) : (index, index, index, index, index, f32) -> (memref) + + store %f10, %in3D_ndhwc[%c0, %c0, %c0, %c3, %c0] : memref + call @conv_3d_input_ndhwc_filter_dhwcf(%in3D_ndhwc, %filter3D_ndhwc, %out3D_ndhwc) : (memref, memref, memref) -> () + %out3D_ndhwc_ = memref_cast %out3D_ndhwc : memref to memref<*xf32> + call @print_memref_f32(%out3D_ndhwc_): (memref<*xf32>) -> () + + dealloc %filter3D_ndhwc : memref + dealloc %in3D_ndhwc : memref + dealloc %out3D_ndhwc : memref + return +} + +// CHECK: Unranked Memref {{.*}} +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [ +// CHECK-SAME: [108], +// CHECK-COUNT-3: [124], +// CHECK-COUNT-2: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-SAME: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ], +// CHECK-NEXT: [ +// CHECK-COUNT-6: [108] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] +// CHECK-SAME: ] diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -560,27 +560,39 @@ populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns(tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns( tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); populateVectorizationPatterns( tiling, promotion, vectorization, tileSizes, context); + populateVectorizationPatterns( + tiling, promotion, vectorization, tileSizes, context); patterns.push_back(std::move(tiling)); patterns.push_back(std::move(promotion));