diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -736,6 +736,228 @@ - !ScalarExpression scalar_arg: I --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_ndhwc_sum + cpp_class_name: PoolingNdhwcSumOp + doc: |- + Performs 3D sum pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s5, s6, s7)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s11, s12, s13)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s14, s15, s16)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 * + s12 + d5 * s15, d3 * s13 + d6 * s16, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_ndhwc_max + cpp_class_name: PoolingNdhwcMaxOp + doc: |- + Performs 3D max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s5, s6, s7)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s11, s12, s13)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s14, s15, s16)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 * + s12 + d5 * s15, d3 * s13 + d6 * s16, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: max + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_ndhwc_min + cpp_class_name: PoolingNdhwcMinOp + doc: |- + Performs 3D min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s1, s2, s3, s4)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s5, s6, s7)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, + s13, s14, s15, s16] -> (s0, s8, s9, s10, s4)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s11, s12, s13)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, + s12, s13, s14, s15, s16] -> (s14, s15, s16)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1 * s11 + d4 * s14, d2 * + s12 + d5 * s15, d3 * s13 + d6 * s16, d7)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d4, d5, d6)> + - affine_map<(d0, d1, d2, d3, d4, d5, d6, d7)[s0, s1, s2, s3, s4, s5, s6, s7, + s8, s9, s10, s11, s12, s13, s14, s15, s16] -> (d0, d1, d2, d3, d7)> + iterator_types: + - parallel + - parallel + - parallel + - parallel + - reduction + - reduction + - reduction + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: min + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: fill_rng_2d cpp_class_name: FillRng2DOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -184,6 +184,62 @@ D.c])) +@linalg_structured_op +def pooling_ndhwc_sum( + I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C), + K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]), + O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SD, S.SH, S.SW), + dilations=AttributeDef(S.DD, S.DH, S.DW)): + """Performs 3D sum pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c) + O[D.n, D.od, D.oh, D.ow, D.c] += cast( + U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW, D.c]) + + +@linalg_structured_op +def pooling_ndhwc_max( + I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C), + K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]), + O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SD, S.SH, S.SW), + dilations=AttributeDef(S.DD, S.DH, S.DW)): + """Performs 3D max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c) + O[D.n, D.od, D.oh, D.ow, D.c] = ReduceFn.max(D.kd, D.kh, D.kw)( + cast( + U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW, D.c])) + + +@linalg_structured_op +def pooling_ndhwc_min( + I=TensorDef(T1, S.N, S.D, S.H, S.W, S.C), + K=TensorDef(T2, S.KD, S.KH, S.KW, index_dims=[D.kd, D.kh, D.kw]), + O=TensorDef(U, S.N, S.OD, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SD, S.SH, S.SW), + dilations=AttributeDef(S.DD, S.DH, S.DW)): + """Performs 3D min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + domain(D.n, D.od, D.oh, D.ow, D.kd, D.kh, D.kw, D.c) + O[D.n, D.od, D.oh, D.ow, D.c] = ReduceFn.min(D.kd, D.kh, D.kw)( + cast( + U, I[D.n, D.od * S.SD + D.kd * S.DD, D.oh * S.SH + D.kh * S.DH, + D.ow * S.SW + D.kw * S.DW, D.c])) + + @linalg_structured_op def fill_rng_2d( min=ScalarDef(F64), diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -518,3 +518,105 @@ outs(%output: memref<1x2x2x1xf32>) return } + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_sum_tensor +// CHECK: %{{.+}} = linalg.pooling_ndhwc_sum +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { + %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32> + %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32> + %cst = constant 0.000000e+00 : f32 + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32> + %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) + outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + return %res : tensor<1x2x2x2x1xf32> +} + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_sum +// CHECK: linalg.pooling_ndhwc_sum +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { + linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) + outs(%output: memref<1x2x2x2x1xf32>) + return +} + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_max_tensor +// CHECK: %{{.+}} = linalg.pooling_ndhwc_max +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { + %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32> + %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32> + %cst = constant 0.000000e+00 : f32 + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32> + %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) + outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + return %res : tensor<1x2x2x2x1xf32> +} + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_max +// CHECK: linalg.pooling_ndhwc_max +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { + linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) + outs(%output: memref<1x2x2x2x1xf32>) + return +} + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_min_tensor +// CHECK: %{{.+}} = linalg.pooling_ndhwc_min +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { + %fake = linalg.init_tensor [3, 3, 3] : tensor<3x3x3xf32> + %init = linalg.init_tensor [1, 2, 2, 2, 1] : tensor<1x2x2x2x1xf32> + %cst = constant 0.000000e+00 : f32 + %fill = linalg.fill(%cst, %init) : f32, tensor<1x2x2x2x1xf32> -> tensor<1x2x2x2x1xf32> + %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) + outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + return %res : tensor<1x2x2x2x1xf32> +} + +// ----- + +// CHECK-LABEL: func @pooling_ndhwc_min +// CHECK: linalg.pooling_ndhwc_min +// CHECK-SAME: dilations = dense<1> : tensor<3xi64> +// CHECK-SAME: strides = dense<1> : tensor<3xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { + linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} + ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) + outs(%output: memref<1x2x2x2x1xf32>) + return +}