diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -661,7 +661,7 @@ The partial multiplication results are reduced into a 2D output. Numeric casting is performed on the operands to the inner multiply, promoting - them to the same data type as the accumulator/output." + them to the same data type as the accumulator/output. implements: - LinalgContractionOpInterface structured_op: !LinalgStructuredOpConfig @@ -2279,38 +2279,39 @@ name: I kind: input_tensor type_var: T1 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s9, s1 * - s2 + s3 * s4, s5 * s6 + s7 * s8)> + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2 + * s3 + s4 * s5, s6 * s7 + s8 * s9)> - !LinalgOperandDefConfig name: K kind: input_tensor type_var: T2 - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s9, s3, s7)> + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s1, s4, s8)> - !LinalgOperandDefConfig name: O kind: output_tensor type_var: U - shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s9, s1, s5)> + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s0, s1, s2, + s6)> - !LinalgOperandDefConfig name: strides kind: index_attr - index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s2, - s6)> + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s3, + s7)> default_indices: - 1 - 1 - !LinalgOperandDefConfig name: dilations kind: index_attr - index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s4, - s8)> + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (s5, + s9)> default_indices: - 1 - 1 indexing_maps: !LinalgIndexingMapsConfig static_indexing_maps: - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] - -> (d0, d3, d1 * s2 + d4 * s4, d2 * s6 + d5 * s8)> + -> (d0, d3, d1 * s3 + d4 * s5, d2 * s7 + d5 * s9)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] -> (d3, d4, d5)> - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9] @@ -3470,6 +3471,497 @@ - !ScalarExpression scalar_arg: I --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nwc_sum + cpp_class_name: PoolingNwcSumOp + doc: |- + Performs sum pooling. + + Layout: + * Input: NWC. + * Kernel: W. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4, + d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_ncw_sum + cpp_class_name: PoolingNcwSumOp + doc: |- + Performs sum pooling. + + Layout: + * Input: NCW. + * Kernel: W. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3 + * s5)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nwc_max + cpp_class_name: PoolingNwcMaxOp + doc: |- + Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4, + d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: max_signed + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nwc_max_unsigned + cpp_class_name: PoolingNwcMaxUnsignedOp + doc: |- + Performs unsigned max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4, + d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: max_unsigned + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_unsigned + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_ncw_max + cpp_class_name: PoolingNcwMaxOp + doc: |- + Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2 * s3 + s4 * s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s2)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s5)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2 * s3 + d3 + * s5)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: max_signed + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nwc_min + cpp_class_name: PoolingNwcMinOp + doc: |- + Performs min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4, + d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: min_signed + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_signed + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nwc_min_unsigned + cpp_class_name: PoolingNwcMinUnsignedOp + doc: |- + Performs unsigned min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + implements: + - LinalgConvolutionOpInterface +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + kind: input_tensor + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1 * s2 + s3 * s4, s5)> + - !LinalgOperandDefConfig + name: K + kind: input_tensor + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s3)> + - !LinalgOperandDefConfig + name: O + kind: output_tensor + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s0, s1, s5)> + - !LinalgOperandDefConfig + name: strides + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s2)> + default_indices: + - 1 + - !LinalgOperandDefConfig + name: dilations + kind: index_attr + index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5] -> (s4)> + default_indices: + - 1 + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1 * s2 + d3 * s4, + d2)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d3)> + - affine_map<(d0, d1, d2, d3)[s0, s1, s2, s3, s4, s5] -> (d0, d1, d2)> + iterator_types: + - parallel + - parallel + - parallel + - reduction + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_fn: + kind: binary + fn_name: min_unsigned + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + scalar_fn: + kind: type + fn_name: cast_unsigned + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: pooling_ndhwc_sum cpp_class_name: PoolingNdhwcSumOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -694,7 +694,6 @@ D.ow * S.SW + D.kw * S.DW, D.ic]) * TypeFn.cast_signed( U, K[D.kd, D.kh, D.kw, D.ic, D.cm]) - @linalg_structured_op def pooling_nhwc_sum(I=TensorDef(T1, S.N, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW, S.C), @@ -838,6 +837,146 @@ D.c] = ReduceFn.min_unsigned[D.kh, D.kw](TypeFn.cast_unsigned( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])) +@linalg_structured_op +def pooling_nwc_sum(I=TensorDef(T1, S.N, + S.OW * S.SW + S.KW * S.DW, S.C), + K=TensorDef(T2, S.KW, index_dims=[D.kw]), + O=TensorDef(U, S.N, S.OW, S.C, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs sum pooling. + + Layout: + * Input: NWC. + * Kernel: W. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.c, D.kw) + O[D.n, D.ow, D.c] += TypeFn.cast_signed( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c]) + + +@linalg_structured_op +def pooling_ncw_sum(I=TensorDef(T1, S.N, S.C, + S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.KW, index_dims=[D.kw]), + O=TensorDef(U, S.N, S.C, S.OW, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs sum pooling. + + Layout: + * Input: NCW. + * Kernel: W. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.c, D.ow, D.kw) + O[D.n, D.c, D.ow] += TypeFn.cast_signed( + U, I[D.n, D.c, D.ow * S.SW + D.kw * S.DW]) + + +@linalg_structured_op +def pooling_nwc_max(I=TensorDef(T1, S.N, + S.OW * S.SW + S.KW * S.DW, S.C), + K=TensorDef(T2, S.KW, index_dims=[D.kw]), + O=TensorDef(U, S.N, S.OW, S.C, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.c, D.kw) + O[D.n, D.ow, D.c] = ReduceFn.max_signed[[D.kw]](TypeFn.cast_signed( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c])) + + +@linalg_structured_op +def pooling_nwc_max_unsigned(I=TensorDef(T1, S.N, + S.OW * S.SW + S.KW * S.DW, S.C), + K=TensorDef(T2, + S.KW, + index_dims=[D.kw]), + O=TensorDef(U, S.N, S.OW, S.C, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs unsigned max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.c, D.kw) + O[D.n, D.ow, + D.c] = ReduceFn.max_unsigned[[D.kw]](TypeFn.cast_unsigned( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c])) + + +@linalg_structured_op +def pooling_ncw_max(I=TensorDef(T1, S.N, S.C, + S.OW * S.SW + S.KW * S.DW), + K=TensorDef(T2, S.KW, index_dims=[D.kw]), + O=TensorDef(U, S.N, S.C, S.OW, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs max pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.c, D.ow, D.kw) + O[D.n, D.c, D.ow] = ReduceFn.max_signed[[D.kw]](TypeFn.cast_signed( + U, I[D.n, D.c, D.ow * S.SW + D.kw * S.DW,])) + + +@linalg_structured_op +def pooling_nwc_min(I=TensorDef(T1, S.N, + S.OW * S.SW + S.KW * S.DW, S.C), + K=TensorDef(T2, S.KW, index_dims=[D.kw]), + O=TensorDef(U, S.N, S.OW, S.C, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.c, D.kw) + O[D.n, D.ow, D.c] = ReduceFn.min_signed[[D.kw]](TypeFn.cast_signed( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c])) + + +@linalg_structured_op +def pooling_nwc_min_unsigned(I=TensorDef(T1, S.N, + S.OW * S.SW + S.KW * S.DW, S.C), + K=TensorDef(T2, + S.KW, + index_dims=[D.kw]), + O=TensorDef(U, S.N, S.OW, S.C, output=True), + strides=IndexAttrDef(S.SW, default=[1]), + dilations=IndexAttrDef(S.DW, default=[1])): + """Performs unsigned min pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + implements(ConvolutionOpInterface) + domain(D.n, D.ow, D.c, D.kw) + O[D.n, D.ow, + D.c] = ReduceFn.min_unsigned[[D.kw]](TypeFn.cast_unsigned( + U, I[D.n, D.ow * S.SW + D.kw * S.DW, D.c])) + + @linalg_structured_op def pooling_ndhwc_sum(I=TensorDef(T1, S.N, S.OD * S.SD + S.KD * S.DD, diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -131,6 +131,20 @@ // ----- +func.func @generalize_pooling_nwc_max_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { + %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + return %0: tensor<1x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_max_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[MAX]] : f32 +// CHECK-NEXT: -> tensor<1x4x1xf32> + +// ----- + func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> @@ -143,6 +157,18 @@ // ----- +func.func @generalize_pooling_nwc_max_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { + %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + return %0: tensor<1x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_max_i32 +// Verify signed integer maximum. +// CHECK: = arith.maxsi + +// ----- + func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> @@ -155,6 +181,18 @@ // ----- +func.func @generalize_pooling_nwc_max_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { + %0 = linalg.pooling_nwc_max_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + return %0: tensor<1x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_max_unsigned_i32 +// Verify unsigned integer minimum. +// CHECK: = arith.maxui + +// ----- + func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> @@ -169,6 +207,20 @@ // ----- +func.func @generalize_pooling_nwc_min_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { + %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + return %0: tensor<1x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_min_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[MIN:.+]] = arith.minf %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[MIN]] : f32 +// CHECK-NEXT: -> tensor<1x4x1xf32> + +// ----- + func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> @@ -181,6 +233,18 @@ // ----- +func.func @generalize_pooling_nwc_min_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { + %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + return %0: tensor<1x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_min_i32 +// Verify signed integer minimum. +// CHECK: = arith.minsi + +// ----- + func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> @@ -193,6 +257,18 @@ // ----- +func.func @generalize_pooling_nwc_min_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { + %0 = linalg.pooling_nwc_min_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + return %0: tensor<1x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_min_unsigned_i32 +// Verify unsigned integer minimum. +// CHECK: = arith.minui + +// ----- + func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> @@ -207,6 +283,20 @@ // ----- +func.func @generalize_pooling_nwc_sum_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { + %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + return %0: tensor<1x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_sum_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[ADD]] : f32 +// CHECK-NEXT: -> tensor<1x4x1xf32> + +// ----- + func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> @@ -221,6 +311,20 @@ // ----- +func.func @generalize_pooling_nwc_sum_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { + %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + return %0: tensor<1x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nwc_sum_i32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32) +// CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: linalg.yield %[[ADD]] : i32 +// CHECK-NEXT: -> tensor<1x4x1xi32> + +// ----- + func.func @generalize_fill_0d(%value: f64, %O: tensor) -> tensor { %0 = linalg.fill ins(%value: f64) outs(%O : tensor) -> tensor return %0: tensor diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -422,6 +422,25 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_sum_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_sum +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +func.func @pooling_nwc_sum_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { + %fake = tensor.empty() : tensor<3xf32> + %init = tensor.empty() : tensor<1x2x1xf32> + %cst = arith.constant 0.000000e+00 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %res = linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) + outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + return %res : tensor<1x2x1xf32> +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_sum // CHECK: linalg.pooling_nhwc_sum // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -437,6 +456,21 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_sum +// CHECK: linalg.pooling_nwc_sum +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +func.func @pooling_nwc_sum(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { + linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) + outs(%output: memref<1x2x1xf32>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nchw_sum_tensor // CHECK: %{{.+}} = linalg.pooling_nchw_sum // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -456,6 +490,25 @@ // ----- +// CHECK-LABEL: func @pooling_ncw_sum_tensor +// CHECK: %{{.+}} = linalg.pooling_ncw_sum +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> +func.func @pooling_ncw_sum_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { + %fake = tensor.empty() : tensor<3xf32> + %init = tensor.empty() : tensor<1x1x2xf32> + %cst = arith.constant 0.000000e+00 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %res = linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) + outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + return %res : tensor<1x1x2xf32> +} + +// ----- + // CHECK-LABEL: func @pooling_nchw_sum // CHECK: linalg.pooling_nchw_sum // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -471,6 +524,21 @@ // ----- +// CHECK-LABEL: func @pooling_ncw_sum +// CHECK: linalg.pooling_ncw_sum +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x1x4xf32>, memref<3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x1x2xf32>) +func.func @pooling_ncw_sum(%input: memref<1x1x4xf32>, %fake: memref<3xf32>, %output: memref<1x1x2xf32>) { + linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x1x4xf32>, memref<3xf32>) + outs(%output: memref<1x1x2xf32>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_max_tensor // CHECK: %{{.+}} = linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -488,6 +556,24 @@ return %res : tensor<1x2x2x1xf32> } +// ----- +// CHECK-LABEL: func @pooling_nwc_max_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +func.func @pooling_nwc_max_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { + %fake = tensor.empty() : tensor<3xf32> + %init = tensor.empty() : tensor<1x2x1xf32> + %cst = arith.constant 0.000000e+00 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) + outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + return %res : tensor<1x2x1xf32> +} + // ----- // CHECK-LABEL: func @pooling_nchw_max_tensor // CHECK: %{{.+}} = linalg.pooling_nchw_max @@ -507,6 +593,25 @@ return %res : tensor<1x1x2x2xf32> } +// ----- +// CHECK-LABEL: func @pooling_ncw_max_tensor +// CHECK: %{{.+}} = linalg.pooling_ncw_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + +func.func @pooling_ncw_max_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { + %fake = tensor.empty() : tensor<3xf32> + %init = tensor.empty() : tensor<1x1x2xf32> + %cst = arith.constant 0.000000e+00 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %res = linalg.pooling_ncw_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) + outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + return %res : tensor<1x1x2xf32> +} + // ----- // CHECK-LABEL: func @pooling_nhwc_max @@ -524,6 +629,21 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_max +// CHECK: linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +func.func @pooling_nwc_max(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { + linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) + outs(%output: memref<1x2x1xf32>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i8_max_tensor // CHECK: %{{.+}} = linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -543,6 +663,25 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_i8_max_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi8>, tensor<3xi8>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> +func.func @pooling_nwc_i8_max_tensor(%input: tensor<1x4x1xi8>) -> tensor<1x2x1xi8> { + %fake = tensor.empty() : tensor<3xi8> + %init = tensor.empty() : tensor<1x2x1xi8> + %cst = arith.constant 0 : i8 + %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xi8>, tensor<3xi8>) + outs(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + return %res : tensor<1x2x1xi8> +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i8_max // CHECK: linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -558,6 +697,21 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_i8_max +// CHECK: linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi8>, memref<3xi8>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi8>) +func.func @pooling_nwc_i8_max(%input: memref<1x4x1xi8>, %fake: memref<3xi8>, %output: memref<1x2x1xi8>) { + linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xi8>, memref<3xi8>) + outs(%output: memref<1x2x1xi8>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i16_max_tensor // CHECK: %{{.+}} = linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -577,6 +731,25 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_i16_max_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi16>, tensor<3xi16>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> +func.func @pooling_nwc_i16_max_tensor(%input: tensor<1x4x1xi16>) -> tensor<1x2x1xi16> { + %fake = tensor.empty() : tensor<3xi16> + %init = tensor.empty() : tensor<1x2x1xi16> + %cst = arith.constant 0 : i16 + %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xi16>, tensor<3xi16>) + outs(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + return %res : tensor<1x2x1xi16> +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i16_max // CHECK: linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -592,6 +765,21 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_i16_max +// CHECK: linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi16>, memref<3xi16>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi16>) +func.func @pooling_nwc_i16_max(%input: memref<1x4x1xi16>, %fake: memref<3xi16>, %output: memref<1x2x1xi16>) { + linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xi16>, memref<3xi16>) + outs(%output: memref<1x2x1xi16>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i32_max_tensor // CHECK: %{{.+}} = linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -611,6 +799,25 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_i32_max_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi32>, tensor<3xi32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> +func.func @pooling_nwc_i32_max_tensor(%input: tensor<1x4x1xi32>) -> tensor<1x2x1xi32> { + %fake = tensor.empty() : tensor<3xi32> + %init = tensor.empty() : tensor<1x2x1xi32> + %cst = arith.constant 0 : i32 + %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xi32>, tensor<3xi32>) + outs(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + return %res : tensor<1x2x1xi32> +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_i32_max // CHECK: linalg.pooling_nhwc_max // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -624,6 +831,21 @@ return } +// ----- + +// CHECK-LABEL: func @pooling_nwc_i32_max +// CHECK: linalg.pooling_nwc_max +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi32>, memref<3xi32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi32>) +func.func @pooling_nwc_i32_max(%input: memref<1x4x1xi32>, %fake: memref<3xi32>, %output: memref<1x2x1xi32>) { + linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xi32>, memref<3xi32>) + outs(%output: memref<1x2x1xi32>) + return +} + // ----- @@ -646,6 +868,25 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_min_tensor +// CHECK: %{{.+}} = linalg.pooling_nwc_min +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) +// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +func.func @pooling_nwc_min_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { + %fake = tensor.empty() : tensor<3xf32> + %init = tensor.empty() : tensor<1x2x1xf32> + %cst = arith.constant 0.000000e+00 : f32 + %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %res = linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) + outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + return %res : tensor<1x2x1xf32> +} + +// ----- + // CHECK-LABEL: func @pooling_nhwc_min // CHECK: linalg.pooling_nhwc_min // CHECK-SAME: dilations = dense<1> : tensor<2xi64> @@ -661,6 +902,21 @@ // ----- +// CHECK-LABEL: func @pooling_nwc_min +// CHECK: linalg.pooling_nwc_min +// CHECK-SAME: dilations = dense<1> : tensor<1xi64> +// CHECK-SAME: strides = dense<1> : tensor<1xi64> +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) +// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +func.func @pooling_nwc_min(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { + linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} + ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) + outs(%output: memref<1x2x1xf32>) + return +} + +// ----- + // CHECK-LABEL: func @pooling_ndhwc_sum_tensor // CHECK: %{{.+}} = linalg.pooling_ndhwc_sum // CHECK-SAME: dilations = dense<1> : tensor<3xi64>