diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml @@ -309,7 +309,11 @@ metadata: !LinalgOpMetadata name: depthwise_conv_2d_input_nhwc_filter_hwc_poly cpp_class_name: DepthwiseConv2DInputNhwcFilterHwcPolyOp - doc: A depth-wise 2-D convolution operation. + doc: |- + Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. structured_op: !LinalgStructuredOpConfig args: - !LinalgOperandDefConfig @@ -317,13 +321,13 @@ usage: InputOperand type_var: T1 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s0, s6, s7, s3)> + (s0, s4, s5, s3)> - !LinalgOperandDefConfig name: K usage: InputOperand type_var: T2 shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> - (s4, s5, s3)> + (s6, s7, s3)> - !LinalgOperandDefConfig name: O usage: OutputOperand @@ -383,6 +387,77 @@ - !ScalarExpression scalar_arg: K --- !LinalgOpConfig +metadata: !LinalgOpMetadata + name: pooling_nhwc_sum_poly + cpp_class_name: PoolingNhwcSumPolyOp + doc: |- + Performs sum pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. +structured_op: !LinalgStructuredOpConfig + args: + - !LinalgOperandDefConfig + name: I + usage: InputOperand + type_var: T1 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s4, s5, s3)> + - !LinalgOperandDefConfig + name: K + usage: InputOperand + type_var: T2 + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s10, s11)> + - !LinalgOperandDefConfig + name: O + usage: OutputOperand + type_var: U + shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] -> + (s0, s1, s2, s3)> + - !LinalgOperandDefConfig + name: strides + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s6, s7)> + - !LinalgOperandDefConfig + name: dilations + usage: IndexAttribute + type_var: I64 + attribute_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11] + -> (s8, s9)> + indexing_maps: !LinalgIndexingMapsConfig + static_indexing_maps: + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d2, d3 * s6 + d0 * s8, d4 * s7 + d1 * s9, d5)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d0, d1)> + - affine_map<(d0, d1, d2, d3, d4, d5)[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, + s10, s11] -> (d2, d3, d4, d5)> + iterator_types: + - reduction + - reduction + - parallel + - parallel + - parallel + - parallel + assignments: + - !ScalarAssign + arg: O + value: !ScalarExpression + scalar_apply: + fn_name: add + operands: + - !ScalarExpression + scalar_arg: O + - !ScalarExpression + symbolic_cast: + type_var: U + operands: + - !ScalarExpression + scalar_arg: I +--- !LinalgOpConfig metadata: !LinalgOpMetadata name: fill_rng_2d cpp_class_name: FillRng2DOp diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py @@ -151,13 +151,15 @@ def __init__(self, kind: OperandKind, type_var: TypeVar, - size_exprs: Optional[Sequence[AffineExprDef]] = None): + size_exprs: Optional[Sequence[AffineExprDef]] = None, + index_dims: Optional[Sequence[DimDef]] = None): if not isinstance(type_var, TypeVar): raise ValueError( f"OperandDef requires a TypeVar but got {repr(type_var)}") self.owner = None # type: Optional["LinalgOpDef"] self.type_var = type_var self.size_exprs = size_exprs + self.index_dims = index_dims self.kind = kind self.name = None # type: Optional[str] self.registered_index = -1 # type: int @@ -174,7 +176,8 @@ def __repr__(self): return (f"{self.name}:OperandDef(kind={self.kind.name}, " - f"type={repr(self.type_var)}, size_exprs={self.size_exprs})") + f"type={repr(self.type_var)}, size_exprs={self.size_exprs}), " + f"index_dims={self.index_dims})") class TensorDef: @@ -184,15 +187,25 @@ to the body of the structured op. A unique name identifies the tensor operands and an index determines their position in the operation's parameter list. A tensor definition takes type, a shape, and an optional flag to mark output - tensors. + tensors. Additionally, a tuple of index dimensions may be used to map the + tensor to the loop dimensions of the operation. This mapping is needed to + compute the indexing map of shape-only tensors that have no uses. """ def __init__(self, type_var: TypeVar, *shape: AffineExprDef, + index_dims: Optional[Sequence[DimDef]] = None, output: bool = False): + if index_dims and len(shape) != len(index_dims): + raise ValueError(f"Expected the shape rank {len(shape)} to match the " + f"number of index_dims {len(index_dims)}") + if index_dims and any(not isinstance(dim, DimDef) for dim in index_dims): + raise ValueError(f"TensorDef requires index dims of type DimDef but " + f"got {type(index_dims)}") kind = OperandKind.OutputTensor if output else OperandKind.InputTensor - self.operand_def = OperandDef(kind, type_var, size_exprs=shape) + self.operand_def = OperandDef( + kind, type_var, size_exprs=shape, index_dims=index_dims) def __getitem__(self, dims) -> TensorUse: assert self.operand_def.owner, "TensorDef is not attached to an op" diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/config.py @@ -138,12 +138,18 @@ read_use.collect_scalar_uses(collected_scalar_uses) read_use.collect_indices(collected_indices) - # Collect all attribute definitions + # Collect all attribute definitions. collected_attr_defs = list() for operand in registered_operands: if operand.kind == OperandKind.Attribute: collected_attr_defs.append(operand) + # Collect all tensors with manual indexing annotation. + collected_index_defs = list() + for operand in registered_operands: + if operand.index_dims: + collected_index_defs.append(operand) + # Add all definitions before uses, so process twice. for use in collected_tensor_uses: self.add_operand(use.operand_def) @@ -151,6 +157,10 @@ self.add_operand(use.operand_def) for definition in collected_attr_defs: self.add_operand(definition) + for definition in collected_index_defs: + if definition not in self.operands: + self.add_operand(definition) + self.add_indexed_operand(definition) for use in collected_tensor_uses: self.add_tensor_use(use) @@ -158,6 +168,9 @@ # symbols are known. for cuse in self.uses.values(): cuse.indexing_map = self._normalize_affine_map(cuse.indexing_map) + for definition in collected_index_defs: + self.operands[definition].indexing_map = self._normalize_affine_map( + self.operands[definition].indexing_map) for operand_config in self.operands.values(): if operand_config.shape_map: operand_config.shape_map = self._normalize_affine_map( @@ -278,6 +291,18 @@ self.operands[operand_def] = OperandDefConfig( operand_def, shape_map=affine_map) + def add_indexed_operand(self, operand_def: OperandDef): + with self.context: + local_state = AffineBuildState( + global_state=self.affine_state, allow_new_symbols=False) + exprs = [] + for expr in operand_def.index_dims: + exprs.append(expr.build(state=local_state)) + self.operands[operand_def].indexing_map = _ir.AffineMap.get( + dim_count=local_state.dim_count, + symbol_count=local_state.symbol_count, + exprs=exprs) + def add_tensor_use(self, tensor_use: TensorUse): if tensor_use in self.uses: return diff --git a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py --- a/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py @@ -81,12 +81,32 @@ O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), strides=AttributeDef(S.SH, S.SW), dilations=AttributeDef(S.DH, S.DW)): - """A depth-wise 2-D convolution operation.""" + """Performs depth-wise 2-D convolution. + + Numeric casting is performed on the operands to the inner multiply, promoting + them to the same data type as the accumulator/output. + """ O[D.n, D.oh, D.ow, D.c] += cast( U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) * cast(U, K[D.kh, D.kw, D.c]) +@linalg_structured_op +def pooling_nhwc_sum_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + """Performs sum pooling. + + Numeric casting is performed on the input operand, promoting it to the same + data type as the accumulator/output. + """ + O[D.n, D.oh, D.ow, D.c] += cast( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) + + @linalg_structured_op def fill_rng_2d( min=ScalarDef(F64), diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -60,6 +60,34 @@ // ----- +func @generalize_pooling_nhwc_sum_poly_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { + %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + return %0: tensor<1x2x4x1xf32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_sum_poly_f32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: f32, %[[SHAPE_ARG:.+]]: f32, %[[OUT_ARG:.+]]: f32) +// CHECK-NEXT: %[[ADD:.+]] = addf %[[OUT_ARG]], %[[IN_ARG]] : f32 +// CHECK-NEXT: linalg.yield %[[ADD]] : f32 +// CHECK-NEXT: -> tensor<1x2x4x1xf32> + +// ----- + +func @generalize_pooling_nhwc_sum_poly_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { + %0 = linalg.pooling_nhwc_sum_poly {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + return %0: tensor<1x2x4x1xi32> +} + +// CHECK-LABEL: @generalize_pooling_nhwc_sum_poly_i32 +// CHECK: ^{{.*}}(%[[IN_ARG:.+]]: i32, %[[SHAPE_ARG:.+]]: i32, %[[OUT_ARG:.+]]: i32) +// CHECK-NEXT: %[[ADD:.+]] = addi %[[OUT_ARG]], %[[IN_ARG]] : i32 +// CHECK-NEXT: linalg.yield %[[ADD]] : i32 +// CHECK-NEXT: -> tensor<1x2x4x1xi32> + +// ----- + func @generalize_fill_rng_2d_f32(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py --- a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py @@ -40,7 +40,18 @@ @linalg_structured_op -def fill_rng( +def pooling_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + O[D.n, D.oh, D.ow, D.c] += cast( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c]) + + +@linalg_structured_op +def fill_rng_poly( min=ScalarDef(F64), max=ScalarDef(F64), seed=ScalarDef(I32), @@ -65,16 +76,22 @@ i32 = IntegerType.get_signless(32) with InsertionPoint(module.body): - # Note that these all have the same indexing maps. We verify the first and - # then do more permutation tests on casting and body generation - # behavior. - # CHECK: #[[$MAPA:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> - # CHECK: #[[$MAPB:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)> - # CHECK: #[[$MAPC:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> + # Multiplication indexing maps. We verify only the indexing maps of the + # first multiplication and then do additional tests on casting and body + # generation behavior. + # CHECK: #[[$MUL_MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + # CHECK: #[[$MUL_MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)> + # CHECK: #[[$MUL_MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> + + # Convolution indexing maps. + # CHECK: #[[$CONV_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 4 + d5 * 2, d3)> + # CHECK: #[[$CONV_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5, d3)> + # CHECK: #[[$CONV_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> - # CHECK: #[[$MAPI:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d4, d2 * 4 + d5 * 2, d3)> - # CHECK: #[[$MAPK:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d4, d5, d3)> - # CHECK: #[[$MAPO:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3)> + # Pooling indexing maps. + # CHECK: #[[$POOL_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3 * 2 + d0, d4 * 4 + d1 * 2, d5)> + # CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1)> + # CHECK: #[[$POOL_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d2, d3, d4, d5)> # CHECK-LABEL: func @test_matmul_mono # CHECK-SAME: %[[A:.+]]: tensor<4x16xf32> @@ -82,7 +99,7 @@ # CHECK: %[[INITC:.+]] = linalg.init_tensor [4, 8] : tensor<4x8xf32> # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$MAPA]], #[[$MAPB]], #[[$MAPC]]] + # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] # CHECK-SAME: ins(%[[A]], %[[B]] # CHECK-SAME: outs(%[[INITC]] @@ -177,28 +194,9 @@ def test_f64f64f32_matmul(lhs, rhs, init_result): return matmul_poly(lhs, rhs, outs=[init_result]) - # CHECK-LABEL: @test_fill_rng - # CHECK: ^{{.*}}(%[[MIN:.+]]: f64, %[[MAX:.+]]: f64, %[[SEED:.+]]: i32, %{{.*}} - # CHECK-DAG: %[[IDX0:.+]] = linalg.index 0 : index - # CHECK-DAG: %[[IDX0_CAST:.+]] = index_cast %[[IDX0]] : index to i32 - # CHECK-DAG: %[[RND0:.+]] = addi %[[IDX0_CAST]], %[[SEED]] : i32 - # CHECK-DAG: %[[CST0:.+]] = constant 1103515245 : i64 - # CHECK-DAG: %[[CST0_CAST:.+]] = trunci %[[CST0]] : i64 to i32 - # Skip the remaining random number computation and match the scaling logic. - # CHECK-DAG: %[[DIFF:.+]] = subf %[[MAX]], %[[MIN]] : f64 - # CHECK-DAG: %[[CST3:.+]] = constant 2.3283063999999999E-10 : f64 - # CHECK-DAG: %[[FACT:.+]] = mulf %[[DIFF]], %[[CST3]] : f64 - # CHECK-DAG: %[[RND4:.+]] = mulf %{{.+}}, %[[FACT]] : f64 - # CHECK-DAG: %[[RND5:.+]] = addf %[[RND4]], %[[MIN]] : f64 - # CHECK-DAG: %{{.*}} = fptosi %[[RND5]] : f64 to i32 - @builtin.FuncOp.from_py_func(f64, f64, i32, - RankedTensorType.get((4, 16), i32)) - def test_fill_rng(min, max, seed, init_result): - return fill_rng(min, max, seed, outs=[init_result]) - # CHECK-LABEL: @test_f32i32_conv # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$MAPI]], #[[$MAPK]], #[[$MAPO]]] + # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$CONV_MAP_K]], #[[$CONV_MAP_O]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"] # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[FILTER:.+]]: f32, %[[OUT:.+]]: i32) # CHECK-NEXT: %[[IN_CAST:.+]] = fptosi %[[IN:.+]] : f32 to i32 @@ -215,5 +213,40 @@ return conv_poly( input, filter, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + # CHECK-LABEL: @test_f32i32_pooling + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]] + # CHECK-SAME: iterator_types = ["reduction", "reduction", "parallel", "parallel", "parallel", "parallel"] + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32) + # CHECK-NEXT: %[[IN_CAST:.+]] = fptosi %[[IN:.+]] : f32 to i32 + # CHECK-NEXT: %[[SUM:.+]] = addi %[[OUT]], %[[IN_CAST]] : i32 + # CHECK-NEXT: linalg.yield %[[SUM]] : i32 + # CHECK-NEXT: -> tensor<2x4xi32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), + RankedTensorType.get((2, 4), i32)) + def test_f32i32_pooling(input, shape, init_result): + return pooling_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_i32_fill_rng + # CHECK: ^{{.*}}(%[[MIN:.+]]: f64, %[[MAX:.+]]: f64, %[[SEED:.+]]: i32, %{{.*}} + # CHECK-DAG: %[[IDX0:.+]] = linalg.index 0 : index + # CHECK-DAG: %[[IDX0_CAST:.+]] = index_cast %[[IDX0]] : index to i32 + # CHECK-DAG: %[[RND0:.+]] = addi %[[IDX0_CAST]], %[[SEED]] : i32 + # CHECK-DAG: %[[CST0:.+]] = constant 1103515245 : i64 + # CHECK-DAG: %[[CST0_CAST:.+]] = trunci %[[CST0]] : i64 to i32 + # Skip the remaining random number computation and match the scaling logic. + # CHECK-DAG: %[[DIFF:.+]] = subf %[[MAX]], %[[MIN]] : f64 + # CHECK-DAG: %[[CST3:.+]] = constant 2.3283063999999999E-10 : f64 + # CHECK-DAG: %[[FACT:.+]] = mulf %[[DIFF]], %[[CST3]] : f64 + # CHECK-DAG: %[[RND4:.+]] = mulf %{{.+}}, %[[FACT]] : f64 + # CHECK-DAG: %[[RND5:.+]] = addf %[[RND4]], %[[MIN]] : f64 + # CHECK-DAG: %{{.*}} = fptosi %[[RND5]] : f64 to i32 + @builtin.FuncOp.from_py_func(f64, f64, i32, + RankedTensorType.get((4, 16), i32)) + def test_i32_fill_rng(min, max, seed, init_result): + return fill_rng_poly(min, max, seed, outs=[init_result]) + print(module) diff --git a/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py b/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py --- a/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py +++ b/mlir/test/python/dialects/linalg/opdsl/shape_maps_iteration.py @@ -42,3 +42,23 @@ @linalg_structured_op def dot(A=TensorDef(T, S.M), B=TensorDef(T, S.M), C=TensorDef(U, output=True)): C[None] += cast(U, A[D.m]) * cast(U, B[D.m]) + +# Verifies that the index_dims of shape-only operands translate to correct +# indexing maps. +# CHECK: --- +# CHECK-LABEL: pool +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s1)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s2)> +# CHECK: shape_map: affine_map<()[s0, s1, s2] -> (s0)> +# CHECK: static_indexing_maps: +# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d1 * 2 + d0)> +# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d0)> +# CHECK-NEXT: - affine_map<(d0, d1)[s0, s1, s2] -> (d1)> +# CHECK: iterator_types: +# CHECK-NEXT: - reduction +# CHECK-NEXT: - parallel +@linalg_structured_op +def pool(I=TensorDef(T, S.I), + K=TensorDef(T, S.K, index_dims=[D.k]), + O=TensorDef(U, S.O, output=True)): + O[D.o] += cast(U, I[D.o * 2 + D.k]) diff --git a/mlir/test/python/dialects/linalg/opsrun.py b/mlir/test/python/dialects/linalg/opsrun.py --- a/mlir/test/python/dialects/linalg/opsrun.py +++ b/mlir/test/python/dialects/linalg/opsrun.py @@ -82,6 +82,28 @@ } """ +pooling_boiler = """ +func @main() -> i32 attributes {llvm.emit_c_interface} { + %v0 = constant 0 : i32 + %v1 = constant 1.0 : f64 + + %input = memref.alloc() : memref<1x4x16x1xf64> + %shape = memref.alloc() : memref<2x2xf64> + %output = memref.alloc() : memref<1x2x4x1xi32> + linalg.fill(%v1, %input) : f64, memref<1x4x16x1xf64> + linalg.fill(%v1, %shape) : f64, memref<2x2xf64> + linalg.fill(%v0, %output) : i32, memref<1x2x4x1xi32> + + call @pooling_on_buffers(%input, %shape, %output) : + (memref<1x4x16x1xf64>, memref<2x2xf64>, memref<1x2x4x1xi32>) -> () + + %c0 = constant 0 : index + %0 = memref.load %output[%c0, %c0, %c0, %c0] : memref<1x2x4x1xi32> + + // TODO: FFI-based solution to allow testing and printing with python code. + return %0 : i32 +} +""" def transform(module, boilerplate): import mlir.conversions @@ -273,3 +295,72 @@ test_conv_generic() + + +def test_pooling_builtin(): + with Context() as ctx, Location.unknown(): + module = Module.create() + f64 = F64Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func( + MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), + MemRefType.get((1, 2, 4, 1), i32)) + def pooling_on_buffers(input, shape, output): + linalg.pooling_nhwc_sum_poly( + input, + shape, + outs=[output], + strides=[2, 4], + dilations=[1, 2]) + + execution_engine = ExecutionEngine(transform(module, pooling_boiler)) + + # TODO: FFI-based solution to allow testing and printing with python code. + # Prepare arguments: one result i32. + # Arguments must be passed as pointers. + c_int_p = ctypes.c_int * 1 + res = c_int_p(-1) + execution_engine.invoke("main", res) + + log("RESULT: ", res[0]) + # CHECK: RESULT: 4 + + +test_pooling_builtin() + + +def test_pooling_generic(): + with Context() as ctx, Location.unknown(): + module = Module.create() + f64 = F64Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + @builtin.FuncOp.from_py_func( + MemRefType.get((1, 4, 16, 1), f64), MemRefType.get((2, 2), f64), + MemRefType.get((1, 2, 4, 1), i32)) + def pooling_on_buffers(input, shape, output): + linalg.pooling_nhwc_sum_poly( + input, + shape, + outs=[output], + strides=[2, 4], + dilations=[1, 2], + emit_generic=True) + + execution_engine = ExecutionEngine(transform(module, pooling_boiler)) + + # TODO: FFI-based solution to allow testing and printing with python code. + # Prepare arguments: one result i32. + # Arguments must be passed as pointers. + c_int_p = ctypes.c_int * 1 + res = c_int_p(-1) + execution_engine.invoke("main", res) + + log("RESULT: ", res[0]) + # CHECK: RESULT: 4 + + +test_pooling_generic()