diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py b/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py new file mode 100644 --- /dev/null +++ b/mlir/test/python/dialects/linalg/opdsl/emit_convolution.py @@ -0,0 +1,58 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import builtin +from mlir.dialects import linalg +from mlir.dialects import std + +from mlir.dialects.linalg.opdsl.lang import * + +T1 = TV.T1 +T2 = TV.T2 + + +@linalg_structured_op +def conv_poly( + I=TensorDef(T1, S.N, S.IH, S.IW, S.C), + K=TensorDef(T2, S.KH, S.KW, S.C), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] += cast( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c]) * cast(U, K[D.kh, D.kw, D.c]) + + +with Context() as ctx, Location.unknown(): + module = Module.create() + f32 = F32Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + # Convolution indexing maps. + # CHECK: #[[$CONV_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d3, d2 * 4 + d4 * 2, d5)> + # CHECK: #[[$CONV_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)> + # CHECK: #[[$CONV_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d5)> + + # CHECK-LABEL: @test_f32i32_conv + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$CONV_MAP_K]], #[[$CONV_MAP_O]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[FILTER:.+]]: f32, %[[OUT:.+]]: i32) + # CHECK-NEXT: %[[IN_CAST:.+]] = arith.fptosi %[[IN:.+]] : f32 to i32 + # CHECK-NEXT: %[[FILTER_CAST:.+]] = arith.fptosi %[[FILTER:.+]] : f32 to i32 + # CHECK-NEXT: %[[PROD:.+]] = arith.muli %[[IN_CAST]], %[[FILTER_CAST]] : i32 + # CHECK-NEXT: %[[SUM:.+]] = arith.addi %[[OUT]], %[[PROD]] : i32 + # CHECK-NEXT: linalg.yield %[[SUM]] : i32 + # CHECK-NEXT: -> tensor<1x2x4x1xi32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2, 1), f32), + RankedTensorType.get((1, 2, 4, 1), i32)) + def test_f32i32_conv(input, filter, init_result): + return conv_poly( + input, filter, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + +print(module) diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py new file mode 100644 --- /dev/null +++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py @@ -0,0 +1,176 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import builtin +from mlir.dialects import linalg +from mlir.dialects import std + +from mlir.dialects.linalg.opdsl.lang import * + +T1 = TV.T1 +T2 = TV.T2 + + +@linalg_structured_op +def matmul_mono( + A=TensorDef(T, S.M, S.K), + B=TensorDef(T, S.K, S.N), + C=TensorDef(T, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) + C[D.m, D.n] += A[D.m, D.k] * B[D.k, D.n] + + +@linalg_structured_op +def matmul_poly( + A=TensorDef(T1, S.M, S.K), + B=TensorDef(T2, S.K, S.N), + C=TensorDef(U, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) + C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) + + +@linalg_structured_op +def matmul_unsigned_poly( + A=TensorDef(T1, S.M, S.K), + B=TensorDef(T2, S.K, S.N), + C=TensorDef(U, S.M, S.N, output=True)): + domain(D.m, D.n, D.k) + C[D.m, D.n] += cast_unsigned(U, A[D.m, D.k]) * cast_unsigned(U, B[D.k, D.n]) + + +with Context() as ctx, Location.unknown(): + module = Module.create() + f16 = F16Type.get() + f32 = F32Type.get() + f64 = F64Type.get() + i8 = IntegerType.get_signless(8) + i16 = IntegerType.get_signless(16) + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + # Multiplication indexing maps. We verify only the indexing maps of the + # first multiplication and then do additional tests on casting and body + # generation behavior. + # CHECK: #[[$MUL_MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> + # CHECK: #[[$MUL_MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)> + # CHECK: #[[$MUL_MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> + + # CHECK-LABEL: func @test_matmul_mono + # CHECK-SAME: %[[A:.+]]: tensor<4x16xf32> + # CHECK-SAME: %[[B:.+]]: tensor<16x8xf32> + # CHECK: %[[INITC:.+]] = linalg.init_tensor [4, 8] : tensor<4x8xf32> + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] + # CHECK-SAME: ins(%[[A]], %[[B]] + # CHECK-SAME: outs(%[[INITC]] + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32)) + def test_matmul_mono(lhs, rhs): + init_result = linalg.InitTensorOp([4, 8], f32) + return matmul_mono(lhs, rhs, outs=[init_result.result]) + + # CHECK-LABEL: @test_i8i8i32_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i8, %[[C_ARG:.+]]: i32) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.extsi %[[A_ARG]] : i8 to i32 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.extsi %[[B_ARG]] : i8 to i32 + # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i32 + # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32 + # CHECK-NEXT: linalg.yield %[[ADD]] : i32 + # CHECK-NEXT: -> tensor<4x8xi32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), + RankedTensorType.get((4, 8), i32)) + def test_i8i8i32_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_i8i8i32_matmul_unsigned + # CHECK: = arith.extui + # CHECK: = arith.extui + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), + RankedTensorType.get((4, 8), i32)) + def test_i8i8i32_matmul_unsigned(lhs, rhs, init_result): + return matmul_unsigned_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_i8i16i32_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i16, %[[C_ARG:.+]]: i32) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.extsi %[[A_ARG]] : i8 to i32 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.extsi %[[B_ARG]] : i16 to i32 + # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i32 + # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32 + # CHECK-NEXT: linalg.yield %[[ADD]] : i32 + # CHECK-NEXT: -> tensor<4x8xi32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i16), + RankedTensorType.get((4, 8), i32)) + def test_i8i16i32_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_i32i32i16_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i32, %[[B_ARG:.+]]: i32, %[[C_ARG:.+]]: i16) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.trunci %[[A_ARG]] : i32 to i16 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.trunci %[[B_ARG]] : i32 to i16 + # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i16 + # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i16 + # CHECK-NEXT: linalg.yield %[[ADD]] : i16 + # CHECK-NEXT: -> tensor<4x8xi16> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i32), RankedTensorType.get((16, 8), i32), + RankedTensorType.get((4, 8), i16)) + def test_i32i32i16_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_i8i8f32_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i8, %[[C_ARG:.+]]: f32) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.sitofp %[[A_ARG]] : i8 to f32 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.sitofp %[[B_ARG]] : i8 to f32 + # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 + # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 + # CHECK-NEXT: linalg.yield %[[ADD]] : f32 + # CHECK-NEXT: -> tensor<4x8xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), + RankedTensorType.get((4, 8), f32)) + def test_i8i8f32_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_i8i8f32_matmul_unsigned + # CHECK: = arith.uitofp + # CHECK: = arith.uitofp + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), + RankedTensorType.get((4, 8), f32)) + def test_i8i8f32_matmul_unsigned(lhs, rhs, init_result): + return matmul_unsigned_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_f16f16f32_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: f16, %[[B_ARG:.+]]: f16, %[[C_ARG:.+]]: f32) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.extf %[[A_ARG]] : f16 to f32 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.extf %[[B_ARG]] : f16 to f32 + # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 + # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 + # CHECK-NEXT: linalg.yield %[[ADD]] : f32 + # CHECK-NEXT: -> tensor<4x8xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f16), RankedTensorType.get((16, 8), f16), + RankedTensorType.get((4, 8), f32)) + def test_f16f16f32_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + # CHECK-LABEL: @test_f64f64f32_matmul + # CHECK: ^{{.*}}(%[[A_ARG:.+]]: f64, %[[B_ARG:.+]]: f64, %[[C_ARG:.+]]: f32) + # CHECK-NEXT: %[[A_CAST:.+]] = arith.truncf %[[A_ARG]] : f64 to f32 + # CHECK-NEXT: %[[B_CAST:.+]] = arith.truncf %[[B_ARG]] : f64 to f32 + # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 + # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 + # CHECK-NEXT: linalg.yield %[[ADD]] : f32 + # CHECK-NEXT: -> tensor<4x8xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f64), RankedTensorType.get((16, 8), f64), + RankedTensorType.get((4, 8), f32)) + def test_f64f64f32_matmul(lhs, rhs, init_result): + return matmul_poly(lhs, rhs, outs=[init_result]) + + +print(module) diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_misc.py b/mlir/test/python/dialects/linalg/opdsl/emit_misc.py new file mode 100644 --- /dev/null +++ b/mlir/test/python/dialects/linalg/opdsl/emit_misc.py @@ -0,0 +1,93 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import builtin +from mlir.dialects import linalg +from mlir.dialects import std + +from mlir.dialects.linalg.opdsl.lang import * + +# This tests miscellaneous features of the emitter that are not tested by the +# matmul, convolution, or, pooling tests. The features include: +# - constant defined in the body +# - fix/predefined types +# - exponential functions +# - custom op names. + +@linalg_structured_op +def fill_rng_poly( + min=ScalarDef(F64), + max=ScalarDef(F64), + seed=ScalarDef(I32), + O=TensorDef(T, S.M, S.N, output=True)): + multiplier = cast(I32, const(1103515245)) + increment = cast(I32, const(12345)) + rand1 = (cast(I32, index(D.m)) + seed) * multiplier + increment + rand2 = (cast(I32, index(D.n)) + rand1) * multiplier + increment + inv_range = cast(F64, const(2.3283064e-10)) + offset = cast(F64, const(2147483647)) + scaling = (max - min) * inv_range + O[D.m, D.n] = cast(T, (offset + cast(F64, rand2)) * scaling + min) + + +@linalg_structured_op +def soft_plus_poly( + I=TensorDef(T, S.M, S.N), O=TensorDef(U, S.M, S.N, output=True)): + O[D.m, D.n] = \ + PrimFn.log(cast(U, const(1.0)) + cast(U, PrimFn.exp(I[D.m, D.n]))) + + +@linalg_structured_op(op_name="custom_op_name") +def non_default_op_name(I=TensorDef(T, S.N), O=TensorDef(T, S.N, output=True)): + O[D.n] = I[D.n] + + +with Context() as ctx, Location.unknown(): + module = Module.create() + f32 = F32Type.get() + f64 = F64Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + # CHECK-LABEL: @test_i32_fill_rng + # CHECK: ^{{.*}}(%[[MIN:.+]]: f64, %[[MAX:.+]]: f64, %[[SEED:.+]]: i32, %{{.*}} + # CHECK-DAG: %[[IDX0:.+]] = linalg.index 0 : index + # CHECK-DAG: %[[IDX0_CAST:.+]] = arith.index_cast %[[IDX0]] : index to i32 + # CHECK-DAG: %[[RND0:.+]] = arith.addi %[[IDX0_CAST]], %[[SEED]] : i32 + # CHECK-DAG: %[[CST0:.+]] = arith.constant 1103515245 : i64 + # CHECK-DAG: %[[CST0_CAST:.+]] = arith.trunci %[[CST0]] : i64 to i32 + # Skip the remaining random number computation and match the scaling logic. + # CHECK-DAG: %[[DIFF:.+]] = arith.subf %[[MAX]], %[[MIN]] : f64 + # CHECK-DAG: %[[CST3:.+]] = arith.constant 2.3283063999999999E-10 : f64 + # CHECK-DAG: %[[FACT:.+]] = arith.mulf %[[DIFF]], %[[CST3]] : f64 + # CHECK-DAG: %[[RND4:.+]] = arith.mulf %{{.+}}, %[[FACT]] : f64 + # CHECK-DAG: %[[RND5:.+]] = arith.addf %[[RND4]], %[[MIN]] : f64 + # CHECK-DAG: %{{.*}} = arith.fptosi %[[RND5]] : f64 to i32 + @builtin.FuncOp.from_py_func(f64, f64, i32, + RankedTensorType.get((4, 16), i32)) + def test_i32_fill_rng(min, max, seed, init_result): + return fill_rng_poly(min, max, seed, outs=[init_result]) + + # CHECK-LABEL: @test_f32_soft_plus + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[OUT:.+]]: f32) + # CHECK-NEXT: %[[C1:.+]] = arith.constant 1.000000e+00 : f64 + # CHECK-NEXT: %[[C1_CAST:.+]] = arith.truncf %[[C1]] : f64 to f32 + # CHECK-NEXT: %[[EXP:.+]] = math.exp %[[IN]] : f32 + # CHECK-NEXT: %[[SUM:.+]] = arith.addf %[[C1_CAST]], %[[EXP]] : f32 + # CHECK-NEXT: %[[LOG:.+]] = math.log %[[SUM]] : f32 + # CHECK-NEXT: linalg.yield %[[LOG]] : f32 + # CHECK-NEXT: -> tensor<4x16xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32)) + def test_f32_soft_plus(input, init_result): + return soft_plus_poly(input, outs=[init_result]) + + # Just check that we don't assert out on name mismatch. + # CHECK-LABEL: @test_non_default_op_name + @builtin.FuncOp.from_py_func( + RankedTensorType.get((42,), f32), RankedTensorType.get((42,), f32)) + def test_non_default_op_name(input, init_result): + return non_default_op_name(input, outs=[init_result]) + + +print(module) diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py b/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py new file mode 100644 --- /dev/null +++ b/mlir/test/python/dialects/linalg/opdsl/emit_pooling.py @@ -0,0 +1,154 @@ +# RUN: %PYTHON %s | FileCheck %s + +from mlir.ir import * +from mlir.dialects import builtin +from mlir.dialects import linalg +from mlir.dialects import std + +from mlir.dialects.linalg.opdsl.lang import * + +T1 = TV.T1 +T2 = TV.T2 + + +@linalg_structured_op +def pooling_max_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.max(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) + + +@linalg_structured_op +def pooling_max_unsigned_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.max_unsigned(D.kh, D.kw)( + cast_unsigned( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])) + + +@linalg_structured_op +def pooling_min_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.min(D.kh, D.kw)( + cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, + D.c])) + + +@linalg_structured_op +def pooling_min_unsigned_poly( + I=TensorDef(T1, S.N, S.H, S.W, S.C), + K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), + O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), + strides=AttributeDef(S.SH, S.SW), + dilations=AttributeDef(S.DH, S.DW)): + domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) + O[D.n, D.oh, D.ow, D.c] = ReduceFn.min_unsigned(D.kh, D.kw)( + cast_unsigned( + U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])) + + +with Context() as ctx, Location.unknown(): + module = Module.create() + f32 = F32Type.get() + i32 = IntegerType.get_signless(32) + with InsertionPoint(module.body): + + # Pooling indexing maps. + # CHECK: #[[$POOL_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d3, d2 * 4 + d4 * 2, d5)> + # CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)> + # CHECK: #[[$POOL_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d5)> + + # CHECK-LABEL: @test_f32i32_max_pooling + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32) + # CHECK-NEXT: %[[IN_CAST:.+]] = arith.fptosi %[[IN:.+]] : f32 to i32 + # CHECK-NEXT: %[[MAX:.+]] = arith.maxsi %[[OUT]], %[[IN_CAST:.+]] : i32 + # CHECK-NEXT: linalg.yield %[[MAX]] : i32 + # CHECK-NEXT: -> tensor<1x2x4x1xi32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), i32)) + def test_f32i32_max_pooling(input, shape, init_result): + return pooling_max_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32i32_max_unsigned_pooling + # CHECK: = arith.fptoui + # CHECK: = arith.maxui + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), i32)) + def test_f32i32_max_unsigned_pooling(input, shape, init_result): + return pooling_max_unsigned_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32f32_max_pooling + # CHECK: linalg.generic + # CHECK-SAME: indexing_maps = [#[[$POOL_MAP_I]], #[[$POOL_MAP_K]], #[[$POOL_MAP_O]]] + # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] + # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: f32) + # CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT]], %[[IN:.+]] : f32 + # CHECK-NEXT: linalg.yield %[[MAX]] : f32 + # CHECK-NEXT: -> tensor<1x2x4x1xf32> + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), f32)) + def test_f32f32_max_pooling(input, shape, init_result): + return pooling_max_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32i32_min_pooling + # CHECK: = arith.fptosi + # CHECK: = arith.minsi + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), i32)) + def test_f32i32_min_pooling(input, shape, init_result): + return pooling_min_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32i32_min_unsigned_pooling + # CHECK: = arith.fptoui + # CHECK: = arith.minui + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), i32)) + def test_f32i32_min_unsigned_pooling(input, shape, init_result): + return pooling_min_unsigned_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + # CHECK-LABEL: @test_f32f32_min_pooling + # CHECK: = arith.minf + @builtin.FuncOp.from_py_func( + RankedTensorType.get((1, 4, 16, 1), f32), + RankedTensorType.get((2, 2), f32), + RankedTensorType.get((1, 2, 4, 1), f32)) + def test_f32f32_min_pooling(input, shape, init_result): + return pooling_min_poly( + input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) + + +print(module) diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py b/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py deleted file mode 100644 --- a/mlir/test/python/dialects/linalg/opdsl/emit_structured_generic.py +++ /dev/null @@ -1,411 +0,0 @@ -# RUN: %PYTHON %s | FileCheck %s - -from mlir.ir import * -from mlir.dialects import builtin -from mlir.dialects import linalg -from mlir.dialects import std - -from mlir.dialects.linalg.opdsl.lang import * - -T1 = TV.T1 -T2 = TV.T2 - - -@linalg_structured_op -def matmul_mono( - A=TensorDef(T, S.M, S.K), - B=TensorDef(T, S.K, S.N), - C=TensorDef(T, S.M, S.N, output=True)): - domain(D.m, D.n, D.k) - C[D.m, D.n] += A[D.m, D.k] * B[D.k, D.n] - - -@linalg_structured_op -def matmul_poly( - A=TensorDef(T1, S.M, S.K), - B=TensorDef(T2, S.K, S.N), - C=TensorDef(U, S.M, S.N, output=True)): - domain(D.m, D.n, D.k) - C[D.m, D.n] += cast(U, A[D.m, D.k]) * cast(U, B[D.k, D.n]) - - -@linalg_structured_op -def matmul_unsigned_poly( - A=TensorDef(T1, S.M, S.K), - B=TensorDef(T2, S.K, S.N), - C=TensorDef(U, S.M, S.N, output=True)): - domain(D.m, D.n, D.k) - C[D.m, D.n] += cast_unsigned(U, A[D.m, D.k]) * cast_unsigned(U, B[D.k, D.n]) - - -@linalg_structured_op -def conv_poly( - I=TensorDef(T1, S.N, S.IH, S.IW, S.C), - K=TensorDef(T2, S.KH, S.KW, S.C), - O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] += cast( - U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, - D.c]) * cast(U, K[D.kh, D.kw, D.c]) - - -@linalg_structured_op -def pooling_max_poly( - I=TensorDef(T1, S.N, S.H, S.W, S.C), - K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), - O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] = ReduceFn.max(D.kh, D.kw)( - cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, - D.c])) - - -@linalg_structured_op -def pooling_max_unsigned_poly( - I=TensorDef(T1, S.N, S.H, S.W, S.C), - K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), - O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] = ReduceFn.max_unsigned(D.kh, D.kw)( - cast_unsigned( - U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])) - - -@linalg_structured_op -def pooling_min_poly( - I=TensorDef(T1, S.N, S.H, S.W, S.C), - K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), - O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] = ReduceFn.min(D.kh, D.kw)( - cast(U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, - D.c])) - - -@linalg_structured_op -def pooling_min_unsigned_poly( - I=TensorDef(T1, S.N, S.H, S.W, S.C), - K=TensorDef(T2, S.KH, S.KW, index_dims=[D.kh, D.kw]), - O=TensorDef(U, S.N, S.OH, S.OW, S.C, output=True), - strides=AttributeDef(S.SH, S.SW), - dilations=AttributeDef(S.DH, S.DW)): - domain(D.n, D.oh, D.ow, D.kh, D.kw, D.c) - O[D.n, D.oh, D.ow, D.c] = ReduceFn.min_unsigned(D.kh, D.kw)( - cast_unsigned( - U, I[D.n, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW, D.c])) - - -@linalg_structured_op -def fill_rng_poly( - min=ScalarDef(F64), - max=ScalarDef(F64), - seed=ScalarDef(I32), - O=TensorDef(T, S.M, S.N, output=True)): - multiplier = cast(I32, const(1103515245)) - increment = cast(I32, const(12345)) - rand1 = (cast(I32, index(D.m)) + seed) * multiplier + increment - rand2 = (cast(I32, index(D.n)) + rand1) * multiplier + increment - inv_range = cast(F64, const(2.3283064e-10)) - offset = cast(F64, const(2147483647)) - scaling = (max - min) * inv_range - O[D.m, D.n] = cast(T, (offset + cast(F64, rand2)) * scaling + min) - - -@linalg_structured_op -def soft_plus_poly( - I=TensorDef(T, S.M, S.N), O=TensorDef(U, S.M, S.N, output=True)): - O[D.m, D.n] = \ - PrimFn.log(cast(U, const(1.0)) + cast(U, PrimFn.exp(I[D.m, D.n]))) - - -@linalg_structured_op(op_name="custom_op_name") -def non_default_op_name(I=TensorDef(T, S.N), O=TensorDef(T, S.N, output=True)): - O[D.n] = I[D.n] - - -with Context() as ctx, Location.unknown(): - module = Module.create() - f16 = F16Type.get() - f32 = F32Type.get() - f64 = F64Type.get() - i8 = IntegerType.get_signless(8) - i16 = IntegerType.get_signless(16) - i32 = IntegerType.get_signless(32) - with InsertionPoint(module.body): - - # Multiplication indexing maps. We verify only the indexing maps of the - # first multiplication and then do additional tests on casting and body - # generation behavior. - # CHECK: #[[$MUL_MAP_A:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)> - # CHECK: #[[$MUL_MAP_B:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)> - # CHECK: #[[$MUL_MAP_C:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)> - - # Convolution indexing maps. - # CHECK: #[[$CONV_MAP_I:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1 * 2 + d3, d2 * 4 + d4 * 2, d5)> - # CHECK: #[[$CONV_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4, d5)> - # CHECK: #[[$CONV_MAP_O:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d5)> - - # Pooling indexing maps. - # CHECK: #[[$POOL_MAP_K:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d3, d4)> - - # CHECK-LABEL: func @test_matmul_mono - # CHECK-SAME: %[[A:.+]]: tensor<4x16xf32> - # CHECK-SAME: %[[B:.+]]: tensor<16x8xf32> - - # CHECK: %[[INITC:.+]] = linalg.init_tensor [4, 8] : tensor<4x8xf32> - # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]] - # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] - # CHECK-SAME: ins(%[[A]], %[[B]] - # CHECK-SAME: outs(%[[INITC]] - - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32)) - def test_matmul_mono(lhs, rhs): - init_result = linalg.InitTensorOp([4, 8], f32) - return matmul_mono(lhs, rhs, outs=[init_result.result]) - - # CHECK-LABEL: @test_i8i8i32_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i8, %[[C_ARG:.+]]: i32) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.extsi %[[A_ARG]] : i8 to i32 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.extsi %[[B_ARG]] : i8 to i32 - # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i32 - # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32 - # CHECK-NEXT: linalg.yield %[[ADD]] : i32 - # CHECK-NEXT: -> tensor<4x8xi32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), - RankedTensorType.get((4, 8), i32)) - def test_i8i8i32_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_i8i8i32_matmul_unsigned - # CHECK: = arith.extui - # CHECK: = arith.extui - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), - RankedTensorType.get((4, 8), i32)) - def test_i8i8i32_matmul_unsigned(lhs, rhs, init_result): - return matmul_unsigned_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_i8i16i32_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i16, %[[C_ARG:.+]]: i32) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.extsi %[[A_ARG]] : i8 to i32 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.extsi %[[B_ARG]] : i16 to i32 - # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i32 - # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i32 - # CHECK-NEXT: linalg.yield %[[ADD]] : i32 - # CHECK-NEXT: -> tensor<4x8xi32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i16), - RankedTensorType.get((4, 8), i32)) - def test_i8i16i32_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_i32i32i16_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i32, %[[B_ARG:.+]]: i32, %[[C_ARG:.+]]: i16) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.trunci %[[A_ARG]] : i32 to i16 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.trunci %[[B_ARG]] : i32 to i16 - # CHECK-NEXT: %[[MUL:.+]] = arith.muli %[[A_CAST]], %[[B_CAST]] : i16 - # CHECK-NEXT: %[[ADD:.+]] = arith.addi %[[C_ARG]], %[[MUL]] : i16 - # CHECK-NEXT: linalg.yield %[[ADD]] : i16 - # CHECK-NEXT: -> tensor<4x8xi16> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i32), RankedTensorType.get((16, 8), i32), - RankedTensorType.get((4, 8), i16)) - def test_i32i32i16_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_i8i8f32_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: i8, %[[B_ARG:.+]]: i8, %[[C_ARG:.+]]: f32) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.sitofp %[[A_ARG]] : i8 to f32 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.sitofp %[[B_ARG]] : i8 to f32 - # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 - # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 - # CHECK-NEXT: linalg.yield %[[ADD]] : f32 - # CHECK-NEXT: -> tensor<4x8xf32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), - RankedTensorType.get((4, 8), f32)) - def test_i8i8f32_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_i8i8f32_matmul_unsigned - # CHECK: = arith.uitofp - # CHECK: = arith.uitofp - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), i8), RankedTensorType.get((16, 8), i8), - RankedTensorType.get((4, 8), f32)) - def test_i8i8f32_matmul_unsigned(lhs, rhs, init_result): - return matmul_unsigned_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_f16f16f32_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: f16, %[[B_ARG:.+]]: f16, %[[C_ARG:.+]]: f32) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.extf %[[A_ARG]] : f16 to f32 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.extf %[[B_ARG]] : f16 to f32 - # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 - # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 - # CHECK-NEXT: linalg.yield %[[ADD]] : f32 - # CHECK-NEXT: -> tensor<4x8xf32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f16), RankedTensorType.get((16, 8), f16), - RankedTensorType.get((4, 8), f32)) - def test_f16f16f32_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_f64f64f32_matmul - # CHECK: ^{{.*}}(%[[A_ARG:.+]]: f64, %[[B_ARG:.+]]: f64, %[[C_ARG:.+]]: f32) - # CHECK-NEXT: %[[A_CAST:.+]] = arith.truncf %[[A_ARG]] : f64 to f32 - # CHECK-NEXT: %[[B_CAST:.+]] = arith.truncf %[[B_ARG]] : f64 to f32 - # CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_CAST]], %[[B_CAST]] : f32 - # CHECK-NEXT: %[[ADD:.+]] = arith.addf %[[C_ARG]], %[[MUL]] : f32 - # CHECK-NEXT: linalg.yield %[[ADD]] : f32 - # CHECK-NEXT: -> tensor<4x8xf32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f64), RankedTensorType.get((16, 8), f64), - RankedTensorType.get((4, 8), f32)) - def test_f64f64f32_matmul(lhs, rhs, init_result): - return matmul_poly(lhs, rhs, outs=[init_result]) - - # CHECK-LABEL: @test_f32i32_conv - # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$CONV_MAP_K]], #[[$CONV_MAP_O]]] - # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] - # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[FILTER:.+]]: f32, %[[OUT:.+]]: i32) - # CHECK-NEXT: %[[IN_CAST:.+]] = arith.fptosi %[[IN:.+]] : f32 to i32 - # CHECK-NEXT: %[[FILTER_CAST:.+]] = arith.fptosi %[[FILTER:.+]] : f32 to i32 - # CHECK-NEXT: %[[PROD:.+]] = arith.muli %[[IN_CAST]], %[[FILTER_CAST]] : i32 - # CHECK-NEXT: %[[SUM:.+]] = arith.addi %[[OUT]], %[[PROD]] : i32 - # CHECK-NEXT: linalg.yield %[[SUM]] : i32 - # CHECK-NEXT: -> tensor<2x4xi32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2, 1), - f32), - RankedTensorType.get((2, 4), i32)) - def test_f32i32_conv(input, filter, init_result): - return conv_poly( - input, filter, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32i32_max_pooling - # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] - # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] - # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: i32) - # CHECK-NEXT: %[[IN_CAST:.+]] = arith.fptosi %[[IN:.+]] : f32 to i32 - # CHECK-NEXT: %[[MAX:.+]] = arith.maxsi %[[OUT]], %[[IN_CAST:.+]] : i32 - # CHECK-NEXT: linalg.yield %[[MAX]] : i32 - # CHECK-NEXT: -> tensor<2x4xi32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), i32)) - def test_f32i32_max_pooling(input, shape, init_result): - return pooling_max_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32i32_max_unsigned_pooling - # CHECK: = arith.fptoui - # CHECK: = arith.maxui - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), i32)) - def test_f32i32_max_unsigned_pooling(input, shape, init_result): - return pooling_max_unsigned_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32f32_max_pooling - # CHECK: linalg.generic - # CHECK-SAME: indexing_maps = [#[[$CONV_MAP_I]], #[[$POOL_MAP_K]], #[[$CONV_MAP_O]]] - # CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction", "parallel"] - # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[SHAPE:.+]]: f32, %[[OUT:.+]]: f32) - # CHECK-NEXT: %[[MAX:.+]] = arith.maxf %[[OUT]], %[[IN:.+]] : f32 - # CHECK-NEXT: linalg.yield %[[MAX]] : f32 - # CHECK-NEXT: -> tensor<2x4xf32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), f32)) - def test_f32f32_max_pooling(input, shape, init_result): - return pooling_max_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32i32_min_pooling - # CHECK: = arith.fptosi - # CHECK: = arith.minsi - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), i32)) - def test_f32i32_min_pooling(input, shape, init_result): - return pooling_min_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32i32_min_unsigned_pooling - # CHECK: = arith.fptoui - # CHECK: = arith.minui - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), i32)) - def test_f32i32_min_unsigned_pooling(input, shape, init_result): - return pooling_min_unsigned_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_f32f32_min_pooling - # CHECK: = arith.minf - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((2, 2), f32), - RankedTensorType.get((2, 4), f32)) - def test_f32f32_min_pooling(input, shape, init_result): - return pooling_min_poly( - input, shape, outs=[init_result], strides=[2, 4], dilations=[1, 2]) - - # CHECK-LABEL: @test_i32_fill_rng - # CHECK: ^{{.*}}(%[[MIN:.+]]: f64, %[[MAX:.+]]: f64, %[[SEED:.+]]: i32, %{{.*}} - # CHECK-DAG: %[[IDX0:.+]] = linalg.index 0 : index - # CHECK-DAG: %[[IDX0_CAST:.+]] = arith.index_cast %[[IDX0]] : index to i32 - # CHECK-DAG: %[[RND0:.+]] = arith.addi %[[IDX0_CAST]], %[[SEED]] : i32 - # CHECK-DAG: %[[CST0:.+]] = arith.constant 1103515245 : i64 - # CHECK-DAG: %[[CST0_CAST:.+]] = arith.trunci %[[CST0]] : i64 to i32 - # Skip the remaining random number computation and match the scaling logic. - # CHECK-DAG: %[[DIFF:.+]] = arith.subf %[[MAX]], %[[MIN]] : f64 - # CHECK-DAG: %[[CST3:.+]] = arith.constant 2.3283063999999999E-10 : f64 - # CHECK-DAG: %[[FACT:.+]] = arith.mulf %[[DIFF]], %[[CST3]] : f64 - # CHECK-DAG: %[[RND4:.+]] = arith.mulf %{{.+}}, %[[FACT]] : f64 - # CHECK-DAG: %[[RND5:.+]] = arith.addf %[[RND4]], %[[MIN]] : f64 - # CHECK-DAG: %{{.*}} = arith.fptosi %[[RND5]] : f64 to i32 - @builtin.FuncOp.from_py_func(f64, f64, i32, - RankedTensorType.get((4, 16), i32)) - def test_i32_fill_rng(min, max, seed, init_result): - return fill_rng_poly(min, max, seed, outs=[init_result]) - - # CHECK-LABEL: @test_f32_soft_plus - # CHECK: ^{{.*}}(%[[IN:.+]]: f32, %[[OUT:.+]]: f32) - # CHECK-NEXT: %[[C1:.+]] = arith.constant 1.000000e+00 : f64 - # CHECK-NEXT: %[[C1_CAST:.+]] = arith.truncf %[[C1]] : f64 to f32 - # CHECK-NEXT: %[[EXP:.+]] = math.exp %[[IN]] : f32 - # CHECK-NEXT: %[[SUM:.+]] = arith.addf %[[C1_CAST]], %[[EXP]] : f32 - # CHECK-NEXT: %[[LOG:.+]] = math.log %[[SUM]] : f32 - # CHECK-NEXT: linalg.yield %[[LOG]] : f32 - # CHECK-NEXT: -> tensor<4x16xf32> - @builtin.FuncOp.from_py_func( - RankedTensorType.get((4, 16), f32), RankedTensorType.get((4, 16), f32)) - def test_f32_soft_plus(input, init_result): - return soft_plus_poly(input, outs=[init_result]) - - # Just check that we don't assert out on name mismatch. - # CHECK-LABEL: @test_non_default_op_name - @builtin.FuncOp.from_py_func( - RankedTensorType.get((42,), f32), RankedTensorType.get((42,), f32)) - def test_non_default_op_name(input, init_result): - return non_default_op_name(input, outs=[init_result]) - - -# TODO: Fix me! Conv and pooling ops above do not verify, which was uncovered -# when switching to more robust module verification. For now, reverting to the -# old behavior which does not verify on module print. -print(module.operation.get_asm(assume_verified=True))