diff --git a/mlir/docs/Bufferization.md b/mlir/docs/Bufferization.md --- a/mlir/docs/Bufferization.md +++ b/mlir/docs/Bufferization.md @@ -139,7 +139,7 @@ ```mlir #map = affine_map<(i) -> (i)> %0 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} - outs(%t : tensor) { + inits(%t : tensor) { ^bb0(%arg0 : f32): %cst = arith.constant 0.0 : f32 linalg.yield %cst : f32 @@ -153,7 +153,7 @@ ```mlir %t = tensor.extract_slice %s [%idx] [%sz] [1] : tensor to tensor -%0 = linalg.generic ... outs(%t) { ... } -> tensor +%0 = linalg.generic ... inits(%t) { ... } -> tensor %1 = tensor.insert_slice %0 into %s [%idx] [%sz] [1] : tensor into tensor ``` diff --git a/mlir/docs/Dialects/Linalg/_index.md b/mlir/docs/Dialects/Linalg/_index.md --- a/mlir/docs/Dialects/Linalg/_index.md +++ b/mlir/docs/Dialects/Linalg/_index.md @@ -103,7 +103,7 @@ %B: memref, strided<[2], offset: 1>>) { linalg.generic #attrs ins(%A: memref>) - outs(%B: memref, strided<[2], offset: 1>>) { + inits(%B: memref, strided<[2], offset: 1>>) { ^bb0(%a: f32, %b: vector<4xf32>): %c = "some_compute"(%a, %b): (f32, vector<4xf32>) -> (vector<4xf32>) linalg.yield %c: vector<4xf32> @@ -189,7 +189,7 @@ %B: memref>) { linalg.generic #attrs ins(%A: memref<8x?xf32, strided<[2, 2], offset: 0>>) - outs(%B: memref>) { + inits(%B: memref>) { ^bb0(%a: f32, %b: vector<4xf32>): %c = "some_compute"(%a, %b): (f32, vector<4xf32>) -> (vector<4xf32>) linalg.yield %c: vector<4xf32> @@ -310,7 +310,7 @@ func.func @example(%A: memref, %B: memref, %C: memref) { linalg.generic #attrs ins(%A, %B: memref, memref) - outs(%C: memref) { + inits(%C: memref) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.addf %a, %b : f32 linalg.yield %d : f32 @@ -383,7 +383,7 @@ func.func @example(%A: memref, %B: memref, %C: memref) { linalg.generic #attrs ins(%A, %B: memref, memref) - outs(%C: memref) { + inits(%C: memref) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.addf %a, %b : f32 linalg.yield %d : f32 diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td @@ -68,7 +68,7 @@ %c = bufferization.alloc_tensor(%d1, %d2) : tensor %0 = linalg.matmul ins(%a, %b: tensor, tensor) - outs(%c: tensor) -> tensor + inits(%c: tensor) -> tensor return %0 : tensor ``` diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td @@ -59,7 +59,7 @@ #map = affine_map<(i, j) -> (i, j)> linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} - outs(%I, %J : memref, memref) { + inits(%I, %J : memref, memref) { ^bb0(%arg0 : index, %arg1 : index): // Access the outer iteration dimension i %i = linalg.index 0 : index diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -71,7 +71,7 @@ linalg.generic #trait_attribute ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) attrs = {other-optional-attributes} {region} ``` @@ -112,7 +112,7 @@ linalg.generic #matmul_trait ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) {other-optional-attributes} { ^bb0(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 @@ -153,7 +153,7 @@ ```mlir %C = linalg.generic #trait_attribute ins(%A, %B : tensor, memref) - outs(%C : tensor) + inits(%C : tensor) {other-optional-attributes} {region} -> (tensor) @@ -249,7 +249,7 @@ ``` %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init: tensor<64xf32>) + inits(%init: tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -263,7 +263,7 @@ ``` %add = linalg.map { arith.addf } ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init: tensor<64xf32>) + inits(%init: tensor<64xf32>) ``` }]; @@ -336,7 +336,7 @@ ``` %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %out, %in: f32 @@ -352,7 +352,7 @@ ``` %reduce = linalg.reduce { arith.addf } ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] ``` }]; @@ -420,7 +420,7 @@ ``` %transpose = linalg.transpose ins(%input:tensor<16x64xf32>) - outs(%init:tensor<64x16xf32>) + inits(%init:tensor<64x16xf32>) permutation = [1, 0] ``` }]; diff --git a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td --- a/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td +++ b/mlir/include/mlir/Dialect/Linalg/TransformOps/LinalgTransformOps.td @@ -520,7 +520,7 @@ // M N // affine_map<(d0, d1, d2) -> (d0, d1)> %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs( %C: tensor) + inits( %C: tensor) ``` Specifying packed_sizes [2, 3, 4] results in tiling the iterator dimensions @@ -534,7 +534,7 @@ // affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d3, d4)> %0 = linalg.generic_representing_some_higher_d_matmul ins(%A, %B: tensor, tensor) - outs( %C: tensor) + inits( %C: tensor) ``` In particular, note that the second operand `B` has shape `KxNxnxk` (and not `KxNxkxn` as one could expect by looking **only** at the operand). @@ -1052,7 +1052,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%in : tensor<32xf32>) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg1: f32, %arg2: f32): %y = arith.addf %arg1, %arg2 : f32 linalg.yield %y : f32 @@ -1065,11 +1065,11 @@ %cst = arith.constant 0.000000e+00 : f32 %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into tensor<4x8xf32> %1 = tensor.empty() : tensor<4xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> %3 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) { + ins(%0 : tensor<4x8xf32>) inits(%2 : tensor<4xf32>) { ^bb0(%arg3: f32, %arg5: f32): %5 = arith.addf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -1077,7 +1077,7 @@ %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} - ins(%3 : tensor<4xf32>) outs(%out : tensor) { + ins(%3 : tensor<4xf32>) inits(%out : tensor) { ^bb0(%arg3: f32, %arg4: f32): %5 = arith.addf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -1103,7 +1103,7 @@ ``` %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> ``` Is transformed to: @@ -1117,14 +1117,14 @@ #map5 = affine_map<(d0, d1, d2) -> (d0, d1)> %0 = tensor.empty() : tensor<16x32x64xf32> %cst = arith.constant 0.000000e+00 : f32 - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) -> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<16x32x64xf32>) -> tensor<16x32x64xf32> %2 = tensor.empty() : tensor<64x4xi1> %3 = linalg.generic {indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "reduction"]} ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>) - outs(%1 : tensor<16x32x64xf32>) { + inits(%1 : tensor<16x32x64xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): %5 = arith.mulf %arg3, %arg4 : f32 %6 = arith.addf %arg6, %5 : f32 @@ -1134,7 +1134,7 @@ %4 = linalg.generic {indexing_maps = [#map4, #map5], iterator_types = ["parallel", "parallel", "reduction"]} ins(%3 : tensor<16x32x64xf32>) - outs(%C : tensor<16x32xf32>) { + inits(%C : tensor<16x32xf32>) { ^bb0(%arg3: f32, %arg4: f32): %5 = arith.addf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -1209,7 +1209,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.addf %arg7, %arg9 : f32 linalg.yield %1 : f32 @@ -1221,7 +1221,7 @@ ``` %0 = tensor.empty(%dim_1) : tensor - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor %2 = scf.for %arg2 = %c0 to %dim_0 step %c5 iter_args(%arg3 = %1) -> (tensor) { %extracted_slice = tensor.extract_slice %1[0, 0] [%dim, 5] [1, 1] : tensor to tensor %extracted_slice_2 = tensor.extract_slice %arg0[0, %arg2] [%dim, 5] [1, 1] : tensor to tensor @@ -1229,7 +1229,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%extracted_slice_2 : tensor) - outs(%extracted_slice : tensor) { + inits(%extracted_slice : tensor) { ^bb0(%in: f32, %out: f32): %5 = arith.addf %in, %out : f32 linalg.yield %5 : f32 @@ -1242,7 +1242,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): %4 = arith.addf %in, %out : f32 linalg.yield %4 : f32 @@ -1314,7 +1314,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.addf %arg7, %arg9 : f32 linalg.yield %1 : f32 @@ -1326,7 +1326,7 @@ ``` %0 = tensor.empty(%dim_1) : tensor - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor %2 = scf.forall (%arg2) in (%c5) shared_outs(%arg3 = %1) -> (tensor) { %4 = affine.min #map(%arg2)[%dim_0] %5 = affine.max #map1(%4) @@ -1334,7 +1334,7 @@ %6 = affine.apply #map2(%arg2)[%dim_0] %extracted_slice_2 = tensor.extract_slice %arg0[0, %6] [%dim, %5] [1, 1] : tensor to tensor %extracted_slice_3 = tensor.extract_slice %extracted_slice[0] [%dim] [1] : tensor to tensor - %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor) outs(%extracted_slice_3 : tensor) { + %7 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%extracted_slice_2 : tensor) inits(%extracted_slice_3 : tensor) { ^bb0(%in: f32, %out: f32): %9 = arith.addf %in, %out : f32 linalg.yield %9 : f32 @@ -1343,7 +1343,7 @@ tensor.parallel_insert_slice %7 into %arg3[0, %arg2] [%dim, 1] [1, 1] : tensor into tensor } } {mapping = []} - %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor) outs(%arg1 : tensor) { + %3 = linalg.generic {indexing_maps = [#map3, #map4], iterator_types = ["parallel", "reduction"]} ins(%2 : tensor) inits(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): %4 = arith.addf %in, %out : f32 linalg.yield %4 : f32 diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -310,7 +310,7 @@ /// is lowered to: /// /// %alloc = memref.alloc -/// linalg.fill ... outs(%alloc) +/// linalg.fill ... inits(%alloc) /// %subview = memref.subview %alloc [%l] [...] [1] /// memref.tensor_store %t, %subview /// %0 = bufferization.to_tensor %alloc restrict writable @@ -437,20 +437,20 @@ /// For example, the following op: /// /// linalg.matmul ins(%0, %1 : tensor<128x32xf32>, tensor<32x64xf32>) -/// outs(%2 : tensor<128x64xf32>) +/// inits(%2 : tensor<128x64xf32>) /// /// split along the first dimension at position 42 will result in: /// /// %3 = tensor.extract_slice %0[0, 0][42, 32][1, 1] /// %4 = tensor.extract_slice %2[0, 0][42, 64][1, 1] /// %5 = linalg.matmul ins(%3, %1 : tensor<42x32xf32>, tensor<32x64xf32>) -/// outs(%5 : tensor<42x64xf32>) +/// inits(%5 : tensor<42x64xf32>) /// %6 = tensor.insert_slice %5 into %2[0, 0][42, 64][1, 1] /// /// %7 = tensor.extract_slice %0[42, 0][86, 32][1, 1] /// %8 = tensor.extract_slice %6[42, 0][86, 64][1, 1] /// %9 = linalg.matmul ins(%7, %1 : tensor<86x32xf32>, tensor<32x64xf32>) -/// outs(%8 : tensor<86x64xf32>) +/// inits(%8 : tensor<86x64xf32>) /// tensor.insert_slice %5 into %6[42, 0][86, 64][1, 1] /// /// Note that there is no simplification other than constant propagation applied @@ -782,7 +782,7 @@ /// affine_map<(d0) -> ()>], /// iterator_types = ["reduction"]} /// ins(%in : tensor<32xf32>) -/// outs(%out : tensor) { +/// inits(%out : tensor) { /// ^bb0(%arg1: f32, %arg2: f32): /// %y = arith.addf %arg1, %arg2 : f32 /// linalg.yield %y : f32 @@ -793,11 +793,11 @@ /// %cst = arith.constant 0.000000e+00 : f32 /// %0 = tensor.expand_shape %in [[0, 1]] : tensor<32xf32> into /// tensor<4x8xf32> %1 = tensor.empty [4] : tensor<4xf32> %2 = linalg.fill -/// ins(%cst : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> %3 = +/// ins(%cst : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> %3 = /// linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, /// affine_map<(d0, d1) -> (d0)>], /// iterator_types = ["parallel", "reduction"]} -/// ins(%0 : tensor<4x8xf32>) outs(%2 : tensor<4xf32>) { +/// ins(%0 : tensor<4x8xf32>) inits(%2 : tensor<4xf32>) { /// ^bb0(%arg3: f32, %arg5: f32): /// %5 = arith.addf %arg3, %arg4 : f32 /// linalg.yield %5 : f32 @@ -805,7 +805,7 @@ /// %r = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, /// affine_map<(d0) -> ()>], /// iterator_types = ["reduction"]} -/// ins(%3 : tensor<4xf32>) outs(%out : tensor) { +/// ins(%3 : tensor<4xf32>) inits(%out : tensor) { /// ^bb0(%arg3: f32, %arg4: f32): /// %5 = arith.addf %arg3, %arg4 : f32 /// linalg.yield %5 : f32 @@ -829,7 +829,7 @@ /// Example: /// ``` /// %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) -/// outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> +/// inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> /// ``` /// /// Is transformed to: @@ -843,7 +843,7 @@ /// #map5 = affine_map<(d0, d1, d2) -> (d0, d1)> /// %0 = tensor.empty [16, 32, 64] : tensor<16x32x64xf32> /// %cst = arith.constant 0.000000e+00 : f32 -/// %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<16x32x64xf32>) -> +/// %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<16x32x64xf32>) -> /// tensor<16x32x64xf32> /// %2 = tensor.empty [64, 4] : tensor<64x4xi1> /// @@ -851,7 +851,7 @@ /// iterator_types = ["parallel", "parallel", "parallel", "reduction"]} /// ins(%A, %B, %2 : tensor<16x256xf32>, tensor<256x32xf32>, /// tensor<64x4xi1>) -/// outs(%1 : tensor<16x32x64xf32>) { +/// inits(%1 : tensor<16x32x64xf32>) { /// ^bb0(%arg3: f32, %arg4: f32, %arg5: i1, %arg6: f32): /// %5 = arith.mulf %arg3, %arg4 : f32 /// %6 = arith.addf %arg6, %5 : f32 @@ -861,7 +861,7 @@ /// %4 = linalg.generic {indexing_maps = [#map4, #map5], /// iterator_types = ["parallel", "parallel", "reduction"]} // ins(%3 : tensor<16x32x64xf32>) -/// outs(%C : tensor<16x32xf32>) { +/// inits(%C : tensor<16x32xf32>) { /// ^bb0(%arg3: f32, %arg4: f32): /// %5 = arith.addf %arg3, %arg4 : f32 /// linalg.yield %5 : f32 diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -104,7 +104,7 @@ /// ``` /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1] /// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst } -/// %2 = linalg.matmul ins(...) outs(%1) +/// %2 = linalg.matmul ins(...) inits(%1) /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1] /// ``` /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2 diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -434,11 +434,11 @@ tensor to tensor %sC = tensor.extract_slice %o1[h((%thread_id_1, %thread_id_2))]: tensor to tensor - %sD = matmul ins(%sA, %sB) outs(%sC) + %sD = matmul ins(%sA, %sB) inits(%sC) %spointwise = subtensor %o2[i((%thread_id_1, %thread_id_2))]: tensor to tensor - %sE = add ins(%spointwise) outs(%sD) + %sE = add ins(%spointwise) inits(%sD) scf.forall.in_parallel { scf.forall.parallel_insert_slice %sD into %o1[h((%thread_id_1, %thread_id_2))]: @@ -472,7 +472,7 @@ %sC = tensor.extract_slice %o[%i, %j][%tileSize1, %tileSize2][1, 1] : tensor to tensor - %add = map {"arith.addf"} ins(%sA, %sB) outs(%sC) + %add = map {"arith.addf"} ins(%sA, %sB) inits(%sC) scf.forall.in_parallel { scf.forall.parallel_insert_slice %add into diff --git a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h --- a/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h +++ b/mlir/include/mlir/Dialect/SCF/Transforms/TileUsingInterface.h @@ -113,8 +113,8 @@ /// the tiled loop nest. For example, consider /// /// ```mlir -/// %0 = linalg.matmul ins(...) outs(...) -> tensor -/// %1 = linalg.matmul ins(%0, ..) outs(...) -> tensor +/// %0 = linalg.matmul ins(...) inits(...) -> tensor +/// %1 = linalg.matmul ins(%0, ..) inits(...) -> tensor /// ``` /// /// If `%1` is tiled in a 2D fashion and `%0` is fused with it, the resulting IR @@ -124,7 +124,7 @@ /// %t1_0 = scf.for .... iter_args(%arg0 = ...) { /// %t1_1 = scf.for ... iter_args(%arg1 = %arg0) { /// ... -/// %t1_2 = linalg.matmul ins(...) outs(...) -> tensor +/// %t1_2 = linalg.matmul ins(...) inits(...) -> tensor /// %t1_3 = linalg.matmul ins(%t1_2, ...) /// %t1_4 = tensor.insert_slice %t1_3 into %arg1 ... /// scf.yield %t1_4 @@ -139,7 +139,7 @@ /// ```mlir /// %t2_0 = scf.for .... iter_args(%arg0 = ...) { /// ... -/// %t2_1 = linalg.matmul ins(...) outs(...) -> tensor +/// %t2_1 = linalg.matmul ins(...) inits(...) -> tensor /// %t2_2 = linalg.matmul ins(%t2_1, ...) /// %t2_3 = tensor.insert_slice %t2_2 into %arg0 ... /// scf.yield %t2_3 @@ -180,8 +180,8 @@ /// /// ```mlir /// %0 = -/// %1 = linalg.fill ... outs(%0 : ... ) -/// %2 = linalg.matmul ... outs(%1 : ...) ... +/// %1 = linalg.fill ... inits(%0 : ... ) +/// %2 = linalg.matmul ... inits(%1 : ...) ... /// ``` /// /// it is legal to fuse the fill with the matmul only if the matmul is tiled @@ -192,8 +192,8 @@ /// ```mlir /// %1 = scf.for ... iter_args(%arg0 = %0) /// %2 = tensor.extract_slice %arg0 -/// %3 = linalg.fill .. outs(%2 : ... ) -/// %4 = linalg.matmul .. outs(%3 : ...) +/// %3 = linalg.fill .. inits(%2 : ... ) +/// %4 = linalg.matmul .. inits(%3 : ...) /// } /// ``` FailureOr diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -889,7 +889,7 @@ %0 = linalg.generic #trait ins(%A: tensor, %B: tensor) - outs(%C: tensor) { + inits(%C: tensor) { ^bb0(%a: f64, %b: f64, %c: i8) : %result = sparse_tensor.binary %a, %b : f64, f64 to i8 overlap={ @@ -910,7 +910,7 @@ %C = bufferization.alloc_tensor... %1 = linalg.generic #trait ins(%A: tensor, %B: tensor - outs(%C: tensor { + inits(%C: tensor { ^bb0(%a: f64, %b: f64, %c: f64) : %row = linalg.index 0 : index %col = linalg.index 1 : index @@ -943,7 +943,7 @@ %C = bufferization.alloc_tensor... %2 = linalg.generic #trait ins(%A: tensor, %B: tensor - outs(%C: tensor { + inits(%C: tensor { ^bb0(%a: f64, %b: i32, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, i32 to f64 overlap={} @@ -991,7 +991,7 @@ %C = bufferization.alloc_tensor... %0 = linalg.generic #trait ins(%A: tensor) - outs(%C: tensor) { + inits(%C: tensor) { ^bb0(%a: f64, %c: f64) : %result = sparse_tensor.unary %a : f64 to f64 present={ @@ -1069,7 +1069,7 @@ %C = bufferization.alloc_tensor... %0 = linalg.generic #trait ins(%A: tensor) - outs(%C: tensor) { + inits(%C: tensor) { ^bb0(%a: f64, %c: f64) : %result = sparse_tensor.reduce %c, %a, %cf1 : f64 { ^bb0(%arg0: f64, %arg1: f64): @@ -1112,7 +1112,7 @@ %C = bufferization.alloc_tensor... %0 = linalg.generic #trait ins(%A: tensor) - outs(%C: tensor) { + inits(%C: tensor) { ^bb0(%a: f64, %c: f64) : %result = sparse_tensor.select %a : f64 { ^bb0(%arg0: f64): @@ -1130,7 +1130,7 @@ %C = bufferization.alloc_tensor... %0 = linalg.generic #trait ins(%A: tensor) - outs(%C: tensor) { + inits(%C: tensor) { ^bb0(%a: f64, %c: f64) : %row = linalg.index 0 : index %col = linalg.index 1 : index diff --git a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td --- a/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.td @@ -58,7 +58,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #matvec ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp --- a/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/EmptyTensorElimination.cpp @@ -107,7 +107,7 @@ /// /// E.g.: /// %0 = tensor.empty() : tensor<10xf32> -/// %1 = linalg.fill ... outs(%0 : tensor<10xf32>) +/// %1 = linalg.fill ... inits(%0 : tensor<10xf32>) /// %2 = tensor.insert_slice %0 into %t ... /// /// In the above example, the anchor is the source operand of the insert_slice diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -145,7 +145,9 @@ return failure(); } - if (succeeded(parser.parseOptionalKeyword("outs"))) { + // outs is deprecated. + if (succeeded(parser.parseOptionalKeyword("inits")) || + succeeded(parser.parseOptionalKeyword("outs"))) { outputsOperandsLoc = parser.getCurrentLocation(); if (parser.parseLParen() || parser.parseOperandList(outputsOperands) || parser.parseColonTypeList(outputTypes) || parser.parseRParen()) @@ -172,7 +174,7 @@ if (!inputs.empty()) p << " ins(" << inputs << " : " << inputs.getTypes() << ")"; if (!outputs.empty()) - p << " outs(" << outputs << " : " << outputs.getTypes() << ")"; + p << " inits(" << outputs << " : " << outputs.getTypes() << ")"; } //===----------------------------------------------------------------------===// @@ -960,7 +962,7 @@ OpAsmParser &parser, OperationState &result, function_ref parseAttrsFn = nullptr) { - // Parse `ins` and `outs`. + // Parse `ins` and `inits`. SmallVector inputTypes, outputTypes; if (parseCommonStructuredOpParts(parser, result, inputTypes, outputTypes, /*addOperandSegmentSizes=*/false)) @@ -1888,7 +1890,7 @@ OpResult resultValue = castOp.getSource().cast(); unsigned resultNumber = resultValue.getResultNumber(); auto resultType = castOp->getResult(0).getType().cast(); - // Replace the `outs` for the result with a `tensor.cast`. This cast is now + // Replace the `inits` for the result with a `tensor.cast`. This cast is now // going from a more dynamic shape to a less dynamic shape. If the producer // for this cast, i.e. producer of the out operand, is also an operation // that folds with tensor.cast consumer (like this pattern), the cast will diff --git a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DataLayoutPropagation.cpp @@ -130,7 +130,7 @@ /// %0 = linalg.generic {indexing_maps = [#map1, #map2, #map0], /// iterator_types = ["parallel", "parallel"]} /// ins(%arg0, %arg1 : tensor, tensor) -/// outs(%init : tensor) { +/// inits(%init : tensor) { /// ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): /// %4 = arith.addf %arg3, %arg4 : f32 /// linalg.yield %4 : f32 @@ -263,7 +263,7 @@ /// %3 = linalg.generic {indexing_maps = [#map0, #map0], /// iterator_types = ["parallel", "parallel"]} /// ins(%arg0 : tensor) -/// outs(%2 : tensor) { +/// inits(%2 : tensor) { /// ^bb0(%arg3: f32, %arg4: f32): /// %4 = arith.addf %arg3, %arg3 : f32 /// linalg.yield %4 : f32 @@ -290,7 +290,7 @@ /// %3 = linalg.generic {indexing_maps = [#map2, #map2], /// iterator_types = ["parallel", "parallel", "parallel", "parallel"]} /// ins(%pack : tensor) -/// outs(%arg1 : tensor) { +/// inits(%arg1 : tensor) { /// ^bb0(%in: f32, %out: f32): /// %4 = arith.addf %in, %in : f32 /// linalg.yield %4 : f32 @@ -410,7 +410,7 @@ /// inner_dims_pos = [3] inner_tiles = [32] into %0 /// %2 = linalg.generic {indexing_maps = [#map], /// iterator_types = ["parallel", "parallel", "parallel", "parallel"]} -/// outs(%1 : tensor<12x56x56x64xf32>) { +/// inits(%1 : tensor<12x56x56x64xf32>) { /// ^bb0(%out : f32): /// linalg.yield %out : f32 /// } -> tensor<12x56x56x64xf32> @@ -423,7 +423,7 @@ /// %1 = linalg.generic {indexing_maps = [#map], /// iterator_types = ["parallel", "parallel", "parallel", /// "parallel", "parallel"]} -/// outs(%arg0 : tensor<12x2x56x56x32xf32>) { +/// inits(%arg0 : tensor<12x2x56x56x32xf32>) { /// ^bb0(%out : f32): /// linalg.yield %out : f32 /// } -> tensor<12x2x56x56x32xf32> diff --git a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DecomposeLinalgOps.cpp @@ -35,7 +35,7 @@ /// /// ```mlir /// %result:2 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...) -/// outs(%init0, %init1 : ...) { +/// inits(%init0, %init1 : ...) { /// ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ...): /// %0 = %b0, %b1 : ... /// %1 = %0, %b2 : ... @@ -49,13 +49,13 @@ /// ```mlir /// %init = tensor.empty ... /// %op0:3 = linalg.generic ... ins(%arg0, %arg1, %arg2 : ...) -/// outs(%init0, %init1, %init : ...) +/// inits(%init0, %init1, %init : ...) /// ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...): /// %0 = %b0, %b1 : ... /// linalg.yield %0, %..., %0 : ... /// } -> (..., ..., ...) /// %op1:2 = linalg.generic ... ins(%arg0, %arg1, %arg2, %op0#2 : ...) -/// outs(%init0, %init1 : ...) { +/// inits(%init0, %init1 : ...) { /// ^bb0(%b0: ... , %b1: ... , %b2: ... , %b3: ..., %b4: ..., %b5: ...): /// %1 = %b3, %b2 : ... /// linalg.yield %..., %1 : ... @@ -68,13 +68,13 @@ /// ```mlir /// %init = tensor.empty ... /// %op0 = linalg.generic ... ins(%arg0, %arg1, : ...) -/// outs(%init : ...) +/// inits(%init : ...) /// ^bb0(%b0: ... , %b1: ... , %b2: ...): /// %0 = %b0, %b1 : ... /// linalg.yield %0 : ... /// } -> ... /// %op1 = linalg.generic ... ins(%arg2, %op0#2 : ...) -/// outs(%init1 : ...) { +/// inits(%init1 : ...) { /// ^bb0(%b0: ... , %b1: ... , %b2: ...): /// %1 = %b1, %b0 : ... /// linalg.yield %..., %1 : ... diff --git a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp @@ -190,7 +190,7 @@ /// %7 = tensor.empty() : tensor /// %8 = linalg.generic #attrs /// ins(%6, %6 : tensor, tensor) - /// outs(%7 : tensor) { + /// inits(%7 : tensor) { /// ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): /// %9 = arith.addi %arg0, %arg1 : i32 /// linalg.yield %9 : i32 diff --git a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp @@ -241,12 +241,12 @@ /// %0 = tensor.empty() : tensor<1x1xf32> /// %1 = linalg.fill /// ins(%cst : f32) -/// outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32> +/// inits(%0 : tensor<1x1xf32>) -> tensor<1x1xf32> /// %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>, /// affine_map<(d0) -> (0, d0)>], /// iterator_types = ["parallel"]} /// ins(%arg0 : tensor<1x?x1x1xf32>) -/// outs(%1 : tensor<1x1xf32>) { +/// inits(%1 : tensor<1x1xf32>) { /// ^bb0(%in: f32, %out: f32): /// %3 = arith.addf %in, %out : f32 /// linalg.yield %3 : f32 @@ -257,14 +257,14 @@ /// %0 = tensor.empty() : tensor<1x1xf32> /// %1 = linalg.fill /// ins(%cst : f32) -/// outs(%0 : tensor<1x1xf32>) -> tensor<1x1xf32> +/// inits(%0 : tensor<1x1xf32>) -> tensor<1x1xf32> /// %2 = tensor.empty() : tensor<1x1xf32> /// %3 = linalg.generic {indexing_maps = [affine_map<(d0) -> (0, d0, 0, 0)>, /// affine_map<(d0) -> (0, d0)>, /// affine_map<(d0) -> (0, d0)>], /// iterator_types = ["parallel"]} /// ins(%arg0, %1 : tensor<1x?x1x1xf32>, tensor<1x1xf32>) -/// outs(%2 : tensor<1x1xf32>) { +/// inits(%2 : tensor<1x1xf32>) { /// ^bb0(%in: f32, %in_0: f32, %out: f32): /// %4 = arith.addf %in, %in_0 : f32 /// linalg.yield %4 : f32 diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp @@ -1059,7 +1059,7 @@ // %3 = linalg.generic { // indexing_maps = [#map, #map], // iterator_types = ["parallel" ,"parallel"]} -// ins(%1 : tensor) outs(%2 : tensor) {.. } +// ins(%1 : tensor) inits(%2 : tensor) {.. } // ``` // // can be fused by collapsing the dimensions of the iteration space. @@ -1070,7 +1070,7 @@ // %3 = linalg.generic { // indexing_maps = [#map, #map], // iterator_types = ["parallel"]} -// ins(%1 : tensor) outs(%2 : tensor) {.. } +// ins(%1 : tensor) inits(%2 : tensor) {.. } // %4 = tensor.expand_shape %3 [[0, 1]] : tensor into tensor // ``` // @@ -1084,7 +1084,7 @@ // %2 = linalg.generic { // indexing_maps = [#map0, #map1], // iterator_types = ["parallel" ,"parallel"]} -// ins(%1 : tensor) outs(%2 : tensor<4x?xf32>) {.. } +// ins(%1 : tensor) inits(%2 : tensor<4x?xf32>) {.. } // ``` // // the reshape cannot be fused with the generic op by collapsing the op diff --git a/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp b/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/EraseUnusedOperandsAndResults.cpp @@ -361,7 +361,7 @@ /// Fold uses of duplicate inputs in the body of a linalg.generic. E.g.: /// ``` -/// linalg.generic ins(%a, %b, %a, %b) outs(%a) +/// linalg.generic ins(%a, %b, %a, %b) inits(%a) /// ^bb0(%in0, %in1, %in2, %in3, %out1) /// ``` /// Assuming that all %a and %b have the same index map: diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -100,15 +100,15 @@ /// along the `tiledSliceDimIndices` and clone the producer. Consider the case /// of fusion of an output tensor: /// ``` -/// %1 = producer ins(...) outs(%0) -/// %2 = consumer ins(...) outs(%1) +/// %1 = producer ins(...) inits(%0) +/// %2 = consumer ins(...) inits(%1) /// ``` /// When consumer is tiled, %1 appears in the loop iter_args: /// ``` -/// %1 = producer ins(...) outs(%0) +/// %1 = producer ins(...) inits(%0) /// %2 = scf.for ... iter_args(%1) .. (%bbarg) { /// %t1 = tensor.extract_slice %bbarg[..] -/// %t2 = consumer ins(...) outs(%t1) +/// %t2 = consumer ins(...) inits(%t1) /// %r = tensor.insert_slice %t2, %bbarg[...] /// } /// ``` @@ -116,8 +116,8 @@ /// ``` /// %2 = scf.for ... iter_args(%0) .. (%bbarg) { /// %t0 = tensor.extract_slice %bbarg[..] -/// %t1 = producer ins(...) outs(%t0) -/// %t2 = consumer ins(...) outs(%t1) +/// %t1 = producer ins(...) inits(%t0) +/// %t2 = consumer ins(...) inits(%t1) /// %r = tensor.insert_slice %t2, %bbarg[...] /// } /// ``` diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -423,7 +423,7 @@ /// indexing_maps = affine_map<(d0, d1, d2, d3, d4) -> (d1, d3)> /// } /// ins(%0 : tensor<2x3x4xf32>) -/// outs(%1 : tensor<5x6xf32>) +/// inits(%1 : tensor<5x6xf32>) /// ``` /// /// the iteration domain size of the linalg op is 3x5x4x6x2. The first affine diff --git a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp --- a/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/TileUsingInterface.cpp @@ -534,11 +534,11 @@ // // ```mlir // %0 = linalg.init - // %1 = linalg.fill .. outs(%0 : ) + // %1 = linalg.fill .. inits(%0 : ) // %2 = scf.for .. iter_args(%arg0 = %1) { // %3 = scf.for .. iter_args(%arg1 = %arg0) { // %4 = tensor.extract_slice %arg1 [..] - // .. = linalg.matmul .. outs(%4 : ) + // .. = linalg.matmul .. inits(%4 : ) // } // } // ``` @@ -551,8 +551,8 @@ // %2 = scf.for .. iter_args(%arg0 = %1 /* incorrect value */ ) { // %3 = scf.for .. iter_args(%arg1 = %arg0) { // %4 = tensor.extract_slice %0 /*incorrect value */ [..] - // %5 = linalg.fill .. outs(%4 : ) - // .. = linalg.matmul .. outs(%5 : ) + // %5 = linalg.fill .. inits(%4 : ) + // .. = linalg.matmul .. inits(%5 : ) // } // } // ``` @@ -572,8 +572,8 @@ // %1 = scf.for .. iter_args(%arg0 = %0 /* corrected value */ ) { // %2 = scf.for .. iter_args(%arg1 = %arg0) { // %3 = tensor.extract_slice %arg1 /* corrected value */ [..] - // %4 = linalg.fill .. outs(%3 : ) - // .. = linalg.matmul .. outs(%4 : ) + // %4 = linalg.fill .. inits(%3 : ) + // .. = linalg.matmul .. inits(%4 : ) // } // } // ``` diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -528,7 +528,7 @@ /// /// Is lowered to: /// ``` -/// linalg.map ins() outs(%dest) { +/// linalg.map ins() inits(%dest) { /// %d0 = linalg.index 0 : index /// %d1 = linalg.index 1 : index /// %0 = "some_op"(%d0, %d1) : (index, index) -> (index) diff --git a/mlir/python/mlir/dialects/linalg/__init__.py b/mlir/python/mlir/dialects/linalg/__init__.py --- a/mlir/python/mlir/dialects/linalg/__init__.py +++ b/mlir/python/mlir/dialects/linalg/__init__.py @@ -33,7 +33,7 @@ # `linalg.matmul(lhs, rhs, outs=[out])` creates the following IR: # ``` # %1 = linalg.matmul ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) -# outs(%0 : tensor<4x8xf32>) +# inits(%0 : tensor<4x8xf32>) # -> tensor<4x8xf32> # ``` # b. by setting emit_generic=True, the Generic Op Form is emitted, e.g.: @@ -41,7 +41,7 @@ # ``` # %1 = linalg.generic {indexing_maps = [...], iterator_types = [...]} # ins(%arg0, %arg1 : tensor<4x16xf32>, tensor<16x8xf32>) -# outs(%0 : tensor<4x8xf32>) { +# inits(%0 : tensor<4x8xf32>) { # ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): # ... # linalg.yield %3 : f32 diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/dsl.py @@ -41,7 +41,7 @@ "but none is set. Did you mean to call this in an op definition?") -def _prepare_structured_op_outs(outs: StructuredOpOuts) -> ValueList: +def _prepare_structured_op_inits(outs: StructuredOpOuts) -> ValueList: if isinstance(outs, (ir.Operation, ir.OpView)): return _get_op_results_or_values(outs) elif isinstance(outs, ir.OpResultList): @@ -87,7 +87,7 @@ emit_generic or not ctx.is_registered_operation(fully_qualified_name)) op_config = op_configs[0] - out_values = _prepare_structured_op_outs(outs) + out_values = _prepare_structured_op_inits(outs) in_values = [_get_op_result_or_value(i) for i in ins] if op_config.structured_op: if emit_generic: diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py --- a/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py +++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/emitter.py @@ -105,7 +105,7 @@ raise ValueError(f"Expected indexing_maps to use no symbols after " f"replacement and compression but got {indexing_maps}") - outs, out_types = _infer_structured_outs(op_config, in_arg_defs, ins, + outs, out_types = _infer_structured_inits(op_config, in_arg_defs, ins, out_arg_defs, outs) result_types = [t for t in out_types if isa(RankedTensorType, t)] @@ -474,7 +474,7 @@ "Unsupported 'min_unsigned' operands: {lhs}, {rhs}") -def _infer_structured_outs( +def _infer_structured_inits( op_config: LinalgStructuredOpConfig, in_arg_defs: Sequence[OperandDefConfig], ins: Sequence[Value], out_arg_defs: Sequence[OperandDefConfig], diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir --- a/mlir/test/Analysis/test-match-reduction.mlir +++ b/mlir/test/Analysis/test-match-reduction.mlir @@ -12,7 +12,7 @@ affine_map<(d0) -> (0)>], iterator_types = ["reduction"]} ins(%in0t : tensor) - outs(%out0t : tensor<1xf32>) { + inits(%out0t : tensor<1xf32>) { ^bb0(%in0: f32, %out0: f32): %add = arith.addf %in0, %out0 : f32 linalg.yield %add : f32 @@ -49,7 +49,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%in0t : tensor<4x4xf32>) - outs(%out0t : tensor<4xf32>) { + inits(%out0t : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %cmp = arith.cmpf ogt, %in0, %out0 : f32 %sel = arith.select %cmp, %in0, %out0 : f32 @@ -69,7 +69,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%in0t : tensor<4x4xf32>) - outs(%out0t : tensor<4xf32>) { + inits(%out0t : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %mul = arith.mulf %in0, %in0 : f32 %sub = arith.subf %mul, %in0 : f32 diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir --- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir +++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir @@ -7,7 +7,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -32,7 +32,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> // CHECK: return %[[PADDED]] : tensor<4x?x?x?xf32> // CHECK: } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -4,8 +4,8 @@ func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) inits([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) inits([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -17,10 +17,10 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) inits([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) inits([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) return %0 : tensor<1x5x6xi32> } @@ -33,8 +33,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[C0_0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor) -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) inits(%[[INIT]] : tensor) -> tensor + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) inits(%[[FILLED]] : tensor) -> tensor %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) return %0 : tensor } @@ -47,8 +47,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) inits(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) return %0 : tensor<1x5x?xf32> } @@ -59,8 +59,8 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) inits(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -74,12 +74,12 @@ func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { // CHECK: [[INITT:%.+]] = tensor.empty() // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]] // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) // CHECK: [[INITB:%.+]] = tensor.empty() - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) inits([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) inits([[INITB]] : tensor<5x6xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -97,14 +97,14 @@ func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { // CHECK: [[INITT:%.+]] = tensor.empty() // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]] // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) // CHECK: [[INITB:%.+]] = tensor.empty() // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) inits([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) inits([[INITB]] // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi // CHECK: linalg.yield [[ADD]] : i32 @@ -123,12 +123,12 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]]) // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}inits(%[[INITT]] // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor - // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) inits(%[[FILL]] : tensor) -> tensor + // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) inits(%[[INITB]] : tensor) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield %[[ADD]] : f32 @@ -143,9 +143,9 @@ func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 // CHECK-DAG: [[INIT:%.+]] = tensor.empty() - // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[INIT]] // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x32x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = array, kernel = array, stride = array} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) return } @@ -157,9 +157,9 @@ // CHECK-DAG: tensor.yield [[CONST]] // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 // CHECK-DAG: [[INIT:%.+]] = tensor.empty() - // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}inits([[INIT]] // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x33x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = array, kernel = array, stride = array} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) return } @@ -170,9 +170,9 @@ // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}inits(%[[INIT]] // CHECK: %[[KERNEL:.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor<3x3xf32>) outs(%[[FILL]] : tensor) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor<3x3xf32>) inits(%[[FILL]] : tensor) %0 = "tosa.max_pool2d"(%arg0) {pad = array, kernel = array, stride = array} : (tensor) -> (tensor) return } @@ -213,14 +213,14 @@ // Fill the pooling target: // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<1x5x33x62xf32> - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EMPTY]] : tensor<1x5x33x62xf32>) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EMPTY]] : tensor<1x5x33x62xf32>) // Compute the sum padding: // CHECK: %[[KERNEL:.+]] = tensor.empty() : tensor<4x4xf32> // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum // CHECK-SAME: dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} // CHECK-SAME: ins(%[[PAD]], %[[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) - // CHECK-SAME: outs(%[[FILL]] : tensor<1x5x33x62xf32>) + // CHECK-SAME: inits(%[[FILL]] : tensor<1x5x33x62xf32>) // Compute dimension based constants: // CHECK: %[[I1:.+]] = arith.constant 1 : index @@ -236,7 +236,7 @@ // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[POOL]] : tensor<1x5x33x62xf32>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x5x33x62xf32>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x5x33x62xf32>) // CHECK: ^bb0(%[[IN:.+]]: f32, %{{.+}}: f32) // CHECK: %[[ZERO:.+]] = arith.constant 0 @@ -297,7 +297,7 @@ // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[POOL:.+]] : tensor<1x5x33x62xi32>) - // CHECK-SAME: outs(%[[EMPTY:.+]] : tensor<1x5x33x62xi8>) + // CHECK-SAME: inits(%[[EMPTY:.+]] : tensor<1x5x33x62xi8>) // CHECK: ^bb0(%[[IN:.+]]: i32, %{{.+}}: i8) // Only different behavior is how the division is performed. @@ -344,12 +344,12 @@ // CHECK: tensor.yield %[[F0]] // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[BATCH]]) : tensor - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EMPTY]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EMPTY]] : tensor) // CHECK: %[[KERNEL:.+]] = tensor.empty() : tensor<4x4xf32> // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum // CHECK-SAME: dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64> // CHECK-SAME: ins(%[[PADDED]], %[[KERNEL]] : tensor, tensor<4x4xf32>) - // CHECK-SAME: outs(%[[FILL]] : tensor) -> tensor + // CHECK-SAME: inits(%[[FILL]] : tensor) -> tensor // CHECK: %[[EMPTY:.+]] = tensor.empty(%[[BATCH]]) : tensor // CHECK: %[[GENERIC:.+]] = linalg.generic %0 = "tosa.avg_pool2d"(%arg0) {pad = array, kernel = array, stride = array} : (tensor) -> (tensor) @@ -369,8 +369,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty() - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x45x40x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) inits(%[[B_IN]] : tensor<1x45x40x28xf32>) // CHECK: arith.addf // CHECK: linalg.yield %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array, stride = array, dilation = array} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) @@ -392,8 +392,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor) outs(%[[B_IN]] : tensor) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor) inits(%[[B_IN]] : tensor) // CHECK: %[[ADD:.+]] = arith.addf // CHECK: linalg.yield %[[ADD]] : f32 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array, stride = array, dilation = array} : (tensor, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor) @@ -452,8 +452,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]]) - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x?x?x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) inits(%[[B_IN]] : tensor<1x?x?x28xf32>) // CHECK: %[[ADD:.+]] = arith.addf // CHECK: linalg.yield %[[ADD]] : f32 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = array, stride = array, dilation = array} : (tensor<1x?x?x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x?x?x28xf32>) @@ -493,11 +493,11 @@ func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -519,9 +519,9 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor) + // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor<3x1x3x11xf32>) inits(%[[FILL]] : tensor) // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor) outs(%[[OUT]] : tensor) { + // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor) inits(%[[OUT]] : tensor) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield %[[ADD]] : f32 @@ -539,11 +539,11 @@ func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -565,13 +565,13 @@ // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x12x12x4x128xi32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) inits([[OUT]] : tensor<1x12x12x512xi32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32 // CHECK: linalg.yield [[ADD]] : i32 @@ -589,13 +589,13 @@ func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x10x10x4x128xi32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) inits([[OUT]] : tensor<1x10x10x512xi32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32 // CHECK: linalg.yield [[ADD]] : i32 @@ -614,7 +614,7 @@ // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index): // CHECK: tensor.yield %cst : f32 // CHECK: } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32> - // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> + // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) inits(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]] %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = array, dilation = array, stride = array} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32> return @@ -628,16 +628,16 @@ // CHECK-DAG: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERMS]]) // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty() // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 - // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : f32) outs(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) + // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : f32) inits(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty() // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]] : tensor<1x49x48x47x27xf32>, tensor<3x4x5x27x28xf32>) - // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xf32>) -> tensor<1x47x45x43x28xf32> + // CHECK-SAME: inits(%[[FILL]] : tensor<1x47x45x43x28xf32>) -> tensor<1x47x45x43x28xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xf32>, tensor<1x47x45x43x28xf32>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) { + // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x47x45x43x28xf32>) { // CHECK: ^bb0(%[[A1:.+]]: f32, %[[A2:.+]]: f32, %{{.+}}: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[A1]], %[[A2]] : f32 // CHECK: linalg.yield %[[ADD]] @@ -653,18 +653,18 @@ // CHECK-DAG: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERMS]]) // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty() // CHECK-DAG: %[[ZERO:.+]] = arith.constant 0 - // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : i32) outs(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) + // CHECK-DAG: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]] : i32) inits(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) // CHECK-DAG: %[[EMPTY:.+]] = tensor.empty() // CHECK-DAG: %[[IZP:.+]] = arith.constant -128 : i32 // CHECK-DAG: %[[FZP:.+]] = arith.constant 42 : i32 // CHECK-DAG: %[[CONV3D:.+]] = linalg.conv_3d_ndhwc_dhwcf_q // CHECK-SAME: {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} // CHECK-SAME: ins(%arg0, %[[TRANSPOSE]], %[[IZP]], %[[FZP]] : tensor<1x49x48x47x27xi8>, tensor<3x4x5x27x28xi8>, i32, i32) - // CHECK-SAME: outs(%[[FILL]] : tensor<1x47x45x43x28xi32>) -> tensor<1x47x45x43x28xi32> + // CHECK-SAME: inits(%[[FILL]] : tensor<1x47x45x43x28xi32>) -> tensor<1x47x45x43x28xi32> // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: {indexing_maps = [#map, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%arg2, %[[CONV3D]] : tensor<28xi32>, tensor<1x47x45x43x28xi32>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) { + // CHECK-SAME: inits(%[[EMPTY]] : tensor<1x47x45x43x28xi32>) { // CHECK: ^bb0(%[[A1:.+]]: i32, %[[A2:.+]]: i32, %{{.+}}: i32): // CHECK: %[[ADD:.+]] = arith.addi %[[A1]], %[[A2]] : i32 // CHECK: linalg.yield %[[ADD]] diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir @@ -34,7 +34,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x1x5x7xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) outs(%[[EMPTY]] : tensor<3x1x5x7xf32>) + // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) inits(%[[EMPTY]] : tensor<3x1x5x7xf32>) // CHECK: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32): // CHECK: linalg.yield %[[IN]] : f32 %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = array, offset = array, border = array} : (tensor<3x1x1x7xf32>) -> tensor<3x1x5x7xf32> @@ -52,7 +52,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x7xi32> // CHECK: %[[RESIZE:.+]] = linalg.generic // CHECK-SAME: {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} - // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) outs(%[[EMPTY]] : tensor<3x7xi32>) + // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) inits(%[[EMPTY]] : tensor<3x7xi32>) // CHECK: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32): // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : i32 @@ -68,7 +68,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<3x4x5x7xi32> // CHECK: %[[BROADCAST:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi32>) outs(%[[EMPTY]] : tensor<3x4x5x7xi32>) { + // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi32>) inits(%[[EMPTY]] : tensor<3x4x5x7xi32>) { // CHECK: ^bb0(%[[IN:.+]]: i32, %[[OUT:.+]]: i32): // CHECK: linalg.yield %[[IN]] : i32 %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = array, offset = array, border = array} : (tensor<3x1x1x7xi8>) -> tensor<3x4x5x7xi32> @@ -87,7 +87,7 @@ // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#map, #map] // CHECK-SAME: iterator_types = ["parallel", "parallel"]} - // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) outs(%[[EMPTY]] : tensor<3x7xi32>) { + // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) inits(%[[EMPTY]] : tensor<3x7xi32>) { // CHECK: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32): // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : i32 diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -6,7 +6,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor) -> tensor { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor) outs([[INIT]] : tensor) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor) inits([[INIT]] : tensor) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -26,7 +26,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -45,7 +45,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) inits([[INIT]] : tensor<2x3xf32>) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -97,7 +97,7 @@ func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> // CHECK: [[RESHAPE:%.+]] = "tosa.reshape"(%[[ARG0]]) - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor, tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -117,7 +117,7 @@ func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> // CHECK: [[RESHAPE:%.+]] = "tosa.reshape"(%[[ARG1]]) - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -139,7 +139,7 @@ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32> // CHECK: [[RESHAPE1:%.+]] = "tosa.reshape"(%[[ARG0]]) {new_shape = array} // CHECK: [[RESHAPE2:%.+]] = "tosa.reshape"(%[[ARG1]]) {new_shape = array} - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) inits([[INIT]] : tensor<2x3xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -560,7 +560,7 @@ func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () { %0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32> // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) inits([[OUT:%.+]] : tensor<2x3x1xi32>) // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -580,7 +580,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) inits([[OUT:%.+]] : tensor) // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -602,7 +602,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs([[OUT:%.+]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits([[OUT:%.+]] : tensor) // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -621,8 +621,8 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32> // CHECK: [[CST0:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<4xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield [[RES]] : f32 @@ -631,8 +631,8 @@ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32> // CHECK: [[CST0:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<5xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield [[RES]] : f32 @@ -671,8 +671,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor // CHECK: %[[CST0:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -691,8 +691,8 @@ func.func @reduce_float_dyn_rank_1(%arg0: tensor) -> () { // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -713,8 +713,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32> // CHECK: %[[CST1:.+]] = arith.constant 1.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) inits(%[[FILL]] : tensor<5x?xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -735,8 +735,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[MAX]] : f32 @@ -756,8 +756,8 @@ func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<4xi32>) // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32 // CHECK: linalg.yield [[RES]] : i32 @@ -766,8 +766,8 @@ // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<5xi32>) // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32 // CHECK: linalg.yield [[RES]] : i32 @@ -806,8 +806,8 @@ func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant true - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) inits([[FILL]] : tensor<4xi1>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1) // CHECK: [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1 // CHECK: linalg.yield [[RES]] : i1 @@ -905,7 +905,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xi8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -926,7 +926,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xui8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -959,13 +959,13 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array, shift = array, scale32 = false, double_round = false, per_channel = false} : (tensor) -> (tensor) // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = array, shift = array, scale32 = false, double_round = false, per_channel = false} : (tensor) -> (tensor) return @@ -983,7 +983,7 @@ // CHECK: %[[C2:.+]] = arith.constant 2 // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]]) - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) inits(%[[INIT]] : tensor<1x?x?x32xi8>) %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = array, output_zp = 0 : i32, per_channel = false, scale32 = true, shift = array} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8> return } @@ -998,7 +998,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) inits([[INIT]] : tensor<2xi8>) // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -1030,7 +1030,7 @@ // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]> // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]> // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) inits([[INIT]] : tensor<3xi8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8): // CHECK: [[C243:%.+]] = arith.constant 243 // CHECK: [[C252:%.+]] = arith.constant 252 @@ -1083,7 +1083,7 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[I1:.+]] = linalg.index 1 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 @@ -1096,7 +1096,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[I1:.+]] = linalg.index 1 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 @@ -1120,7 +1120,7 @@ // CHECK: %[[C0_2:.+]] = arith.constant 0 // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} inits(%[[INIT]] : tensor) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 // CHECK-DAG: %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]] @@ -1140,21 +1140,21 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8> func.func @tile(%arg0 : tensor<2x3xi8>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<2x2x1x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array} %0 = "tosa.tile"(%arg0) {multiples = array} : (tensor<2x3xi8>) -> (tensor<4x3xi8>) // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<1x2x2x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array} %1 = "tosa.tile"(%arg0) {multiples = array} : (tensor<2x3xi8>) -> (tensor<2x6xi8>) // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<5x2x7x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: "tosa.reshape"([[GENERIC]]) {new_shape = array} @@ -1174,7 +1174,7 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor<2x?x1x3xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor<2x?x1x3xi8>) // CHECK: ^bb0(%[[ARG1:.+]]: i8, // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: "tosa.reshape"(%[[GENERIC]]) {new_shape = array} @@ -1194,7 +1194,7 @@ // CHECK: %[[CST1:.+]] = arith.constant 1 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8> // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits(%[[INIT]] : tensor<2x2x?x3xi8>) // CHECK: ^bb0(%[[ARG1:.+]]: i8, // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: "tosa.reshape"(%[[GENERIC]]) {new_shape = array} @@ -1214,11 +1214,11 @@ func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () { // CHECK: [[IDX_INIT:%.+]] = tensor.empty() // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]] + // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]] // CHECK: [[VAL_INIT:%.+]] = tensor.empty() // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: [[IDX:%.+]] = linalg.index 0 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]] @@ -1230,11 +1230,11 @@ // CHECK: [[IDX_INIT:%.+]] = tensor.empty() // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]] + // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]] // CHECK: [[VAL_INIT:%.+]] = tensor.empty() // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: [[IDX:%.+]] = linalg.index 1 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]] @@ -1266,11 +1266,11 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32 - // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]] + // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]] // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648 - // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor, tensor) + // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor, tensor) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: %[[IDX:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]] @@ -1290,11 +1290,11 @@ func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () { // CHECK: %[[IDX_INIT:.+]] = tensor.empty() // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32 - // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]] + // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]] // CHECK: %[[VAL_INIT:.+]] = tensor.empty() // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648 - // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) + // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) // CHECK: %[[IDX:.+]] = linalg.index 1 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]] // CHECK: %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]] @@ -1312,7 +1312,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]] func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xf32>) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1332,7 +1332,7 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor) inits(%[[INIT]] : tensor) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1350,7 +1350,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]] func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xi32>) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1368,7 +1368,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 @@ -1386,7 +1386,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) inits(%[[INIT]] : tensor<6xi32>) // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32) // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]] // CHECK: %[[C32768:.+]] = arith.constant 32768 @@ -1421,7 +1421,7 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 @@ -1439,7 +1439,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis-empty-tensor-elimination.mlir @@ -10,7 +10,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"] @@ -37,7 +37,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-analysis.mlir @@ -4,7 +4,7 @@ func.func @unknown_op_aliasing(%f: f32, %f2: f32, %pos: index) -> f32 { %0 = tensor.empty() : tensor<10xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<10xf32>) -> tensor<10xf32> + %1 = linalg.fill ins(%f : f32) inits(%0 : tensor<10xf32>) -> tensor<10xf32> // Something must bufferize out-of-place because the op may return an alias // of %1. @@ -12,7 +12,7 @@ %alias = "dummy.dummy_op"(%1) : (tensor<10xf32>) -> (tensor<10xf32>) // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%f2 : f32) outs(%1 : tensor<10xf32>) -> tensor<10xf32> + %2 = linalg.fill ins(%f2 : f32) inits(%1 : tensor<10xf32>) -> tensor<10xf32> %3 = tensor.extract %alias[%pos] : tensor<10xf32> return %3 : f32 } @@ -23,7 +23,7 @@ func.func @unknown_op_writing(%f: f32, %f2: f32, %pos: index) -> f32 { %0 = tensor.empty() : tensor<10xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%f : f32) outs(%0 : tensor<10xf32>) -> tensor<10xf32> + %1 = linalg.fill ins(%f : f32) inits(%0 : tensor<10xf32>) -> tensor<10xf32> // The op may bufferize to a memory write, so it must bufferize out-of-place. // CHECK: "dummy.dummy_op"(%{{.*}}) {__inplace_operands_attr__ = ["false"]} diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-empty-tensor-elimination.mlir @@ -11,7 +11,7 @@ %f0 = arith.constant 0.0: f32 // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]]) - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref) + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[EXTRACT_SLICE_ALLOC]] : memref) // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis). // CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]] // This allocs the whole dim to allow for a full clone of t. @@ -20,7 +20,7 @@ // insert_slice. The pass replaces the tensor.empty with an out-of-place // extract_slice. %a = tensor.empty(%sz) : tensor - %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor + %f = linalg.fill ins(%f0 : f32) inits(%a : tensor) -> tensor // CHECK: memref.copy %[[FUNC_ARG]], %[[ALLOC]] : memref to memref // CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref to memref> @@ -52,8 +52,8 @@ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] %a = tensor.empty(%sz) : tensor - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref) -> tensor // Self-copy canonicalizes away later. %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor into tensor @@ -79,8 +79,8 @@ %iv_i32 = arith.index_cast %iv : index to i32 %f = arith.sitofp %iv_i32 : i32 to f32 - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]] + %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32> // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%iv][5][1] : tensor<5xf32> into tensor @@ -109,8 +109,8 @@ %iv_i32 = arith.index_cast %iv : index to i32 %f = arith.sitofp %iv_i32 : i32 to f32 - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]] + %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32> // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%idx][5][1] : tensor<5xf32> into tensor @@ -130,7 +130,7 @@ func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> { %cst = arith.constant 8.0 : f32 %0 = tensor.empty() : tensor<128xf32> - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<128xf32>) -> tensor<128xf32> %2 = tensor.expand_shape %1 [[0, 1, 2]] : tensor<128xf32> into tensor<1x1x128xf32> %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1] @@ -158,8 +158,8 @@ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] %a = tensor.empty(%sz) : tensor - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref) -> tensor // Self-copy canonicalizes away later. scf.forall.in_parallel { @@ -185,15 +185,15 @@ %if = scf.if %c -> tensor { // CHECK: %[[T_SUBVIEW_1:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] %a1 = tensor.empty(%sz) : tensor - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW_1]] : memref) -> tensor + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW_1]] : memref) -> tensor // CHECK: scf.yield %[[T_SUBVIEW_1]] scf.yield %f1 : tensor } else { // CHECK: %[[T_SUBVIEW_2:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] %a2 = tensor.empty(%sz) : tensor - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW_2]] : memref) -> tensor + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW_2]] : memref) -> tensor // CHECK: scf.yield %[[T_SUBVIEW_2]] scf.yield %f2 : tensor } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -140,10 +140,10 @@ // One alloc for the alloc_tensor, another one because the transfer_write // bufferizes out-of-place. // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] %t1 = bufferization.alloc_tensor() : tensor<10xf32> - %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) -> tensor<10xf32> + %filled = linalg.fill ins(%cst : f32) inits(%t1 : tensor<10xf32>) -> tensor<10xf32> // The transfer_write is out-of-place because "dummy_op" may read. // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -51,13 +51,13 @@ // CHECK: %[[call:.*]] = call @return_slice(%[[t]] // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: memref.copy %[[call]], %[[alloc]] -// CHECK: linalg.fill ins({{.*}}) outs(%[[t]] +// CHECK: linalg.fill ins({{.*}}) inits(%[[t]] // CHECK: memref.load %[[alloc]] // CHECK: memref.load %[[t]] func.func @main(%t: tensor, %sz: index, %idx: index) -> (f32, f32) { %cst = arith.constant 1.0 : f32 %0 = call @return_slice(%t, %sz) : (tensor, index) -> (tensor) - %filled = linalg.fill ins(%cst : f32) outs(%t : tensor) -> tensor + %filled = linalg.fill ins(%cst : f32) inits(%t : tensor) -> tensor %r1 = tensor.extract %0[%idx] : tensor %r2 = tensor.extract %filled[%idx] : tensor return %r1, %r2 : f32, f32 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -75,21 +75,21 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand does not interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return @@ -260,7 +260,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -292,7 +292,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -304,7 +304,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor) -> tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> @@ -336,14 +336,14 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand is inplaceable at the function boundary. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%C: tensor<4x4xf32>) + inits(%C: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return @@ -370,7 +370,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) + inits(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. @@ -385,7 +385,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) + inits(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> @@ -410,7 +410,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} - %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) -> tensor + %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) inits(%15 : tensor) -> tensor // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} @@ -450,7 +450,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) + inits(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. @@ -465,7 +465,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) + inits(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> @@ -504,7 +504,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -527,7 +527,7 @@ %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> - %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> + %FB = linalg.fill ins(%f0 : f32) inits(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -550,7 +550,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor to tensor - %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor) -> tensor + %FC = linalg.fill ins(%f0 : f32) inits(%ssC : tensor) -> tensor %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor into tensor %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -577,12 +577,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> call @foo(%A) : (tensor<64xf32>) -> () call @foo(%B) : (tensor<64xf32>) -> () @@ -613,12 +613,12 @@ // bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 4. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 3. Does not read or write, bufferizes inplace. // CHECK: scf.for @@ -638,12 +638,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %A2 = linalg.fill ins(%f1 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A2 is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %B2 = linalg.fill ins(%f2 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32> call @bar(%A2) : (tensor<64xf32>) -> () call @bar(%B2) : (tensor<64xf32>) -> () @@ -688,8 +688,8 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} @@ -701,7 +701,7 @@ %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -726,7 +726,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -734,7 +734,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -748,7 +748,7 @@ %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -779,7 +779,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -211,7 +211,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 %t = bufferization.alloc_tensor() : tensor<10x20xf32> - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> + %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<10x20xf32>) -> tensor<10x20xf32> // expected-error @+1 {{operand #0 may return/yield a new buffer allocation}} return %r : tensor<10x20xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -244,7 +244,7 @@ // CHECK-NOT: copy func.func @does_not_read(%t: tensor) -> tensor { %f0 = arith.constant 0.0 : f32 - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor) -> tensor + %r = linalg.fill ins(%f0 : f32) inits(%t : tensor) -> tensor return %r : tensor } @@ -416,12 +416,12 @@ // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 %v0 = arith.constant 0.0 : f32 - // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref>) - %d = linalg.fill ins(%v0 : f32) outs(%c : tensor) -> tensor + // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref>) + %d = linalg.fill ins(%v0 : f32) inits(%c : tensor) -> tensor - // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref>) + // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) inits(%[[C]] : memref>) %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) - outs(%d: tensor) -> tensor + inits(%d: tensor) -> tensor // CHECK-NEXT: return return %e : tensor @@ -446,12 +446,12 @@ %B = bufferization.alloc_tensor() : tensor<64xf32> %C = bufferization.alloc_tensor() : tensor - // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) - // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) - // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref) - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) inits(%[[A]] : memref<64xf32>) + // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) inits(%[[B]] : memref<64xf32>) + // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref) + %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32> + %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32> + %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor) -> tensor // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) %res = call @init_and_dot(%AA, %BB, %CC) : diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir @@ -56,11 +56,11 @@ -> (tensor<5xf32>, tensor<5xf32>) { // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<5xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>) + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<5xf32>) %r = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%t : tensor<5xf32>) { + inits(%t : tensor<5xf32>) { ^bb0(%arg0 : f32) : linalg.yield %f : f32 } -> tensor<5xf32> @@ -75,11 +75,11 @@ { %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32> // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : i64} : tensor<3xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>) + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<3xf32>) %r = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%0 : tensor<3xf32>) { + inits(%0 : tensor<3xf32>) { ^bb0(%arg0 : f32) : linalg.yield %f : f32 } -> tensor<3xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/transform-ops.mlir @@ -113,8 +113,8 @@ // CHECK-SAME: %[[B:.*]]: memref<9x6xf32>, // CHECK-SAME: %[[C:.*]]: memref<12x6xf32>) -> memref<12x6xf32> { func.func @matmul(%A: tensor<12x9xf32>, %B: tensor<9x6xf32>, %C: tensor<12x6xf32>) -> tensor<12x6xf32> { - // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) outs(%[[C]] : memref<12x6xf32>) - %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) outs(%C: tensor<12x6xf32>) -> tensor<12x6xf32> + // CHECK: linalg.matmul ins(%[[A]], %[[B]] : memref<12x9xf32>, memref<9x6xf32>) inits(%[[C]] : memref<12x6xf32>) + %D = linalg.matmul ins(%A, %B: tensor<12x9xf32>, tensor<9x6xf32>) inits(%C: tensor<12x6xf32>) -> tensor<12x6xf32> // CHECK: return %[[C]] : memref<12x6xf32> return %D : tensor<12x6xf32> } diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -126,7 +126,7 @@ %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one) threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one) { - %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32> + %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) inits(%z : tensor<32x32xf32>) -> tensor<32x32xf32> gpu.terminator } return @@ -288,7 +288,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%x : memref<32x32xf32>) - outs(%y : memref<32x32xf32>) { + inits(%y : memref<32x32xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } diff --git a/mlir/test/Dialect/LLVM/transform-e2e.mlir b/mlir/test/Dialect/LLVM/transform-e2e.mlir --- a/mlir/test/Dialect/LLVM/transform-e2e.mlir +++ b/mlir/test/Dialect/LLVM/transform-e2e.mlir @@ -7,7 +7,7 @@ // CHECK-NOT: linalg // CHECK: llvm.intr.fmuladd{{.*}} %0 = linalg.matmul ins(%arg0, %arg1: tensor<2x4xf32>, tensor<4x6xf32>) - outs(%arg2: tensor<2x6xf32>) + inits(%arg2: tensor<2x6xf32>) -> tensor<2x6xf32> return %0 : tensor<2x6xf32> } diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -10,7 +10,7 @@ %B = memref.view %arg0[%c0][%K, %N] : memref to memref %C = memref.view %arg0[%c0][%M, %N] : memref to memref linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return } @@ -19,7 +19,7 @@ //----------------------------------------------------------------------------// func.func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { linalg.batch_matmul ins(%A, %B: memref, memref) - outs(%C : memref) + inits(%C : memref) return } // CHECK-LABEL: @named_batch_matmul diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir --- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir +++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir @@ -7,7 +7,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -22,7 +22,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg3] [%arg5] [1] : tensor to tensor // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[%arg2, %arg3] [%arg4, %arg5] [1, 1] : tensor to tensor // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor) outs(%[[SLICE2]] : tensor) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor) inits(%[[SLICE2]] : tensor) // CHECK: return %[[GENERIC]] : tensor //----- @@ -34,7 +34,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor<16x8xf32>, tensor<8xf32>) - outs(%arg0 : tensor<16x8xf32>) { + inits(%arg0 : tensor<16x8xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -49,7 +49,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<8xf32> to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<16x8xf32> to tensor<4x2xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>) // CHECK: return %[[GENERIC]] : tensor<4x2xf32> //----- @@ -61,7 +61,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor<8xf32>) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -76,7 +76,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg2] [2] [1] : tensor<8xf32> to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, %arg2] [%arg3, 2] [1, 1] : tensor to tensor // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor<2xf32>) outs(%[[SLICE2]] : tensor) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor<2xf32>) inits(%[[SLICE2]] : tensor) // CHECK: return %[[GENERIC]] : tensor //----- @@ -88,7 +88,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -103,7 +103,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor to tensor<4x2xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>) // CHECK: return %[[GENERIC]] : tensor<4x2xf32> //----- @@ -112,7 +112,7 @@ %lhs = arith.constant dense<1.0> : tensor<4x4xf32> %rhs = arith.constant dense<1.0> : tensor<4x4xf32> %dst = arith.constant dense<[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]]> : tensor<4x4xf32> - %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) -> tensor<4x4xf32> + %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) inits(%dst : tensor<4x4xf32>) -> tensor<4x4xf32> %1 = tensor.extract_slice %0[1,1][2,2][1,1] : tensor<4x4xf32> to tensor<2x2xf32> return %1 : tensor<2x2xf32> } @@ -121,7 +121,7 @@ // CHECK: %[[SLICE0:.+]] = arith.constant dense<1.000000e+00> : tensor<2x4xf32> // CHECK: %[[SLICE1:.+]] = arith.constant dense<1.000000e+00> : tensor<4x2xf32> // CHECK: %[[SLICE3:.+]] = tensor.extract_slice %[[CST:.+]][1, 1] [2, 2] [1, 1] : tensor<4x4xf32> to tensor<2x2xf32> -// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32> +// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) inits(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32> // CHECK: return %[[MATMUL]] : tensor<2x2xf32> //----- @@ -136,12 +136,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %slice = tensor.extract_slice %conv [0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> @@ -153,8 +153,8 @@ // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32> // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> -// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) inits(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> +// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) inits(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> // CHECK: return %[[CONV]] : tensor<1x32x32x16xf32> //----- @@ -163,7 +163,7 @@ func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> { %cst = arith.constant 1.000000e+00 : f32 %init = tensor.empty(%width) : tensor<1x?xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x?xf32>) -> tensor<1x?xf32> %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor into tensor<1x1x1x?xf32> return %expand : tensor<1x1x1x?xf32> diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -16,7 +16,7 @@ // CHECK-DAG: %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32> // CHECK: linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} // CHECK-SAME: ins(%[[MEMREF]] : memref<4xf32>) -// CHECK-SAME: outs(%[[RESULT_MEMREF]] : memref<4xf32>) { +// CHECK-SAME: inits(%[[RESULT_MEMREF]] : memref<4xf32>) { // CHECK: ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32): // CHECK: %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32 // CHECK: linalg.yield %[[DIM1]] : f32 @@ -28,7 +28,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel"] } ins(%arg0 : tensor<4xf32>) - outs(%arg0 : tensor<4xf32>) { + inits(%arg0 : tensor<4xf32>) { ^bb0(%gen_arg1: f32, %out: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 @@ -50,14 +50,14 @@ // CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF]] : memref) -// CHECK-SAME: outs(%[[OUT_BUF]] : memref) { +// CHECK-SAME: inits(%[[OUT_BUF]] : memref) { func.func @empty_tensor(%in : tensor, %size: index) -> tensor { %init = tensor.empty(%size) : tensor %0 = linalg.generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel"] } ins(%in : tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%gen_arg1: f32, %out: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 @@ -75,7 +75,7 @@ // CHECK: %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%{{.*}} : memref<4xf32>) -// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>) +// CHECK-SAME: inits(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>) // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32): func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { %0, %1 = linalg.generic { @@ -108,7 +108,7 @@ // CHECK-DAG: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF_ARG]] : memref) -// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref, memref) +// CHECK-SAME: inits(%[[RESULT0]], %[[RESULT1]] : memref, memref) func.func @dynamic_results(%arg0: tensor) -> (tensor, tensor) { %0, %1 = linalg.generic { @@ -146,13 +146,13 @@ // CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>) -// CHECK-SAME: outs(%[[INIT_BUFFER]] : memref<3x2xf32>) { +// CHECK-SAME: inits(%[[INIT_BUFFER]] : memref<3x2xf32>) { func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) { %0 = linalg.generic #trait ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) - outs(%arg1 : tensor<3x2xf32>) { + inits(%arg1 : tensor<3x2xf32>) { ^bb(%v0: vector<3x4xi4>, %v1: f32) : linalg.yield %v1 : f32 } -> tensor<3x2xf32> @@ -167,10 +167,10 @@ func.func @bufferize_fill(%arg0: tensor) -> tensor { %c0 = arith.constant 0.0 : f32 // CHECK: %[[ALLOC:.*]] = memref.alloc - // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref) + // CHECK: linalg.fill ins(%cst : f32) inits(%[[ALLOC]] : memref) // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[TENSOR]] - %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor) -> tensor + %0 = linalg.fill ins(%c0 : f32) inits(%arg0 : tensor) -> tensor return %0 : tensor } @@ -179,13 +179,13 @@ // CHECK-LABEL: func @bufferize_dot func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor) -> tensor { %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>) - outs(%out : tensor) -> tensor + inits(%out : tensor) -> tensor return %dot : tensor // CHECK: %[[ALLOC:.*]] = memref.alloc // TODO: The copy is not necessary. // CHECK: memref.copy {{.*}}, %[[ALLOC]] // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>) - // CHECK-SAME: outs(%[[ALLOC:.*]] : memref) + // CHECK-SAME: inits(%[[ALLOC:.*]] : memref) // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[OUT_TENSOR]] } @@ -202,14 +202,14 @@ // CHECK: %[[collapse:.*]] = tensor.collapse_shape %[[arg0]] // CHECK: %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]] // CHECK: %[[alloc:.*]] = memref.alloc() -// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>) +// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) inits(%[[alloc]] : memref<6xi64>) // CHECK: %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: %[[call:.*]] = call @csum(%[[generic_t]]) // CHECK: return %[[call]] func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> { %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1> %1 = tensor.empty() : tensor<6xi64> - %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) { + %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) inits(%1 : tensor<6xi64>) { ^bb0(%arg1: i1, %arg2: i64): %4 = arith.extui %arg1 : i1 to i64 linalg.yield %4 : i64 diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -10,9 +10,9 @@ %2 = memref.view %1[%c0][] : memref to memref<16x16xf32> %3 = memref.cast %2 : memref<16x16xf32> to memref - // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>) + // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) inits({{.*}}memref<16x16xf32>) linalg.matmul ins(%3, %3: memref, memref) - outs(%3: memref) + inits(%3: memref) return %3: memref } @@ -32,7 +32,7 @@ memref.copy %arg0, %arg0 : memref<0xf32> to memref<0xf32> // tensor<0xf32> cannot be dce'ed - %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) { + %1 = linalg.generic #trait inits(%arg1 : tensor<0xf32>) { ^bb(%0: f32) : linalg.yield %0 : f32 } -> tensor<0xf32> @@ -56,9 +56,9 @@ %tc = tensor.cast %c : tensor<3x?xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>) - // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> + // CHECK-SAME: inits({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - outs(%tc: tensor) -> tensor + inits(%tc: tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<3x?xf32> @@ -79,9 +79,9 @@ %tc = tensor.cast %c : tensor<*xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor, tensor) - // CHECK-SAME: outs({{.*}}tensor) -> tensor + // CHECK-SAME: inits({{.*}}tensor) -> tensor %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - outs(%tc: tensor) -> tensor + inits(%tc: tensor) -> tensor // CHECK: tensor.cast %1 = tensor.cast %0 : tensor to tensor<*xf32> @@ -98,11 +98,11 @@ func.func @linalg_effects(%a : tensor, %b : memref, %c : tensor) { // CHECK-NOT: %{{.*}} = linalg.matmul %t = linalg.matmul ins(%a, %b : tensor, memref) - outs(%c : tensor) -> tensor + inits(%c : tensor) -> tensor // CHECK: linalg.matmul linalg.matmul ins(%a, %c : tensor, tensor) - outs(%b : memref) + inits(%b : memref) return } @@ -122,7 +122,7 @@ indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%3, %3 : tensor, tensor) { + inits(%3, %3 : tensor, tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg3, %arg2 : f32, f32 } -> (tensor, tensor) @@ -143,7 +143,7 @@ indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0 : tensor) - outs(%out : tensor<1x2x3xf32>) { + inits(%out : tensor<1x2x3xf32>) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> (tensor<1x2x3xf32>) @@ -163,7 +163,7 @@ indexing_maps = [#map, #map], iterator_types = [] } ins(%arg0 : f32) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> (tensor) @@ -187,7 +187,7 @@ ^bb1(%arg1 : f32): %3 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%2 : tensor) { + ins(%arg0 : tensor) inits(%2 : tensor) { ^bb0(%arg2: f32, %arg3 : f32): linalg.yield %arg1 : f32 } -> tensor @@ -215,7 +215,7 @@ {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2, %2 : tensor, tensor) { + inits(%2, %2 : tensor, tensor) { ^bb0(%arg3: f32, %arg4 : f32, %arg5 : f32, %arg6 : f32): linalg.yield %arg2, %arg4 : f32, f32 } -> (tensor, tensor) @@ -247,10 +247,10 @@ %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32> + %0 = linalg.fill ins(%c0_i32 : i32) inits(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32> %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>) - outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> - %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) { + inits(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> + %2 = linalg.generic #trait inits(%arg0 : tensor<7x7xi32>) { ^bb(%3: i32) : linalg.yield %3 : i32 } -> tensor<7x7xi32> @@ -270,7 +270,7 @@ %c21 = arith.constant 21 : index %c42 = arith.constant 42 : index %0 = tensor.empty(%c21, %c42) : tensor - %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg1 : f32) inits(%0 : tensor) -> tensor %2 = tensor.dim %arg0, %c0 : tensor %3 = tensor.dim %arg0, %c1 : tensor %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor @@ -278,7 +278,7 @@ } // CHECK-LABEL: func @propogate_casts // CHECK: %[[INIT:.+]] = tensor.empty -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]] // CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]] // CHECK: return %[[RESULT]] @@ -302,8 +302,8 @@ %empty = tensor.empty() : tensor<1x2x3x4xf32> // CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape // CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) - // CHECK-SAME: outs(%[[COLLAPSE]] : tensor<6x4xf32>) - %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> + // CHECK-SAME: inits(%[[COLLAPSE]] : tensor<6x4xf32>) + %fill = linalg.fill ins(%zero : f32) inits(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]] : tensor<1x2x3x4xf32> into tensor<6x4xf32> // CHECK: return %[[FILL]] : tensor<6x4xf32> @@ -317,8 +317,8 @@ func.func @fold_fill_reshape_dynamic(%arg0 : tensor) -> tensor { %zero = arith.constant 0.0 : f32 // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] - %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor) -> tensor - // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[RESHAPE]] + %0 = linalg.fill ins(%zero : f32) inits(%arg0 : tensor) -> tensor + // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[RESHAPE]] %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]] : tensor into tensor // CHECK: return %[[RESULT]] @@ -334,7 +334,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<4x16xf32>) - outs(%0 : memref<4x16xf32>) { + inits(%0 : memref<4x16xf32>) { ^bb0(%arg4: f32, %arg5: f32): linalg.yield %arg4 : f32 } @@ -346,12 +346,12 @@ // CHECK-LABEL: func @fold_static_pad_fill // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: return %[[FILL]] func.func @fold_static_pad_fill() -> tensor<412x276xf32> { %f0 = arith.constant 0.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): tensor.yield %f0 : f32 @@ -371,18 +371,18 @@ // CHECK-DAG: %[[I1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[SRC]] : tensor<8x?x16x32xf32>) +// CHECK: %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[SRC]] : tensor<8x?x16x32xf32>) // CHECK: %[[S0:.+]] = affine.apply #[[MAP0]]()[%[[LOW0]]] // CHECK: %[[DIM1:.+]] = tensor.dim %[[OF]], %[[I1]] : tensor<8x?x16x32xf32> // CHECK: %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]] // CHECK: %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]] // CHECK: %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: return %[[FILL]] func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor { %f0 = arith.constant 0.0 : f32 - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32> %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): tensor.yield %f0 : f32 @@ -397,7 +397,7 @@ %f0 = arith.constant 0.0 : f32 %f1 = arith.constant 1.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> // CHECK: tensor.pad %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): @@ -427,7 +427,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -437,7 +437,7 @@ // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -458,7 +458,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -468,7 +468,7 @@ // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -490,7 +490,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %5 : tensor, tensor<2x?x?xf32>) - outs(%4 : tensor<2x3x4xf32>) { + inits(%4 : tensor<2x3x4xf32>) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %9 = arith.addf %arg3, %arg4 : f32 linalg.yield %9 : f32 @@ -500,7 +500,7 @@ // CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -524,7 +524,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -533,7 +533,7 @@ return %7: tensor<2x3x4xf32> // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -548,7 +548,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor<1x?x?xf32>) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %3 = arith.subf %arg5, %arg6 : f32 linalg.yield %3 : f32 @@ -556,7 +556,7 @@ return %2 : tensor // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>) -// CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%{{.*}} : tensor<1x?x?xf32>) // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor } @@ -570,7 +570,7 @@ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.+]] = tensor.empty() -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]] // CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor // CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor @@ -584,7 +584,7 @@ tensor.yield %f0 : f32 } : tensor to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> return %0: tensor<8x384x384xf32> } @@ -605,7 +605,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -624,7 +624,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 0, 129] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %1 at dim#3 @@ -644,7 +644,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, 255] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %0 at dim#3 @@ -664,7 +664,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> // Overlap btween %0 and %1 is fine but not with %2 is fine. // CHECK-COUNT-3: tensor.insert_slice %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -687,7 +687,7 @@ } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> // Different filling value than padding value. - %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f1 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -699,7 +699,7 @@ func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8xf32>, tensor) { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<4x8xf32> return %1, %0 : tensor<4x8xf32>, tensor } @@ -712,7 +712,7 @@ // CHECK-DAG: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor to tensor<4x8xf32> // CHECK: %[[MATMUL:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_CAST]], %[[RHS_CAST]] : -// CHECK-SAME: outs(%[[OUT_CAST]] : +// CHECK-SAME: inits(%[[OUT_CAST]] : // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]] // CHECK: return %[[MATMUL]], %[[RESULT_CAST]] @@ -723,7 +723,7 @@ func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : i1) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor scf.if %arg3 { %1 = tensor.cast %0 : tensor to tensor<4x8xf32> func.call @some_use(%1) : (tensor<4x8xf32>) -> () @@ -735,7 +735,7 @@ // CHECK-LABEL: func @linalgop_with_cond_cast_consumer // CHECK-SAME: (%[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor, %[[ARG3:.*]]: i1) // CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor, tensor) -// CHECK-SAME: outs(%[[ARG2]] : tensor) -> tensor +// CHECK-SAME: inits(%[[ARG2]] : tensor) -> tensor // CHECK: scf.if %[[ARG3]] { // CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor to tensor<4x8xf32> // CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> () @@ -749,7 +749,7 @@ %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8x12x16xf32>, tensor) { %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<4x8x12x16xf32> return %1, %0 : tensor<4x8x12x16xf32>, tensor } @@ -760,7 +760,7 @@ // CHECK: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor to tensor<4x8x12x16xf32> // CHECK: %[[CONV:.+]] = linalg.conv_2d_nchw_fchw // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[OUT_CAST]] : +// CHECK-SAME: inits(%[[OUT_CAST]] : // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]] // CHECK: return %[[CONV]], %[[RESULT_CAST]] @@ -780,7 +780,7 @@ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d2, d1, d0)>]} - ins(%arg0 : tensor) outs(%empty1, %empty2 : tensor, tensor) { + ins(%arg0 : tensor) inits(%empty1, %empty2 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32) : linalg.yield %b0, %b0 : f32, f32 } -> (tensor, tensor) @@ -794,7 +794,7 @@ // CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32> // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: ins(%[[CAST]] : -// CHECK-SAME: outs(%[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT2]], %[[INIT1]] : // CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor // CHECK: return %[[RETURN_CAST]], %[[GENERIC]]#1 @@ -806,7 +806,7 @@ indexing_maps = [#map, #map], iterator_types = ["parallel"] } ins(%arg0 : tensor) - outs(%arg1 : memref) { + inits(%arg1 : memref) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } @@ -821,7 +821,7 @@ // CHECK-SAME: indexing_maps = [#map, #map], // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: } ins(%[[ARG1]] : tensor) -// CHECK-SAME: outs(%[[ARG2]] : memref) { +// CHECK-SAME: inits(%[[ARG2]] : memref) { // ----- @@ -834,13 +834,13 @@ affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%1, %1 : memref<4xf32>, memref<4xf32>) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { ^bb0(%in: f32, %in_24: f32, %out: f32): linalg.yield %in : f32 } -> tensor<4xf32> %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%36 : tensor<4xf32>) { + inits(%36 : tensor<4xf32>) { ^bb0(%out: f32): linalg.yield %out : f32 } -> tensor<4xf32> @@ -856,7 +856,7 @@ indexing_maps = [#map, #map], iterator_types = ["parallel"] } ins(%0 : tensor) - outs(%arg1 : memref) { + inits(%arg1 : memref) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } @@ -870,4 +870,4 @@ // CHECK-SAME: indexing_maps = [#map, #map], // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: } ins(%[[ARG1]] : tensor<5xf32>) -// CHECK-SAME: outs(%[[ARG2]] : memref) { +// CHECK-SAME: inits(%[[ARG2]] : memref) { diff --git a/mlir/test/Dialect/Linalg/collapse-dim.mlir b/mlir/test/Dialect/Linalg/collapse-dim.mlir --- a/mlir/test/Dialect/Linalg/collapse-dim.mlir +++ b/mlir/test/Dialect/Linalg/collapse-dim.mlir @@ -7,7 +7,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} - ins(%arg0 : tensor<2x32x10x4096xf32>) outs(%arg1 : tensor<2x32xf32>) { + ins(%arg0 : tensor<2x32x10x4096xf32>) inits(%arg1 : tensor<2x32xf32>) { ^bb0(%arg3: f32, %arg4: f32): %1 = arith.addf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -22,7 +22,7 @@ // CHECK: %[[T:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32> // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[T]] : tensor<2x32x40960xf32>) outs(%{{.*}} : tensor<2x32xf32>) { +// CHECK-SAME: ins(%[[T]] : tensor<2x32x40960xf32>) inits(%{{.*}} : tensor<2x32xf32>) { // CHECK: } -> tensor<2x32xf32> // ----- @@ -34,7 +34,7 @@ affine_map<(d0, d1, d2, d3) -> (d1, d0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0 : tensor<32x2x10x4096xf32>) outs(%arg1 : tensor<2x32x10x4096xf32>) { + ins(%arg0 : tensor<32x2x10x4096xf32>) inits(%arg1 : tensor<2x32x10x4096xf32>) { ^bb0(%arg3: f32, %arg4: f32): %1 = arith.addf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -50,6 +50,6 @@ // CHECK-DAG: %[[D:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[S]] : tensor<32x2x40960xf32>) outs(%[[D]] : tensor<2x32x40960xf32>) { +// CHECK-SAME: ins(%[[S]] : tensor<32x2x40960xf32>) inits(%[[D]] : tensor<2x32x40960xf32>) { // CHECK: } -> tensor<2x32x40960xf32> // CHECK: tensor.expand_shape %[[R]] {{\[}}[0], [1], [2, 3]] : tensor<2x32x40960xf32> into tensor<2x32x10x4096xf32> diff --git a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir --- a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir +++ b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir @@ -18,7 +18,7 @@ %0 = test.linalg_conv_op { indexing_maps = [#map, #map], iterator_types = [#test.iterator_type]} - ins(%arg0 : tensor) outs(%arg1 : tensor) { + ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg3 : f32 } -> tensor @@ -37,7 +37,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -56,7 +56,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -75,7 +75,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -94,7 +94,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -115,7 +115,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -136,7 +136,7 @@ #test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -157,7 +157,7 @@ #test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -178,7 +178,7 @@ #test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -198,7 +198,7 @@ iterator_types = [#test.iterator_type, #test.iterator_type]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir --- a/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir +++ b/mlir/test/Dialect/Linalg/convert-conv2d-to-img2col.mlir @@ -11,7 +11,7 @@ %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%arg0, %arg1: tensor, tensor<3x3x4x16xf32>) - outs(%arg2: tensor) -> tensor + inits(%arg2: tensor) -> tensor return %0 : tensor } @@ -57,7 +57,7 @@ // CHECK-SAME: tensor<3x3x4x16xf32> into tensor<36x16xf32> // CHECK-DAG: %[[RESHAPED_OUTPUT:.+]] = tensor.collapse_shape %[[OUTPUT]] // CHECK-SAME: [0, 1, 2], [3] -// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) outs(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>) +// CHECK: %[[MATMUL_RESULT:.+]] = linalg.matmul ins(%[[RESHAPED_INIT_COL_TENSOR]], %[[RESHAPED_FILTER]] : tensor<196x36xf32>, tensor<36x16xf32>) inits(%[[RESHAPED_OUTPUT]] : tensor<196x16xf32>) // CHECK: %[[RESULT:.+]] = tensor.expand_shape %[[MATMUL_RESULT]] {{\[}}[0, 1, 2], [3]] : tensor<196x16xf32> into tensor<1x14x14x16xf32> // CHECK: return %[[RESULT]] @@ -65,7 +65,7 @@ %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%arg0, %arg1: tensor<1x16x16x4xf32>, tensor<3x3x4x16xf32>) - outs(%arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> + inits(%arg2: tensor<1x14x14x16xf32>) -> tensor<1x14x14x16xf32> return %0 : tensor<1x14x14x16xf32> } @@ -94,7 +94,7 @@ // CHECK: %[[INPUT_T:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT]] : tensor<1x114x114x16xf32>) outs(%[[INPUT_T_INIT]] : tensor<1x16x114x114xf32>) { +// CHECK-SAME: ins(%[[INPUT]] : tensor<1x114x114x16xf32>) inits(%[[INPUT_T_INIT]] : tensor<1x16x114x114xf32>) { // CHECK-NEXT: ^bb0(%[[ARG3:.+]]: f32, %[[ARG4:.+]]: f32): // CHECK-NEXT: linalg.yield %[[ARG3]] : f32 // CHECK-NEXT: } -> tensor<1x16x114x114xf32> @@ -102,7 +102,7 @@ // CHECK: %[[FILTER_T:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[FILTER]] : tensor<3x3x16xf32>) outs(%[[FILTER_T_INIT]] : tensor<16x3x3xf32>) { +// CHECK-SAME: ins(%[[FILTER]] : tensor<3x3x16xf32>) inits(%[[FILTER_T_INIT]] : tensor<16x3x3xf32>) { // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32): // CHECK: linalg.yield // CHECK: } -> tensor<16x3x3xf32> @@ -110,7 +110,7 @@ // CHECK: %[[OUTPUT_T:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[OUTPUT]] : tensor<1x112x112x16xf32>) outs(%[[INIT_OUTPUT_TENSOR]] : tensor<1x16x112x112xf32>) { +// CHECK-SAME: ins(%[[OUTPUT]] : tensor<1x112x112x16xf32>) inits(%[[INIT_OUTPUT_TENSOR]] : tensor<1x16x112x112xf32>) { // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32): // CHECK-NEXT: linalg.yield // CHECK-NEXT: } -> tensor<1x16x112x112xf32> @@ -118,7 +118,7 @@ // CHECK: %[[COL_TENSOR:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP4]], #[[MAP5]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[INPUT_T]] : tensor<1x16x114x114xf32>) outs(%[[INIT_COL_TENSOR]] : tensor<1x16x112x112x3x3xf32>) { +// CHECK-SAME: ins(%[[INPUT_T]] : tensor<1x16x114x114xf32>) inits(%[[INIT_COL_TENSOR]] : tensor<1x16x112x112x3x3xf32>) { // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32): // CHECK-NEXT: linalg.yield // CHECK-NEXT: } -> tensor<1x16x112x112x3x3xf32> @@ -128,14 +128,14 @@ // CHECK-SAME: tensor<16x3x3xf32> into tensor<16x9xf32> // CHECK: %[[OUTPUT_T_R:.+]] = tensor.collapse_shape %[[OUTPUT_T]] // CHECK-SAME: tensor<1x16x112x112xf32> into tensor<16x12544xf32> -// CHECK: %[[BMV_RESULT:.+]] = linalg.batch_matvec ins(%[[COL_TENSOR_R]], %[[FILTER_T_R]] : tensor<16x12544x9xf32>, tensor<16x9xf32>) outs(%[[OUTPUT_T_R]] : tensor<16x12544xf32>) -> tensor<16x12544xf32> +// CHECK: %[[BMV_RESULT:.+]] = linalg.batch_matvec ins(%[[COL_TENSOR_R]], %[[FILTER_T_R]] : tensor<16x12544x9xf32>, tensor<16x9xf32>) inits(%[[OUTPUT_T_R]] : tensor<16x12544xf32>) -> tensor<16x12544xf32> // CHECK: %[[RESULT_R:.+]] = tensor.expand_shape %[[BMV_RESULT]] // CHECK-SAME: tensor<16x12544xf32> into tensor<1x16x112x112xf32> // CHECK: %[[RESULT_INIT:.+]] = tensor.empty() : tensor<1x112x112x16xf32> // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP6]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK-SAME: ins(%[[RESULT_R]] : tensor<1x16x112x112xf32>) outs(%[[RESULT_INIT]] : tensor<1x112x112x16xf32>) { +// CHECK-SAME: ins(%[[RESULT_R]] : tensor<1x16x112x112xf32>) inits(%[[RESULT_INIT]] : tensor<1x112x112x16xf32>) { // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32): // CHECK-NEXT: linalg.yield // CHECK-NEXT: } -> tensor<1x112x112x16xf32> @@ -144,7 +144,7 @@ %0 = linalg.depthwise_conv_2d_nhwc_hwc { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> - } ins(%input, %filter : tensor<1x114x114x16xf32>, tensor<3x3x16xf32>) outs(%output : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32> + } ins(%input, %filter : tensor<1x114x114x16xf32>, tensor<3x3x16xf32>) inits(%output : tensor<1x112x112x16xf32>) -> tensor<1x112x112x16xf32> return %0 : tensor<1x112x112x16xf32> } @@ -169,7 +169,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[INPUT]] : tensor<8x16x16x4xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x14x14x3x3x4xf32>) +// CHECK-SAME: inits(%[[IT]] : tensor<8x14x14x3x3x4xf32>) // CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2], [3, 4, 5]] : tensor<8x14x14x3x3x4xf32> into tensor<8x196x36xf32> // CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0, 1, 2], [3]] : tensor<3x3x4x16xf32> into tensor<36x16xf32> // CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1, 2], [3]] : tensor<8x14x14x16xf32> into tensor<8x196x16xf32> @@ -177,7 +177,7 @@ // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[CS_INPUT]], %[[CS_FILTER]] : tensor<8x196x36xf32>, tensor<36x16xf32>) -// CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x196x16xf32>) +// CHECK-SAME: inits(%[[CS_RESULT]] : tensor<8x196x16xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 // CHECK: %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32 @@ -189,7 +189,7 @@ %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%arg0, %arg1: tensor<8x16x16x4xf32>, tensor<3x3x4x16xf32>) - outs(%arg2: tensor<8x14x14x16xf32>) -> tensor<8x14x14x16xf32> + inits(%arg2: tensor<8x14x14x16xf32>) -> tensor<8x14x14x16xf32> return %0 : tensor<8x14x14x16xf32> } @@ -214,7 +214,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[INPUT]] : tensor<8x4x16x16xf32>) -// CHECK-SAME: outs(%[[IT]] : tensor<8x4x3x3x14x14xf32>) +// CHECK-SAME: inits(%[[IT]] : tensor<8x4x3x3x14x14xf32>) // CHECK: %[[CS_FILTER:.+]] = tensor.collapse_shape %[[FILTER]] {{\[}}[0], [1, 2, 3]] : tensor<16x4x3x3xf32> into tensor<16x36xf32> // CHECK: %[[CS_INPUT:.+]] = tensor.collapse_shape %[[IMG2COL]] {{\[}}[0], [1, 2, 3], [4, 5]] : tensor<8x4x3x3x14x14xf32> into tensor<8x36x196xf32> // CHECK: %[[CS_RESULT:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]] : tensor<8x16x14x14xf32> into tensor<8x16x196xf32> @@ -222,7 +222,7 @@ // CHECK-SAME: indexing_maps = [#[[LHSMAP]], #[[RHSMAP]], #[[RESMAP]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[CS_FILTER]], %[[CS_INPUT]] : tensor<16x36xf32>, tensor<8x36x196xf32>) -// CHECK-SAME: outs(%[[CS_RESULT]] : tensor<8x16x196xf32>) +// CHECK-SAME: inits(%[[CS_RESULT]] : tensor<8x16x196xf32>) // CHECK: ^bb0(%[[ARG0:.+]]: f32, %[[ARG1:.+]]: f32, %[[ARG2:.+]]: f32): // CHECK: %[[MUL:.+]] = arith.mulf %[[ARG0]], %[[ARG1]] : f32 // CHECK: %[[ADD:.+]] = arith.addf %[[MUL]], %[[ARG2]] : f32 @@ -234,7 +234,7 @@ %0 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%arg0, %arg1: tensor<8x4x16x16xf32>, tensor<16x4x3x3xf32>) - outs(%arg2: tensor<8x16x14x14xf32>) -> tensor<8x16x14x14xf32> + inits(%arg2: tensor<8x16x14x14xf32>) -> tensor<8x16x14x14xf32> return %0 : tensor<8x16x14x14xf32> } diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir --- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir +++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir @@ -10,7 +10,7 @@ // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]] // CHECK-SAME: iterator_types = [] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] // CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32): // CHECK: %[[YIELD:.*]] = arith.addf %[[LHS]], %[[RHS]] : f32 // CHECK: linalg.yield %[[YIELD]] : f32 @@ -29,7 +29,7 @@ // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] %0 = arith.addf %arg0, %arg1 : tensor return %0 : tensor } @@ -42,7 +42,7 @@ func.func @exp(%arg0: tensor) -> tensor { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] // CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32): // CHECK: %[[YIELD:.*]] = math.exp %[[SCALAR]] : f32 // CHECK: linalg.yield %[[YIELD]] : f32 @@ -60,7 +60,7 @@ func.func @select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] - // CHECK-SAME: outs(%[[ARG1]] + // CHECK-SAME: inits(%[[ARG1]] // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32): // CHECK: arith.select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32 %0 = arith.select %arg0, %arg1, %arg2 : tensor, tensor @@ -78,7 +78,7 @@ // CHECK: %[[INIT:.*]] = tensor.empty() : tensor // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[INIT]] + // CHECK-SAME: inits(%[[INIT]] // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32 %0 = arith.cmpf olt, %arg0, %arg1 : tensor @@ -101,7 +101,7 @@ // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[INIT]] + // CHECK-SAME: inits(%[[INIT]] // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32 %0 = arith.cmpf olt, %arg0, %arg1 : tensor<4x?x?x8x2x?xf32> diff --git a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir --- a/mlir/test/Dialect/Linalg/data-layout-propagation.mlir +++ b/mlir/test/Dialect/Linalg/data-layout-propagation.mlir @@ -10,7 +10,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32): %4 = arith.addf %arg3, %arg3 : f32 linalg.yield %4 : f32 @@ -41,7 +41,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: return %[[ELEM]] : tensor // ----- @@ -51,7 +51,7 @@ %init = tensor.empty() : tensor<128x256xi32> %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<128x256xi32>) - outs(%init : tensor<128x256xi32>) { + inits(%init : tensor<128x256xi32>) { ^bb0(%arg3: i32, %arg4: i32): %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 @@ -74,7 +74,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: return %[[ELEM]] : tensor<4x16x16x32xi32> // ----- @@ -84,7 +84,7 @@ %init = tensor.empty() : tensor<128x256xi32> %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<128x256xi32>) - outs(%init : tensor<128x256xi32>) { + inits(%init : tensor<128x256xi32>) { ^bb0(%arg3: i32, %arg4: i32): %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 @@ -108,7 +108,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: return %[[ELEM]] : tensor<16x4x32x16xi32> // ----- @@ -118,7 +118,7 @@ %init = tensor.empty() : tensor<128x256xi32> %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<128x256xi32>) - outs(%init : tensor<128x256xi32>) { + inits(%init : tensor<128x256xi32>) { ^bb0(%arg3: i32, %arg4: i32): %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 @@ -142,7 +142,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: return %[[ELEM]] : tensor<16x4x16x32xi32> // ----- @@ -158,7 +158,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map1, #map2, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -195,7 +195,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]], #[[MAP4]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[PACK_ARG0]], %[[PACK_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: return %[[ELEM]] : tensor // ----- @@ -208,7 +208,7 @@ indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<64xf32>) - outs(%0 : tensor<1x56x57x64xf32>) { + inits(%0 : tensor<1x56x57x64xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<1x56x57x64xf32> @@ -227,7 +227,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // ----- @@ -241,7 +241,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100xi32>, tensor<128xi32>) - outs(%init_transpose : tensor<100x200x128x256xi32>) { + inits(%init_transpose : tensor<100x200x128x256xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %0 = arith.addi %b0, %b1 : i32 %1 = arith.addi %0, %b2 : i32 @@ -273,7 +273,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // ----- @@ -287,7 +287,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100x1x1x1xi32>, tensor<1x128x1x1xi32>) - outs(%init_transpose : tensor<100x200x128x256xi32>) { + inits(%init_transpose : tensor<100x200x128x256xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %0 = arith.addi %b0, %b1 : i32 %1 = arith.addi %0, %b2 : i32 @@ -319,7 +319,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // ----- @@ -336,7 +336,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d2, d1, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor<100x128x200x256xi32>, tensor<100xi32>, tensor<128xi32>) - outs(%init_transpose : tensor<100x200x128x256xi32>) { + inits(%init_transpose : tensor<100x200x128x256xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %0 = arith.addi %b0, %b1 : i32 %1 = arith.addi %0, %b2 : i32 @@ -369,7 +369,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]], #[[MAP2]], #[[MAP]]] // CHECK-SAME: ins(%[[PACKED_ARG0]], %[[ARG1]], %[[PACKED_ARG2]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // ----- @@ -377,7 +377,7 @@ func.func @elem_pack_transpose_outer_dims(%arg0: tensor<128x256xi32>, %init: tensor<128x256xi32>) -> tensor<16x4x32x16xi32>{ %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<128x256xi32>) - outs(%init : tensor<128x256xi32>) { + inits(%init : tensor<128x256xi32>) { ^bb0(%arg3: i32, %arg4: i32): %4 = arith.addi %arg3, %arg4 : i32 linalg.yield %4 : i32 @@ -406,7 +406,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[PACKED_ARG1]] +// CHECK-SAME: inits(%[[PACKED_ARG1]] // ----- @@ -415,7 +415,7 @@ func.func @unpack_on_output(%arg0: tensor<12x2x56x56x32xf32>) -> tensor<12x56x56x64xf32> { %0 = tensor.empty() : tensor<12x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> - %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} outs(%1 : tensor<12x56x56x64xf32>) { + %2 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} inits(%1 : tensor<12x56x56x64xf32>) { ^bb0(%out: f32): %3 = arith.addf %out, %out : f32 linalg.yield %3 : f32 @@ -436,7 +436,7 @@ // CHECK-SAME: into %[[ARG0_EMPTY_PACK]] // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]]] -// CHECK-SAME: outs(%[[PACKED_ARG0]] +// CHECK-SAME: inits(%[[PACKED_ARG0]] // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_EMPTY_UNPACK]] @@ -448,7 +448,7 @@ func.func @unpack_on_input(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf32>) -> tensor<12x56x56x64xf32> { %0 = tensor.empty() : tensor<12x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> - %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) { + %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf32>) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %out : f32 linalg.yield %3 : f32 @@ -475,7 +475,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[ARG0_PACK]] -// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK-SAME: inits(%[[ARG1_PACK]] // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] @@ -487,7 +487,7 @@ func.func @unpack_element_type_change(%arg0: tensor<12x2x56x56x32xf32>, %init: tensor<12x56x56x64xf16>) -> tensor<12x56x56x64xf16> { %0 = tensor.empty() : tensor<12x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> - %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf16>) { + %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf16>) { ^bb0(%in: f32, %out: f16): %3 = arith.truncf %in : f32 to f16 linalg.yield %3 : f16 @@ -514,7 +514,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[ARG0_PACK]] -// CHECK-SAME: outs(%[[ARG1_PACK]] +// CHECK-SAME: inits(%[[ARG1_PACK]] // CHECK: %[[ARG0_NEW_EMPTY_UNPACK:.+]] = tensor.empty() : tensor<12x56x56x64xf16> // CHECK: %[[UNPACK:.+]] = tensor.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] @@ -528,7 +528,7 @@ %init = tensor.empty() : tensor<12x56x56x64xf32> %0 = tensor.empty() : tensor<12x56x56x64xf32> %1 = tensor.unpack %arg0 outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] into %0 : tensor<12x2x56x56x32xf32> -> tensor<12x56x56x64xf32> - %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) outs(%init : tensor<12x56x56x64xf32>) { + %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%1: tensor<12x56x56x64xf32>) inits(%init : tensor<12x56x56x64xf32>) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %in : f32 linalg.yield %3 : f32 @@ -551,7 +551,7 @@ // CHECK: %[[RES:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[PACKED_ARG0]] -// CHECK-SAME: outs(%[[DEST]] +// CHECK-SAME: inits(%[[DEST]] // CHECK: %[[UNPACKED:.+]] = tensor.unpack %[[RES]] // CHECK-SAME: outer_dims_perm = [0, 3, 1, 2] inner_dims_pos = [3] inner_tiles = [32] // CHECK-SAME: into %[[ARG0_UNPACK_EMPTY]] @@ -629,7 +629,7 @@ %init = tensor.empty() : tensor<128x256xi32> %elem = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor<128x256xi32>) - outs(%init : tensor<128x256xi32>) { + inits(%init : tensor<128x256xi32>) { ^bb0(%arg3: i32, %arg4: i32): %4 = arith.addi %arg3, %arg3 : i32 linalg.yield %4 : i32 @@ -647,7 +647,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<128x256xi32> // CHECK-NEXT: %[[GEN:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] // CHECK: %[[ALLOC:.+]] = bufferization.alloc_tensor() : tensor<4x16x16x32xi32> // CHECK-NEXT: %{{.+}} = tensor.pack %[[GEN]] // CHECK-SAME: inner_dims_pos = [1, 0] inner_tiles = [16, 32] @@ -660,7 +660,7 @@ func.func @scalar_tensor(%arg0 : tensor) -> tensor<1x32x7x7x32xf32> { %empty_gen = tensor.empty() : tensor<1x7x7x1024xf32> - %gen = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor) outs(%empty_gen : tensor<1x7x7x1024xf32>) { + %gen = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor) inits(%empty_gen : tensor<1x7x7x1024xf32>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<1x7x7x1024xf32> @@ -678,4 +678,4 @@ // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir --- a/mlir/test/Dialect/Linalg/decompose-ops.mlir +++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir @@ -15,7 +15,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init1, %init2 : tensor, tensor) { + inits(%init1, %init2 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %0 = arith.addf %b0, %b1 : f32 %1 = arith.mulf %0, %b2 : f32 @@ -41,7 +41,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -55,7 +55,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B6:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B7:[a-zA-Z0-9_]+]]: f32 @@ -87,7 +87,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -98,7 +98,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP3]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG2]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B3:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f32 @@ -124,7 +124,7 @@ affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init1, %init2, %init2 : tensor, tensor, tensor) { + inits(%init1, %init2, %init2 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32, %b5 : f32) : %0 = arith.addf %b0, %b1 : f32 %1 = arith.mulf %0, %b2 : f32 @@ -150,7 +150,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -165,7 +165,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B7:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B8:[a-zA-Z0-9_]+]]: f32 @@ -195,7 +195,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]], %[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]], %[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -206,7 +206,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP3]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG2]], %[[GENERIC1]]#0 : -// CANONICALIZECHECK-SAME: outs(%[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B5:[a-zA-Z0-9_]+]]: f32 @@ -226,7 +226,7 @@ indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<10x20xf32>, tensor<10xi32>) - outs(%init : tensor<20x10xf64>) { + inits(%init : tensor<20x10xf64>) { ^bb0(%b0 : f32, %b1 : i32, %b2 : f64): %1 = arith.sitofp %b1 : i32 to f64 %2 = arith.extf %b0 : f32 to f64 @@ -248,7 +248,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:.+]]: f32 // CHECK-SAME: %[[B1:.+]]: i32 @@ -260,7 +260,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1 : -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B4:.+]]: f32 // CHECK-SAME: %[[B5:.+]]: i32 @@ -273,7 +273,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP0]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1, %[[GENERIC1]]#1 : -// CHECK-SAME: outs(%[[INIT0]] : +// CHECK-SAME: inits(%[[INIT0]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B9:.+]]: f32 // CHECK-SAME: %[[B10:.+]]: i32 @@ -296,7 +296,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:.+]]: i32 // CANONICALIZECHECK-SAME: %[[B1:.+]]: f64 @@ -306,7 +306,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B2:.+]]: f32 // CANONICALIZECHECK-SAME: %[[B3:.+]]: f64 @@ -316,7 +316,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP1]], #[[MAP2]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[GENERIC0]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT0]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT0]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f64 // CANONICALIZECHECK-SAME: %[[B5:[a-zA-Z0-9_]+]]: f64 @@ -339,7 +339,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2, %arg3 : tensor, tensor) { + inits(%arg2, %arg3 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : %1 = arith.addf %b0, %b2 : f32 %2 = arith.mulf %b1, %b3 : f32 @@ -360,7 +360,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]], %[[ARG3]], %[[ARG2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 @@ -373,7 +373,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 : -// CHECK-SAME: outs(%[[ARG2]], %[[ARG3]] : +// CHECK-SAME: inits(%[[ARG2]], %[[ARG3]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[ARG9:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[ARG10:[a-zA-Z0-9_]+]]: f32 @@ -397,7 +397,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]] : -// CANONICALIZECHECK-SAME: outs(%[[ARG2]] : +// CANONICALIZECHECK-SAME: inits(%[[ARG2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 @@ -407,7 +407,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP1]], #[[MAP1]], #[[MAP3]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG1]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[ARG2]], %[[ARG3]] : +// CANONICALIZECHECK-SAME: inits(%[[ARG2]], %[[ARG3]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir --- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir @@ -6,7 +6,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -25,7 +25,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -34,7 +34,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %5 = arith.mulf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -43,7 +43,7 @@ %6 = tensor.empty() : tensor %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%1, %4 : tensor, tensor) - outs(%6 : tensor) { + inits(%6 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %5 = arith.divf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -65,7 +65,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 %3 = arith.mulf %2, %arg4 : f32 @@ -86,7 +86,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32 linalg.yield %2 : f32 diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir --- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir @@ -10,7 +10,7 @@ %3 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%arg0_t : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i1, %arg3: i8): %10 = arith.extui %arg2 : i1 to i8 linalg.yield %10 : i8 @@ -23,7 +23,7 @@ %7 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%arg1_t, %cst : tensor, tensor) - outs(%6 : tensor) { + inits(%6 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir --- a/mlir/test/Dialect/Linalg/detensorize_if.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir @@ -18,7 +18,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -30,7 +30,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 @@ -79,7 +79,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -91,7 +91,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 @@ -142,7 +142,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -159,7 +159,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %12 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir --- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir @@ -15,7 +15,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%farg0, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir @@ -15,7 +15,7 @@ %1 = tensor.empty() : tensor %2 = linalg.generic #attrs ins(%0, %farg1 : tensor, tensor) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -27,7 +27,7 @@ %5 = tensor.empty() : tensor %6 = linalg.generic #attrs ins(%4, %4 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %8 = arith.addi %arg0, %arg1 : i32 linalg.yield %8 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir @@ -28,7 +28,7 @@ %1 = tensor.empty() : tensor %2 = linalg.generic #sum_reduction_attrs ins(%0: tensor<10xi32>) - outs(%1: tensor) { + inits(%1: tensor) { ^bb(%a: i32, %x: i32): %b = arith.addi %x, %a : i32 linalg.yield %b : i32 @@ -37,7 +37,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %farg1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -49,7 +49,7 @@ %7 = tensor.empty() : tensor<10xi32> %9 = linalg.generic #broadcast_attrs ins(%6: tensor) - outs(%7: tensor<10xi32>) { + inits(%7: tensor<10xi32>) { ^bb(%a: i32, %b: i32) : linalg.yield %a : i32 } -> tensor<10xi32> @@ -67,7 +67,7 @@ // DET-ALL: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>) // DET-ALL: ^[[bb1]](%{{.*}}: tensor<10xi32>) // DET-ALL: tensor.empty() : tensor -// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor) { +// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor) { // DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): // DET-ALL: %{{.*}} = arith.addi %{{.*}}, %{{.*}} // DET-ALL: linalg.yield %{{.*}} : i32 @@ -78,7 +78,7 @@ // DET-ALL: ^[[bb2]](%{{.*}}: i32) // DET-ALL: tensor.from_elements %{{.*}} : tensor // DET-ALL: tensor.empty() : tensor<10xi32> -// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor) outs(%{{.*}} : tensor<10xi32>) { +// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor) inits(%{{.*}} : tensor<10xi32>) { // DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): // DET-ALL: linalg.yield %{{.*}} : i32 // DET-ALL: } -> tensor<10xi32> @@ -92,12 +92,12 @@ // DET-CF-SAME: (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor) // DET-CF: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>) // DET-CF: ^bb1(%{{.*}}: tensor<10xi32>) -// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor) { +// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor) { // DET-CF: tensor.extract %{{.*}}[] : tensor // DET-CF: cmpi slt, %{{.*}}, %{{.*}} : i32 // DET-CF: cf.cond_br %{{.*}}, ^bb2(%{{.*}} : tensor), ^bb3(%{{.*}} : tensor) // DET-CF: ^bb2(%{{.*}}: tensor) -// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor) outs(%{{.*}} : tensor<10xi32>) { +// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor) inits(%{{.*}} : tensor<10xi32>) { // DET-CF: cf.br ^bb1(%{{.*}} : tensor<10xi32>) // DET-CF: ^bb3(%{{.*}}: tensor) // DET-CF: return %{{.*}} : tensor diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir @@ -20,7 +20,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %reshaped1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -32,7 +32,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -16,7 +16,7 @@ func.func @drop_one_trip_loops(%arg0 : tensor, %arg1 : f32, %shape: tensor) -> tensor { %0 = linalg.generic #trait ins(%arg0, %arg1 : tensor, f32) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } -> tensor @@ -62,7 +62,7 @@ func.func @drop_one_trip_loops_all_ones(%arg0 : tensor<1x1x1xf32>, %arg1 : f32, %shape: tensor) -> tensor { %0 = linalg.generic #trait ins(%arg0, %arg1 : tensor<1x1x1xf32>, f32) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } -> tensor @@ -96,7 +96,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor) - outs(%shape: tensor) { + inits(%shape: tensor) { ^bb0(%arg6 : i32, %arg7 : i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -142,7 +142,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%arg0 : tensor<1x1xf32>) { + inits(%arg0 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> @@ -169,7 +169,7 @@ (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{ %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xi32>) - outs(%arg0 : tensor<1x1xi32>) { + inits(%arg0 : tensor<1x1xi32>) { ^bb0(%arg3: i32, %arg4: i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -202,7 +202,7 @@ func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait ins(%arg0 : tensor<1x5xf32>) - outs(%shape : tensor<5xf32>) { + inits(%shape : tensor<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor<5xf32> @@ -236,7 +236,7 @@ %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32> %2 = linalg.generic #trait ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) - outs(%shape : tensor<5x5xf32>) { + inits(%shape : tensor<5x5xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -270,7 +270,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -296,7 +296,7 @@ %1 = tensor.empty() : tensor<1x2x5xf32> %2 = linalg.generic {i64, indexing_maps = [#map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) { + ins(%arg0 : tensor<5xf32>) inits(%1 : tensor<1x2x5xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<1x2x5xf32> @@ -313,11 +313,11 @@ func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> { %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1xf32>) -> tensor<1xf32> %add = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) { + ins(%input : tensor<1x1000xf32>)inits(%fill : tensor<1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %1823 = arith.addf %arg1, %arg2 : f32 linalg.yield %1823 : f32 @@ -334,12 +334,12 @@ // CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32> // CHECK: %[[INIT:.+]] = tensor.empty() : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor) -> tensor // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] // CHECK-SAME: ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[GENERIC_RESHAPE:.+]] = tensor.expand_shape %[[GENERIC]] [] : tensor into tensor<1xf32> // CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32> @@ -378,13 +378,13 @@ %c3 = arith.constant 3 : index %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32> %1 = tensor.empty(%0) : tensor<1x?xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x?xf32>) -> tensor<1x?xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor<1x?x1x?xf32>) - outs(%2 : tensor<1x?xf32>) { + inits(%2 : tensor<1x?xf32>) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -397,12 +397,12 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x?xf32> // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] // CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[RESHAPE]] : tensor) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -412,13 +412,13 @@ %cst = arith.constant 1.000000e+00 : f32 %c3 = arith.constant 3 : index %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor<1x?x1x1xf32>) - outs(%2 : tensor<1x1xf32>) { + inits(%2 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -430,13 +430,13 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x1xf32> // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3] // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[INIT2:.+]] = tensor.empty() : tensor<1xf32> // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: ins(%[[RESHAPE]], %[[FILL]] : tensor, tensor<1xf32>) -// CHECK-SAME: outs(%[[INIT2]] : tensor<1xf32>) +// CHECK-SAME: inits(%[[INIT2]] : tensor<1xf32>) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -447,13 +447,13 @@ %c2 = arith.constant 2 : index %0 = tensor.dim %arg0, %c2 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -466,12 +466,12 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]] // CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[RESHAPE]] : tensor) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -516,7 +516,7 @@ func.func @drop_one_trip_loops(%arg0 : memref, %arg1 : f32, %shape: memref) -> memref { linalg.generic #trait ins(%arg0, %arg1 : memref, f32) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } @@ -549,7 +549,7 @@ { linalg.generic #trait ins(%arg0 : memref) - outs(%shape: memref) { + inits(%shape: memref) { ^bb0(%arg6 : i32, %arg7 : i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -595,7 +595,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%arg0 : memref<1x1xf32>) { + inits(%arg0 : memref<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } @@ -622,7 +622,7 @@ (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{ linalg.generic #trait ins(%arg0 : memref<1x1xi32>) - outs(%arg0 : memref<1x1xi32>) { + inits(%arg0 : memref<1x1xi32>) { ^bb0(%arg3: i32, %arg4: i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -655,7 +655,7 @@ func.func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> { linalg.generic #trait ins(%arg0 : memref<1x5xf32>) - outs(%shape : memref<5xf32>) { + inits(%shape : memref<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } @@ -689,7 +689,7 @@ %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32> linalg.generic #trait ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>) - outs(%shape : memref<5x5xf32>) { + inits(%shape : memref<5x5xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -723,7 +723,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } @@ -749,7 +749,7 @@ %1 = memref.alloc() : memref<1x2x5xf32> linalg.generic {i64, indexing_maps = [#map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) { + ins(%arg0 : memref<5xf32>) inits(%1 : memref<1x2x5xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } @@ -761,7 +761,7 @@ // CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32> // CHECK: %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]] // CHECK: linalg.generic -// CHECK-SAME: outs(%[[OUT:.*]] : +// CHECK-SAME: inits(%[[OUT:.*]] : // CHECK: %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]] // CHECK: return %[[RESULT]] @@ -773,7 +773,7 @@ linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) { + ins(%input : memref<1x1000xf32>)inits(%init : memref<1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %1823 = arith.addf %arg1, %arg2 : f32 linalg.yield %1823 : f32 @@ -793,7 +793,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] // CHECK-SAME: ins(%[[INPUT_RESHAPE]] : memref<1000xf32>) -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : memref) +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : memref) // CHECK: return %[[INIT:.+]] : memref<1xf32> @@ -816,7 +816,7 @@ func.func @input_stays_same(%arg0 : memref>, %arg1 : f32, %shape: memref) -> memref { linalg.generic #trait ins(%arg0, %arg1 : memref>, f32) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } @@ -836,7 +836,7 @@ // CHECK-SAME: {indexing_maps = [#[[MAP1]], #[[MAP2]], #[[MAP3]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : memref>, f32) -// CHECK-SAME: outs(%[[OUT]] : memref) { +// CHECK-SAME: inits(%[[OUT]] : memref) { // CHECK: ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32): // CHECK: linalg.yield %[[ARG]] : f32 // CHECK: } @@ -860,7 +860,7 @@ %0 = tensor.empty() : tensor<8xf32> %1 = linalg.generic #matvec ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>) - outs(%0: tensor<8xf32>) { + inits(%0: tensor<8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %m = arith.mulf %a, %b : f32 %add = arith.addf %x, %m : f32 @@ -882,7 +882,7 @@ %0 = tensor.empty() : tensor<4x2xf32> %res = scf.forall (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) { %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> scf.forall.in_parallel { // CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}} // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<4x2xf32> @@ -907,7 +907,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32, 3>) - outs(%arg0 : memref<1x1xf32, 3>) { + inits(%arg0 : memref<1x1xf32, 3>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } diff --git a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir --- a/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir +++ b/mlir/test/Dialect/Linalg/erase-unused-operands-and-results.mlir @@ -5,7 +5,7 @@ // CHECK-SAME: (%[[ARG0:.*]]: tensor) -> tensor { // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @remove_deadargs_generic_basic(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index @@ -26,7 +26,7 @@ // CHECK-LABEL: func @remove_deadargs_generic_mixedaccess // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-NOT: ins -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor) -> (tensor) { @@ -62,7 +62,7 @@ // CHECK: arith.addf %[[BBARG]], %[[BBARG]] %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor, tensor) - outs(%arg0 : tensor) attrs = {someattr} { + inits(%arg0 : tensor) attrs = {someattr} { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 @@ -85,7 +85,7 @@ // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 @@ -110,7 +110,7 @@ // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]]) %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0, %arg0 : tensor, tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32 linalg.yield %1 : f32 @@ -132,7 +132,7 @@ // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]]) %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1, %arg0, %arg1 : tensor, tensor, tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32 linalg.yield %1 : f32 @@ -154,7 +154,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %arg0, %arg0 + inits(%arg0, %arg0, %arg0, %arg0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 @@ -169,7 +169,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -186,7 +186,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["reduction"]} ins(%arg0 : tensor) - outs(%init0, %init1 : tensor, tensor) { + inits(%init0, %init1 : tensor, tensor) { ^bb0(%b0: f32, %b1: f32, %b2: i32): %8 = linalg.index 0 : index %9 = arith.index_cast %8 : index to i32 @@ -205,7 +205,7 @@ // CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor // CHECK: %[[GENERIC:.+]]:2 = linalg.generic -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK: return %[[GENERIC]]#1 // ----- @@ -214,11 +214,11 @@ func.func @loop_dim_operand(%arg0 : tensor) -> tensor { %cst = arith.constant 0 : i32 %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor) -> tensor %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} - ins(%arg0 : tensor) outs(%fill : tensor) { + ins(%arg0 : tensor) inits(%fill : tensor) { ^bb0(%b0: f32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 @@ -239,11 +239,11 @@ %cst = arith.constant 0 : i32 %init1 = tensor.empty(%arg0) : tensor %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor) -> tensor %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["parallel"]} - outs(%init1, %fill : tensor, tensor) { + inits(%init1, %fill : tensor, tensor) { ^bb0(%b0: i32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 @@ -256,7 +256,7 @@ // CHECK-SAME: %[[ARG0:.+]]: index // CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG0]]) // CHECK: linalg.generic -// CHECK-SAME: outs(%[[INIT]] +// CHECK-SAME: inits(%[[INIT]] // ----- @@ -271,7 +271,7 @@ iterator_types = ["parallel", "reduction"]} ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3 : tensor, tensor, tensor, tensor, tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32): %1 = arith.addi %b0, %b1 : i32 %2 = arith.addi %1, %b2 : i32 @@ -297,7 +297,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] : -// CHECK-SAME: outs(%[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]] : // CHECK: ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: i32 // CHECK-SAME: %[[B2:[a-zA-Z0-9_]+]]: i32 @@ -323,7 +323,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0 : tensor, tensor) { + inits(%arg0, %arg0 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %1 = arith.addf %b0, %b0 : f32 linalg.yield %1, %1 : f32, f32 @@ -333,7 +333,7 @@ // CHECK: func @drop_redundant_results // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK: %[[GENERIC:.+]] = linalg.generic -// CHECK-SAME: outs(%[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]] : // CHECK: return %[[GENERIC]] // ----- @@ -357,7 +357,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %init0, %init0 + inits(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32 @@ -372,7 +372,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -396,7 +396,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %init0, %init0 + inits(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 @@ -414,7 +414,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -437,7 +437,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %init0, %init0 + inits(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : linalg.yield %b2, %b1, %b3 : f32, f32, f32 @@ -452,7 +452,7 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : +// CHECK-SAME: inits(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 // ----- @@ -475,7 +475,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %init0, %init0 + inits(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : %1 = arith.addf %b1, %b2: f32 @@ -492,7 +492,7 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : +// CHECK-SAME: inits(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 @@ -505,7 +505,7 @@ -> tensor { // CHECK-INPUT: %[[result:.*]] = linalg.generic {indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel"]} - // CHECK-INPUT-SAME: ins(%[[a]] : tensor) outs(%[[b]] : tensor) { + // CHECK-INPUT-SAME: ins(%[[a]] : tensor) inits(%[[b]] : tensor) { // CHECK-INPUT: ^bb0(%[[in:.*]]: f32, %[[out:.*]]: f32): // CHECK-INPUT: %[[add:.*]] = arith.addf %[[in]], %[[out]] // CHECK-INPUT: linalg.yield %[[add]] @@ -513,7 +513,7 @@ // CHECK-INPUT: return %[[result]] %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} - ins(%a, %b : tensor, tensor) outs(%b : tensor) { + ins(%a, %b : tensor, tensor) inits(%b : tensor) { ^bb0(%in: f32, %in_2: f32, %out: f32): %16 = arith.addf %in, %in_2 : f32 linalg.yield %16 : f32 diff --git a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir --- a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir +++ b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir @@ -18,7 +18,7 @@ %0 = test.linalg_fill_op { indexing_maps = [#map0, #map0, #map1], iterator_types = ["parallel"]} - ins(%arg0, %arg0 : f32, f32) outs(%arg1 : tensor) { + ins(%arg0, %arg0 : f32, f32) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -34,7 +34,7 @@ %0 = test.linalg_fill_op { indexing_maps = [#map1, #map1], iterator_types = ["parallel"]} - ins(%arg0 : tensor) outs(%arg1 : tensor) { + ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir --- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir +++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir @@ -15,7 +15,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : linalg.yield %arg1 : f32 } -> tensor @@ -42,7 +42,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%arg0 : tensor<1x1xf32>) { + inits(%arg0 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> @@ -68,7 +68,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%arg1 : memref<1x1xf32>) { + inits(%arg1 : memref<1x1xf32>) { ^bb0(%arg2: f32, %arg3 : f32) : linalg.yield %arg2 : f32 } @@ -96,7 +96,7 @@ func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait ins(%arg0 : tensor<1x5xf32>) - outs(%shape : tensor<5xf32>) { + inits(%shape : tensor<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor<5xf32> diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir --- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir +++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir @@ -29,7 +29,7 @@ %c0 = arith.constant 0: index %f0 = arith.constant 0.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>) %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> @@ -69,7 +69,7 @@ %alloc = memref.alloc() : memref<128 x i8> %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.fill ins(%f0 : f32) outs(%view : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%view : memref<32 x f32>) memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> @@ -129,7 +129,7 @@ %f0 = arith.constant 0.0: f32 %f1 = arith.constant 1.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>) %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> "some_interleaved_use"(%subview) : (memref<16 x f32>) -> () diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir --- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir +++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir @@ -16,7 +16,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %t0 = arith.addi %b0, %b1 : i32 %t1 = arith.addi %t0, %b2 : i32 @@ -39,7 +39,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] @@ -67,7 +67,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %iv0 = linalg.index 0: index %iv1 = linalg.index 1: index @@ -129,7 +129,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<9x7x8x2x3x4x5x6xi32>, tensor<7x8x2xi32>, tensor<6x3x4x5xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %t0 = arith.addi %b0, %b1 : i32 %t1 = arith.addi %t0, %b2 : i32 @@ -153,7 +153,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] @@ -181,7 +181,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %iv0 = linalg.index 0: index %iv1 = linalg.index 1: index @@ -229,7 +229,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction", "reduction", "parallel"]} - ins(%0 : tensor<2x6x?x5xf32>) outs(%arg1 : tensor<2x5xf32>) { + ins(%0 : tensor<2x6x?x5xf32>) inits(%arg1 : tensor<2x5xf32>) { ^bb0(%b0 : f32, %b1 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -245,7 +245,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel"] // CHECK-SAME: ins(%[[ARG0]] : tensor<2x?x5xf32>) -// CHECK-SAME: outs(%[[ARG1]] : tensor<2x5xf32>) +// CHECK-SAME: inits(%[[ARG1]] : tensor<2x5xf32>) // ----- @@ -258,7 +258,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x3x4x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x3x4x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -285,7 +285,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) outs(%init : tensor<2x4x3x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) inits(%init : tensor<2x4x3x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -312,7 +312,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -341,7 +341,7 @@ %2 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) outs(%init : tensor<2x3x4x5xf32>) { + ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) inits(%init : tensor<2x3x4x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %3 = arith.addf %b0, %b1 : f32 linalg.yield %3 : f32 @@ -358,7 +358,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%{{.+}}: tensor<6x20xf32>) +// CHECK-SAME: inits(%{{.+}}: tensor<6x20xf32>) // CHECK: %[[RESHAPE1:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]{{\]}} // CHECK: %[[RESHAPE2:.+]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1], [2], [3]{{\]}} // CHECK: return %[[RESHAPE2]] @@ -374,7 +374,7 @@ // CONTROL: %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}} // CONTROL: %[[GENERIC:.+]] = linalg.generic // CONTROL-SAME: ins(%[[EXPAND]], %[[ARG1]] : -// CONTROL-SAME: outs(%[[INIT_RESHAPE]] : +// CONTROL-SAME: inits(%[[INIT_RESHAPE]] : // CONTROL: %[[RESULT:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}} // ----- @@ -387,7 +387,7 @@ %1 = linalg.generic { indexing_maps = [#map, #map], iterator_types = ["parallel"]} - ins(%0: tensor<1xf32>) outs(%init : tensor<1xf32>) { + ins(%0: tensor<1xf32>) inits(%init : tensor<1xf32>) { ^bb0(%b0 : f32, %b1 : f32): linalg.yield %b0: f32 } -> tensor<1xf32> @@ -410,7 +410,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor<4x?x?x8xf32>) - outs(%arg1 : tensor<4x?x?x8xf32>) { + inits(%arg1 : tensor<4x?x?x8xf32>) { ^bb0(%b0: f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -430,7 +430,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[COLLAPSE_ARG0]], %[[COLLAPSE_ARG1_0]] : -// CHECK-SAME: outs(%[[COLLAPSE_ARG1_1]] : +// CHECK-SAME: inits(%[[COLLAPSE_ARG1_1]] : // CHECK: %[[EXPAND_GENERIC:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}} // CHECK: return %[[EXPAND_GENERIC]] @@ -448,7 +448,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0 : tensor) outs(%init : tensor) { + ins(%0 : tensor) inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index @@ -474,7 +474,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]] : -// CHECK-SAME: outs(%[[COLLAPSE_INIT]] : +// CHECK-SAME: inits(%[[COLLAPSE_INIT]] : // CHECK-NEXT: ^bb{{[0-9]}} // CHECK: %[[ID0:.+]] = linalg.index 0 // CHECK-DAG: %[[T0:.+]] = arith.remui %[[ID0]], %[[C4]] @@ -504,7 +504,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} - ins(%0 : tensor) outs(%init : tensor) { + ins(%0 : tensor) inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -13,7 +13,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -22,7 +22,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]] @@ -55,7 +55,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, f32) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -64,7 +64,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP1]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, f32) - outs(%2 : tensor) { + inits(%2 : tensor) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]] @@ -97,7 +97,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -106,7 +106,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %5 = arith.mulf %arg5, %arg6 : f32 linalg.yield %5 : f32 @@ -131,7 +131,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -140,7 +140,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor){ + inits(%2 : tensor){ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %5 = arith.mulf %arg5, %arg6 : f32 linalg.yield %5 : f32 @@ -165,7 +165,7 @@ %1 = tensor.empty(%0) : tensor %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -176,7 +176,7 @@ %4 = tensor.empty(%0, %3) : tensor %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%2, %arg2 : tensor, tensor) - outs(%4 : tensor){ + inits(%4 : tensor){ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %6 = arith.mulf %arg5, %arg6 : f32 linalg.yield %6 : f32 @@ -195,7 +195,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} ins(%arg0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -205,7 +205,7 @@ // CHECK: arith.mulf %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} ins(%1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.mulf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -231,7 +231,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) - outs(%2 : tensor<5x?x?xf32>) { + inits(%2 : tensor<5x?x?xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.mulf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -263,7 +263,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst, %arg0 : tensor, tensor<5x?x?xf32>) - outs(%2 : tensor<5x?x?xf32>) { + inits(%2 : tensor<5x?x?xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.mulf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -291,7 +291,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 @@ -300,7 +300,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%3 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -342,7 +342,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg4: i32, %arg5: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -356,7 +356,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%3, %arg0 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 @@ -396,7 +396,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -410,7 +410,7 @@ indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"] } ins(%3 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -457,7 +457,7 @@ %1 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2 : i32, %arg3 : i32): %2 = linalg.index 0 : index %3 = arith.index_cast %2 : index to i32 @@ -471,7 +471,7 @@ {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel", "parallel"]} ins(%arg1, %1 : tensor, tensor) - outs(%4 : tensor) { + inits(%4 : tensor) { ^bb0(%arg2 : i32, %arg3 : i32, %arg4: i32): %6 = arith.addi %arg2, %arg3 : i32 linalg.yield %6 : i32 @@ -503,7 +503,7 @@ %1 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} - ins(%arg1 : tensor) outs(%0 : tensor) { + ins(%arg1 : tensor) inits(%0 : tensor) { ^bb0(%arg2: i32, %arg3: f32): %3 = arith.index_cast %arg2 : i32 to index %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32> @@ -514,7 +514,7 @@ {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%1, %cst : tensor, tensor<10xf32>) outs(%2 : tensor<10xf32>) { + ins(%1, %cst : tensor, tensor<10xf32>) inits(%2 : tensor<10xf32>) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %4 = arith.mulf %arg2, %arg3 : f32 linalg.yield %4 : f32 @@ -559,7 +559,7 @@ // CHECK: %[[T1:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[ARG0]] : tensor<4xf32>) -// CHECK-SAME: outs(%[[T0]] : tensor<4xf32>) +// CHECK-SAME: inits(%[[T0]] : tensor<4xf32>) // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: f32, %[[ARG2:[a-zA-Z0-9_]+]]: f32) // CHECK: %[[T2:.+]] = arith.addf %[[ARG1]], %[[CST]] @@ -579,7 +579,7 @@ {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>) - outs(%init : tensor<1x10xf32>) { + inits(%init : tensor<1x10xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -588,7 +588,7 @@ {indexing_maps = [#map1, #map2], iterator_types = ["reduction"]} ins(%0 : tensor<1x10xf32>) - outs(%arg2 : tensor<1xf32>) { + inits(%arg2 : tensor<1xf32>) { ^bb0(%arg3: f32, %arg4: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -625,7 +625,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } - outs(%init0 : tensor) { + inits(%init0 : tensor) { ^bb0(%a: f32): linalg.yield %cp5 : f32 } -> tensor @@ -638,7 +638,7 @@ iterator_types = ["parallel", "parallel"] } ins(%0, %1 : tensor, tensor) - outs(%init1 : tensor) { + inits(%init1 : tensor) { ^bb0(%a: f32, %b: f32, %c: f32): %m = arith.mulf %a, %b : f32 linalg.yield %m : f32 @@ -656,7 +656,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : tensor<1x8xf64>) { + inits(%arg0 : tensor<1x8xf64>) { ^bb0(%a: f64): %r = func.call @compute1(%a) : (f64) -> f64 linalg.yield %r : f64 @@ -672,7 +672,7 @@ indexing_maps = [affine_map<(i, j) -> (i, j)>, affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<1x8xf64>) - outs(%arg1 : tensor<1x8xi32>) { + inits(%arg1 : tensor<1x8xi32>) { ^bb0(%a: f64, %b: i32): %r = func.call @compute2(%a, %b) : (f64, i32) -> i32 linalg.yield %r : i32 @@ -697,7 +697,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%three : tensor<3x2xf32>) outs(%init : tensor<3xf32>) { + ins(%three : tensor<3x2xf32>) inits(%init : tensor<3xf32>) { ^bb0(%arg0 : f32, %arg1 : f32): %0 = arith.addf %arg0, %arg1 : f32 linalg.yield %0 : f32 @@ -714,12 +714,12 @@ } func.func @break_outs_dependency(%arg0 : tensor) -> tensor { - %0 = linalg.generic #trait ins(%arg0 : tensor) outs(%arg0 : tensor) { + %0 = linalg.generic #trait ins(%arg0 : tensor) inits(%arg0 : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : %1 = arith.addf %arg1, %arg1 : f32 linalg.yield %1 : f32 } -> tensor - %2 = linalg.generic #trait ins(%0 : tensor) outs(%0 : tensor) { + %2 = linalg.generic #trait ins(%0 : tensor) inits(%0 : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : %3 = arith.mulf %arg1, %arg1 : f32 linalg.yield %3 : f32 @@ -734,12 +734,12 @@ // CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]]) // CHECK: %[[GENERIC1:.+]] = linalg.generic -// CHECK-SAME: outs(%[[INIT]] : tensor) +// CHECK-SAME: inits(%[[INIT]] : tensor) // CHECK-DAG: %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]] // CHECK-DAG: %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]]) // CHECK: %[[RESULT:.+]] = linalg.generic -// CHECK-SAME: outs(%[[INIT]] : tensor) +// CHECK-SAME: inits(%[[INIT]] : tensor) // ----- @@ -760,7 +760,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %cst, %c42 : tensor, f32, i32) - outs(%0, %1 : tensor, tensor) { + inits(%0, %1 : tensor, tensor) { ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : i32, %arg4 : f32, %arg5 : i32) : %3 = arith.addf %arg1, %arg2 : f32 linalg.yield %3, %arg3 : f32, i32 @@ -785,7 +785,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<3x2xf32> @@ -803,7 +803,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { + } ins(%input : tensor<2x3xf64>) inits(%init : tensor<3x2xf64>) { ^bb0(%arg1: f64, %arg2: f64): linalg.yield %arg1 : f64 } -> tensor<3x2xf64> @@ -827,7 +827,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { + } ins(%input : tensor<1x2x3x4xi32>) inits(%init : tensor<3x1x4x2xi32>) { ^bb0(%arg1: i32, %arg2: i32): linalg.yield %arg1 : i32 } -> tensor<3x1x4x2xi32> @@ -851,7 +851,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { + } ins(%input : tensor<1x2x3x4xi16>) inits(%init : tensor<3x1x4x2xi16>) { ^bb0(%arg1: i16, %arg2: i16): linalg.yield %arg1 : i16 } -> tensor<3x1x4x2xi16> @@ -867,7 +867,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<3x2xf32> @@ -884,7 +884,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %cst : f32 } -> tensor<3x2xf32> @@ -900,7 +900,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): %add = arith.addf %arg1, %arg1 : f32 linalg.yield %add : f32 @@ -929,16 +929,16 @@ %5 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"] - } ins(%arg0 : tensor) outs(%4 : tensor) { + } ins(%arg0 : tensor) inits(%4 : tensor) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor %6 = tensor.empty(%arg1) : tensor - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor) -> tensor + %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor) -> tensor %8 = linalg.generic { indexing_maps = [#map2, #map3], iterator_types = ["parallel", "reduction"] - } ins(%5 : tensor) outs(%7 : tensor) { + } ins(%5 : tensor) inits(%7 : tensor) { ^bb0(%arg2: f32, %arg3: f32): %9 = arith.maxf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -953,7 +953,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%arg0 : tensor<5000xi64>) { + inits(%arg0 : tensor<5000xi64>) { ^bb0(%arg3: i64): // no predecessors %22 = linalg.index 0 : index %23 = arith.index_cast %22 : index to i64 @@ -963,7 +963,7 @@ %2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>], iterator_types = ["parallel", "parallel"]} - ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) { + ins(%0 : tensor<5000xi64>) inits(%1 : tensor<5000xi32>) { ^bb0(%arg3: i64, %arg5: i32): // no predecessors %22 = arith.index_cast %arg3 : i64 to index %23 = tensor.extract %arg1[%22] : tensor<5000xi32> @@ -980,7 +980,7 @@ // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32> // CHECK: %[[RESULT:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:.+]]: i64 // CHECK-SAME: %[[B1:.+]]: i32 @@ -998,14 +998,14 @@ // CHECK-NOT: linalg.fill // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @fold_fill_generic_basic(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor %3 = tensor.empty(%0) : tensor %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) outs (%3:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): @@ -1022,14 +1022,14 @@ // CHECK-NOT: linalg.fill // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @fold_fill_generic_different_dtype(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor %3 = tensor.empty(%0) : tensor %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) outs (%3:tensor) { ^bb0(%arg1: f16, %arg2: f16, %arg3: f16): @@ -1045,7 +1045,7 @@ // CHECK-NOT: linalg.fill // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-NOT: ins -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> func.func @fold_fill_generic_mixedaccess(%arg0: tensor) -> (tensor) { @@ -1056,9 +1056,9 @@ %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.dim %arg0, %c1 : tensor %2 = tensor.empty(%0, %1) : tensor - %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%cst1 : f32) inits(%2 : tensor) -> tensor %4 = tensor.empty(%1, %0) : tensor - %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst2 : f32) inits(%4 : tensor) -> tensor %6 = tensor.empty(%0, %1) : tensor %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor, tensor) outs (%6:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): @@ -1077,7 +1077,7 @@ %1 = tensor.empty() : tensor %2:2 = linalg.generic { indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []} - ins(%arg0, %arg1, %arg1 : tensor, tensor, tensor) outs(%0, %1 : tensor, tensor) { + ins(%arg0, %arg1, %arg1 : tensor, tensor, tensor) inits(%0, %1 : tensor, tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32, %arg8: f32, %arg9: f32): %4 = arith.addf %arg5, %arg6 : f32 %5 = arith.addf %4, %arg7 : f32 @@ -1085,7 +1085,7 @@ } -> (tensor, tensor) %3 = linalg.generic { indexing_maps = [#map, #map, #map], iterator_types = []} - ins(%2#1, %arg1 : tensor, tensor) outs(%arg4 : tensor) { + ins(%2#1, %arg1 : tensor, tensor) inits(%arg4 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %4 = arith.addf %arg5, %arg6 : f32 %5 = arith.addf %4, %arg6 : f32 @@ -1100,7 +1100,7 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[INIT]] : +// CHECK-SAME: inits(%[[INIT]] : // CHECK-NEXT: ^bb0 // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -1126,7 +1126,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -1135,7 +1135,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%arg8 : memref) { + inits(%arg8 : memref) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]] diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir @@ -20,28 +20,28 @@ %init = tensor.empty(%d0, %d1) : tensor %0 = linalg.generic #binary2Dpointwise ins(%arg0, %arg1 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %1 = arith.mulf %arg6, %arg7 : f32 linalg.yield %1 : f32 } -> tensor %2 = linalg.generic #binary2Dpointwise ins(%arg2, %arg3 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %3 = arith.mulf %arg6, %arg7 : f32 linalg.yield %3 : f32 } -> tensor %4 = linalg.generic #binary2Dpointwise ins(%arg4, %arg5 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %5 = arith.mulf %arg6, %arg7 : f32 linalg.yield %5 : f32 } -> tensor %6 = linalg.generic #ternary2Dpointwise ins(%0, %2, %4 : tensor, tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32, %arg9 : f32): %7 = arith.addf %arg6, %arg7 : f32 %8 = arith.addf %7, %arg8 : f32 diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise.mlir @@ -6,7 +6,7 @@ %0:2 = linalg.generic { indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%arg0, %arg0 : tensor, tensor) { + ins(%arg0 : tensor) inits(%arg0, %arg0 : tensor, tensor) { ^bb0(%b0: f32, %b1: f32, %b2: f32): %1 = arith.addf %b0, %b0 : f32 %2 = arith.mulf %b0, %b0 : f32 @@ -15,7 +15,7 @@ %3 = linalg.generic { indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} - ins(%0#0, %arg1 : tensor, tensor) outs(%arg0 : tensor) { + ins(%0#0, %arg1 : tensor, tensor) inits(%arg0 : tensor) { ^bb0(%b0: f32, %b1: f32, %b2: f32): %4 = arith.subf %b0, %b1 : f32 linalg.yield %4 : f32 diff --git a/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir b/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir --- a/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir +++ b/mlir/test/Dialect/Linalg/fusion-multiuse-producer.mlir @@ -8,7 +8,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg1, %arg2 : tensor, tensor) { + inits(%arg1, %arg2 : tensor, tensor) { ^bb0(%b0: f32, %b1 : f32, %b2 : f32): %1 = arith.addf %b0, %b1 : f32 linalg.yield %1, %1 : f32, f32 @@ -17,7 +17,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%0#1, %arg3 : tensor, tensor) - outs(%arg4 : tensor) { + inits(%arg4 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %3 = arith.mulf %b0, %b1 : f32 linalg.yield %3 : f32 diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir --- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir +++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir @@ -8,7 +8,7 @@ // CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor into tensor // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]], // CHECK-SAME: iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor<16xf32>) outs(%[[RI]] : tensor) +// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor<16xf32>) inits(%[[RI]] : tensor) // CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor into tensor // CHECK: return %[[RR]] : tensor func.func @reshape(%A: tensor, %B: tensor<16xf32>, %init: tensor) -> tensor { @@ -19,7 +19,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %B : tensor, tensor<16xf32>) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %s = arith.subf %arg1, %arg2 : f32 linalg.yield %s : f32 @@ -38,7 +38,7 @@ // CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]], // CHECK-SAME: iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<12544x16xf32>) +// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<12544x16xf32>) // CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<12544x16xf32> into tensor<112x112x16xf32> // CHECK: return %[[RR]] : tensor<112x112x16xf32> func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>, @@ -55,7 +55,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>) - outs(%2 : tensor<112x112x16xf32>) { + inits(%2 : tensor<112x112x16xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %s = arith.subf %arg1, %arg2 : f32 %m = arith.mulf %s, %arg3 : f32 @@ -81,7 +81,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>) - outs(%21 : tensor<112x112x16xf32>) { + inits(%21 : tensor<112x112x16xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %s = arith.subf %arg1, %arg2 : f32 linalg.yield %s : f32 @@ -106,7 +106,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>) - outs(%26 : tensor<2x3x5xf32>) { + inits(%26 : tensor<2x3x5xf32>) { ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32): %29 = arith.sitofp %arg6 : i32 to f32 %30 = arith.addf %arg7, %cst_8 : f32 @@ -121,6 +121,6 @@ // CHECK-LABEL: func @type_correctness // CHECK: %[[OP:.+]] = linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}}, %{{.+}} : tensor<6x5xi32>, tensor<5xf32>, tensor<5xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<6x5xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<6x5xf32>) // CHECK: tensor.expand_shape %[[OP]] // CHECK-SAME: tensor<6x5xf32> into tensor<2x3x5xf32> diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -2,7 +2,7 @@ func.func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) { linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>) - outs(%C: memref<16x32xf32>) + inits(%C: memref<16x32xf32>) return } @@ -20,7 +20,7 @@ // CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A]], %[[B]] -// CHECK-SAME: outs(%[[C]] +// CHECK-SAME: inits(%[[C]] // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) // CHECK: %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32 @@ -31,7 +31,7 @@ func.func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -39,7 +39,7 @@ // CHECK: linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<16x32xf32>) // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32 @@ -54,7 +54,7 @@ %C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xcomplex>, tensor<8x32xcomplex>) - outs(%C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> + inits(%C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> return %0: tensor<16x32xcomplex> } @@ -62,7 +62,7 @@ // CHECK: linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xcomplex>, tensor<8x32xcomplex>) -// CHECK-SAME: outs(%{{.+}} : tensor<16x32xcomplex>) +// CHECK-SAME: inits(%{{.+}} : tensor<16x32xcomplex>) // CHECK: ^{{.*}}(%[[A_ARG:.+]]: complex, %[[B_ARG:.+]]: complex, %[[C_ARG:.+]]: complex) // CHECK-NEXT: %[[MUL:.+]] = complex.mul %[[A_ARG]], %[[B_ARG]] : complex @@ -76,7 +76,7 @@ linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x3x4x2x3xf32>) + inits(%output : memref<2x3x4x2x3xf32>) return } @@ -90,7 +90,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -103,7 +103,7 @@ linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x2x3x2x3xf32>) + inits(%output : memref<2x2x3x2x3xf32>) return } @@ -117,7 +117,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<2x2x3x2x3xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<2x2x3x2x3xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -129,7 +129,7 @@ func.func @depthwise_conv_2d_nhwc_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -143,7 +143,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -169,7 +169,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -195,7 +195,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -205,7 +205,7 @@ // ----- func.func @generalize_fill(%output: memref, %value : f32) { - linalg.fill ins(%value : f32) outs(%output : memref) + linalg.fill ins(%value : f32) inits(%output : memref) return } @@ -219,7 +219,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"]} // CHECK-SAME: ins(%[[VAL]] : f32) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32) // CHECK-NEXT: linalg.yield %[[BBARG0]] : f32 @@ -228,7 +228,7 @@ func.func @generalize_batch_matm_vec(%lhs : memref, %rhs: memref, %out: memref) { linalg.batch_matvec ins(%lhs, %rhs: memref, memref) - outs(%out: memref) + inits(%out: memref) return } // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -241,7 +241,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: f32) // CHECK: %[[BBARG0_F32:.+]] = arith.sitofp %[[BBARG0]] : i8 to f32 // CHECK: %[[BBARG1_F32:.+]] = arith.sitofp %[[BBARG1]] : i8 to f32 @@ -253,7 +253,7 @@ func.func @batch_reduce_gemm(%lhs: memref<7x8x9xf32>, %rhs: memref<7x9x8xf32>, %out: memref<8x8xf32>) { linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>) - outs(%out: memref<8x8xf32>) + inits(%out: memref<8x8xf32>) return } @@ -267,7 +267,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<7x8x9xf32>, memref<7x9x8xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<8x8xf32> +// CHECK-SAME: inits(%{{.+}} : memref<8x8xf32> // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 // CHECK: %[[ADD:.+]] = arith.addf %[[BBARG2]], %[[MUL]] : f32 diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -3,7 +3,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_f16f64f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -22,7 +22,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -43,7 +43,7 @@ func.func @generalize_matmul_tensor_i16i64i32_unsigned(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul {cast = #linalg.type_fn} ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -54,7 +54,7 @@ func.func @generalize_matmul_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -67,7 +67,7 @@ func.func @generalize_matmul_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -80,7 +80,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -93,7 +93,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -106,7 +106,7 @@ func.func @generalize_matmul_unsigned_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -119,7 +119,7 @@ func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -133,7 +133,7 @@ func.func @generalize_pooling_nwc_max_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> return %0: tensor<1x4x1xf32> } @@ -147,7 +147,7 @@ func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -159,7 +159,7 @@ func.func @generalize_pooling_nwc_max_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_max {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> return %0: tensor<1x4x1xi32> } @@ -171,7 +171,7 @@ func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -183,7 +183,7 @@ func.func @generalize_pooling_nwc_max_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_max_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> return %0: tensor<1x4x1xi32> } @@ -195,7 +195,7 @@ func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -209,7 +209,7 @@ func.func @generalize_pooling_nwc_min_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> return %0: tensor<1x4x1xf32> } @@ -223,7 +223,7 @@ func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -235,7 +235,7 @@ func.func @generalize_pooling_nwc_min_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_min {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> return %0: tensor<1x4x1xi32> } @@ -247,7 +247,7 @@ func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -259,7 +259,7 @@ func.func @generalize_pooling_nwc_min_unsigned_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_min_unsigned {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> return %0: tensor<1x4x1xi32> } @@ -271,7 +271,7 @@ func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -285,7 +285,7 @@ func.func @generalize_pooling_nwc_sum_f32(%input : tensor<1x16x1xf32>, %shape: tensor<2xf32>, %output: tensor<1x4x1xf32>) -> tensor<1x4x1xf32> { %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) outs(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> + ins(%input, %shape : tensor<1x16x1xf32>, tensor<2xf32>) inits(%output : tensor<1x4x1xf32>) -> tensor<1x4x1xf32> return %0: tensor<1x4x1xf32> } @@ -299,7 +299,7 @@ func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -313,7 +313,7 @@ func.func @generalize_pooling_nwc_sum_i32(%input : tensor<1x16x1xi32>, %shape: tensor<2xi32>, %output: tensor<1x4x1xi32>) -> tensor<1x4x1xi32> { %0 = linalg.pooling_nwc_sum {dilations = dense<[2]> : tensor<1xi64>, strides = dense<[4]> : tensor<1xi64>} - ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) outs(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> + ins(%input, %shape : tensor<1x16x1xi32>, tensor<2xi32>) inits(%output : tensor<1x4x1xi32>) -> tensor<1x4x1xi32> return %0: tensor<1x4x1xi32> } @@ -326,7 +326,7 @@ // ----- func.func @generalize_fill_0d(%value: f64, %O: tensor) -> tensor { - %0 = linalg.fill ins(%value: f64) outs(%O : tensor) -> tensor + %0 = linalg.fill ins(%value: f64) inits(%O : tensor) -> tensor return %0: tensor } @@ -340,7 +340,7 @@ // ----- func.func @generalize_fill_2d(%value: f64, %O: memref<16x32xf32>) { - linalg.fill ins(%value: f64) outs(%O : memref<16x32xf32>) + linalg.fill ins(%value: f64) inits(%O : memref<16x32xf32>) return } @@ -355,7 +355,7 @@ // ----- func.func @generalize_index(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -368,7 +368,7 @@ // ----- func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -381,7 +381,7 @@ // Verifies the default value of the fun attribute is an exp op. func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -393,7 +393,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -405,7 +405,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -417,7 +417,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -429,7 +429,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -441,7 +441,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -453,7 +453,7 @@ // Verifies the default value of the fun attribute is an add op. func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -466,7 +466,7 @@ func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -479,7 +479,7 @@ func.func @generalize_elemwise_rank_zero(%lhs : tensor, %rhs : tensor, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor, tensor) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -492,7 +492,7 @@ // Verifies the fun attribute controls the binary function used. func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir @@ -4,7 +4,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -29,7 +29,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> // CHECK: %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32> // CHECK: %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1_1]], 2, %[[DIM3_1]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack-tile.mlir @@ -20,7 +20,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] // CHECK-SAME: permutation = [1, 0] // CHECK: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}} @@ -49,7 +49,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[PAD]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<8x2xf32>) // CHECK-SAME: permutation = [0, 1] // CHECK: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}} @@ -80,7 +80,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] // CHECK-SAME: permutation = [0, 1] // CHECK: %[[SUB_ITER:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}} // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<32x8xf32> into tensor<1x1x32x8xf32> diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir --- a/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-pack.mlir @@ -11,7 +11,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] : tensor<32x8xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x32xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<8x32xf32>) // CHECK-SAME: permutation = [1, 0] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 32] [1, 1, 1, 1, 1, 1] @@ -35,7 +35,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[PAD]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<8x2xf32>) // CHECK-SAME: permutation = [0, 1] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1] @@ -53,7 +53,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[SRC]] : tensor<32x8xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<32x8xf32>) // CHECK-SAME: permutation = [0, 1] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] @@ -71,7 +71,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<5x7x3xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[SRC]] : tensor<3x5x7xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<5x7x3xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<5x7x3xf32>) // CHECK-SAME: permutation = [1, 2, 0] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0, 0, 0] [1, 1, 1, 5, 7, 3] [1, 1, 1, 1, 1, 1] diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir --- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack-tile.mlir @@ -26,7 +26,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] // CHECK-SAME: permutation = [1, 0] // CHECK: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}} @@ -58,7 +58,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<8x2xf32>) // CHECK-SAME: permutation = [0, 1] // CHECK: %[[UNPACK_TILE:.+]] = tensor.extract_slice %[[TRANSP]] // CHECK-SAME: [0, 0] [%[[OUT_I_SZ]], %[[OUT_J_SZ]]] [1, 1] @@ -95,7 +95,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] -// CHECK-SAME: outs(%[[EMPTY]] +// CHECK-SAME: inits(%[[EMPTY]] // CHECK-SAME: permutation = [0, 1] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}} // CHECK-SAME: [%[[K]], %[[C]]] [32, 8] [1, 1] diff --git a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir --- a/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir +++ b/mlir/test/Dialect/Linalg/generalize-tensor-unpack.mlir @@ -11,7 +11,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] : tensor<8x32xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<32x8xf32>) // CHECK-SAME: permutation = [1, 0] // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]] // CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] @@ -30,7 +30,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] : tensor<8x2xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<8x2xf32>) // CHECK-SAME: permutation = [0, 1] // They have the same type, so the insert_slice op is folded // away. @@ -50,7 +50,7 @@ // CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32> // CHECK: %[[TRANSP:.+]] = linalg.transpose // CHECK-SAME: ins(%[[TILE]] : tensor<32x8xf32>) -// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>) +// CHECK-SAME: inits(%[[EMPTY]] : tensor<32x8xf32>) // CHECK-SAME: permutation = [0, 1] // They have the same type, so the insert_slice op is folded // away. diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir --- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir +++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir @@ -12,7 +12,7 @@ %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel"]} ins(%arg0, %scalar : tensor<4xf32>, tensor) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32) ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // CHECK: tensor.extract %[[SCALAR]][] @@ -37,7 +37,7 @@ %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel"]} ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32) ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]] diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir --- a/mlir/test/Dialect/Linalg/inlining.mlir +++ b/mlir/test/Dialect/Linalg/inlining.mlir @@ -23,7 +23,7 @@ // CHECK: linalg.generic linalg.generic #trait ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0 : f32, %1 : f32) : %2 = arith.addf %0, %0: f32 linalg.yield %2 : f32 diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -36,7 +36,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.index 2 : index linalg.yield %0 : f32 @@ -50,7 +50,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.index -1 : index linalg.yield %0 : f32 @@ -74,7 +74,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.yield } @@ -87,7 +87,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> (0)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref<1xi32>) { + inits(%arg0 : memref<1xi32>) { ^bb(%i : i32): linalg.yield %i : i32 } @@ -100,7 +100,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["random"]} - outs(%arg0 : memref<1xi32>) { + inits(%arg0 : memref<1xi32>) { ^bb(%i : i32): linalg.yield %i : i32 } @@ -113,7 +113,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> (0, 0)> ], iterator_types = []} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%f : f32): linalg.yield %f: f32 } @@ -128,7 +128,7 @@ indexing_maps = [ affine_map<() -> (0)>, affine_map<() -> (0, 0)> ], iterator_types = []} ins(%cst : f32) - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%0 : f32, %1 : f32): linalg.yield %0: f32 } @@ -141,7 +141,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%0: f32): %1 = arith.constant 1: i4 linalg.yield %1: i4 @@ -159,7 +159,7 @@ ], iterator_types = ["parallel","parallel"]} ins(%arg0 : memref(off + i)>>) - outs(%arg1 : memref(off + i)>>) { + inits(%arg1 : memref(off + i)>>) { ^bb(%0: f32, %1: f32): linalg.yield %1: f32 } @@ -178,7 +178,7 @@ indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], iterator_types = []} ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb1: linalg.yield %f0: f32 ^bb2: @@ -195,7 +195,7 @@ indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ], iterator_types = []} ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { } } @@ -206,7 +206,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], iterator_types = []} - outs(%arg0, %arg0 : memref, memref) { + inits(%arg0, %arg0 : memref, memref) { ^bb(%f: f32): linalg.yield %f: f32 } @@ -219,7 +219,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%i: i1): linalg.yield %i : i1 } @@ -232,7 +232,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb(%i: i1): linalg.yield %i : i1 } -> tensor @@ -245,7 +245,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%i: f32): %0 = arith.constant 0: i1 linalg.yield %0: i1 @@ -261,7 +261,7 @@ indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} ins(%arg0 : memref(off + i)>>) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb(%i: f32, %j: f32): linalg.yield %i: f32 } -> tensor @@ -274,7 +274,7 @@ linalg.generic { indexing_maps = [ affine_map<(i, j) -> (i, j)> ], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32) : %1 = arith.addf %0, %0: f32 } @@ -298,7 +298,7 @@ func.func @named_ops(%a3: memref, %b3: memref, %c3: memref) { // expected-error @+1 {{expected operand rank (2) to match the result rank of indexing_map #1 (3)}} linalg.batch_matmul ins(%a3, %b3: memref, memref) - outs(%c3 : memref) + inits(%c3 : memref) return } @@ -316,7 +316,7 @@ func.func @matching_inits(%m: memref, %t: tensor) { // expected-error @+1 {{expected type of operand #2 ('tensor') to match type of corresponding result ('tensor')}} %res = linalg.matmul ins(%m, %m : memref, memref) - outs(%t : tensor) + inits(%t : tensor) -> tensor return } @@ -327,7 +327,7 @@ { %0 = tensor.empty(%arg0, %arg1) : tensor // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}} - linalg.fill ins(%arg2 : f32) outs(%0 : tensor) + linalg.fill ins(%arg2 : f32) inits(%0 : tensor) } // ----- @@ -336,7 +336,7 @@ (%arg0 : memref, %arg1 : f32) -> tensor { // expected-error @+1 {{expected the number of results (1) to be equal to the number of output tensors (0)}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : memref) -> tensor + %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : memref) -> tensor return %0 : tensor } @@ -346,7 +346,7 @@ (%arg0 : tensor, %arg1 : f32) -> memref { // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref'}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor) -> memref + %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : tensor) -> memref return %0 : memref } @@ -355,7 +355,7 @@ func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) { // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}} linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>) - outs(%arg2 :memref<2x4xf32>) + inits(%arg2 :memref<2x4xf32>) return } @@ -366,7 +366,7 @@ linalg.conv_2d_nhwc_hwcf { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>) - outs(%output : memref<1x2x3x1xf32>) + inits(%output : memref<1x2x3x1xf32>) return } @@ -382,7 +382,7 @@ func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) { // expected-error @+1 {{unexpected result less than 0 at expression #0 in}} - linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) { + linalg.generic #attrs ins(%A: memref<5xf32>) inits(%B: memref<5xf32>) { ^bb0(%a: f32, %b: f32): linalg.yield %a : f32 } @@ -396,7 +396,7 @@ -> tensor<64xf32> { %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 // expected-error @+1{{'linalg.yield' op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}} @@ -413,7 +413,7 @@ // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -429,7 +429,7 @@ // expected-error@+1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f64, %rhs_elem: f64) { %0 = arith.addf %lhs_elem, %rhs_elem: f64 linalg.yield %0: f64 @@ -445,7 +445,7 @@ // expected-error@+1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}} %add = linalg.map ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>) - outs(%init:tensor<32xf32>) + inits(%init:tensor<32xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -461,7 +461,7 @@ // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [2] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -477,7 +477,7 @@ // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [3] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -493,7 +493,7 @@ // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16xf32>) + inits(%init:tensor<16xf32>) dimensions = [1, 1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -509,7 +509,7 @@ // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16xf32>) + inits(%init:tensor<16xf32>) dimensions = [2, 1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -525,7 +525,7 @@ // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1, 2] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -543,7 +543,7 @@ // expected-error @+1{{'linalg.reduce' op mismatching number of operands and block arguments}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -561,7 +561,7 @@ // expected-error @+1{{'linalg.reduce' op input element type 'f32' does not match corresponding block argument type 'f64'}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) dimensions = [1] (%in1: f32, %in2: f64, %out1: f32, %out2: f64) { %0 = arith.addf %in1, %out1: f32 @@ -580,7 +580,7 @@ // expected-error @+1{{'linalg.reduce' op output element type 'f64' does not match corresponding block argument type 'f32'}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -597,7 +597,7 @@ // expected-error @+1{{'linalg.reduce' op expects all inputs to have the same shapes. Shape at input-index 1 is not equal to the shape at input-index 0.}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -615,7 +615,7 @@ // expected-error @+1{{'linalg.reduce' op expects all outputs to have the same shapes. Shape at output-index 1 is not equal to the shape at output-index 0.}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -632,7 +632,7 @@ // expected-error @+1 {{'linalg.transpose' op permutation is not valid}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 1, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -644,7 +644,7 @@ // expected-error @+1 {{'linalg.transpose' op dim(result, 0) = 32 doesn't match dim(input, permutation[0]) = 16}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [0, 1, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -657,7 +657,7 @@ // expected-error @+1 {{'linalg.transpose' op size of permutation 2 does not match the argument rank 3}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 0] func.return %transpose : tensor<32x64x16xf32> } @@ -669,7 +669,7 @@ // expected-error @+1 {{'linalg.transpose' op input rank 2 does not match init rank 3}} %transpose = linalg.transpose ins(%input:tensor<16x32xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 0, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -682,7 +682,7 @@ // expected-error @+1 {{'linalg.broadcast' op input rank plus added dimensions does not match init rank. }} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x8x16xf32>) + inits(%init:tensor<4x8x16xf32>) dimensions = [1, 2] func.return %bcast : tensor<4x8x16xf32> } @@ -695,7 +695,7 @@ // expected-error @+1 {{'linalg.broadcast' op dimension 0 is out of range. expected range: [0, 2], got: 5}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x8x16xf32>) + inits(%init:tensor<4x8x16xf32>) dimensions = [5] func.return %bcast : tensor<4x8x16xf32> } @@ -708,7 +708,7 @@ // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<5x8x16xf32>) + inits(%init:tensor<5x8x16xf32>) dimensions = [1] func.return %bcast : tensor<5x8x16xf32> } @@ -721,7 +721,7 @@ // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}} %bcast = linalg.broadcast ins(%input:tensor<1x16xf32>) - outs(%init:tensor<4x?x16xf32>) + inits(%init:tensor<4x?x16xf32>) dimensions = [1] func.return %bcast : tensor<4x?x16xf32> } diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir --- a/mlir/test/Dialect/Linalg/library-calls.mlir +++ b/mlir/test/Dialect/Linalg/library-calls.mlir @@ -14,11 +14,11 @@ %C = memref.alloc(%x, %y) : memref // CHECK: call @linalg_fill_f32_viewsxsxf32({{.*}}) : (f32, memref) - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) // CHECK: call @linalg_matmul_viewsxsxf32_viewsxsxf32_viewsxsxf32({{.*}}) : (memref, memref, memref) -> () linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return %C : memref } diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -15,7 +15,7 @@ %B = memref.view %arg0[%c0][%K, %N] : memref to memref %C = memref.view %arg0[%c0][%M, %N] : memref to memref linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return } // CHECK-LABEL: func @matmul(%{{.*}}: memref, @@ -60,7 +60,7 @@ %3 = memref.view %arg0[%c0][%M] : memref to memref %4 = memref.view %arg0[%c0][%N] : memref to memref linalg.matvec ins(%2, %3: memref, memref) - outs(%4 : memref) + inits(%4 : memref) return } // CHECK-LABEL: func @matvec(%{{.*}}: memref, @@ -101,7 +101,7 @@ %2 = memref.view %arg0[%c0][%M] : memref to memref %3 = memref.view %arg0[%c0][] : memref to memref linalg.dot ins(%1, %2 : memref, memref) - outs(%3 : memref) + inits(%3 : memref) return } // CHECK-LABEL: func @dot(%{{.*}}: memref, @@ -135,7 +135,7 @@ %arg3: memref) { // Verifies that we use the correct arith operations for integers. linalg.dot ins(%arg0, %arg1 : memref, memref) - outs(%arg3 : memref) + inits(%arg3 : memref) return } // CHECK-LABEL: func @dot_int( @@ -148,7 +148,7 @@ %arg3: memref) { // Verifies that we use the correct (saturating) arith operations for booleans. linalg.dot ins(%arg0, %arg1 : memref, memref) - outs(%arg3 : memref) + inits(%arg3 : memref) return } // CHECK-LABEL: func @dot_bool( @@ -160,7 +160,7 @@ func.func @dot_view(%arg0: memref>, %arg1: memref>, %arg2: memref) { linalg.dot ins(%arg0, %arg1 : memref>, memref>) - outs(%arg2: memref) + inits(%arg2: memref) return } // CHECK-LABEL: func @dot_view( @@ -186,7 +186,7 @@ // CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref func.func @fill_view(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view( @@ -200,7 +200,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref> func.func @fill_view0(%arg0: memref, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref) return } // CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { @@ -210,7 +210,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func.func @fill_view3(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view3( @@ -230,7 +230,7 @@ iterator_types = ["parallel"], indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] } ins(%arg0: memref>) - outs(%arg1: memref>) { + inits(%arg1: memref>) { ^bb0(%a: f32, %b: f32): linalg.yield %a : f32 } @@ -264,7 +264,7 @@ func.func @generic_region(%arg0: memref>, %arg1: memref>, %arg2: memref>) { linalg.generic #trait2 ins(%arg0: memref>) - outs(%arg1, %arg2 : memref>, + inits(%arg1, %arg2 : memref>, memref>) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b : f32 @@ -309,7 +309,7 @@ %arg2: memref>) { linalg.generic #trait4 ins(%arg0 : memref>) - outs(%arg1, %arg2 : memref>, + inits(%arg1, %arg2 : memref>, memref>) { ^bb0(%a: f32, %b: f32, %c: f32): %i = linalg.index 0 : index @@ -377,7 +377,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : memref) - outs(%arg1 : memref<3x4xf32>) { + inits(%arg1 : memref<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } @@ -403,7 +403,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : f32) - outs(%arg1 : memref<3x4xf32>) { + inits(%arg1 : memref<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } @@ -427,7 +427,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : memref) - outs(%arg1 : memref<3x4xi32>) { + inits(%arg1 : memref<3x4xi32>) { ^bb(%a: i32, %b: i32) : %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -477,7 +477,7 @@ { linalg.generic #trait_reduce_1D ins(%arg0 : memref) - outs(%arg1 : memref) { + inits(%arg1 : memref) { ^bb(%a: f32, %b: f32) : %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -523,7 +523,7 @@ { linalg.generic #trait_reduce_init_1D ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { + inits(%arg2 : memref) { ^bb(%a: f32, %b: f32, %c: f32) : %i = linalg.index 0 : index %0 = arith.constant 0 : index @@ -567,7 +567,7 @@ } func.func @generic_const_init(%arg0: memref) { %cst = arith.constant 1.0 : f32 - linalg.generic #trait_const_fill outs(%arg0 : memref) { + linalg.generic #trait_const_fill inits(%arg0 : memref) { ^bb0(%arg1: f32): linalg.yield %cst : f32 } @@ -601,7 +601,7 @@ { linalg.generic #scalar_trait ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { + inits(%arg2 : memref) { ^bb(%a : f32, %b : f32, %c : f32) : %result = scf.if %arg3 -> (f32) { scf.yield %a : f32 @@ -643,7 +643,7 @@ //----------------------------------------------------------------------------// func.func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { linalg.batch_matmul ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) return } // CHECK-LABEL: @named_batch_matmul @@ -685,7 +685,7 @@ func.func @conv1d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_1d ins(%in, %filter : memref, memref) - outs(%out : memref) + inits(%out : memref) return } @@ -728,7 +728,7 @@ func.func @conv2d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_2d ins(%in, %filter : memref, memref) - outs(%out: memref) + inits(%out: memref) return } // CHECK-LABEL: @conv2d_no_symbols @@ -781,7 +781,7 @@ func.func @conv3d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_3d ins(%in, %filter : memref, memref) - outs(%out : memref) + inits(%out : memref) return } @@ -856,7 +856,7 @@ iterator_types = ["parallel"], indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]} ins(%0: memref>) - outs(%1: memref>) { + inits(%1: memref>) { ^bb0(%a: i32, %b: i32): linalg.yield %a : i32 } diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir @@ -57,7 +57,7 @@ // CHECK: %[[R2c:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP4]], #[[$MAP5]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK: ins(%{{.*}} : tensor<1x28x28x1xf32>) outs(%{{.*}} : tensor<1x32x32x1xf32>) +// CHECK: ins(%{{.*}} : tensor<1x28x28x1xf32>) inits(%{{.*}} : tensor<1x32x32x1xf32>) // CHECK: ^bb0(%[[VAL:.+]]: f32, %{{.*}}: f32) // CHECK: linalg.yield %[[VAL]] : f32 // CHECK: return %[[R2c:.+]] diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -34,7 +34,7 @@ iterator_types = ["parallel", "parallel"] } ins(%arg0: tensor<10x34xf32>) - outs(%arg1: tensor<10x34xf32>) { + inits(%arg1: tensor<10x34xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -61,7 +61,7 @@ // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] - // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) + // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] // CHECK: scf.yield %[[RESPARTIAL]] @@ -135,7 +135,7 @@ iterator_types = ["parallel", "parallel"] } ins(%arg0: tensor<10x34xf32>) - outs(%arg1: tensor<10x34xf32>) { + inits(%arg1: tensor<10x34xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -154,7 +154,7 @@ // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] - // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) + // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] // CHECK: scf.yield %[[RESPARTIAL]] diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -4,11 +4,11 @@ func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> // CHECK: depthwise_conv_1d_nwc_wcm %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>) - outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + inits(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> return %0 : tensor<1x10x8x8xf32> } @@ -18,11 +18,11 @@ func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> // CHECK: depthwise_conv_1d_nwc_wc %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>) - outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + inits(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> return %0 : tensor<1x10x8xf32> } @@ -32,15 +32,15 @@ func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x4x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<2x3x4x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : tensor<2x3x4x2x3xf32>) %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + inits(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> return %0 : tensor<2x3x4x2x3xf32> } @@ -49,11 +49,11 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>) linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x3x4x2x3xf32>) + inits(%output : memref<2x3x4x2x3xf32>) return } @@ -63,10 +63,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_1d_nw // CHECK-SAME: {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32> %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} ins(%input, %filter: tensor<1x113x96xf32>, tensor<3x96xf32>) - outs(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + inits(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32> return %0: tensor<1x56x96xf32> } @@ -76,10 +76,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + inits(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> return %0: tensor<1x56x56x96xf32> } @@ -88,10 +88,10 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwc // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>) linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -101,10 +101,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nchw_chw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - outs(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + inits(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> return %0: tensor<1x96x56x56xf32> } @@ -113,25 +113,25 @@ // CHECK: linalg.depthwise_conv_2d_nchw_chw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x96x113x113xf32>, memref<96x3x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<1x96x56x56xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<1x96x56x56xf32>) linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x96x113x113xf32>, memref<96x3x3xf32>) - outs(%output: memref<1x96x56x56xf32>) + inits(%output: memref<1x96x56x56xf32>) return } func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x6x7x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<2x6x7x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : tensor<2x6x7x2x3xf32>) %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + inits(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> return %0 : tensor<2x6x7x2x3xf32> } @@ -140,11 +140,11 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<2x6x7x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<2x6x7x2x3xf32>) linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x6x7x2x3xf32>) + inits(%output : memref<2x6x7x2x3xf32>) return } @@ -157,7 +157,7 @@ // CHECK-NOT: dilations = linalg.depthwise_conv_2d_nhwc_hwc ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -167,7 +167,7 @@ // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}} linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -177,7 +177,7 @@ // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}} linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -187,11 +187,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> // CHECK: depthwise_conv_3d_ndhwc_dhwcm %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>) - outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + inits(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> return %0 : tensor<2x3x13x4x6x6xf32> } @@ -201,11 +201,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> // CHECK: depthwise_conv_3d_ndhwc_dhwc %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>) - outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + inits(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> return %0 : tensor<2x3x13x4x6xf32> } @@ -217,7 +217,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) @@ -233,7 +233,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) @@ -249,7 +249,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) @@ -265,7 +265,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) @@ -281,7 +281,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) @@ -297,7 +297,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) @@ -313,7 +313,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) @@ -329,7 +329,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor<64x3x3x32xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<64x3x3x32xf32>) @@ -345,7 +345,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) @@ -361,7 +361,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) @@ -377,7 +377,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: tensor, tensor) @@ -393,7 +393,7 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: memref, memref) @@ -408,15 +408,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -427,15 +427,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> func.func @pooling_nwc_sum_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> %res = linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> return %res : tensor<1x2x1xf32> } @@ -446,11 +446,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_sum(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -461,11 +461,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xf32>) func.func @pooling_nwc_sum(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) - outs(%output: memref<1x2x1xf32>) + inits(%output: memref<1x2x1xf32>) return } @@ -476,15 +476,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> return %res : tensor<1x1x2x2xf32> } @@ -495,15 +495,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> func.func @pooling_ncw_sum_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x1x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> %res = linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) - outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + inits(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> return %res : tensor<1x1x2xf32> } @@ -514,11 +514,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x1x4x4xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x1x2x2xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x1x2x2xf32>) func.func @pooling_nchw_sum(%input: memref<1x1x4x4xf32>, %fake: memref<3x3xf32>, %output: memref<1x1x2x2xf32>) { linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x1x4x4xf32>, memref<3x3xf32>) - outs(%output: memref<1x1x2x2xf32>) + inits(%output: memref<1x1x2x2xf32>) return } @@ -529,11 +529,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x1x4xf32>, memref<3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x1x2xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x1x2xf32>) func.func @pooling_ncw_sum(%input: memref<1x1x4xf32>, %fake: memref<3xf32>, %output: memref<1x1x2xf32>) { linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x1x4xf32>, memref<3xf32>) - outs(%output: memref<1x1x2xf32>) + inits(%output: memref<1x1x2xf32>) return } @@ -544,15 +544,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -562,15 +562,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> func.func @pooling_nwc_max_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> return %res : tensor<1x2x1xf32> } @@ -580,16 +580,16 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> return %res : tensor<1x1x2x2xf32> } @@ -599,16 +599,16 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> func.func @pooling_ncw_max_tensor(%input: tensor<1x1x4xf32>) -> tensor<1x1x2xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x1x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2xf32>) -> tensor<1x1x2xf32> %res = linalg.pooling_ncw_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x1x4xf32>, tensor<3xf32>) - outs(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> + inits(%fill: tensor<1x1x2xf32>) -> tensor<1x1x2xf32> return %res : tensor<1x1x2xf32> } @@ -619,11 +619,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_max(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -634,11 +634,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xf32>) func.func @pooling_nwc_max(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) - outs(%output: memref<1x2x1xf32>) + inits(%output: memref<1x2x1xf32>) return } @@ -649,15 +649,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> { %fake = tensor.empty() : tensor<3x3xi8> %init = tensor.empty() : tensor<1x2x2x1xi8> %cst = arith.constant 0 : i8 - %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>) - outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + inits(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> return %res : tensor<1x2x2x1xi8> } @@ -668,15 +668,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi8>, tensor<3xi8>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> func.func @pooling_nwc_i8_max_tensor(%input: tensor<1x4x1xi8>) -> tensor<1x2x1xi8> { %fake = tensor.empty() : tensor<3xi8> %init = tensor.empty() : tensor<1x2x1xi8> %cst = arith.constant 0 : i8 - %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x1xi8>) -> tensor<1x2x1xi8> %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi8>, tensor<3xi8>) - outs(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8> + inits(%fill: tensor<1x2x1xi8>) -> tensor<1x2x1xi8> return %res : tensor<1x2x1xi8> } @@ -687,11 +687,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi8>, memref<3x3xi8>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi8>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi8>) func.func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %output: memref<1x2x2x1xi8>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi8>, memref<3x3xi8>) - outs(%output: memref<1x2x2x1xi8>) + inits(%output: memref<1x2x2x1xi8>) return } @@ -702,11 +702,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi8>, memref<3xi8>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi8>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xi8>) func.func @pooling_nwc_i8_max(%input: memref<1x4x1xi8>, %fake: memref<3xi8>, %output: memref<1x2x1xi8>) { linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xi8>, memref<3xi8>) - outs(%output: memref<1x2x1xi8>) + inits(%output: memref<1x2x1xi8>) return } @@ -717,15 +717,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> { %fake = tensor.empty() : tensor<3x3xi16> %init = tensor.empty() : tensor<1x2x2x1xi16> %cst = arith.constant 0 : i16 - %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>) - outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + inits(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> return %res : tensor<1x2x2x1xi16> } @@ -736,15 +736,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi16>, tensor<3xi16>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> func.func @pooling_nwc_i16_max_tensor(%input: tensor<1x4x1xi16>) -> tensor<1x2x1xi16> { %fake = tensor.empty() : tensor<3xi16> %init = tensor.empty() : tensor<1x2x1xi16> %cst = arith.constant 0 : i16 - %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x1xi16>) -> tensor<1x2x1xi16> %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi16>, tensor<3xi16>) - outs(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16> + inits(%fill: tensor<1x2x1xi16>) -> tensor<1x2x1xi16> return %res : tensor<1x2x1xi16> } @@ -755,11 +755,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi16>, memref<3x3xi16>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi16>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi16>) func.func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>, %output: memref<1x2x2x1xi16>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi16>, memref<3x3xi16>) - outs(%output: memref<1x2x2x1xi16>) + inits(%output: memref<1x2x2x1xi16>) return } @@ -770,11 +770,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi16>, memref<3xi16>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi16>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xi16>) func.func @pooling_nwc_i16_max(%input: memref<1x4x1xi16>, %fake: memref<3xi16>, %output: memref<1x2x1xi16>) { linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xi16>, memref<3xi16>) - outs(%output: memref<1x2x1xi16>) + inits(%output: memref<1x2x1xi16>) return } @@ -785,15 +785,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> { %fake = tensor.empty() : tensor<3x3xi32> %init = tensor.empty() : tensor<1x2x2x1xi32> %cst = arith.constant 0 : i32 - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>) - outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + inits(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> return %res : tensor<1x2x2x1xi32> } @@ -804,15 +804,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xi32>, tensor<3xi32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> func.func @pooling_nwc_i32_max_tensor(%input: tensor<1x4x1xi32>) -> tensor<1x2x1xi32> { %fake = tensor.empty() : tensor<3xi32> %init = tensor.empty() : tensor<1x2x1xi32> %cst = arith.constant 0 : i32 - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x1xi32>) -> tensor<1x2x1xi32> %res = linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xi32>, tensor<3xi32>) - outs(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32> + inits(%fill: tensor<1x2x1xi32>) -> tensor<1x2x1xi32> return %res : tensor<1x2x1xi32> } @@ -823,11 +823,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi32>, memref<3x3xi32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi32>) func.func @pooling_nhwc_i32_max(%input: memref<1x4x4x1xi32>, %fake: memref<3x3xi32>, %output: memref<1x2x2x1xi32>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi32>, memref<3x3xi32>) - outs(%output: memref<1x2x2x1xi32>) + inits(%output: memref<1x2x2x1xi32>) return } @@ -838,11 +838,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xi32>, memref<3xi32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xi32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xi32>) func.func @pooling_nwc_i32_max(%input: memref<1x4x1xi32>, %fake: memref<3xi32>, %output: memref<1x2x1xi32>) { linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xi32>, memref<3xi32>) - outs(%output: memref<1x2x1xi32>) + inits(%output: memref<1x2x1xi32>) return } @@ -854,15 +854,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -873,15 +873,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x1xf32>, tensor<3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> func.func @pooling_nwc_min_tensor(%input: tensor<1x4x1xf32>) -> tensor<1x2x1xf32> { %fake = tensor.empty() : tensor<3xf32> %init = tensor.empty() : tensor<1x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x1xf32>) -> tensor<1x2x1xf32> %res = linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: tensor<1x4x1xf32>, tensor<3xf32>) - outs(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> + inits(%fill: tensor<1x2x1xf32>) -> tensor<1x2x1xf32> return %res : tensor<1x2x1xf32> } @@ -892,11 +892,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_min(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -907,11 +907,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x1xf32>, memref<3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x1xf32>) func.func @pooling_nwc_min(%input: memref<1x4x1xf32>, %fake: memref<3xf32>, %output: memref<1x2x1xf32>) { linalg.pooling_nwc_min {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %fake: memref<1x4x1xf32>, memref<3xf32>) - outs(%output: memref<1x2x1xf32>) + inits(%output: memref<1x2x1xf32>) return } @@ -922,15 +922,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -941,11 +941,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -956,15 +956,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -975,11 +975,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -990,15 +990,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -1009,11 +1009,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -1056,8 +1056,8 @@ func.func @batch_reduce_matmul(%arg0: tensor<8x128x256xf32>, %arg1: tensor<8x256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> { // CHECK: %{{.+}} = linalg.batch_reduce_matmul // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<8x128x256xf32>, tensor<8x256x512xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32> - %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32> + %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) inits(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32> return %0: tensor<128x512xf32> } @@ -1066,8 +1066,8 @@ func.func @batch_reduce_matmul(%arg0: memref, %arg1: memref, %arg2: memref) { // CHECK: linalg.batch_reduce_matmul // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) - linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref, memref) outs(%arg2: memref) + // CHECK-SAME: inits(%{{.+}} : memref) + linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref, memref) inits(%arg2: memref) return } @@ -1076,9 +1076,9 @@ // CHECK-LABEL: func @matmul_transpose_b // CHECK: linalg.matmul_transpose_b // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<3x5xf32>, memref<7x5xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<3x7xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<3x7xf32>) func.func @matmul_transpose_b(%arg0: memref<3x5xf32>, %arg1: memref<7x5xf32>, %arg2: memref<3x7xf32>) { - linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) outs(%arg2: memref<3x7xf32>) + linalg.matmul_transpose_b ins(%arg0, %arg1 : memref<3x5xf32>, memref<7x5xf32>) inits(%arg2: memref<3x7xf32>) return } @@ -1087,8 +1087,8 @@ // CHECK-LABEL: func @batchmatmul_transpose_b // CHECK: linalg.batch_matmul_transpose_b // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x3x5xf32>, memref<2x7x5xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<2x3x7xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<2x3x7xf32>) func.func @batchmatmul_transpose_b(%arg0: memref<2x3x5xf32>, %arg1: memref<2x7x5xf32>, %arg2: memref<2x3x7xf32>) { - linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) outs(%arg2: memref<2x3x7xf32>) + linalg.batch_matmul_transpose_b ins(%arg0, %arg1 : memref<2x3x5xf32>, memref<2x7x5xf32>) inits(%arg2: memref<2x3x7xf32>) return } diff --git a/mlir/test/Dialect/Linalg/namedop_conversion.mlir b/mlir/test/Dialect/Linalg/namedop_conversion.mlir --- a/mlir/test/Dialect/Linalg/namedop_conversion.mlir +++ b/mlir/test/Dialect/Linalg/namedop_conversion.mlir @@ -4,9 +4,9 @@ func.func @depthwise_conv(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]] // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] - // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor) outs(%[[INIT]] : tensor) + // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor) inits(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) inits(%arg2 : tensor) -> tensor return %0 : tensor } @@ -17,8 +17,8 @@ func.func @depthwise_conv_q(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3 : i32, %arg4 : i32) -> tensor { // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]] // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] - // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor, tensor, i32, i32) outs(%[[INIT]] : tensor) + // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor, tensor, i32, i32) inits(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) inits(%arg2 : tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -18,15 +18,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -45,15 +45,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -72,15 +72,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -99,15 +99,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -126,15 +126,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -153,15 +153,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -180,15 +180,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -207,15 +207,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -234,15 +234,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -261,15 +261,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -288,15 +288,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -315,15 +315,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -344,13 +344,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -371,13 +371,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -397,13 +397,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -424,13 +424,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -453,11 +453,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -480,11 +480,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -505,13 +505,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -532,13 +532,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -559,13 +559,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -586,13 +586,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -615,11 +615,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -642,10 +642,10 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -22,8 +22,8 @@ /// Inplaceable, no alloc // CHECK-NOT: alloc - // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[A]] : memref>) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[A]] : memref>) + %r = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK: return // CHECK-NOT: tensor @@ -45,8 +45,8 @@ // CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref> // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) {alignment = 64 : i64} : memref - // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[ALLOC]] : memref) + %r = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK-NOT: dealloc // CHECK: return %[[ALLOC]] : memref @@ -67,14 +67,14 @@ /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc. // CHECK: %[[ALLOC:.*]] = memref.alloc - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[ALLOC]] - %f = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[ALLOC]] + %f = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor /// The second op has no interfering reads and can reuse. // CHECK-NOT: alloc - // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]] + // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) inits(%[[A]] %r = linalg.matmul ins(%f, %f: tensor, tensor) - outs(%A: tensor) + inits(%A: tensor) -> tensor // CHECK: memref.dealloc %[[ALLOC]] @@ -91,7 +91,7 @@ /// Within op multiple uses of %A, must alloc. // CHECK: alloc %r = linalg.matmul ins(%A, %A: tensor, tensor) - outs(%A: tensor) + inits(%A: tensor) -> tensor // CHECK-NOT: dealloc return %r: tensor @@ -181,8 +181,8 @@ tensor<128x192xf32> to tensor<8x16xf32> // linalg.fill is inplace. - // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]] - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> + // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[C_SLICE]] + %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> // CHECK: scf.for %[[K:.*]] = %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) { @@ -192,9 +192,9 @@ tensor<256x16xf32> to tensor<32x16xf32> // linalg.matmul is inplace as well as the enclosing scf.for. - // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]] + // CHECK: linalg.matmul ins({{.*}} inits(%[[C_SLICE]] %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>) - outs(%arg8 : tensor<8x16xf32>) + inits(%arg8 : tensor<8x16xf32>) -> tensor<8x16xf32> scf.yield %10 : tensor<8x16xf32> } @@ -231,7 +231,7 @@ %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -250,7 +250,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg1 : tensor) outs(%arg2 : tensor) { + ins(%arg1 : tensor) inits(%arg2 : tensor) { ^bb0(%arg3: i32, %arg4 : f32): %iv1 = linalg.index 1 : index %1 = arith.index_cast %arg3: i32 to index @@ -266,7 +266,7 @@ // CHECK-SAME: ) { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]] : // CHECK: %[[YIELD:.+]] = memref.load %[[ARG0]] // CHECK: linalg.yield %[[YIELD]] @@ -281,14 +281,14 @@ %s1: index, %s2: index, %cst: f32) -> tensor { - // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}}) + // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) inits(%[[t3]] : {{.*}}) %r = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1)-> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%t1, %t2 : tensor, tensor) - outs(%t3 : tensor) { + inits(%t3 : tensor) { ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) : %add = arith.addf %arg0, %arg1 : f32 linalg.yield %add : f32 @@ -316,14 +316,14 @@ // Make sure that a copy is inserted here. // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: memref.copy %[[t0]], %[[ALLOC]] - // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref + // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref %r0 =linalg.generic #trait outs (%t0 : tensor) { ^bb(%0: f32) : %a = arith.addf %cst, %0 : f32 linalg.yield %a : f32 } -> (tensor) - // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref + // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref %r1 = linalg.generic #trait outs (%r0 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 @@ -343,7 +343,7 @@ // CHECK: linalg.map { arith.addf } ins(%[[LHS]], %[[RHS]] : memref<64xf32 %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -360,7 +360,7 @@ // CHECK: linalg.reduce { arith.addf } ins(%[[INPUT]] : memref<16x32x64xf32 %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %out, %in: f32 @@ -378,7 +378,7 @@ // CHECK: linalg.transpose ins(%[[ARG0]] : memref<16x32x64xf32 %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 2, 0] func.return %transpose : tensor<32x64x16xf32> } @@ -391,7 +391,7 @@ %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x32xf32>) - outs(%init:tensor<8x16x32xf32>) + inits(%init:tensor<8x16x32xf32>) dimensions = [1] func.return %bcast : tensor<8x16x32xf32> } diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir --- a/mlir/test/Dialect/Linalg/pad_fusion.mlir +++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir @@ -10,7 +10,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%init : tensor) { + ins(%arg0 : tensor) inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32): %1 = arith.mulf %arg6, %arg6 : f32 linalg.yield %1 : f32 @@ -38,13 +38,13 @@ // CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]]) -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}inits(%[[INIT]] // CHECK-DAG: %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]] // CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]] // CHECK-SAME: [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1] // CHECK: %[[SOURCE:.+]] = linalg.generic -// CHECK-SAME: outs(%[[SLICE]] : tensor) +// CHECK-SAME: inits(%[[SLICE]] : tensor) // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]] // CHECK-SAME: [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1] // CHECK: return %[[RESULT]] @@ -59,7 +59,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%init : tensor<42x?xf32>) { + ins(%arg0 : tensor) inits(%init : tensor<42x?xf32>) { ^bb0(%arg4 : f32, %arg5 : f32): %1 = arith.mulf %arg4, %arg4 : f32 linalg.yield %1 : f32 @@ -82,12 +82,12 @@ // CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]]) -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}inits(%[[INIT]] // CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]] // CHECK-SAME: [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1] // CHECK: %[[SOURCE:.+]] = linalg.generic -// CHECK-SAME: outs(%[[SLICE]] : tensor<42x?xf32>) +// CHECK-SAME: inits(%[[SLICE]] : tensor<42x?xf32>) // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]] // CHECK-SAME: [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1] // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir --- a/mlir/test/Dialect/Linalg/parallel-loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir @@ -8,7 +8,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>) - outs(%sum : memref<2x2xf32>) { + inits(%sum : memref<2x2xf32>) { ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): %0 = arith.addf %lhs_in, %rhs_in : f32 linalg.yield %0 : f32 @@ -41,7 +41,7 @@ func.func @lower_outer_parallel(%A: memref, %B: memref) { linalg.generic #trait ins(%A : memref) - outs(%B : memref) { + inits(%B : memref) { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 } @@ -74,7 +74,7 @@ func.func @lower_mixed_parallel(%A: memref, %B: memref) { linalg.generic #trait ins(%A : memref) - outs(%B : memref) { + inits(%B : memref) { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 } diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -25,7 +25,7 @@ linalg.matmul ins(%11, %14: memref>, memref>) - outs(%17: memref>) + inits(%17: memref>) } } } @@ -56,7 +56,7 @@ // CHECK: memref.copy %[[vB]], %[[partialB]] : memref> to memref> // CHECK: memref.copy %[[vC]], %[[partialC]] : memref> to memref> // -// CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]] +// CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} inits(%[[partialC]] // // CHECK: memref.copy %[[partialC]], %[[vC]] : // CHECK: memref> to @@ -95,7 +95,7 @@ linalg.matmul ins(%11, %14: memref>, memref>) - outs(%17: memref>) + inits(%17: memref>) } } } @@ -126,7 +126,7 @@ // CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref> to memref> // CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref> to memref> // -// CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]] +// CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} inits(%[[partialC_f64]] // // CHECK: memref.copy %[[partialC_f64]], %[[vC_f64]] : // CHECK: memref> to @@ -146,7 +146,7 @@ func.func @gemm_shared(%a : memref, %b : memref, %c : memref) { linalg.matmul ins(%a, %b: memref, memref) - outs(%c: memref) + inits(%c: memref) return } @@ -177,7 +177,7 @@ // CHECK-NEXT: memref.copy %[[subview_B]], %[[shared_B]] : memref> to memref, #gpu.address_space> // CHECK-NEXT: gpu.barrier -// CHECK: linalg.matmul ins(%[[shared_A]], %[[shared_B]]{{.*}} outs(%[[subview_C]] +// CHECK: linalg.matmul ins(%[[shared_A]], %[[shared_B]]{{.*}} inits(%[[subview_C]] transform.sequence failures(propagate) { @@ -193,7 +193,7 @@ func.func @gemm_private(%a : memref, %b : memref, %c : memref) { linalg.matmul ins(%a, %b: memref, memref) - outs(%c: memref) + inits(%c: memref) return } @@ -219,7 +219,7 @@ // CHECK-NEXT: memref.copy %[[subview_A]], %[[private_A]] : memref> to memref, #gpu.address_space> // CHECK-NEXT: memref.copy %[[subview_B]], %[[private_B]] : memref> to memref, #gpu.address_space> -// CHECK: linalg.matmul ins(%[[private_A]], %[[private_B]]{{.*}} outs(%[[subview_C]] +// CHECK: linalg.matmul ins(%[[private_A]], %[[private_B]]{{.*}} inits(%[[subview_C]] transform.sequence failures(propagate) { @@ -258,9 +258,9 @@ // CHECK-COUNT-3: memref.copy // CHECK: linalg.generic // CHECK-SAME: ins(%[[a_pro_subview]], %[[b_pro_subview]] - // CHECK-SAME: outs(%[[c_pro_subview]] + // CHECK-SAME: inits(%[[c_pro_subview]] - linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref>, memref<128x32xf32, strided<[?, ?], offset: ?>>) outs(%9 : memref>) { + linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref>, memref<128x32xf32, strided<[?, ?], offset: ?>>) inits(%9 : memref>) { ^bb0(%arg9: f32, %arg10: f32, %arg11: f32): %15 = arith.mulf %arg9, %arg10 : f32 %16 = arith.addf %arg11, %15 : f32 diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -3,7 +3,7 @@ func.func @gemm(%a : memref, %b : memref, %c : memref) { linalg.matmul ins(%a, %b: memref, memref) - outs(%c: memref) + inits(%c: memref) return } @@ -29,7 +29,7 @@ // CHECK: memref.copy %[[svA]], %[[svAA]] // CHECK: memref.copy %[[svC]], %[[svCC]] -// CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]] +// CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} inits(%[[VC]] // CHECK: memref.copy %[[svCC]], %[[svC]] // CHECK: memref.dealloc %[[tmpA]] // CHECK: memref.dealloc %[[tmpC]] diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir --- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir @@ -11,7 +11,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg2 : f32, %arg3:f32, %arg4 : f32): %2 = arith.addf %arg2, %arg3 : f32 linalg.yield %2 : f32 @@ -44,19 +44,19 @@ %fill = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg2: f32): linalg.yield %cst : f32 } -> tensor %0 = tensor.expand_shape %fill [[0, 1], [2]] : tensor into tensor<1x?x?xf32> %1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x?x?xf32>, tensor<1x?x?xf32>) - outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> + inits(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> return %1 : tensor<1x?x?xf32> } // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2) // CHECK: func @control_consumer_reshape_fusion // CHECK: %[[FILL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]]] -// CHECK-SAME: outs(%{{.+}} : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<1x?x?xf32>) // CHECK: linalg.batch_matmul -// CHECK-SAME: outs(%[[FILL]] : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%[[FILL]] : tensor<1x?x?xf32>) diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir --- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir @@ -14,7 +14,7 @@ indexing_maps = [#map0, #map1, #map2, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %arg1, %arg2 : tensor, tensor, f32) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -38,7 +38,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP7]], #[[MAP6]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[T1]], %[[ARG2]] : tensor, tensor, f32) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: %[[T4:.+]] = tensor.collapse_shape %[[T3]] // CHECK-SAME: [0], [1], [2, 3] // CHECK-SAME: tensor into tensor @@ -57,7 +57,7 @@ indexing_maps = [#map0, #map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, f32) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -87,7 +87,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]], %[[ARG2]] : tensor, tensor, f32) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor @@ -102,7 +102,7 @@ affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor, tensor) - outs(%a : tensor) { + inits(%a : tensor) { ^bb0(%arg0 : f32, %arg1: f32, %s: f32): %1 = arith.addf %arg0, %arg1 : f32 linalg.yield %1 : f32 @@ -130,7 +130,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP8]], #[[MAP9]], #[[MAP10]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<3x4x?x?x2x?xf32>, tensor<3x4x?x?xf32>) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor // ----- @@ -147,7 +147,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>) - outs(%0 : tensor<264x4xf32>) { + inits(%0 : tensor<264x4xf32>) { ^bb0(%arg1: f32, %arg2: f32, %s: f32): %2 = arith.mulf %arg1, %arg2 : f32 linalg.yield %2 : f32 @@ -173,7 +173,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[CST]] : -// CHECK-SAME: outs(%[[T1]] : tensor<8x33x4xf32>) +// CHECK-SAME: inits(%[[T1]] : tensor<8x33x4xf32>) // CHECK: return %[[T2]] : tensor<8x33x4xf32> // ----- @@ -190,7 +190,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -239,7 +239,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -288,7 +288,7 @@ affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>) - outs(%shape : tensor<6x4x210xi32>) { + inits(%shape : tensor<6x4x210xi32>) { ^bb0(%arg3 : i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -329,7 +329,7 @@ // CHECK: %[[T4:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: ins(%[[T1]], %[[T2]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>) -// CHECK-SAME: outs(%[[T3]] : tensor<2x3x4x5x6x7xi32>) +// CHECK-SAME: inits(%[[T3]] : tensor<2x3x4x5x6x7xi32>) // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG8:[a-zA-Z0-9_]+]]: i32, %[[ARG9:[a-zA-Z0-9_]+]]: i32, // CHECK-SAME: %[[ARG10:[a-zA-Z0-9_]+]]: i32) @@ -362,7 +362,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<264x?xi32>) - outs(%shape : tensor<264x?x4xi32>) { + inits(%shape : tensor<264x?x4xi32>) { ^bb0(%arg1: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -418,7 +418,7 @@ indexing_maps = [#map0, #map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -446,7 +446,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP4]], #[[MAP4]], #[[MAP5]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor, tensor) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor // ----- @@ -459,7 +459,7 @@ %3 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%0 : tensor) outs(%2 : tensor) { + ins(%0 : tensor) inits(%2 : tensor) { ^bb0(%arg1 : f32, %arg2: f32): %4 = arith.addf %arg1, %arg1 : f32 linalg.yield %4 : f32 @@ -484,7 +484,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %arg1 : tensor<2xi64>, tensor) - outs(%1 : tensor<2xi64>) { + inits(%1 : tensor<2xi64>) { ^bb0(%arg4: i64, %arg5: i64, %arg6: i64): %3 = arith.addi %arg4, %arg5 : i64 linalg.yield %3 : i64 @@ -512,7 +512,7 @@ affine_map<(d0, d1, d2) -> (d2, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor, tensor) - outs(%a, %a : tensor, tensor) { + inits(%a, %a : tensor, tensor) { ^bb0(%arg0 : f32, %arg1: f32, %s: f32, %t : f32): %1 = arith.addf %arg0, %arg1 : f32 linalg.yield %1, %1 : f32, f32 @@ -537,7 +537,7 @@ // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: ins(%[[RESHAPE0]], %[[RESHAPE1]] : -// CHECK-SAME: outs(%[[RESHAPE2]], %[[RESHAPE3]] : +// CHECK-SAME: inits(%[[RESHAPE2]], %[[RESHAPE3]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -551,7 +551,7 @@ indexing_maps = [#map0, #map0, #map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<512xf32>, tensor<512xf32>) - outs(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) { + inits(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32, %arg7: f32): %2 = arith.addf %arg4, %arg5 : f32 linalg.yield %2, %2 : f32, f32 @@ -571,5 +571,5 @@ // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]], #[[MAP1]]] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]], %[[OUTS]] : +// CHECK-SAME: inits(%[[ARG2]], %[[OUTS]] : // CHECK: return %[[GENERIC]]#1 diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -54,7 +54,7 @@ affine_map<(d0, d1, d2) -> (d0 + d1, d1 - d0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -92,7 +92,7 @@ {indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2: f32, %arg3: f32) : linalg.yield %arg2 : f32 } -> tensor @@ -111,7 +111,7 @@ %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.dim %0, %c0 : tensor %2 = tensor.dim %0, %c1 : tensor %3 = linalg.generic @@ -120,7 +120,7 @@ affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %4 = arith.mulf %arg3, %arg4 : f32 %5 = arith.addf %4, %arg5 : f32 @@ -154,7 +154,7 @@ {indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2: f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -179,7 +179,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel"] } ins(%arg_0 : tensor) - outs(%arg_0, %arg_1 : tensor, tensor) { + inits(%arg_0, %arg_1 : tensor, tensor) { ^bb0(%in: f32, %out_0: f32, %out_1: f32): linalg.yield %in, %in : f32, f32 } -> (tensor, tensor) diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -32,38 +32,38 @@ %arg3: memref) { linalg.matmul ins(%arg0, %arg0 : memref>, memref>) - outs(%arg0 : memref>) + inits(%arg0 : memref>) linalg.matvec ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref>) + inits(%arg2: memref>) linalg.dot ins(%arg1, %arg2: memref>, memref>) - outs(%arg3: memref) + inits(%arg3: memref) return } // CHECK-LABEL: func @ops(% // CHECK: linalg.matmul // CHECK-SAME: ins(%{{.*}}, %{{.*}} : memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}} : memref>) +// CHECK-SAME: inits(%{{.*}} : memref>) // CHECK: linalg.matvec // CHECK-SAME: ins(%{{.*}}, %{{.*}}: memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}}: memref>) +// CHECK-SAME: inits(%{{.*}}: memref>) // CHECK: linalg.dot // CHECK-SAME: ins(%{{.*}}, %{{.*}}: memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}}: memref) +// CHECK-SAME: inits(%{{.*}}: memref) // ----- func.func @fill_view(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view( // CHECK: %{{.*}}: memref>, %{{.*}}: f32) { -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref>) // ----- @@ -79,12 +79,12 @@ func.func @fill_view3(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view3( // CHECK: %{{.*}}: memref>, %{{.*}}: f32) { -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref>) // ----- @@ -105,7 +105,7 @@ %cst = arith.constant 0.0 : f32 linalg.generic #trait_0 ins(%arg0, %cst : memref, strided<[?, 1], offset: ?>>, f32) - outs(%arg1 : memref>) + inits(%arg1 : memref>) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) : linalg.yield %1 : f32 @@ -118,7 +118,7 @@ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} // CHECK-SAME: ins({{.*}}, {{.*}} : memref, strided<[?, 1], offset: ?>>, f32) -// CHECK-SAME: outs({{.*}} : memref>) +// CHECK-SAME: inits({{.*}} : memref>) // CHECK-SAME: {foo = 1 : i64} // ----- @@ -127,7 +127,7 @@ func.func @generic_without_inputs(%arg0 : memref) { linalg.generic {indexing_maps = [#map0], iterator_types = ["parallel", "parallel", "parallel"]} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb0(%arg3: f32): %cst = arith.constant 0.000000e+00 : f32 linalg.yield %cst : f32 @@ -158,7 +158,7 @@ -> (tensor) { %0 = linalg.generic #trait_1 ins(%arg0, %arg1 : tensor>, tensor) - outs(%arg1 : tensor) + inits(%arg1 : tensor) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) : %f0 = arith.constant 0.0 : f32 @@ -171,7 +171,7 @@ // CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} // CHECK-SAME: ins({{.*}} : tensor>, tensor) -// CHECK-SAME: outs({{.*}} : tensor) +// CHECK-SAME: inits({{.*}} : tensor) // CHECK-SAME: {foo = 1 : i64} // CHECK: -> tensor // CHECK: return {{.*}} : tensor @@ -183,14 +183,14 @@ -> (tensor, tensor) { %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : i32) inits(%0 : tensor) -> tensor %2 = tensor.empty() : tensor - %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%arg2 : i32) inits(%2 : tensor) -> tensor %4:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%1, %3 : tensor, tensor) { + inits(%1, %3 : tensor, tensor) { ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32): %5 = arith.cmpi sge, %arg3, %arg5 : i32 %6 = arith.select %5, %arg3, %arg5 : i32 @@ -206,7 +206,7 @@ // CHECK-LABEL: func @generic_with_multiple_tensor_outputs // CHECK: %{{.*}} = linalg.generic { // CHECK-SAME: ins({{.*}} : tensor, tensor) -// CHECK-SAME: outs({{.*}} : tensor, tensor) +// CHECK-SAME: inits({{.*}} : tensor, tensor) // CHECK: } -> (tensor, tensor) // ----- @@ -226,7 +226,7 @@ { %0 = linalg.generic #trait_broadcast ins(%arg0 : tensor) - outs(%arg1 : tensor<3x4xf32>) { + inits(%arg1 : tensor<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } -> tensor<3x4xf32> @@ -251,7 +251,7 @@ %arg1: memref>) { linalg.generic #trait_3 ins(%arg0 : memref, strided<[?, 1], offset: ?>>) - outs(%arg1 : memref>) + inits(%arg1 : memref>) attrs = {foo = 1} { ^bb(%a: vector<3x4xi4>, %b: f32) : %0 = linalg.index 0 : index @@ -267,7 +267,7 @@ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_2" // CHECK-SAME: ins({{.*}} : memref, strided<[?, 1], offset: ?>>) -// CHECK-SAME: outs({{.*}} : memref>) +// CHECK-SAME: inits({{.*}} : memref>) // CHECK-SAME: attrs = {foo = 1 : i64} { // CHECK: ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32): // CHECK: %{{.*}} = linalg.index 0 : index @@ -283,10 +283,10 @@ -> (tensor) { linalg.batch_matmul ins(%a3, %b3: memref, memref) - outs(%c3: memref) + inits(%c3: memref) %res1 = linalg.batch_matmul ins(%ta3, %tb3: tensor, tensor) - outs(%tc3: tensor) + inits(%tc3: tensor) -> tensor return %res1 : tensor } @@ -298,10 +298,10 @@ func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor { %0 = tensor.empty(%arg0, %arg1) : tensor - %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : f32) inits(%0 : tensor) -> tensor return %1 : tensor } -// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor) -> tensor +// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) inits(%{{.+}} : tensor) -> tensor // ----- @@ -313,7 +313,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2, %arg3 : tensor, tensor) { + inits(%arg2, %arg3 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32): %1 = arith.mulf %b0, %b1 : f32 %2 = arith.addf %1, %b3 : f32 @@ -328,7 +328,7 @@ func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) () { %0 = arith.constant 0.0: f32 linalg.yield %0: f32 @@ -336,7 +336,7 @@ func.return %add : tensor<64xf32> } // CHECK-LABEL: func @map_no_inputs -// CHECK: linalg.map outs +// CHECK: linalg.map inits // CHECK-NEXT: () { // CHECK-NEXT: arith.constant // CHECK-NEXT: linalg.yield @@ -348,7 +348,7 @@ %init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -357,7 +357,7 @@ } // CHECK-LABEL: func @map_binary // CHECK: linalg.map { arith.addf } ins -// CHECK-SAME: outs +// CHECK-SAME: inits // ----- @@ -365,7 +365,7 @@ %init: memref<64xf32>) { linalg.map ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>) - outs(%init:memref<64xf32>) + inits(%init:memref<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -380,7 +380,7 @@ func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> { %abs = linalg.map ins(%input:tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%input_elem: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 @@ -395,7 +395,7 @@ func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%input:memref<64xf32>) - outs(%init:memref<64xf32>) + inits(%init:memref<64xf32>) (%input_elem: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 @@ -411,7 +411,7 @@ %init: tensor<16x64xf32>) -> tensor<16x64xf32> { %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %out, %in: f32 @@ -421,7 +421,7 @@ } // CHECK-LABEL: func @reduce // CHECK: linalg.reduce { arith.addf } ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions = [1] // ----- @@ -430,7 +430,7 @@ %init: memref<16x64xf32>) { linalg.reduce ins(%input:memref<16x32x64xf32>) - outs(%init:memref<16x64xf32>) + inits(%init:memref<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %out, %in: f32 @@ -440,7 +440,7 @@ } // CHECK-LABEL: func @reduce // CHECK: linalg.reduce { arith.addf } ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions = [1] // ----- @@ -450,7 +450,7 @@ %init2: tensor<16x64xi64>) -> (tensor<16x64xf32>, tensor<16x64xi64>) { %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) dimensions = [1] (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { %0 = arith.addf %in1, %out1: f32 @@ -470,7 +470,7 @@ %init2: memref<16x64xi64>) { linalg.reduce ins(%input1, %input2 : memref<16x32x64xf32>, memref<16x32x64xi64>) - outs(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>) + inits(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>) dimensions = [1] (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { %0 = arith.addf %in1, %out1: f32 @@ -489,13 +489,13 @@ %init: tensor<32x64x16xf32>) -> tensor<32x64x16xf32> { %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 2, 0] func.return %transpose : tensor<32x64x16xf32> } // CHECK-LABEL: func @transpose // CHECK: linalg.transpose ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: permutation // ----- @@ -504,7 +504,7 @@ %init: memref<32x64x16xf32>) { linalg.transpose ins(%input:memref<16x32x64xf32>) - outs(%init:memref<32x64x16xf32>) + inits(%init:memref<32x64x16xf32>) permutation = [1, 2, 0] func.return } @@ -516,13 +516,13 @@ %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x32xf32>) - outs(%init:tensor<8x16x32xf32>) + inits(%init:tensor<8x16x32xf32>) dimensions = [1] func.return %bcast : tensor<8x16x32xf32> } // CHECK-LABEL: func @broadcast_static_sizes // CHECK: linalg.broadcast ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions // ----- @@ -532,13 +532,13 @@ -> tensor<8x16x?xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x?xf32>) - outs(%init:tensor<8x16x?xf32>) + inits(%init:tensor<8x16x?xf32>) dimensions = [1] func.return %bcast : tensor<8x16x?xf32> } // CHECK-LABEL: func @broadcast_with_dynamic_sizes // CHECK: linalg.broadcast ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions // ----- @@ -547,14 +547,14 @@ %init: memref<8x16x32xf32>) { linalg.broadcast ins(%input:memref<8x32xf32>) - outs(%init:memref<8x16x32xf32>) + inits(%init:memref<8x16x32xf32>) dimensions = [1] func.return } // CHECK-LABEL: func @broadcast_memref // CHECK: linalg.broadcast ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions // ----- @@ -563,7 +563,7 @@ %init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem fastmath : f32 linalg.yield %0: f32 @@ -575,7 +575,7 @@ // CHECK-NEXT: %[[MAPPED:.*]] = linalg.map // CHECK-SAME: { arith.addf {fastmath = #arith.fastmath} } // CHECK-SAME: ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-NEXT: return %[[MAPPED]] : tensor<64xf32> // ----- @@ -584,7 +584,7 @@ %init: tensor<16x64xf32>) -> tensor<16x64xf32> { %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %out, %in fastmath : f32 @@ -596,6 +596,6 @@ // CHECK-NEXT: %[[REDUCED:.*]] = linalg.reduce // CHECK-SAME: { arith.addf {fastmath = #arith.fastmath} } // CHECK-SAME: ins -// CHECK-SAME: outs +// CHECK-SAME: inits // CHECK-SAME: dimensions = [1] // CHECK-NEXT: return %[[REDUCED]] : tensor<16x64xf32> diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir --- a/mlir/test/Dialect/Linalg/standard.mlir +++ b/mlir/test/Dialect/Linalg/standard.mlir @@ -5,7 +5,7 @@ %arg2: memref) { linalg.dot ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref) + inits(%arg2: memref) return } // CHECK-LABEL: func @dot( @@ -46,7 +46,7 @@ func.func @matmul_vec_impl(%A: !matrix_type_A, %B: !matrix_type_B, %C: !matrix_type_C) { linalg.generic #matmul_trait ins(%A, %B : !matrix_type_A, !matrix_type_B) - outs(%C : !matrix_type_C) { + inits(%C : !matrix_type_C) { ^bb0(%a: !vector_type_A, %b: !vector_type_B, %c: !vector_type_C): %d = vector.outerproduct %a, %b, %c: !vector_type_A, !vector_type_B linalg.yield %d: !vector_type_C @@ -65,7 +65,7 @@ // expected-error @below {{failed to legalize}} %0 = linalg.generic { indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor) outs(%arg1 : tensor) { + ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor @@ -76,6 +76,6 @@ func.func @func(%arg0: tensor<4x8xf32>, %arg1: tensor<4x8xf32>) -> tensor<4x8xf32> { // expected-error @below {{failed to legalize}} - %0 = linalg.copy ins(%arg0 : tensor<4x8xf32>) outs(%arg1 : tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.copy ins(%arg0 : tensor<4x8xf32>) inits(%arg1 : tensor<4x8xf32>) -> tensor<4x8xf32> return %0 : tensor<4x8xf32> } diff --git a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir --- a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir +++ b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir @@ -4,11 +4,11 @@ // CHECK-SAME: (%[[INIT:.+]]: tensor, %[[OFFSET0:.+]]: index, %[[SIZE1:.+]]: index) // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EXT:.+]] = tensor.extract_slice %[[INIT]][%[[OFFSET0]], 8, 4] [1, %[[SIZE1]], 6] [1, 3, 1] -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EXT]] : tensor) -> tensor // CHECK: return %[[FILL]] func.func @swap_fill_insert_slice(%init : tensor, %offset0: index, %size1: index) -> tensor { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor) -> tensor %1 = tensor.extract_slice %0[%offset0, 8, 4] [1, %size1, 6] [1, 3, 1] : tensor to tensor return %1: tensor @@ -21,7 +21,7 @@ // CHECK: tensor.extract_slice func.func @dont_swap_fill_insert_slice_multi_user(%init : tensor, %offset0: index, %size1: index) -> (tensor, tensor<2x?x6xf32>) { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor) -> tensor %1 = tensor.extract_slice %0[%offset0, 8, 4] [2, %size1, 6] [1, 3, 1] : tensor to tensor<2x?x6xf32> return %0, %1: tensor, tensor<2x?x6xf32> diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -2,7 +2,7 @@ func.func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %t0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor %c4 = arith.constant 4 : index @@ -19,7 +19,7 @@ %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor - %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) -> tensor + %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) inits(%8 : tensor) -> tensor %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor scf.yield %10 : tensor } @@ -50,8 +50,8 @@ // slices of the producing matmul. // CHECK-DAG: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1] : tensor to tensor // CHECK-DAG: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor to tensor<2x4xf32> -// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> -// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> +// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) inits(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> +// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) inits(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> // CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] // ----- @@ -66,12 +66,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> { %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> { @@ -87,7 +87,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) outs(%2 : tensor<1x8x16x4xf32>) { + ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) inits(%2 : tensor<1x8x16x4xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %result = arith.addf %arg3, %arg4 : f32 linalg.yield %result : f32 @@ -110,7 +110,7 @@ // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32> -// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> +// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]]) // CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) @@ -124,10 +124,10 @@ // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) -// CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) +// CHECK-SAME: inits(%[[ST_FILL]] : tensor<1x8x16x4xf32>) // CHECK-NEXT: %[[ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) -// CHECK-SAME: outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) +// CHECK-SAME: inits(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) // CHECK: tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] // ----- @@ -148,12 +148,12 @@ %oc = tensor.dim %elementwise, %c3 : tensor %init = tensor.empty(%n, %oh, %ow, %oc) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor { %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor { @@ -174,7 +174,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%0, %1 : tensor, tensor) outs(%2 : tensor) { + ins(%0, %1 : tensor, tensor) inits(%2 : tensor) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): %result = arith.addf %arg4, %arg5 : f32 linalg.yield %result : f32 @@ -217,7 +217,7 @@ // CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor // CHECK: %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor) -> tensor // CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor // CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor @@ -256,10 +256,10 @@ // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]] // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) -// CHECK-SAME: outs(%[[ST_FILL]] : tensor) -> tensor +// CHECK-SAME: inits(%[[ST_FILL]] : tensor) -> tensor // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor, tensor) -// CHECK-SAME: outs(%[[ST_ARG]] : tensor) +// CHECK-SAME: inits(%[[ST_ARG]] : tensor) // CHECK: tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] @@ -301,7 +301,7 @@ tensor.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> - %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32> + %fill = linalg.fill ins(%zero : f32) inits(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32> %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { @@ -311,7 +311,7 @@ %add = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} - ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) outs(%2 : tensor<16x32xf32>) { + ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) inits(%2 : tensor<16x32xf32>) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): %result = arith.addf %arg4, %arg5 : f32 linalg.yield %result : f32 diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -5,7 +5,7 @@ // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 + s0 - 1)> func.func @conv(%arg0 : memref, %arg1 : memref, %arg2 : memref) { - linalg.conv_2d ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) + linalg.conv_2d ins(%arg0, %arg1 : memref, memref) inits(%arg2 : memref) return } @@ -38,4 +38,4 @@ // CHECK-DAG: %[[SVOUT:.*]] = memref.subview %[[ARG2]][%[[I]], %[[J]]] [%[[T4]], %[[T5]]] // CHECK: linalg.conv_2d // CHECK-SAME: ins(%[[SVIN]], %[[SVKER]] -// CHECK-SAME: outs(%[[SVOUT]] +// CHECK-SAME: inits(%[[SVOUT]] diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -3,7 +3,7 @@ func.func @indexed_vector(%arg0: memref<50xindex>) { linalg.generic {indexing_maps = [affine_map<(i) -> (i)>], iterator_types = ["parallel"]} - outs(%arg0 : memref<50xindex>) { + inits(%arg0 : memref<50xindex>) { ^bb0(%a: index): %i = linalg.index 0 : index linalg.yield %i : index @@ -31,7 +31,7 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) { linalg.generic {indexing_maps = [affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : memref<50x50xindex>) { + inits(%arg0 : memref<50x50xindex>) { ^bb0(%a: index): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -14,13 +14,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: inits(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor // CHECK: return %[[TD0]] : tensor @@ -50,7 +50,7 @@ affine_map<(d0, d1, d2) -> (d2, d1, d0)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3: f32, %arg4: f32): %5 = arith.addf %arg2, %arg3 : f32 linalg.yield %5 : f32 @@ -76,7 +76,7 @@ // CHECK: %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor to tensor // CHECK: %[[STRETURN:.+]] = linalg.generic // CHECK-SAME: ins(%[[STARG0]], %[[STARG1]] : tensor, tensor) -// CHECK-SAME: outs(%[[STARG2]] : tensor) +// CHECK-SAME: inits(%[[STARG2]] : tensor) // CHECK: %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]] // CHECK: scf.yield %[[TD]] // CHECK: } @@ -121,7 +121,7 @@ affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1, %arg2 : tensor, tensor) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb0(%arg3 : f32, %arg4: f32, %arg5: f32): %5 = arith.addf %arg3, %arg5 : f32 linalg.yield %5 : f32 diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir --- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir +++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir @@ -19,14 +19,14 @@ // CHECK: %[[tC:.*]] = tensor.extract_slice %[[C_BLK]]{{.*}} : tensor to tensor // CHECK: %[[RES:.*]] = linalg.matmul // CHECK-SAME: ins(%[[tA]], %[[tB]] : tensor, tensor) - // CHECK-SAME: outs(%[[tC]] : tensor) -> tensor + // CHECK-SAME: inits(%[[tC]] : tensor) -> tensor // CHECK: scf.forall.in_parallel { // CHECK-NEXT: tensor.parallel_insert_slice %[[RES]] into %[[C_BLK]]{{.*}} : // CHECK-SAME: tensor into tensor // CHECK-NEXT: } // CHECK-NEXT: } {mapping = [#gpu.thread, #gpu.thread]} %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -68,7 +68,7 @@ %tile_size_1 = "test.dummy"() : () -> (index) %tile_size_2 = "test.dummy"() : () -> (index) %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -107,7 +107,7 @@ // CHECK: scf.forall.in_parallel // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -148,7 +148,7 @@ // CHECK: scf.forall.in_parallel // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -184,7 +184,7 @@ // CHECK: scf.forall.in_parallel // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -202,7 +202,7 @@ %result = linalg.generic {indexing_maps = [ affine_map<(d0) -> (d0)>,affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%A : tensor<4xf32>) outs(%B1 : tensor<4xf32>) { + ins(%A : tensor<4xf32>) inits(%B1 : tensor<4xf32>) { ^bb0(%arg3: f32, %arg4: f32): // no predecessors %2 = arith.addf %arg3, %arg3 : f32 linalg.yield %2 : f32 @@ -256,7 +256,7 @@ // CHECK-NEXT: tensor.parallel_insert_slice %tile_size = "test.dummy"() : () -> (index) %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -304,7 +304,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] } ins(%IN1, %IN2 : tensor<100xf32>, tensor<100xf32>) - outs(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>) + inits(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>) { ^bb0(%a1: f32, %a2: f32, %a3: f32, %a4: f32): %1 = arith.addf %a1, %a3 : f32 @@ -356,7 +356,7 @@ ], iterator_types = ["parallel", "parallel"] } ins(%IN1, %IN2, %IN3 : tensor<100xf32>, tensor<100x300xf32>, tensor<300xf32>) - outs(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>) { + inits(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>) { ^bb0(%i1: f32, %i2: f32, %i3: f32, %o1: f32, %o2: f32): %1 = arith.addf %i1, %o1 : f32 %2 = arith.addf %i2, %1 : f32 diff --git a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir --- a/mlir/test/Dialect/Linalg/transform-lower-pack.mlir +++ b/mlir/test/Dialect/Linalg/transform-lower-pack.mlir @@ -12,7 +12,7 @@ // CHECK-SAME: : tensor<136x64x16x16xf32> into tensor<17x8x2x32x16x16xf32> // CHECK: linalg.transpose // CHECK-SAME: ins(%{{.*}} : tensor<17x8x2x32x16x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<17x2x16x16x32x8xf32>) + // CHECK-SAME: inits(%{{.*}} : tensor<17x2x16x16x32x8xf32>) // CHECK-SAME: permutation = [0, 2, 4, 5, 3, 1] %pack = tensor.pack %arg0 padding_value(%cst_0 : f32) inner_dims_pos = [1, 0] inner_tiles = [32, 8] into %arg1 : tensor<129x47x16x16xf32> -> tensor<17x2x16x16x32x8xf32> @@ -36,7 +36,7 @@ // CHECK: tensor.empty() : tensor<17x8x2x32x16x16xf32> // CHECK: linalg.transpose // CHECK-SAME: ins(%{{.*}} : tensor<17x2x16x16x32x8xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<17x8x2x32x16x16xf32>) + // CHECK-SAME: inits(%{{.*}} : tensor<17x8x2x32x16x16xf32>) // CHECK-SAME: permutation = [0, 5, 1, 4, 2, 3] // CHECK: tensor.collapse_shape {{.*}}[0, 1], [2, 3], [4], [5]] // CHECK-SAME: : tensor<17x8x2x32x16x16xf32> into tensor<136x64x16x16xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir --- a/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-bufferize-to-allocation.mlir @@ -12,7 +12,7 @@ // CHECK-DAG: %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]] // CHECK-DAG: %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]] // CHECK: %[[alloc:.*]] = memref.alloc(%[[size0]], %[[size1]]) : memref -// CHECK: linalg.fill ins(%[[c50]] : index) outs(%[[alloc]] : memref) +// CHECK: linalg.fill ins(%[[c50]] : index) inits(%[[alloc]] : memref) // CHECK: %[[dim0:.*]] = tensor.dim %[[t]], %[[c0]] // CHECK: %[[subview:.*]] = memref.subview %[[alloc]][5, %[[l2]]] [%[[dim0]], 10] [1, 1] // CHECK: memref.tensor_store %[[t]], %[[subview]] diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir --- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir @@ -47,11 +47,11 @@ // CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]] // CHECK: %[[OPRES:.+]] = linalg.depthwise_conv_1d_nwc_wc // CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] - // CHECK-SAME: outs(%[[SLICERES]] + // CHECK-SAME: inits(%[[SLICERES]] // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[OPRES]] into %[[RES]] %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x1x113x96xf32>, tensor<1x3x96xf32>) - outs(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32> + inits(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32> // CHECK: %[[INSERTED]] return %0: tensor<1x1x56x96xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir @@ -12,7 +12,7 @@ func.func @fuse_tileable_op(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg1 : tensor) -> tensor %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -23,11 +23,11 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor to tensor // CHECK: %[[T0:.*]] = tensor.extract_slice %[[IN]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]] + // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]] %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.forall.in_parallel { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -74,7 +74,7 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]] - %7 = linalg.elemwise_unary ins(%0 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%0 : tensor) inits(%5 : tensor) -> tensor scf.forall.in_parallel { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor<64xf32> } @@ -108,7 +108,7 @@ func.func @fuse_tileable_op_rank_reducing(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor) -> tensor %d0 = tensor.dim %arg1, %c0 : tensor // CHECK: scf.forall {{.*}} -> (tensor) { @@ -116,7 +116,7 @@ %5 = tensor.extract_slice %o[%arg3] [1] [1] : tensor to tensor // CHECK: tensor.extract_slice %{{.*}}[%{{.*}}] [1] [1] : tensor to tensor<1xf32> - // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : tensor<1xf32>) -> tensor<1xf32> + // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : tensor<1xf32>) -> tensor<1xf32> // CHECK: tensor.extract_slice %{{.*}}[0] [1] [1] : tensor<1xf32> to tensor // CHECK: func.call @foo(%{{.*}}) : (tensor) -> tensor %7 = func.call @foo(%5) : (tensor) -> tensor @@ -154,7 +154,7 @@ func.func @fuse_tileable_op_through_bbarg(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor) -> tensor %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -165,11 +165,11 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor to tensor // CHECK: %[[T0:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]] + // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]] %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor to tensor - // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} inits(%[[T1]] + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.forall.in_parallel { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -208,7 +208,7 @@ %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] - } ins(%in : tensor) outs(%out_1, %out_3 : tensor, tensor) { + } ins(%in : tensor) inits(%out_1, %out_3 : tensor, tensor) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.addf %a, %b : f32 %e = arith.addf %d, %c : f32 @@ -229,7 +229,7 @@ %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0 - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.forall.in_parallel { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -9,9 +9,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %1 : tensor } @@ -36,9 +36,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %1 : tensor } @@ -66,18 +66,18 @@ // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) // CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]] // CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] -// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor) +// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} inits(%[[OUT_SLICE1]] : tensor) // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) // CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0] -// CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) outs(%[[OUT_SLICE2]] : tensor) +// CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) inits(%[[OUT_SLICE2]] : tensor) // CHECK: return %[[RES]] - %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) -> tensor<12x25xf32> + %fill = linalg.fill ins(%five : f32) inits(%init : tensor<12x25xf32>) -> tensor<12x25xf32> %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input : tensor<12x7x25xf32>) outs(%fill : tensor<12x25xf32>) { + } ins(%input : tensor<12x7x25xf32>) inits(%fill : tensor<12x25xf32>) { ^bb0(%arg0: f32, %arg1: f32): %2 = arith.addf %arg0, %arg1 : f32 linalg.yield %2 : f32 @@ -105,7 +105,7 @@ %1 = tensor.unpack %arg0 inner_dims_pos = [0, 1] inner_tiles = [8, 8] into %0 : tensor<16x48x8x8xf32> -> tensor<128x384xf32> %2 = linalg.elemwise_unary ins(%1: tensor<128x384xf32>) - outs(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32> + inits(%arg1: tensor<128x384xf32>) -> tensor<128x384xf32> return %2 : tensor<128x384xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir @@ -6,7 +6,7 @@ // CHECK-NOT: linalg.elemwise_unary // CHECK: linalg.generic %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-hoist-pad.mlir @@ -5,7 +5,7 @@ -> tensor<24x25xf32> { // expected-note @below {{payload operation}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -40,7 +40,7 @@ -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -82,7 +82,7 @@ // CHECK: %[[PADDED:.*]] = tensor.extract_slice %[[PACKED]][%{{.*}}, 0, 0] [1, 5, 12] [1, 1, 1] // CHECK-SAME: : tensor<5x5x12xf32> to tensor<5x12xf32> // CHECK: linalg.matmul ins(%[[PADDED]] - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -126,7 +126,7 @@ // CHECK: %[[TRANSPOSED:.*]] = linalg.generic // CHECK: -> tensor<5x12xf32> // CHECK: linalg.matmul ins(%[[TRANSPOSED]] - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -162,13 +162,13 @@ // CHECK: %[[PADDED:.*]] = tensor.pad %{{.*}} // CHECK: : tensor to tensor<5x25xf32> // CHECK: scf.for %{{.*}} iter_args(%[[INNER_PADDED:[0-9a-zA-Z]*]] = %[[PADDED]]) -> (tensor<5x25xf32>) - // CHECK: %[[RES:.*]] = linalg.matmul {{.*}} outs(%[[INNER_PADDED]] + // CHECK: %[[RES:.*]] = linalg.matmul {{.*}} inits(%[[INNER_PADDED]] // CHECK-SAME: : tensor<5x25xf32> // CHECK: scf.yield %[[RES]] : tensor<5x25xf32> // CHECK: %[[CAST:.*]] = tensor.cast %{{.*}} : tensor<5x25xf32> to tensor // CHECK: tensor.insert_slice %[[CAST]] into %{{.*}}[%{{.*}}, 0] [%{{.*}}, 25] [1, 1] // CHECK-SAME: : tensor into tensor<24x25xf32> - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir --- a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir @@ -10,7 +10,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%arg0 : tensor) outs(%arg1 : tensor) { + } ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2: f32, %arg3: f32): %1 = math.exp %arg2 : f32 linalg.yield %1 : f32 @@ -28,7 +28,7 @@ func.func @interchange_matmul(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) inits(%arg2 : tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-match.mlir b/mlir/test/Dialect/Linalg/transform-op-match.mlir --- a/mlir/test/Dialect/Linalg/transform-op-match.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-match.mlir @@ -48,7 +48,7 @@ %1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<12x128x32xf32>) - outs(%0 : tensor<128x12x32xf32>) { + inits(%0 : tensor<128x12x32xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<128x12x32xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir --- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir @@ -13,7 +13,7 @@ %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>) -> tensor<13x42xf32> { %0 = linalg.matmul ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>) - outs(%arg2: tensor<13x42xf32>) + inits(%arg2: tensor<13x42xf32>) -> tensor<13x42xf32> // The first application computes the total size. // CHECK: %{{.*}} = affine.apply #[[$MAP13]]() @@ -45,7 +45,7 @@ %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>) -> tensor<13x42xf32> { %0 = linalg.matmul ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>) - outs(%arg2: tensor<13x42xf32>) + inits(%arg2: tensor<13x42xf32>) -> tensor<13x42xf32> return %0 : tensor<13x42xf32> @@ -86,7 +86,7 @@ %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor return %0 : tensor @@ -107,7 +107,7 @@ -> tensor { // expected-note @below {{payload op}} %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor return %0 : tensor diff --git a/mlir/test/Dialect/Linalg/transform-op-pack.mlir b/mlir/test/Dialect/Linalg/transform-op-pack.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pack.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pack.mlir @@ -22,8 +22,8 @@ // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"] // CHECK-SAME: ins(%{{.*}} : tensor<3x2x4xf16>) - // CHECK-SAME: outs(%{{.*}} : tensor<3xf16>) - %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<3x7xf16>) outs(%t1 : tensor<3xf16>) { + // CHECK-SAME: inits(%{{.*}} : tensor<3xf16>) + %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor<3x7xf16>) inits(%t1 : tensor<3xf16>) { ^bb0(%in: f16, %out: f16): %3 = arith.addf %in, %out : f16 linalg.yield %3 : f16 @@ -64,8 +64,8 @@ // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "reduction"] // CHECK-SAME: ins(%{{.*}} : tensor<3x2x4xf16>) - // CHECK-SAME: outs(%{{.*}} : tensor<3xf16>) - %2 = linalg.generic #col_reduction_2d_trait ins(%t0 : tensor<7x3xf16>) outs(%t1 : tensor<3xf16>) { + // CHECK-SAME: inits(%{{.*}} : tensor<3xf16>) + %2 = linalg.generic #col_reduction_2d_trait ins(%t0 : tensor<7x3xf16>) inits(%t1 : tensor<3xf16>) { ^bb0(%in: f16, %out: f16): %3 = arith.addf %in, %out : f16 linalg.yield %3 : f16 @@ -119,8 +119,8 @@ // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "reduction"] // CHECK-SAME: ins(%{{.*}} : tensor) - // CHECK-SAME: outs(%{{.*}} : tensor) - %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor) outs(%t1 : tensor) { + // CHECK-SAME: inits(%{{.*}} : tensor) + %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor) inits(%t1 : tensor) { ^bb0(%in: f16, %out: f16): %3 = arith.addf %in, %out : f16 linalg.yield %3 : f16 @@ -165,8 +165,8 @@ // CHECK-SAME: indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel", "reduction"] // CHECK-SAME: ins(%{{.*}} : tensor) - // CHECK-SAME: outs(%{{.*}} : tensor) - %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor) outs(%t1 : tensor) { + // CHECK-SAME: inits(%{{.*}} : tensor) + %2 = linalg.generic #reduction_2d_trait ins(%t0 : tensor) inits(%t1 : tensor) { ^bb0(%in: f16, %out: f16): %3 = arith.addf %in, %out : f16 linalg.yield %3 : f16 @@ -209,9 +209,9 @@ // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.*}} : tensor, tensor) - // CHECK-SAME: outs(%{{.*}} : tensor) + // CHECK-SAME: inits(%{{.*}} : tensor) %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor // CHECK: tensor.unpack %{{.*}} outer_dims_perm = [1, 0] inner_dims_pos = [1, 0] inner_tiles = [3, 2] @@ -258,9 +258,9 @@ // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.*}} : tensor<14x64x28x28x8xf32>, tensor<256x64x1x1x4x8xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<14x256x28x28x4xf32>) + // CHECK-SAME: inits(%{{.*}} : tensor<14x256x28x28x4xf32>) %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>) - outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> + inits(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [4] // CHECK-SAME: : tensor<14x256x28x28x4xf32> -> tensor<14x1024x28x28xf32> @@ -298,7 +298,7 @@ // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.*}} : tensor, tensor<1x?x?x?x4x6xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor) + // CHECK-SAME: inits(%{{.*}} : tensor) %0 = linalg.conv_2d_nhwc_hwcf ins (%input, %filter: tensor, tensor<1x?x?x?xf32>) outs (%init: tensor) -> tensor @@ -344,9 +344,9 @@ // CHECK: linalg.generic {indexing_maps = [#[[$PACKED_MAP_0]], #[[$PACKED_MAP_1]], #[[$PACKED_MAP_2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.*}} : tensor, tensor) - // CHECK-SAME: outs(%{{.*}} : tensor) + // CHECK-SAME: inits(%{{.*}} : tensor) %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor // CHECK: tensor.unpack %{{.*}} inner_dims_pos = [1] inner_tiles = [%[[TS]]] into %[[C]] @@ -367,7 +367,7 @@ func.func @conv_cant_pack(%i: tensor<14x512x28x28xf32>, %f: tensor<1024x512x1x1xf32>, %o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> { %0 = linalg.conv_2d_nchw_fchw ins(%i, %f: tensor<14x512x28x28xf32>, tensor<1024x512x1x1xf32>) - outs(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> + inits(%o: tensor<14x1024x28x28xf32>) -> tensor<14x1024x28x28xf32> return %0: tensor<14x1024x28x28xf32> } @@ -385,10 +385,10 @@ func.func @matmul(%A: tensor, %B: tensor, %C: tensor) -> (tensor, tensor) { %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor %1 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor return %0, %1 : tensor, tensor } @@ -407,7 +407,7 @@ func.func @matmul(%A: tensor, %B: tensor, %C: tensor) -> tensor { %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor return %0 : tensor } @@ -484,7 +484,7 @@ %0 = tensor.pack %source inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %dest : tensor<128x256xf32> -> tensor<4x16x32x16xf32> %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor - %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor) -> tensor return } @@ -506,7 +506,7 @@ %b = tensor.unpack %a inner_dims_pos = [0, 1] inner_tiles = [32, 16] into %source : tensor<4x16x32x16xf32> -> tensor<128x256xf32> %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor - %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor) -> tensor return } @@ -526,7 +526,7 @@ func.func @no_matching_pack(%source: tensor<16xf32>) { %f0 = arith.constant 0.0 : f32 %1 = tensor.empty() : tensor<4x4xf32> - %2 = linalg.fill ins(%f0: f32) outs(%1 : tensor<4x4xf32>) -> tensor<4x4xf32> + %2 = linalg.fill ins(%f0: f32) inits(%1 : tensor<4x4xf32>) -> tensor<4x4xf32> %b = tensor.unpack %2 inner_dims_pos = [0] inner_tiles = [4] into %source : tensor<4x4xf32> -> tensor<16xf32> return } @@ -547,7 +547,7 @@ func.func @invalid_outer_perm(%A: tensor, %B: tensor, %C: tensor) -> tensor { %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor return %0 : tensor } @@ -573,7 +573,7 @@ func.func @invalid_inner_perm(%A: tensor, %B: tensor, %C: tensor) -> tensor { %0 = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%C: tensor) + inits(%C: tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -25,8 +25,8 @@ // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) - // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + // CHECK-SAME: inits(%[[T2]] : tensor<4x5xf32>) + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> } @@ -67,8 +67,8 @@ // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) - // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + // CHECK-SAME: inits(%[[T2]] : tensor<4x5xf32>) + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> } @@ -89,7 +89,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -110,7 +110,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -132,7 +132,7 @@ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // This is attached to an error that is silenceable and is not reported by this transform // {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir b/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir --- a/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-rewrite-in-destination-passing-style.mlir @@ -78,7 +78,7 @@ // CHECK: %[[empty:.*]] = tensor.empty(%[[s1]], %[[s2]]) : tensor // CHECK: %[[generic:.*]] = linalg.generic // CHECK-SAME: {indexing_maps = [#[[$map]]], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: outs(%[[empty]] : tensor) { +// CHECK-SAME: inits(%[[empty]] : tensor) { // CHECK: %[[i0:.*]] = linalg.index 0 // CHECK: %[[i1:.*]] = linalg.index 1 // CHECK: %[[added:.*]] = arith.addi %[[i0]], %[[i1]] @@ -116,7 +116,7 @@ // CHECK: %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor // CHECK: %[[generic:.*]] = linalg.generic // CHECK-SAME: {indexing_maps = [#[[$map2]]], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: outs(%[[empty]] : tensor) { +// CHECK-SAME: inits(%[[empty]] : tensor) { // CHECK: %[[i0:.*]] = linalg.index 0 // CHECK: %[[i1:.*]] = linalg.index 1 // CHECK: %[[mul:.*]] = arith.muli %[[i0]], %[[i1]] @@ -155,7 +155,7 @@ // CHECK-DAG: %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]] // CHECK-DAG: %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]] // CHECK: %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor -// CHECK: %[[filled:.*]] = linalg.fill ins(%[[c50]] : index) outs(%[[empty]] : tensor) +// CHECK: %[[filled:.*]] = linalg.fill ins(%[[c50]] : index) inits(%[[empty]] : tensor) // CHECK-DAG: %[[dim0:.*]] = tensor.dim %[[t1]], %[[c0]] // CHECK: %[[inserted:.*]] = tensor.insert_slice %[[t1]] into %[[filled]][5, %[[l2]]] [%[[dim0]], 10] [1, 1] : tensor into tensor // CHECK: return %[[inserted]] @@ -188,7 +188,7 @@ // CHECK-DAG: %[[size0:.*]] = affine.apply #[[$map]]()[%[[h1]], %[[dim0]]] // CHECK-DAG: %[[size1:.*]] = affine.apply #[[$map1]]()[%[[l2]], %[[h2]]] // CHECK: %[[empty:.*]] = tensor.empty(%[[size0]], %[[size1]]) : tensor -// CHECK: %[[filled:.*]] = linalg.fill ins(%[[padding]] : index) outs(%[[empty]] : tensor) +// CHECK: %[[filled:.*]] = linalg.fill ins(%[[padding]] : index) inits(%[[empty]] : tensor) // CHECK-DAG: %[[dim0:.*]] = tensor.dim %[[t1]], %[[c0]] // CHECK: %[[inserted:.*]] = tensor.insert_slice %[[t1]] into %[[filled]][5, %[[l2]]] [%[[dim0]], 10] [1, 1] : tensor into tensor // CHECK: return %[[inserted]] @@ -217,7 +217,7 @@ // CHECK-NOT: generic // CHECK-NOT: insert_slice // CHECK: %[[alloc_tensor:.*]] = bufferization.alloc_tensor(%{{.*}}) : tensor -// CHECK: %[[copied:.*]] = linalg.copy ins(%[[t1]] : tensor) outs(%[[alloc_tensor]] : tensor) -> tensor +// CHECK: %[[copied:.*]] = linalg.copy ins(%[[t1]] : tensor) inits(%[[alloc_tensor]] : tensor) -> tensor // CHECK: return %[[copied]] func.func @tensor_pad_nofold(%t1: tensor, %padding: index) -> tensor { diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir @@ -12,7 +12,7 @@ // CHECK: scf.yield %[[INS_2]] : tensor // CHECK: %[[INS_1:.*]] = tensor.insert_slice %[[RES_LOOP_2]] into %{{.*}}, 25] [1, 1] : tensor into tensor<24x25xf32> // CHECK: scf.yield %[[INS_1]] : tensor<24x25xf32> - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> // CHECK: return %[[RES_LOOP_1]] : tensor<24x25xf32> func.return %0 : tensor<24x25xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir @@ -7,14 +7,14 @@ // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}} : tensor, tensor<256x32xf32>, tensor<64x4xi1>) - // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor) { + // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor) { // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor) - // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor) { + // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor) { %0 = linalg.matmul ins(%A, %B: tensor, tensor<256x32xf32>) - outs(%C: tensor) -> tensor + inits(%C: tensor) -> tensor return %0: tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -2,7 +2,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -16,16 +16,16 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor<16x32x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) { +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) { // CHECK: arith.addf // CHECK: linalg.yield %{{.*}} : f32 // CHECK: } -> tensor<16x32xf32> @@ -45,7 +45,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor<32xf32>, tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -64,16 +64,16 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], -// CHECK: iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { +// CHECK: iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor) inits(%[[F]] : tensor<4xf32>) { // CHECK: arith.subf // CHECK: math.exp // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor<4xf32> -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor) { // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor @@ -97,7 +97,7 @@ affine_map<(d0, d1, d2) -> (d2, d0)> ], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 %4 = arith.maxf %3, %arg2 : f32 @@ -116,15 +116,15 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) inits(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf // CHECK: arith.maxf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { +// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) { // CHECK: arith.maxf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> @@ -140,7 +140,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -154,16 +154,16 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "reduction", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor<16x32x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) { +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) { // CHECK: arith.addf // CHECK: linalg.yield %{{.*}} : f32 // CHECK: } -> tensor<16x32xf32> @@ -183,7 +183,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor<32xf32>, tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -202,16 +202,16 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], -// CHECK: iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { +// CHECK: iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor) inits(%[[F]] : tensor<4xf32>) { // CHECK: arith.subf // CHECK: math.exp // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor<4xf32> -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor) { // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor @@ -235,7 +235,7 @@ affine_map<(d0, d1, d2) -> (d2, d0)> ], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 %4 = arith.minf %3, %arg2 : f32 @@ -254,15 +254,15 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) inits(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf // CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { +// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) { // CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -17,7 +17,7 @@ // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32> // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_LOW]] - // CHECK: outs(%[[OUT_SLICE_LOW]] + // CHECK: inits(%[[OUT_SLICE_LOW]] // CHECK: linalg.index 0 // CHECK: func.call @elem // CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [42] [1] @@ -26,7 +26,7 @@ // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][42] [58] [1] : tensor<100xf32> to tensor<58xf32> // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_HIGH]] - // CHECK: outs(%[[OUT_SLICE_HIGH]] + // CHECK: inits(%[[OUT_SLICE_HIGH]] // CHECK: %[[IDX:.+]] = linalg.index 0 // CHECK: affine.apply #[[$ADD_42_MAP]](%[[IDX]]) // CHECK: func.call @elem @@ -35,7 +35,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32 @@ -63,14 +63,14 @@ // the splitting altogether. // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN]] - // CHECK: outs(%[[OUT]] + // CHECK: inits(%[[OUT]] // CHECK: linalg.index 0 // CHECK: func.call @elem %0 = linalg.generic { indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) { + ins(%arg0: tensor<10xf32>) inits(%arg1: tensor<10xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32 @@ -102,7 +102,7 @@ // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_LOW]] - // CHECK: outs(%[[OUT_SLICE_LOW]] + // CHECK: inits(%[[OUT_SLICE_LOW]] // CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1] // // CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] @@ -111,7 +111,7 @@ // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_HIGH]] - // CHECK: outs(%[[OUT_SLICE_HIGH]] + // CHECK: inits(%[[OUT_SLICE_HIGH]] // CHECK: %[[SPLIT_HIGH_4:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] // CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_4]]] [1] %0 = func.call @get_size() : () -> index @@ -119,7 +119,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): %5 = arith.addf %3, %4 : f32 linalg.yield %5 : f32 @@ -147,7 +147,7 @@ // CHECK: %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0] // CHECK: %[[RES_1:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_1]] : tensor<4x34xf32>) - // CHECK-SAME: outs(%[[OUT_1]] : tensor<4x34xf32>) + // CHECK-SAME: inits(%[[OUT_1]] : tensor<4x34xf32>) // CHECK: %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]] // // CHECK: %[[IN_2:.+]] = tensor.extract_slice %[[IN]] @@ -158,14 +158,14 @@ // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]] // CHECK: %[[RES_21:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>) - // CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>) + // CHECK-SAME: inits(%[[OUT_21]] : tensor<6x16xf32>) // CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]] // // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]] // CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]] // CHECK: %[[RES_22:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>) - // CHECK-SAME: outs(%[[OUT_22]] : tensor<6x18xf32>) + // CHECK-SAME: inits(%[[OUT_22]] : tensor<6x18xf32>) // CHECK: %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]] // CHECK: %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]] %0 = linalg.generic { @@ -174,7 +174,7 @@ iterator_types = ["parallel", "parallel"] } ins(%arg0: tensor<10x34xf32>) - outs(%arg1: tensor<10x34xf32>) { + inits(%arg1: tensor<10x34xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -211,7 +211,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): linalg.yield %3 : f32 } -> tensor<100xf32> @@ -235,7 +235,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): linalg.yield %3 : f32 } -> tensor<100xf32> @@ -271,7 +271,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%0: f32, %1: f32): linalg.yield %0 : f32 } -> tensor<100xf32> @@ -299,7 +299,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%arg4: f32, %arg5: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%arg4, %i, %i) : (f32, index, index) -> f32 @@ -310,7 +310,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg2: tensor<200xf32>) outs(%arg3: tensor<200xf32>) { + ins(%arg2: tensor<200xf32>) inits(%arg3: tensor<200xf32>) { ^bb0(%arg4: f32, %arg5: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%arg4, %i, %i) : (f32, index, index) -> f32 diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir --- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -21,13 +21,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-SAME: inits(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<4x4xf32> into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> @@ -60,14 +60,14 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x?xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor<4x?xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: inits(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> %sz = func.call @get_dynamic_tile_size() : () -> index %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> @@ -91,10 +91,10 @@ %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>) -> (tensor<128x128xf32>, tensor<128x128xf32>) { %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> %1 = linalg.matmul ins(%0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> return %0, %1 : tensor<128x128xf32>, tensor<128x128xf32> } @@ -116,10 +116,10 @@ %arg0: tensor<128x128xf32>, %arg1: tensor<128x128xf32>, %arg2: tensor<128x128xf32>) -> (tensor<128x128xf32>, tensor<128x128xf32>) { %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> %1 = linalg.matmul ins(%0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> return %0, %1 : tensor<128x128xf32>, tensor<128x128xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -12,7 +12,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -57,7 +57,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -105,7 +105,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -123,7 +123,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{non-isolated target}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir --- a/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir +++ b/mlir/test/Dialect/Linalg/transform-pack-greedily.mlir @@ -15,8 +15,8 @@ // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]] // CHECK-SAME: ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<128x8x8x16xf32>) - %0 = linalg.matmul ins(%A, %B : !A_mk, !B_kn) outs(%C : !C_mn) -> !C_mn + // CHECK-SAME: inits(%{{.*}} : tensor<128x8x8x16xf32>) + %0 = linalg.matmul ins(%A, %B : !A_mk, !B_kn) inits(%C : !C_mn) -> !C_mn return %0 : !C_mn } @@ -56,8 +56,8 @@ // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]] // CHECK-SAME: ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<8x128x8x16xf32>) - %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) outs(%C : !C_nm) { + // CHECK-SAME: inits(%{{.*}} : tensor<8x128x8x16xf32>) + %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) inits(%C : !C_nm) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b : f32 %e = arith.addf %c, %d : f32 @@ -101,8 +101,8 @@ // CHECK-SAME: indexing_maps = [#[[$mk_kkmm]], #[[$kn_kknn]], #[[$mn_mmnn]]] // CHECK-SAME: ["reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%{{.*}} : tensor<128x8x32x8xf32>, tensor<8x8x32x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<8x128x8x16xf32>) - %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) outs(%C : !C_nm) { + // CHECK-SAME: inits(%{{.*}} : tensor<8x128x8x16xf32>) + %0 = linalg.generic #mkn_trait ins(%A, %B : !A_mk, !B_nk) inits(%C : !C_nm) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b : f32 %e = arith.addf %c, %d : f32 @@ -146,8 +146,8 @@ // CHECK-SAME: indexing_maps = [#[[$bmkm2_kkmm]], #[[$nkb_kknn]], #[[$nbm_mmnn]]] // CHECK-SAME: ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel"]} // CHECK-SAME: ins(%{{.*}} : tensor<42x128x8x33x32x8xf32>, tensor<8x8x42x32x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor<8x42x128x8x16xf32>) - %0 = linalg.generic #mkn_trait ins(%A, %B : !A_bmkm2, !B_nkb) outs(%C : !C_nbm) { + // CHECK-SAME: inits(%{{.*}} : tensor<8x42x128x8x16xf32>) + %0 = linalg.generic #mkn_trait ins(%A, %B : !A_bmkm2, !B_nkb) inits(%C : !C_nbm) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b : f32 %e = arith.addf %c, %d : f32 @@ -182,11 +182,11 @@ // CHECK-SAME: indexing_maps = [#[[$M1]], #[[$M2]], #[[$M3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction", "reduction", "reduction", "parallel", "parallel", "reduction", "parallel", "parallel"] // CHECK-SAME: ins(%{{.*}} : tensor, tensor<1x2x3x3x32x16xf32>) - // CHECK-SAME: outs(%{{.*}} : tensor) + // CHECK-SAME: inits(%{{.*}} : tensor) %0 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%arg0, %c0: tensor, tensor<16x47x3x3xf32>) - outs(%arg2: tensor) -> tensor + inits(%arg2: tensor) -> tensor return %0 : tensor } @@ -210,11 +210,11 @@ // CHECK: linalg.map %mapped = linalg.map { arith.addf } ins(%arg0, %arg1 : tensor<10x100xf32>, tensor<10x100xf32>) - outs(%map_init : tensor<10x100xf32>) + inits(%map_init : tensor<10x100xf32>) // CHECK: linalg.reduce %res = linalg.reduce { arith.addf } ins(%mapped: tensor<10x100xf32>) - outs(%output: tensor<10xf32>) + inits(%output: tensor<10xf32>) dimensions = [1] return %res : tensor<10xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -5,7 +5,7 @@ %v: memref) { linalg.dot ins(%x, %y: memref>, memref>) - outs(%v: memref) + inits(%v: memref) return } @@ -29,7 +29,7 @@ linalg.matvec ins(%A, %x: memref>, memref>) - outs(%y: memref>) + inits(%y: memref>) return } @@ -47,7 +47,7 @@ // CHECK: scf.for {{.*}} step %[[c6]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -56,7 +56,7 @@ %C: memref>) { linalg.matmul ins(%A, %B: memref>, memref>) - outs(%C: memref>) + inits(%C: memref>) return } @@ -97,7 +97,7 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] { // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -124,7 +124,7 @@ linalg.generic #generic_matmul_trait ins(%A, %B : memref>, memref>) - outs(%C : memref>) { + inits(%C : memref>) { ^bb(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -155,7 +155,7 @@ %y: memref>) { linalg.matvec ins(%A, %x: memref>, memref>) - outs(%y: memref>) + inits(%y: memref>) return } @@ -173,7 +173,7 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -182,7 +182,7 @@ %C: memref>) { linalg.matmul ins(%A, %B: memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) return } @@ -216,4 +216,4 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] { // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir --- a/mlir/test/Dialect/Linalg/transform-promotion.mlir +++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir @@ -22,7 +22,7 @@ memref> to memref> linalg.matmul ins(%3, %4: memref>, memref>) - outs(%5: memref>) + inits(%5: memref>) } } } @@ -56,7 +56,7 @@ // CHECK: memref.copy %[[s2]], %[[l2]] : memref to memref // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[v1]] : memref, memref) -// CHECK-SAME: outs(%[[v2]] : memref) +// CHECK-SAME: inits(%[[v2]] : memref) transform.sequence failures(propagate) { ^bb0(%arg1: !pdl.operation): @@ -89,7 +89,7 @@ linalg.matmul {__internal_linalg_transform__ = "_promote_first_view_"} ins(%3, %4: memref>, memref>) - outs(%5: memref>) + inits(%5: memref>) } } } @@ -116,7 +116,7 @@ // CHECK-NOT: memref.copy // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[s1]] : memref, memref>) -// CHECK-SAME: outs(%[[s2]] : memref>) +// CHECK-SAME: inits(%[[s2]] : memref>) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): @@ -138,7 +138,7 @@ %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref> to memref> linalg.fill - ins(%cf : f32) outs(%3 : memref>) + ins(%cf : f32) inits(%3 : memref>) return } // CHECK-LABEL: func @aligned_promote_fill @@ -147,9 +147,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref> -// CHECK: linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref) +// CHECK: linalg.fill ins({{.*}} : f32) inits(%[[v0]] : memref) // CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref -// CHECK: linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref) +// CHECK: linalg.fill ins(%[[cf]] : f32) inits(%[[v0]] : memref) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): @@ -172,7 +172,7 @@ %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref, strided<[?, 1], offset: ?>> to memref, strided<[?, ?], offset: ?>> linalg.fill ins(%cc : complex) - outs(%3 : memref, strided<[?, ?], offset: ?>>) + inits(%3 : memref, strided<[?, ?], offset: ?>>) return } // CHECK-LABEL: func @aligned_promote_fill_complex @@ -181,9 +181,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref> // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref> to memref, strided<[?, 1], offset: ?>> -// CHECK: linalg.fill ins({{.*}} : complex) outs(%[[v0]] : memref>) +// CHECK: linalg.fill ins({{.*}} : complex) inits(%[[v0]] : memref>) // CHECK: memref.copy %[[s0]], %[[l0]] : memref, strided{{.*}}> to memref, strided{{.*}}> -// CHECK: linalg.fill ins(%[[cc]] : complex) outs(%[[v0]] : memref>) +// CHECK: linalg.fill ins(%[[cc]] : complex) inits(%[[v0]] : memref>) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir @@ -17,11 +17,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + inits(%D : tensor) -> tensor %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + inits(%5 : tensor) -> tensor %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, @@ -30,7 +30,7 @@ iterator_types = ["parallel", "parallel"] } ins(%C, %6 : tensor, tensor) - outs(%D : tensor) { + inits(%D : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %16 = arith.maxf %arg3, %cst : f32 %17 = arith.cmpf ogt, %arg2, %cst : f32 @@ -74,11 +74,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + inits(%D : tensor) -> tensor %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + inits(%5 : tensor) -> tensor %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, @@ -87,7 +87,7 @@ iterator_types = ["parallel", "parallel"] } ins(%C, %6 : tensor, tensor) - outs(%D : tensor) { + inits(%D : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %16 = arith.maxf %arg3, %cst : f32 %17 = arith.cmpf ogt, %arg2, %cst : f32 diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -5,7 +5,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -33,12 +33,12 @@ // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor) -> tensor // CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor) { // CHECK: %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]] // CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor // CHECK: %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor -// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) outs(%[[EXT]] : tensor) { +// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) inits(%[[EXT]] : tensor) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield @@ -48,7 +48,7 @@ // CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor // CHECK: scf.yield %[[INS]] : tensor // CHECK: } -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) inits(%[[ARG1]] : tensor) { // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor @@ -61,7 +61,7 @@ affine_map<(d0, d1) -> (d1)>], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %42 = arith.addf %arg7, %arg9 : f32 linalg.yield %42 : f32 @@ -97,7 +97,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -126,7 +126,7 @@ // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor) -> tensor // CHECK: %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK-DAG: %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]] // CHECK-DAG: %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]]) @@ -134,7 +134,7 @@ // CHECK: %[[TINDEX:.+]] = affine.apply #[[MAP2]](%[[IV]])[%[[D1]]] // CHECK: %[[INCHUNK:.+]] = tensor.extract_slice %[[ARG0]][0, %[[TINDEX]]] [%[[D0]], %[[TS1]]] [1, 1] : tensor to tensor // CHECK: %[[TEMPEXT:.+]] = tensor.extract_slice %[[ET]][0] [%[[D0]]] [1] : tensor to tensor -// CHECK: %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor) outs(%[[TEMPEXT]] : tensor) { +// CHECK: %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor) inits(%[[TEMPEXT]] : tensor) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield @@ -143,7 +143,7 @@ // CHECK: tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor into tensor // CHECK: } // CHECK: } -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) inits(%[[ARG1]] : tensor) { // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor @@ -154,7 +154,7 @@ func.func @matmul_tile_parallel( %A: tensor, %B: tensor, %out: tensor) -> tensor { %matmul = linalg.matmul ins(%A, %B: tensor, tensor) - outs(%out: tensor) -> tensor + inits(%out: tensor) -> tensor return %matmul : tensor } @@ -180,7 +180,7 @@ // CHECK-DAG: %[[D3:.*]] = tensor.dim %[[ARG2]], %[[C0]] : tensor // CHECK-DAG: %[[D4:.*]] = tensor.dim %[[ARG2]], %[[C1]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D3]], %[[D4]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor) -> tensor // CHECK: %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK-DAG: %[[TS0:.+]] = affine.min #[[MAP0]](%[[IV]])[%[[D1]]] // CHECK-DAG: %[[TS1:.+]] = affine.max #[[MAP1]](%[[TS0]]) @@ -189,12 +189,12 @@ // CHECK: %[[INCHUNKA:.+]] = tensor.extract_slice %[[ARG0]][0, %[[TINDEX]]] [%[[D0]], %[[TS1]]] [1, 1] : tensor to tensor // CHECK: %[[INCHUNKB:.+]] = tensor.extract_slice %[[ARG1]][%[[TINDEX]], 0] [%[[TS1]], %[[D2]]] [1, 1] : tensor to tensor // CHECK: %[[TEMPEXT:.+]] = tensor.extract_slice %[[ET]][0, 0] [%[[D0]], %[[D2]]] [1, 1] : tensor to tensor -// CHECK: %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor, tensor) outs(%[[TEMPEXT]] : tensor) -> tensor +// CHECK: %[[PARTIAL:.+]] = linalg.matmul ins(%[[INCHUNKA]], %[[INCHUNKB]] : tensor, tensor) inits(%[[TEMPEXT]] : tensor) -> tensor // CHECK: scf.forall.in_parallel { // CHECK: tensor.parallel_insert_slice %[[PARTIAL]] into %[[ARG3]][0, 0, %[[IV]]] [%[[D0]], %[[D2]], 1] [1, 1, 1] : tensor into tensor // CHECK: } // CHECK: } -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG2]] : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP3]], #[[MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[L]] : tensor) inits(%[[ARG2]] : tensor) { // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor @@ -208,7 +208,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -237,7 +237,7 @@ // CHECK-DAG: %[[D0:.*]] = tensor.dim %[[ARG0]], %[[C0]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor) -> tensor // CHECK: %[[L:.*]] = scf.forall (%[[IV:.+]]) in (5) shared_outs(%[[ARG3:.+]] = %[[F]]) -> (tensor) { // CHECK: %[[ET:.+]] = tensor.extract_slice %[[ARG3:.+]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor to tensor // CHECK: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor @@ -247,7 +247,7 @@ // CHECK: %[[D3:.+]] = tensor.dim %[[ACC]], %[[C0]] : tensor // CHECK: %[[INCHUNK:.+]] = tensor.extract_slice %[[ARG0]][0, %[[IV1]]] [%[[D0]], %[[TS0]]] [1, 1] : tensor to tensor // CHECK: %[[TEMPEXT:.+]] = tensor.extract_slice %[[ACC]][0] [%[[D3]]] [1] : tensor to tensor -// CHECK: %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor) outs(%[[TEMPEXT]] : tensor) { +// CHECK: %[[PARTIAL:.+]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[INCHUNK]] : tensor) inits(%[[TEMPEXT]] : tensor) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield @@ -259,7 +259,7 @@ // CHECK: tensor.parallel_insert_slice %[[CARRY]] into %[[ARG3]][0, %[[IV]]] [%[[D0]], 1] [1, 1] : tensor into tensor // CHECK: } // CHECK: } -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP2]], #[[MAP3]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) inits(%[[ARG1]] : tensor) { // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor @@ -273,7 +273,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -310,7 +310,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -337,7 +337,7 @@ func.func @fail_for_float_neutral(%arg0: tensor, %arg1: tensor) -> tensor { // expected-error @below {{'linalg.generic' op Failed to get an identity value for the reduction operation.}} // expected-note @below {{when applied to this op}} - %0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) outs(%arg1 : tensor) { + %0 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): %1 = llvm.fmul %in, %in : f32 %2 = llvm.fadd %1, %out : f32 diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -6,7 +6,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [0] : vector<1584xf32> to f32 linalg.dot ins(%A, %B: memref<1584xf32>, memref<1584xf32>) - outs(%C: memref) + inits(%C: memref) return } @@ -25,7 +25,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [1] : vector<1584x1584xf32> to vector<1584xf32> linalg.matvec ins(%A, %B: memref<1584x1584xf32>, memref<1584xf32>) - outs(%C: memref<1584xf32>) + inits(%C: memref<1584xf32>) return } @@ -43,7 +43,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<1584x1584x1584xf32> to vector<1584x1584xf32> linalg.matmul ins(%A, %B: memref<1584x1584xf32>, memref<1584x1584xf32>) - outs(%C: memref<1584x1584xf32>) + inits(%C: memref<1584x1584xf32>) return } @@ -62,7 +62,7 @@ // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [3] : vector<1584x1584x1584x1584xf32> to vector<1584x1584x1584xf32> linalg.batch_matmul ins(%A, %B: memref<1584x1584x1584xf32>, memref<1584x1584x1584xf32>) - outs(%C: memref<1584x1584x1584xf32>) + inits(%C: memref<1584x1584x1584xf32>) return } @@ -97,7 +97,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<8x32xf32> linalg.generic #matmul_trait ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>) - outs(%C : memref<8x32xf32>) { + inits(%C : memref<8x32xf32>) { ^bb(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -137,7 +137,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<32x8xf32> linalg.generic #matmul_transpose_out_trait ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>) - outs(%C : memref<32x8xf32>) { + inits(%C : memref<32x8xf32>) { ^bb(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -166,7 +166,7 @@ %1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<12x128x32xf32>) - outs(%0 : tensor<128x12x32xf32>) { + inits(%0 : tensor<128x12x32xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<128x12x32xf32> @@ -204,7 +204,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xi32>, memref<8x32xi32> linalg.generic #matmul_trait ins(%A, %B : memref<8x16xi32>, memref<16x32xi32>) - outs(%C : memref<8x32xi32>) { + inits(%C : memref<8x32xi32>) { ^bb(%a: i32, %b: i32, %c: i32) : %d = arith.muli %a, %b: i32 %e = arith.addi %c, %d: i32 @@ -229,7 +229,7 @@ // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32> linalg.matmul ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>) - outs(%C: memref<8x32xf32>) + inits(%C: memref<8x32xf32>) return } @@ -250,7 +250,7 @@ indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>], iterator_types = ["parallel", "parallel"]} ins(%arg0 : f32) - outs(%A: memref<8x16xf32>) { + inits(%A: memref<8x16xf32>) { ^bb(%0: f32, %1: f32) : linalg.yield %0 : f32 } @@ -274,7 +274,7 @@ indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>], iterator_types = ["parallel", "parallel"]} ins(%arg0 : complex) - outs(%A: memref<8x16xcomplex>) { + inits(%A: memref<8x16xcomplex>) { ^bb(%0: complex, %1: complex) : linalg.yield %0 : complex } @@ -297,7 +297,7 @@ %1 = linalg.generic {indexing_maps = [#map0, #map0], iterator_types = ["parallel"]} ins(%arg0 : tensor<5xf32>) - outs(%0 : tensor<5xi32>) { + inits(%0 : tensor<5xi32>) { ^bb0(%arg1: f32, %arg2: i32): %2 = linalg.index 0 : index %11 = affine.apply affine_map<() -> (123)>() @@ -339,7 +339,7 @@ func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) + linalg.fill ins(%arg0 : f32) inits(%A : memref<8x16xf32>) return } @@ -357,7 +357,7 @@ // CHECK-SAME: (%[[M:.*]]: memref, %[[val:.*]]: f32) // CHECK: %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector // CHECK: vector.transfer_write %[[VEC]], %[[M]][] : vector, memref - linalg.fill ins(%arg0 : f32) outs(%A : memref) + linalg.fill ins(%arg0 : f32) inits(%A : memref) return } @@ -432,7 +432,7 @@ indexing_maps = [ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - outs(%arg0: memref<1x2x4x8xindex>) { + inits(%arg0: memref<1x2x4x8xindex>) { ^bb0(%arg1: index): // CHECK: %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<8xindex> to vector<1x2x4x8xindex> // CHECK: vector.transfer_write %[[BCST]], %[[ARG0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {{.*}} : vector<1x2x4x8xindex>, memref<1x2x4x8xindex> @@ -460,7 +460,7 @@ indexing_maps = [ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - outs(%arg0: memref<1x2x4x8xindex>) { + inits(%arg0: memref<1x2x4x8xindex>) { ^bb0(%arg1: index): // CHECK: %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<2xindex> to vector<1x8x4x2xindex> // CHECK: %[[TRAN:.*]] = vector.transpose %[[BCST]], [0, 3, 2, 1] : vector<1x8x4x2xindex> to vector<1x2x4x8xindex> @@ -508,7 +508,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg1, %arg2: memref<4x256xf32>, memref<256xf32>) - outs( + inits( %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 : memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, @@ -590,7 +590,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg1, %arg2: tensor<4x256xf32>, tensor<256xf32>) - outs( + inits( %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 : tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, @@ -681,7 +681,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>) - outs(%C : memref<4x4x4x4xf32>) { + inits(%C : memref<4x4x4x4xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %s = arith.subf %arg0, %arg1 : f32 %a = arith.addf %arg2, %s : f32 @@ -725,7 +725,7 @@ %C: memref<16x14x7x8xf32>, %D: memref<7x14x8x16xf32>) { linalg.generic #matmul_trait ins(%A, %B, %C : memref<14x7xf32>, memref<16x14xf32>, memref<16x14x7x8xf32>) - outs(%D : memref<7x14x8x16xf32>) { + inits(%D : memref<7x14x8x16xf32>) { ^bb(%a: f32, %b: f32, %c: f32, %d: f32) : %e = arith.addf %a, %b: f32 %f = arith.addf %e, %c: f32 @@ -760,7 +760,7 @@ // CHECK: %[[R:.*]] = vector.multi_reduction , %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32> // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) - outs(%arg2: tensor<8x12xf32>) + inits(%arg2: tensor<8x12xf32>) -> tensor<8x12xf32> // CHECK: return %[[W]] : tensor<8x12xf32> return %0 : tensor<8x12xf32> @@ -847,7 +847,7 @@ // CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index // CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) inits(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> // CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> // CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> // CHECK: return %[[RESULT]] @@ -1105,7 +1105,7 @@ affine_map<(d0, d1, d2) -> (d0, d1)> ], iterator_types = ["parallel", "parallel", "reduction"] - } ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) { + } ins(%input : tensor<4x16x8xf32>) inits(%output : tensor<4x16xf32>) { ^bb0(%arg0: f32, %arg1: f32): %1 = math.exp %arg0 : f32 %2 = arith.addf %1, %arg1 : f32 @@ -1148,7 +1148,7 @@ affine_map<(d0, d1, d2, d3) -> (d3, d0)> ], iterator_types = ["parallel", "reduction", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %1 = math.exp %arg0 : f32 %2 = math.exp %arg1 : f32 @@ -1177,11 +1177,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant -3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %max = arith.maxf %in0, %out0 : f32 linalg.yield %max : f32 @@ -1208,11 +1208,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %maxf32 = arith.constant 3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%maxf32 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %min = arith.minf %out0, %in0 : f32 linalg.yield %min : f32 @@ -1238,11 +1238,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant 1.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %mul = arith.mulf %in0, %out0 : f32 linalg.yield %mul : f32 @@ -1268,11 +1268,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %or = arith.ori %in0, %out0 : i1 linalg.yield %or : i1 @@ -1298,11 +1298,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant true %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %and = arith.andi %in0, %out0 : i1 linalg.yield %and : i1 @@ -1328,11 +1328,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %xor = arith.xori %in0, %out0 : i1 linalg.yield %xor : i1 @@ -1360,13 +1360,13 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4x4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4x4xf32>) -> tensor<4x4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>) - outs(%fill : tensor<4x4xf32>) { + inits(%fill : tensor<4x4xf32>) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 linalg.yield %40 : f32 @@ -1396,13 +1396,13 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>) - outs(%fill : tensor<4xf32>) { + inits(%fill : tensor<4xf32>) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -1437,7 +1437,7 @@ // CHECK: %[[init:.*]] = tensor.empty() : tensor %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%f0 : f32) inits(%0 : tensor) -> tensor // CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]] // CHECK-SAME: : tensor<32xf32>, vector<32xf32> // CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector @@ -1451,7 +1451,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0 : tensor<32xf32>) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%a: f32, %b: f32): %3 = arith.addf %a, %b : f32 linalg.yield %3 : f32 @@ -1478,13 +1478,13 @@ func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> { %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<6x6x3x3xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32> // CHECK: linalg.generic %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<8x8xf32>) - outs(%fill : tensor<6x6x3x3xf32>) { + inits(%fill : tensor<6x6x3x3xf32>) { ^bb0(%arg7: f32, %arg9: f32): linalg.yield %arg7 : f32 } -> tensor<6x6x3x3xf32> @@ -1509,7 +1509,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<2x4x8xf32>, tensor<2x4xf32>) - outs(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) { + inits(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32): %1 = arith.mulf %b0, %b1 : f32 %2 = arith.addf %1, %b3 : f32 @@ -1545,7 +1545,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%arg1 : tensor<4x3xi32>) outs(%arg2 : tensor<4x7x3x2xf32>) { + } ins(%arg1 : tensor<4x3xi32>) inits(%arg2 : tensor<4x7x3x2xf32>) { ^bb0(%arg3: i32, %arg4: f32): %2 = arith.index_cast %arg3 : i32 to index %3 = tensor.extract %arg0[%2] : tensor<3xf32> @@ -1582,7 +1582,7 @@ %2 = linalg.generic { indexing_maps = [#map1], iterator_types = ["parallel", "parallel", "parallel"] - } outs(%arg2 : tensor<1x1x3xf32>) { + } inits(%arg2 : tensor<1x1x3xf32>) { ^bb0(%arg4: f32): %7 = tensor.extract %arg0[%c0, %c1] : tensor<3x3xf32> linalg.yield %7 : f32 @@ -1616,7 +1616,7 @@ %1 = linalg.generic { indexing_maps = [#map1], iterator_types = ["parallel", "parallel", "parallel"] - } outs(%arg2 : tensor<1x1x3xf32>) { + } inits(%arg2 : tensor<1x1x3xf32>) { ^bb0(%arg4: f32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index @@ -1655,7 +1655,7 @@ %25 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } outs(%extracted_slice : tensor<1x4xf32>) { + } inits(%extracted_slice : tensor<1x4xf32>) { ^bb0(%out: f32): %26 = linalg.index 0 : index %27 = arith.addi %arg0, %26 : index @@ -1707,7 +1707,7 @@ %2 = linalg.generic { indexing_maps = [#map0, #map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg4 : tensor<4x7x3x2xf32>) { + } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg4 : tensor<4x7x3x2xf32>) { ^bb0(%arg5: i32, %arg6: i32, %arg7: f32, %arg8: f32): %3 = arith.index_cast %arg5 : i32 to index %4 = arith.index_cast %arg6 : i32 to index @@ -1752,7 +1752,7 @@ %c5 = arith.constant 5 : index %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} outs(%0 : tensor<5xf32>) { + %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel"]} inits(%0 : tensor<5xf32>) { ^bb0(%out: f32): %2 = linalg.index 0 : index %extracted = tensor.extract %arg1[%2] : tensor<5xi32> @@ -1798,7 +1798,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } outs(%extracted_slice : tensor<1x4xf32>) { + } inits(%extracted_slice : tensor<1x4xf32>) { ^bb0(%out: f32): %2 = linalg.index 1 : index %3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0) @@ -1840,7 +1840,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } outs(%extracted_slice : tensor<1x4xf32>) { + } inits(%extracted_slice : tensor<1x4xf32>) { ^bb0(%out: f32): %2 = linalg.index 1 : index %3 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%2, %arg0) @@ -1884,7 +1884,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } outs(%extracted_slice : tensor<1x4xf32>) { + } inits(%extracted_slice : tensor<1x4xf32>) { ^bb0(%out: f32): %2 = linalg.index 1 : index %3 = arith.maxsi %2, %c79 : index @@ -1924,7 +1924,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } outs(%extracted_slice : tensor<1x4xf32>) { + } inits(%extracted_slice : tensor<1x4xf32>) { ^bb0(%out: f32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index @@ -1962,7 +1962,7 @@ #map = affine_map<(d0) -> (d0)> func.func @vectorize_nd_tensor_extract_block_arg(%arg0: tensor<5x6xf32>, %arg1: tensor<5xindex>) -> tensor<5xf32> { %0 = tensor.empty() : tensor<5xf32> - %1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg1: tensor<5xindex>) outs(%0 : tensor<5xf32>) { + %1 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%arg1: tensor<5xindex>) inits(%0 : tensor<5xf32>) { ^bb0(%in: index, %out: f32): %2 = linalg.index 0 : index %extracted_0 = tensor.extract %arg0[%in, %2] : tensor<5x6xf32> @@ -2001,7 +2001,7 @@ func.func @vectorize_map(%arg0: memref<64xf32>, %arg1: memref<64xf32>, %arg2: memref<64xf32>) { linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) - outs(%arg2 : memref<64xf32>) + inits(%arg2 : memref<64xf32>) (%in: f32, %in_0: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 @@ -2025,7 +2025,7 @@ func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>, %arg1: memref<32x64x16xf32>) { linalg.transpose ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] return } // CHECK-LABEL: func @vectorize_transpose @@ -2044,7 +2044,7 @@ func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>, %arg1: memref<16x64xf32>) { linalg.reduce ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<16x64xf32>) dimensions = [1] + inits(%arg1 : memref<16x64xf32>) dimensions = [1] (%in: f32, %init: f32) { %0 = arith.addf %in, %init : f32 linalg.yield %0 : f32 @@ -2072,7 +2072,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2106,7 +2106,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2140,7 +2140,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2178,7 +2178,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2213,7 +2213,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"] } ins(%arg0 : tensor) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb(%in: f32, %out: f32) : %0 = arith.addf %in, %out : f32 linalg.yield %0 : f32 @@ -2251,7 +2251,7 @@ affine_map<(d0, d1, d2) -> (d2, d1)>], iterator_types = ["reduction", "parallel", "parallel"] } ins(%arg0 : tensor) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb(%in: f32, %out: f32) : %0 = arith.addf %in, %out : f32 linalg.yield %0 : f32 @@ -2295,7 +2295,7 @@ %expanded = tensor.expand_shape %extracted_slice [[0, 1]] : tensor into tensor<1x?xf32> %extracted_slice_0 = tensor.extract_slice %arg0[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor %extracted_slice_1 = tensor.extract_slice %expanded[0, %arg3] [1, %arg2] [1, 1] : tensor<1x?xf32> to tensor - %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%extracted_slice_0 : tensor) outs(%extracted_slice_1 : tensor) { + %2 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%extracted_slice_0 : tensor) inits(%extracted_slice_1 : tensor) { ^bb0(%in: f32, %out: f32): %3 = arith.addf %in, %out : f32 linalg.yield %3 : f32 @@ -2325,8 +2325,8 @@ %cst_6 = arith.constant 4.000000e+00 : f32 %1 = scf.for %arg0 = %c0 to %c64 step %c4 iter_args(%arg1 = %input) -> (tensor<120x64xf32>) { %extracted_slice = tensor.extract_slice %arg1[%c0, %arg0] [1, 4] [1, 1] : tensor<120x64xf32> to tensor<1x4xf32> - %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) outs(%extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> - %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} outs(%10 : tensor<1x4xf32>) { + %10 = linalg.fill {__internal_linalg_transform__ = "1"} ins(%cst_6 : f32) inits(%extracted_slice : tensor<1x4xf32>) -> tensor<1x4xf32> + %11 = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} inits(%10 : tensor<1x4xf32>) { ^bb0(%out: f32): %12 = linalg.index 0 : index %13 = arith.addi %arg0, %12 : index @@ -2362,7 +2362,7 @@ %0 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} - ins(%arg0 : tensor<0xf32>) outs(%arg1 : tensor) { + ins(%arg0 : tensor<0xf32>) inits(%arg1 : tensor) { ^bb0(%in: f32, %out: f32): %12 = arith.addf %out, %in : f32 linalg.yield %12 : f32 @@ -2387,7 +2387,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor<8x?xf32>, tensor<8x?xf32>) - outs(%arg2 : tensor<8x?xf32>) { + inits(%arg2 : tensor<8x?xf32>) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2428,7 +2428,7 @@ %2 = linalg.generic { indexing_maps = [#map1], iterator_types = ["parallel", "parallel"] - } outs(%arg1 : tensor) { + } inits(%arg1 : tensor) { ^bb0(%arg3: f32): %7 = tensor.extract %arg0[%c0, %c1] : tensor linalg.yield %7 : f32 @@ -2479,7 +2479,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor<8x32xf32>, tensor<8x32xf32>) - outs(%arg2 : tensor<8x32xf32>) { + inits(%arg2 : tensor<8x32xf32>) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 @@ -2506,7 +2506,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor<8x30xf32>, tensor<8x30xf32>) - outs(%arg2 : tensor<8x30xf32>) { + inits(%arg2 : tensor<8x30xf32>) { ^bb(%in0: f32, %in1: f32, %out: f32) : %0 = arith.addf %in0, %in1 : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -4,7 +4,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<1x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -67,7 +67,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>) - outs(%output : memref<4x2x8xi32>) + inits(%output : memref<4x2x8xi32>) return } @@ -129,7 +129,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -206,7 +206,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -255,7 +255,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x1xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -324,7 +324,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -409,7 +409,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -465,7 +465,7 @@ func.func @conv1d_8_tensor(%input: tensor<11xf32>, %filter: tensor<4xf32>, %output: tensor<8xf32>) -> tensor<8xf32> { %0 = linalg.conv_1d ins(%input, %filter : tensor<11xf32>, tensor<4xf32>) - outs(%output : tensor<8xf32>) -> tensor<8xf32> + inits(%output : tensor<8xf32>) -> tensor<8xf32> return %0 : tensor<8xf32> } @@ -520,7 +520,7 @@ linalg.depthwise_conv_1d_nwc_wc {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<3x5x4xf32>, memref<2x4xf32>) - outs(%output : memref<3x2x4xf32>) + inits(%output : memref<3x2x4xf32>) return } @@ -561,7 +561,7 @@ linalg.depthwise_conv_1d_nwc_wc {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<3x5x4xi8>, memref<2x4xi8>) - outs(%output : memref<3x2x4xi32>) + inits(%output : memref<3x2x4xi32>) return } @@ -606,7 +606,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%input, %filter : memref<1x2x3xf16>, memref<1x3x2xf16>) - outs(%output : memref<1x2x2xf32>) + inits(%output : memref<1x2x2xf32>) return } @@ -631,7 +631,7 @@ linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x4x3xf32>, memref<1xf32>) - outs(%output : memref<4x2x3xf32>) + inits(%output : memref<4x2x3xf32>) return } @@ -657,7 +657,7 @@ linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x4x3xf32>, memref<1xf32>) - outs(%output : memref<4x2x3xf32>) + inits(%output : memref<4x2x3xf32>) return } @@ -685,7 +685,7 @@ linalg.pooling_nwc_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x4x3xi8>, memref<1xi8>) - outs(%output : memref<4x2x3xi32>) + inits(%output : memref<4x2x3xi32>) return } @@ -717,7 +717,7 @@ linalg.pooling_nwc_max {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x4x3xi8>, memref<1xi8>) - outs(%output : memref<4x2x3xi32>) + inits(%output : memref<4x2x3xi32>) return } @@ -747,7 +747,7 @@ linalg.pooling_nwc_sum {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<2xf32>) - outs(%output : memref<4x2x3xf32>) + inits(%output : memref<4x2x3xf32>) return } @@ -778,7 +778,7 @@ linalg.pooling_ncw_sum {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x4xf32>, memref<1xf32>) - outs(%output : memref<4x3x2xf32>) + inits(%output : memref<4x3x2xf32>) return } @@ -808,7 +808,7 @@ linalg.pooling_nwc_sum {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%input, %filter : memref<1x2x3xf16>, memref<1xf16>) - outs(%output : memref<1x2x3xf32>) + inits(%output : memref<1x2x3xf32>) return } @@ -829,7 +829,7 @@ linalg.pooling_nwc_sum {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x4x3xf32>, memref<2xf32>) - outs(%output : memref<4x2x3xf32>) + inits(%output : memref<4x2x3xf32>) return } @@ -852,7 +852,7 @@ linalg.pooling_ncw_sum {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<2xf32>) - outs(%output : memref<4x3x2xf32>) + inits(%output : memref<4x3x2xf32>) return } @@ -885,7 +885,7 @@ linalg.pooling_ncw_sum {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x2x5xf32>, memref<2xf32>) - outs(%output : memref<4x2x3xf32>) + inits(%output : memref<4x2x3xf32>) return } diff --git a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir --- a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir +++ b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir @@ -5,9 +5,9 @@ %c2 = arith.constant 2 : index %cst_0 = arith.constant -0.000000e+00 : f32 %0 = memref.alloc() : memref<128x384xf32> - linalg.fill ins(%cst_0 : f32) outs(%0 : memref<128x384xf32>) + linalg.fill ins(%cst_0 : f32) inits(%0 : memref<128x384xf32>) %2 = memref.alloc() : memref<128xf32> - linalg.fill ins(%cst_0 : f32) outs(%2 : memref<128xf32>) + linalg.fill ins(%cst_0 : f32) inits(%2 : memref<128xf32>) scf.forall (%arg0) in (%c2) { %7 = affine.min affine_map<(d0) -> (d0 * -64 + 128, 64)>(%arg0) %8 = affine.max affine_map<(d0) -> (0, d0)>(%7) @@ -21,12 +21,12 @@ %12 = memref.subview %2[%9] [%10] [1] : memref<128xf32> to memref (d0 + s0)>> - // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref) outs(%{{.*}} : memref) + // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref) inits(%{{.*}} : memref) linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%11 : memref (d0 * 384 + s0 + d1)>>) - outs(%12 : memref (d0 + s0)>>) { + inits(%12 : memref (d0 + s0)>>) { ^bb0(%arg1: f32, %arg2: f32): %14 = arith.addf %arg1, %arg2 : f32 linalg.yield %14 : f32 diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir --- a/mlir/test/Dialect/SCF/loop-pipelining.mlir +++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir @@ -566,7 +566,7 @@ // CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]]) // CHECK: %[[CV:.+]] = memref.subview %[[ARG2]] // CHECK: linalg.generic -// CHECK-SAME: ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] : +// CHECK-SAME: ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) inits(%[[CV]] : // CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] // CHECK: %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] : // CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] : @@ -581,7 +581,7 @@ // CHECK: } // CHECK: %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] : // CHECK: linalg.generic -// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] : +// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) inits(%[[CV]] : #map = affine_map<(d0)[s0]->(d0 + s0)> @@ -615,7 +615,7 @@ %C_view = memref.subview %result[%i0][8][1] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : memref to memref<8xf32, #map> %scalar = arith.addf %cf, %cf {__test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1} : f32 linalg.generic #linalg_attrs ins(%a_buf_view, %b_buf_view, %scalar : memref<8xf32, #map>, memref<8xf32, #map>, f32) - outs(%C_view: memref<8xf32, #map>) { + inits(%C_view: memref<8xf32, #map>) { ^bb0(%a: f32, %b: f32, %s: f32, %c: f32): %add = arith.addf %a, %b : f32 %accum = arith.addf %add, %c : f32 diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -620,9 +620,9 @@ // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32> + %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<1xf32>) -> tensor<1xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32> + %8 = linalg.fill ins(%cst : f32) inits(%7 : tensor<1xf32>) -> tensor<1xf32> scf.forall.in_parallel { // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} @@ -647,14 +647,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> scf.yield %5 : tensor<4xf32> } @@ -677,14 +677,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> scf.yield %5 : tensor<4xf32> } @@ -693,7 +693,7 @@ %6 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %6 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %7 = linalg.fill ins(%cst3 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> return %2, %7 : tensor<4xf32>, tensor<4xf32> } @@ -768,7 +768,7 @@ // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true"]} %2 = tensor.extract_slice %t[0][4][1] : tensor<10xf32> to tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %filled = linalg.fill ins(%cst : f32) outs(%2 : tensor<4xf32>) -> tensor<4xf32> + %filled = linalg.fill ins(%cst : f32) inits(%2 : tensor<4xf32>) -> tensor<4xf32> %3 = tensor.extract %filled[%a] : tensor<4xf32> vector.print %3 : f32 } diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -55,7 +55,7 @@ // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]] // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]]) %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor { - %r = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %r = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor scf.yield %B : tensor } %1 = tensor.extract %0[%c1] : tensor @@ -547,8 +547,8 @@ %2 = scf.forall (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor) { // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor - // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref) -> tensor + // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview]] : memref) -> tensor // Self-copy will DCE away later. // CHECK: memref.copy %[[subview]], %[[subview]] @@ -594,8 +594,8 @@ // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor - // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref) -> tensor + // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview1]] : memref) -> tensor // Now the copy of the actual insert_slice. (It will fold away.) // CHECK: memref.copy %[[subview1]], %[[subview1]] @@ -637,8 +637,8 @@ %6 = tensor.extract_slice %arg1[0, %4] [8, 4] [1, 1] : tensor<8x8xf32> to tensor<8x4xf32> %7 = tensor.extract_slice %o[%1, %4] [4, 4] [1, 1] : tensor<8x8xf32> to tensor<4x4xf32> - // CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) outs({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>) - %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) -> tensor<4x4xf32> + // CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) inits({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>) + %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) inits(%7 : tensor<4x4xf32>) -> tensor<4x4xf32> scf.forall.in_parallel { tensor.parallel_insert_slice %8 into %o[%1, %4] [4, 4] [1, 1] : tensor<4x4xf32> into tensor<8x8xf32> } @@ -719,8 +719,8 @@ %c0 = arith.constant 0 : index // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1> %alloc = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<5xf32> - // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<5xf32, 1>) - %filled = linalg.fill ins(%cst : f32) outs(%alloc : tensor<5xf32>) -> tensor<5xf32> + // CHECK: linalg.fill {{.*}} inits(%[[alloc]] : memref<5xf32, 1>) + %filled = linalg.fill ins(%cst : f32) inits(%alloc : tensor<5xf32>) -> tensor<5xf32> // CHECK: scf.if %{{.*}} -> (memref<5xf32, 1>) { %1 = scf.if %c -> tensor<5xf32> { // CHECK: %[[cloned:.*]] = bufferization.clone %[[alloc]] diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir --- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir +++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir @@ -51,7 +51,7 @@ // CHECK: scf.yield %[[B]] : memref // CHECK: } // CHECK: %[[S:.*]] = memref.subview %[[M]]{{\[}}%[[S1]]] {{\[}}%[[D]]] [1] -// CHECK: linalg.fill ins(%[[C]] : f64) outs(%[[S]] +// CHECK: linalg.fill ins(%[[C]] : f64) inits(%[[S]] // CHECK: return %[[M]], %[[S2]] : memref, index func.func @sparse_push_back_n(%arg0: index, %arg1: memref, %arg2: f64, %arg3: index) -> (memref, index) { %0:2 = sparse_tensor.push_back %arg0, %arg1, %arg2, %arg3 : index, memref, f64, index diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -377,8 +377,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion1() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR> @@ -392,8 +392,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion2() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC> @@ -408,8 +408,8 @@ // CHECK: %[[V:.*]] = memref.alloc(%[[D0]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[D0]]) : memref // CHECK: %[[D:.*]] = memref.alloc(%[[D0]]) : memref -// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[V]] : memref) -// CHECK: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[V]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK: return %[[D]] : memref func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor diff --git a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir --- a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir @@ -9,13 +9,13 @@ // CHECK: %[[VAL_3:.*]] = arith.constant 0 : index // CHECK: %[[VAL_4:.*]] = memref.alloc() : memref<16xindex> // CHECK: %[[VAL_5:.*]] = memref.cast %[[VAL_4]] : memref<16xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_4]] : memref<16xindex>) +// CHECK: linalg.fill ins(%[[VAL_3]] : index) inits(%[[VAL_4]] : memref<16xindex>) // CHECK: %[[VAL_6:.*]] = memref.alloc() : memref<16xindex> // CHECK: %[[VAL_7:.*]] = memref.cast %[[VAL_6]] : memref<16xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_6]] : memref<16xindex>) +// CHECK: linalg.fill ins(%[[VAL_3]] : index) inits(%[[VAL_6]] : memref<16xindex>) // CHECK: %[[VAL_8:.*]] = memref.alloc() : memref<16xf64> // CHECK: %[[VAL_9:.*]] = memref.cast %[[VAL_8]] : memref<16xf64> to memref -// CHECK: linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_8]] : memref<16xf64>) +// CHECK: linalg.fill ins(%[[VAL_2]] : f64) inits(%[[VAL_8]] : memref<16xf64>) // CHECK: %[[VAL_10:.*]] = sparse_tensor.storage_specifier.init : !sparse_tensor.storage_specifier // CHECK: %[[VAL_12:.*]] = sparse_tensor.storage_specifier.set %[[VAL_10]] lvl_sz at 0 with %[[VAL_0]] : !sparse_tensor.storage_specifier // CHECK: %[[VAL_14:.*]] = sparse_tensor.storage_specifier.get %[[VAL_12]] pos_mem_sz at 0 : !sparse_tensor.storage_specifier diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -332,8 +332,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion1() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR> @@ -348,8 +348,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion2() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC> @@ -365,8 +365,8 @@ // CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK: return %[[C]] : memref func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -41,7 +41,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32> -// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref<13xi32>) +// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) inits(%[[M]] : memref<13xi32>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -83,7 +83,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[C0]] : i32) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -130,7 +130,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64> -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -148,7 +148,7 @@ // CHECK-RWT-SAME: %[[A:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>) -> tensor<2x4xf64> { // CHECK-RWT: %[[F0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-RWT: %[[B:.*]] = memref.alloc() : memref<2x4xf64> -// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]] +// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]] // CHECK-RWT: sparse_tensor.foreach in %[[A]] // CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): // CHECK-RWT: memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]] @@ -190,7 +190,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -238,7 +238,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64> -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x?xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -286,7 +286,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<2xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -308,7 +308,7 @@ // CHECK-RWT: %[[D0:.*]] = tensor.dim %[[A]], %[[C0]] // CHECK-RWT: %[[D1:.*]] = tensor.dim %[[A]], %[[C1]] // CHECK-RWT: %[[B:.*]] = memref.alloc(%[[D0]], %[[D1]]) -// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]] +// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]] // CHECK-RWT: sparse_tensor.foreach in %[[A]] // CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): // CHECK-RWT: memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]] @@ -354,7 +354,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<3xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64> -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x3x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir --- a/mlir/test/Dialect/SparseTensor/dense.mlir +++ b/mlir/test/Dialect/SparseTensor/dense.mlir @@ -59,7 +59,7 @@ %c = arith.constant 1.0 : f32 %0 = linalg.generic #trait_2d ins(%arga: tensor<32x16xf32, #DenseMatrix>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %a, %c : f32 linalg.yield %1 : f32 @@ -99,7 +99,7 @@ %c = arith.constant 1.0 : f32 %0 = linalg.generic #trait_2d ins(%arga: tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32, #DenseMatrix>) { + inits(%argx: tensor<32x16xf32, #DenseMatrix>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %a, %c : f32 linalg.yield %1 : f32 @@ -145,7 +145,7 @@ -> tensor<32x16xf32, #DenseMatrix> { %0 = linalg.generic #trait_3d ins(%arga: tensor<32x16x8xf32>) - outs(%argx: tensor<32x16xf32, #DenseMatrix>) { + inits(%argx: tensor<32x16xf32, #DenseMatrix>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %x, %a : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir --- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir +++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir @@ -59,12 +59,12 @@ -> (tensor<10xf32>, tensor<10xf32>) { // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [false]} : tensor<10xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [true]} : tensor<10xf32> - // CHECK-FUNC: linalg.generic {{.*}} outs(%[[alloc]] + // CHECK-FUNC: linalg.generic {{.*}} inits(%[[alloc]] %0 = linalg.generic #trait ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f32, %x : f32): %up = arith.addf %a, %x : f32 linalg.yield %up : f32 diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir --- a/mlir/test/Dialect/SparseTensor/one_trip.mlir +++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir @@ -25,7 +25,7 @@ func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor<1x1xf32, #Dense>) { + inits(%argx: tensor<1x1xf32, #Dense>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/rejected.mlir b/mlir/test/Dialect/SparseTensor/rejected.mlir --- a/mlir/test/Dialect/SparseTensor/rejected.mlir +++ b/mlir/test/Dialect/SparseTensor/rejected.mlir @@ -27,7 +27,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): // NOTE: `subi %a, %x` is the reason why the program is rejected by the sparse compiler. // It is because we do not allow `-outTensor` in reduction loops as it creates cyclic diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir --- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir +++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir @@ -81,7 +81,7 @@ func.func @sparse_scale(%argx: tensor) -> tensor { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 @@ -145,7 +145,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 @@ -171,7 +171,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref> // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref // CHECK: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64> -// CHECK: linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_15]] : memref<32x64xf64>) +// CHECK: linalg.fill ins(%[[VAL_4]] : f64) inits(%[[VAL_15]] : memref<32x64xf64>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref @@ -266,7 +266,7 @@ %argz: tensor<32x64xf64>) -> tensor<32x64xf64> { %0 = linalg.generic #trait_mul ins(%argx, %argy : tensor<32x64xf64, #SortedCOO>, tensor<32x64xf64, #SortedCOO>) - outs(%argz: tensor<32x64xf64>) { + inits(%argz: tensor<32x64xf64>) { ^bb(%x: f64, %y: f64, %z: f64): %1 = arith.mulf %x, %y : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir @@ -22,7 +22,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32 @@ -34,7 +34,7 @@ func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -52,7 +52,7 @@ // CHECK: %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32> // CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32> -// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>) +// CHECK: linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_7]] : memref<32xf32>) // CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] { // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32 @@ -65,7 +65,7 @@ %u = tensor.empty() : tensor<32xf32> %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%u: tensor<32xf32>) { + inits(%u: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -82,7 +82,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32 @@ -94,7 +94,7 @@ func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %argb : f32 linalg.yield %0 : f32 @@ -116,7 +116,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32xf32>) // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { // CHECK: %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index // CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index @@ -149,7 +149,7 @@ func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -168,7 +168,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] // CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] { // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref @@ -186,7 +186,7 @@ func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %a : f32 // same tensor %1 = arith.addf %a, %a : f32 // should yield @@ -206,7 +206,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] { @@ -221,7 +221,7 @@ func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %argb : f32 linalg.yield %0 : f32 @@ -249,7 +249,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32> @@ -262,7 +262,7 @@ func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -280,7 +280,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32> @@ -293,7 +293,7 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -314,7 +314,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -352,7 +352,7 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -371,7 +371,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>) // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { @@ -387,7 +387,7 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -408,7 +408,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -446,7 +446,7 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -465,7 +465,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>) // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { @@ -481,7 +481,7 @@ func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -502,7 +502,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref @@ -564,7 +564,7 @@ func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -585,7 +585,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref @@ -625,7 +625,7 @@ func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -647,7 +647,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>) // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref @@ -716,7 +716,7 @@ // Kernel "x(i) = a(i) * c + b(i) * c". %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %argc : f32 %1 = arith.mulf %b, %argc : f32 @@ -740,7 +740,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>) // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref @@ -809,7 +809,7 @@ // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c". %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 %1 = arith.mulf %0, %argc : f32 @@ -850,7 +850,7 @@ func.func @sum_reduction(%arga: tensor, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -954,7 +954,7 @@ // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -1067,7 +1067,7 @@ // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction_inv ins(%arga, %argb, %argc : tensor<16xf32, #SV>, tensor, tensor<16xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %c: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %0, %c : f32 @@ -1108,7 +1108,7 @@ // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor> to memref // CHECK-DAG: %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor // CHECK-DAG: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_18]] : memref) // CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK-DAG: %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref @@ -1246,7 +1246,7 @@ %argx: tensor) -> tensor { %r = linalg.generic #trait_four_tensors ins(%arga, %argb, %argc, %argd: tensor, tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %c: f64, %d: f64, %x: f64): %0 = arith.addf %a, %b : f64 %1 = arith.addf %c, %d : f64 @@ -1559,7 +1559,7 @@ %argc: tensor, %argx: tensor) ->tensor{ %0 = linalg.generic #trait_red3s ins(%arga, %argb, %argc: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64,%b: f64,%c: f64,%x: f64): %0 = arith.addf %x, %a : f64 %1 = arith.addf %0, %b : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir @@ -27,7 +27,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -44,7 +44,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -63,7 +63,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -80,7 +80,7 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -102,7 +102,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref // CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_7]] : index @@ -143,7 +143,7 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -163,7 +163,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index @@ -182,7 +182,7 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -204,7 +204,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { @@ -250,7 +250,7 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -270,7 +270,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] { @@ -290,7 +290,7 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -314,7 +314,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { @@ -384,7 +384,7 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -405,7 +405,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32x16xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] { @@ -427,7 +427,7 @@ func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -452,7 +452,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref @@ -591,7 +591,7 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -616,7 +616,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref @@ -687,7 +687,7 @@ func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -711,7 +711,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) { @@ -793,7 +793,7 @@ func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -815,7 +815,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] { @@ -839,7 +839,7 @@ func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -890,7 +890,7 @@ func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%argA, %argb: tensor<16x32xf32, #Tds>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 @@ -936,7 +936,7 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor<10x20xf32, #Tds>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -964,7 +964,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref // CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_11]] : memref) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index @@ -983,7 +983,7 @@ %0 = arith.constant 2.0 : f64 %1 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %2 = arith.mulf %a, %0 : f64 linalg.yield %2 : f64 @@ -1048,7 +1048,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%s: f32, %a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %s, %0 : f32 @@ -1272,7 +1272,7 @@ tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %c: f32, %d: f32, %e: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %0, %d : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir @@ -35,7 +35,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index @@ -56,7 +56,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -77,7 +77,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index @@ -98,7 +98,7 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -122,7 +122,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_9]] { // CHECK: %[[VAL_18:.*]] = arith.muli %[[VAL_16]], %[[VAL_5]] : index @@ -167,7 +167,7 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -189,7 +189,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] { // CHECK: %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_5]] : index @@ -212,7 +212,7 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -236,7 +236,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_8]] : index @@ -285,7 +285,7 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -307,7 +307,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_6]] : index @@ -330,7 +330,7 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -356,7 +356,7 @@ // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_9]] : index @@ -429,7 +429,7 @@ func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -452,7 +452,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_6]] : index @@ -477,7 +477,7 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -501,7 +501,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -555,7 +555,7 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -577,7 +577,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] { @@ -601,7 +601,7 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -627,7 +627,7 @@ // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -705,7 +705,7 @@ func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -728,7 +728,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] { @@ -754,7 +754,7 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -780,7 +780,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_16]] : memref<32x16x8xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -862,7 +862,7 @@ func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -885,7 +885,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] { @@ -911,7 +911,7 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -939,7 +939,7 @@ // CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_19]] : memref<32x16x8xf32>) // CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -1045,7 +1045,7 @@ func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -1069,7 +1069,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] { @@ -1097,7 +1097,7 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -1163,7 +1163,7 @@ %argd: tensor) -> tensor { %0 = linalg.generic #trait_kernel_3d ins(%argb, %argc, %argd: tensor, tensor, tensor) - outs(%arga: tensor) { + inits(%arga: tensor) { ^bb(%b: f32, %c: f32, %d: f32, %a: f32): %0 = arith.mulf %b, %c : f32 %1 = arith.mulf %0, %d : f32 @@ -1219,7 +1219,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor<10x20x30xf32, #Tsss>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -1273,7 +1273,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction_inv ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -1308,7 +1308,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<10x20x30xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] { @@ -1330,7 +1330,7 @@ %argx: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> { %0 = linalg.generic #trait_invariants ins(%arga, %argb, %argc : tensor<10xf32, #Td>, tensor<20xf32>, tensor<30xf32>) - outs(%argx: tensor<10x20x30xf32>) { + inits(%argx: tensor<10x20x30xf32>) { ^bb(%a: f32, %b: f32, %c: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %0, %c : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir @@ -47,7 +47,7 @@ %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -98,7 +98,7 @@ %argx = bufferization.alloc_tensor() : tensor<32xf32, #SpVec> %0 = linalg.generic #trait1 ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32, #SpVec>) - outs(%argx: tensor<32xf32, #SpVec>) { + inits(%argx: tensor<32xf32, #SpVec>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -135,7 +135,7 @@ %argx = bufferization.alloc_tensor() : tensor<32xf32, #EncDenseVec> %0 = linalg.generic #trait1 ins(%arga, %argb: tensor<32xf32, #EncDenseVec>, tensor<4xf32, #EncDenseVec>) - outs(%argx: tensor<32xf32, #EncDenseVec>) { + inits(%argx: tensor<32xf32, #EncDenseVec>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -167,7 +167,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32> -// CHECK: linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>) +// CHECK: linalg.fill ins(%[[ZERO]] : i32) inits(%[[VAL_11]] : memref<32xi32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_4]] { @@ -186,7 +186,7 @@ %argx: tensor<32xi32>) -> tensor<32xi32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32>) - outs(%argx: tensor<32xi32>) { + inits(%argx: tensor<32xi32>) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.andi %a, %b : i32 linalg.yield %0 : i32 @@ -237,7 +237,7 @@ %argx = bufferization.alloc_tensor() : tensor<32xi32, #SpVec> %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32, #SpVec>) - outs(%argx: tensor<32xi32, #SpVec>) { + inits(%argx: tensor<32xi32, #SpVec>) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.andi %a, %b : i32 linalg.yield %0 : i32 @@ -293,7 +293,7 @@ %argx: tensor<32x16xf64>) -> tensor<32x16xf64> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64>) - outs(%argx: tensor<32x16xf64>) { + inits(%argx: tensor<32x16xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 @@ -363,7 +363,7 @@ %argx = bufferization.alloc_tensor() : tensor<32x16xf64, #CSR> %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64, #CSR>) - outs(%argx: tensor<32x16xf64, #CSR>) { + inits(%argx: tensor<32x16xf64, #CSR>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 @@ -427,7 +427,7 @@ %argx: tensor<32x16xf64>) -> tensor<32x16xf64> { %0 = linalg.generic #trait4 ins(%arga, %argb: tensor<34x16xf64, #CSR>, tensor<32x19xf64, #Row>) - outs(%argx: tensor<32x16xf64>) { + inits(%argx: tensor<32x16xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 @@ -488,7 +488,7 @@ %argx: tensor<32x16xf64>) -> tensor<32x16xf64> { %0 = linalg.generic #trait5 ins(%arga, %argb: tensor<34x16xf64, #CSR>, tensor<32x19xf64, #Row>) - outs(%argx: tensor<32x16xf64>) { + inits(%argx: tensor<32x16xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir @@ -46,7 +46,7 @@ func.func public @main(%arg0: tensor<4x5xi32, #DCSR>) -> tensor<4x3x5xi32, #SparseTensor> { %0 = bufferization.alloc_tensor() : tensor<4x3x5xi32, #SparseTensor> %1 = linalg.generic #trait - ins(%arg0 : tensor<4x5xi32, #DCSR>) outs(%0 : tensor<4x3x5xi32, #SparseTensor>) { + ins(%arg0 : tensor<4x5xi32, #DCSR>) inits(%0 : tensor<4x3x5xi32, #SparseTensor>) { ^bb0(%in: i32, %out: i32): linalg.yield %in : i32 } -> tensor<4x3x5xi32, #SparseTensor> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -26,7 +26,7 @@ // CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64> -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<5x4xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { // CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> @@ -267,7 +267,7 @@ // CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64> -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<4x5xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> @@ -330,7 +330,7 @@ // CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64> // CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<3x5xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat_codegen.mlir @@ -189,7 +189,7 @@ // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK-DAG: %[[TMP_d0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK: %[[A:.*]] = memref.alloc(%[[TMP_c9]], %[[TMP_c4]]) : memref -// CHECK: linalg.fill ins(%[[TMP_d0]] : f64) outs(%[[A]] : memref) +// CHECK: linalg.fill ins(%[[TMP_d0]] : f64) inits(%[[A]] : memref) // CHECK: %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor // CHECK: %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -54,8 +54,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: } @@ -72,7 +72,7 @@ %v = bufferization.alloc_tensor(%n) : tensor %0 = linalg.generic #rowsum ins(%arga: tensor) - outs(%v: tensor) { + inits(%v: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.addf %x, %a : f64 linalg.yield %1 : f64 @@ -105,8 +105,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C4]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C4]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C4]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { @@ -124,7 +124,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>) - outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> + inits(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> return %D: tensor<8x4xf64, #CSR> } @@ -153,8 +153,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C8]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C8]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C8]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { @@ -172,6 +172,6 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -39,8 +39,8 @@ // CHECK: %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref // CHECK: %[[VAL_24:.*]] = memref.alloc() : memref<300xindex> // CHECK: %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref -// CHECK: linalg.fill ins(%[[F0]] : f64) outs(%[[VAL_20]] : memref<300xf64>) -// CHECK: linalg.fill ins(%[[False]] : i1) outs(%[[VAL_22]] : memref<300xi1>) +// CHECK: linalg.fill ins(%[[F0]] : f64) inits(%[[VAL_20]] : memref<300xf64>) +// CHECK: linalg.fill ins(%[[False]] : i1) inits(%[[VAL_22]] : memref<300xi1>) // CHECK: %[[VAL_26:.*]] = call @sparsePositions0(%[[Arg0]], %[[I0]]) : (!llvm.ptr, index) -> memref // CHECK: %[[VAL_27:.*]] = call @sparseCoordinates0(%[[Arg0]], %[[I0]]) : (!llvm.ptr, index) -> memref // CHECK: %[[VAL_28:.*]] = call @sparsePositions0(%[[Arg0]], %[[I1]]) : (!llvm.ptr, index) -> memref @@ -124,8 +124,8 @@ %0 = bufferization.alloc_tensor() : tensor<100x300xf64, #DCSR> %cst = arith.constant 0.000000e+00 : f64 %1 = linalg.fill ins(%cst : f64) - outs(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + inits(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<100x200xf64, #DCSR>, tensor<200x300xf64, #DCSR>) - outs(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + inits(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> return %2 : tensor<100x300xf64, #DCSR> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir @@ -54,7 +54,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.absf %a : f64 linalg.yield %0 : f64 @@ -86,7 +86,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.ceil %a : f64 linalg.yield %0 : f64 @@ -118,7 +118,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.floor %a : f64 linalg.yield %0 : f64 @@ -150,7 +150,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.negf %a : f64 linalg.yield %0 : f64 @@ -210,7 +210,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.addf %a, %b : f64 linalg.yield %0 : f64 @@ -272,7 +272,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.subf %a, %b : f64 linalg.yield %0 : f64 @@ -308,7 +308,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 @@ -342,7 +342,7 @@ %c = arith.constant 2.0 : f64 %0 = linalg.generic #traitc ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.divf %a, %c : f64 linalg.yield %0 : f64 @@ -382,7 +382,7 @@ %xinp = bufferization.alloc_tensor() : tensor<32xf64, #SV> %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%xinp: tensor<32xf64, #SV>) { + inits(%xinp: tensor<32xf64, #SV>) { ^bb(%a: f64, %x: f64): %0 = math.absf %a : f64 %1 = math.ceil %0 : f64 @@ -423,7 +423,7 @@ %init = bufferization.alloc_tensor() : tensor<32xcomplex, #SV> %0 = linalg.generic #traitc ins(%arg0: tensor<32xcomplex, #SV>) - outs(%init: tensor<32xcomplex, #SV>) { + inits(%init: tensor<32xcomplex, #SV>) { ^bb(%a: complex, %x: complex): %0 = complex.div %a, %c : complex linalg.yield %0 : complex diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir @@ -55,7 +55,7 @@ %init = bufferization.alloc_tensor(%0, %1) : tensor %r = linalg.generic #trait ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -112,7 +112,7 @@ %init = bufferization.alloc_tensor(%0, %1) : tensor %r = linalg.generic #trait ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir @@ -74,7 +74,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.addi %a, %b : i64 linalg.yield %0 : i64 @@ -137,7 +137,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.subi %a, %b : i64 linalg.yield %0 : i64 @@ -173,7 +173,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.muli %a, %b : i64 linalg.yield %0 : i64 @@ -207,7 +207,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.divsi %a, %c : i64 linalg.yield %0 : i64 @@ -241,7 +241,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.divui %a, %c : i64 linalg.yield %0 : i64 @@ -277,7 +277,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.andi %a, %b : i64 linalg.yield %0 : i64 @@ -337,7 +337,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.ori %a, %b : i64 linalg.yield %0 : i64 @@ -397,7 +397,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.xori %a, %b : i64 linalg.yield %0 : i64 @@ -431,7 +431,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shrsi %a, %c : i64 linalg.yield %0 : i64 @@ -465,7 +465,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shrui %a, %c : i64 linalg.yield %0 : i64 @@ -499,7 +499,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shli %a, %c : i64 linalg.yield %0 : i64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir @@ -47,7 +47,7 @@ %c: tensor<10x30xf32>) -> tensor<10x30xf32> { %0 = linalg.matmul ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>) - outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32> + inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32> return %0 : tensor<10x30xf32> } @@ -93,7 +93,7 @@ %c: tensor<10x30xf32>) -> tensor<10x30xf32> { %0 = linalg.matmul ins(%a, %b: tensor<10x20xf32>, tensor<20x30xf32,#DCSR>) - outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32> + inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32> return %0 : tensor<10x30xf32> } @@ -191,7 +191,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> return %D: tensor<4x4xf64, #DCSR> } @@ -292,7 +292,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64> + inits(%output : tensor<5x6xi64>) -> tensor<5x6xi64> return %0: tensor<5x6xi64> } @@ -354,6 +354,6 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + inits(%x: tensor) -> tensor return %dot : tensor } diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSC>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir @@ -84,8 +84,8 @@ // CHECK: %[[VAL_31:.*]] = m // CHECK: %[[VAL_32:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[VAL_33:.*]] = memref.cast %[[VAL_32]] : memref<4xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_9]] : f64) outs(%[[VAL_30]] : memref<4xf64>) -// CHECK: linalg.fill ins(%[[VAL_12]] : i1) outs(%[[VAL_31]] : memref<4xi1>) +// CHECK: linalg.fill ins(%[[VAL_9]] : f64) inits(%[[VAL_30]] : memref<4xf64>) +// CHECK: linalg.fill ins(%[[VAL_12]] : i1) inits(%[[VAL_31]] : memref<4xi1>) // CHECK: %[[VAL_34:.*]]:4 = scf.for %[[VAL_35:.*]] = %[[VAL_10]] to %[[VAL_8]] step %[[VAL_11]] iter_args(%[[VAL_36:.*]] = %[[VAL_27]], %[[VAL_37:.*]] = %[[VAL_17]], %[[VAL_38:.*]] = %[[VAL_19]], %[[VAL_39:.*]] = %[[VAL_29]]) -> (memref, memref, memref, !sparse_tensor.storage_specifier // CHECK: %[[VAL_40:.*]] = memref.load %[[VAL_0]]{{\[}}%[[VAL_35]]] : memref // CHECK: %[[VAL_41:.*]] = arith.addi %[[VAL_35]], %[[VAL_11]] : index @@ -148,6 +148,6 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir @@ -40,7 +40,7 @@ // CHECK-DAG: %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> // CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] { // CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_12]] { // CHECK: %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_9]] : index @@ -89,7 +89,7 @@ %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<10x20x30x40x50x60x70x80xf32>, tensor<80x70x60x50x40x30x20x10xf32, #SparseTensor>) - outs(%argx: tensor<10x20x30x40x50x60x70x80xf32>) { + inits(%argx: tensor<10x20x30x40x50x60x70x80xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir @@ -48,7 +48,7 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 @@ -81,7 +81,7 @@ // CHECK: } func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> { %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%x: f32): %1 = arith.addf %x, %x : f32 linalg.yield %1 : f32 @@ -129,7 +129,7 @@ %xm = bufferization.alloc_tensor() : tensor<10x20xf32, #DCSR> %0 = linalg.generic #trait_scale ins(%arga: tensor<10x20xf32, #CSR>) - outs(%xm: tensor<10x20xf32, #DCSR>) { + inits(%xm: tensor<10x20xf32, #DCSR>) { ^bb(%a: f32, %x: f32): %1 = arith.mulf %a, %s : f32 linalg.yield %1 : f32 @@ -292,7 +292,7 @@ %0 = linalg.generic #trait_sumred ins(%arga, %argb: tensor, tensor) - outs(%xinit: tensor) { + inits(%xinit: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 @@ -407,7 +407,7 @@ %0 = linalg.generic #trait_matmat ins(%arga, %argb: tensor, tensor) - outs(%cinit: tensor) { + inits(%cinit: tensor) { ^bb(%a: f32, %b: f32, %c: f32): %1 = arith.mulf %a, %b : f32 %2 = arith.addf %c, %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir @@ -20,7 +20,7 @@ // CHECK: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32> -// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_8]] : memref<10xf32>) +// CHECK: linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -36,7 +36,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -54,7 +54,7 @@ // CHECK: %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : memref<10xf32> -// CHECK: linalg.fill ins(%[[VAL_2]] : f32) outs(%[[VAL_8]] : memref<10xf32>) +// CHECK: linalg.fill ins(%[[VAL_2]] : f32) inits(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] { @@ -70,7 +70,7 @@ %m = bufferization.alloc_tensor() : tensor<10xf32> %0 = linalg.generic #trait ins(%arga: tensor<10xi32, #SV>) - outs(%m: tensor<10xf32>) { + inits(%m: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -103,7 +103,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f32, %x : f32): %up = arith.addf %a, %x : f32 linalg.yield %up : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -61,7 +61,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_dd ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %scale : f32 linalg.yield %0 : f32 @@ -109,7 +109,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_ss ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %scale : f32 linalg.yield %0 : f32 @@ -159,7 +159,7 @@ %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -53,7 +53,7 @@ %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir @@ -25,7 +25,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_9]] : memref<20x30x10xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index @@ -45,7 +45,7 @@ %argx: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10x20x30xf32, #X>) - outs(%argx: tensor<20x30x10xf32>) { + inits(%argx: tensor<20x30x10xf32>) { ^bb(%a : f32, %x: f32): linalg.yield %a : f32 } -> tensor<20x30x10xf32> @@ -64,7 +64,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor> // CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_10]] : memref) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_7]], %[[VAL_11]] : index @@ -84,7 +84,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a : f32, %x: f32): linalg.yield %a : f32 } -> tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -83,7 +83,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a : f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape_dot.mlir @@ -78,7 +78,7 @@ func.func @sparse_reshape_fused(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor { %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> %0 = tensor.empty() : tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor return %ret1 : tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir @@ -68,7 +68,7 @@ %c = arith.constant 2.2 : f32 %0 = linalg.generic #trait ins(%arga, %argp, %argq: tensor<32x16xf32, #SparseMatrix>, tensor, f32) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %p: f32, %q: f32, %x: f32): %0 = arith.mulf %a, %p : f32 // scalar tensor argument %1 = arith.mulf %0, %q : f32 // scalar argument diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir @@ -32,7 +32,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst : f64) - outs(%0 : tensor<1024x1024xf64>) { + inits(%0 : tensor<1024x1024xf64>) { ^bb0(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<1024x1024xf64> @@ -49,7 +49,7 @@ %0 = bufferization.alloc_tensor() : tensor<32xf64> %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%0 : tensor<32xf64>) { + inits(%0 : tensor<32xf64>) { ^bb0(%x: f64): linalg.yield %cst : f64 } -> tensor<32xf64> @@ -105,7 +105,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -114,7 +114,7 @@ // Sample the result with elements-wise multiplication with sparse matrix. %3 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 @@ -187,7 +187,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -197,7 +197,7 @@ %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM> %4 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%3 : tensor<8x8xf64, #SM>) { + inits(%3 : tensor<8x8xf64, #SM>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir @@ -83,7 +83,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -93,7 +93,7 @@ %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM> %4 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%3 : tensor<8x8xf64, #SM>) { + inits(%3 : tensor<8x8xf64, #SM>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir @@ -40,7 +40,7 @@ func.func @mul64(%arga: tensor<32xf64, #SparseVector64>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<32xf64, #SparseVector64>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 @@ -69,7 +69,7 @@ func.func @mul32(%arga: tensor<32xf64, #SparseVector32>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<32xf64, #SparseVector32>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir @@ -50,7 +50,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%arga: tensor<3x4xf64, #DCSR>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir @@ -75,7 +75,7 @@ func.func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> { %0 = linalg.generic #trait_scale_d ins(%arga: tensor<1024xf32, #DenseVector>) - outs(%argx: tensor<1024xf32>) { + inits(%argx: tensor<1024xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -199,7 +199,7 @@ %argx: tensor<1024xf32>) -> tensor<1024xf32> { %0 = linalg.generic #trait_mul_s ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>) - outs(%argx: tensor<1024xf32>) { + inits(%argx: tensor<1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -297,7 +297,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction_d ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -438,7 +438,7 @@ %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> { %0 = linalg.generic #trait_mul_ds ins(%arga, %argb: tensor<512x1024xf32, #SparseMatrix>, tensor<512x1024xf32>) - outs(%argx: tensor<512x1024xf32>) { + inits(%argx: tensor<512x1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -552,7 +552,7 @@ %argx: tensor<33x64xf64>) -> tensor<33x64xf64> { %0 = linalg.generic #trait_affine ins(%arga: tensor<32x64xf64, #SparseMatrix>) - outs(%argx: tensor<33x64xf64>) { + inits(%argx: tensor<33x64xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir @@ -112,7 +112,7 @@ %0 = linalg.generic #trait ins(%arga, %argb: tensor<64x32xf64, #SparseMatrix>, tensor<64x32xf64, #SparseMatrix>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %m = arith.addf %a, %b : f64 %t = arith.addf %x, %m : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir @@ -29,7 +29,7 @@ // CHECK: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64> -// CHECK: linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>) +// CHECK: linalg.fill ins(%[[VAL_4]] : i64) inits(%[[VAL_11]] : memref<8xi64>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_1]] { @@ -48,7 +48,7 @@ %init = tensor.empty() : tensor<8xi64> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64>) { + inits(%init: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -71,7 +71,7 @@ // CHECK: %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64> -// CHECK: linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>) +// CHECK: linalg.fill ins(%[[VAL_3]] : i64) inits(%[[VAL_11]] : memref<8xi64>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -113,7 +113,7 @@ %init = tensor.empty() : tensor<8xi64> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64>) { + inits(%init: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_mv.mlir @@ -21,7 +21,7 @@ %argx: tensor) -> tensor { %x = linalg.generic #matvec ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_ops.mlir @@ -56,7 +56,7 @@ %t = arith.constant 1 : i32 %0 = linalg.generic #trait ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32, #DenseVector>) - outs(%init: tensor<1024xf32>) { + inits(%init: tensor<1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = math.absf %a : f32 %1 = math.ceil %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector_peeled.mlir @@ -54,7 +54,7 @@ func.func @mul_s(%arga: tensor<1024xf32, #SparseVector>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> { %0 = linalg.generic #trait_mul_s ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>) - outs(%argx: tensor<1024xf32>) { + inits(%argx: tensor<1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir --- a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir +++ b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir @@ -101,7 +101,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %t = arith.mulf %x, %a: f64 linalg.yield %t : f64 @@ -191,7 +191,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %t = arith.mulf %x, %a: f64 linalg.yield %t : f64 @@ -272,7 +272,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i13, %x: i13): %t = arith.ori %x, %a: i13 linalg.yield %t : i13 @@ -350,7 +350,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i13, %x: i13): %t = arith.ori %a, %x: i13 linalg.yield %t : i13 @@ -425,7 +425,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %t = arith.subi %x, %a: i32 linalg.yield %t : i32 @@ -503,7 +503,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %t = arith.xori %x, %a: i32 linalg.yield %t : i32 @@ -577,7 +577,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %t = arith.andi %x, %a: i32 linalg.yield %t : i32 @@ -652,7 +652,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %t = arith.muli %x, %a: i32 linalg.yield %t : i32 @@ -726,7 +726,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %t = arith.addi %x, %a: i32 linalg.yield %t : i32 @@ -800,7 +800,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %t = arith.subf %x, %a: f32 linalg.yield %t : f32 @@ -874,7 +874,7 @@ -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %t = arith.addf %x, %a: f32 linalg.yield %t : f32 diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -200,7 +200,7 @@ // CHECK-DAG: %[[ALLOC:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref // CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] // CHECK: %[[MAPPED:.*]] = linalg.map -// CHECK: outs(%[[ALLOC_T]] : tensor) +// CHECK: inits(%[[ALLOC_T]] : tensor) // CHECK: %[[INDEX:.*]] = linalg.index 0 : index // CHECK: %[[ELEM:.*]] = memref.dim %[[ARG_M]], %[[INDEX]] : memref<*xf32> // CHECK: linalg.yield %[[ELEM]] @@ -226,7 +226,7 @@ // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref<16x?xindex> // CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] // CHECK: %[[MAPPED:.*]] = linalg.map -// CHECK: outs(%[[ALLOC_T]] : tensor<16x?xindex>) +// CHECK: inits(%[[ALLOC_T]] : tensor<16x?xindex>) // CHECK: %[[INDEX0:.*]] = linalg.index 0 // CHECK: %[[INDEX1:.*]] = linalg.index 1 // CHECK: %[[ADD:.*]] = arith.addi %[[INDEX0]], %[[INDEX1]] @@ -563,7 +563,7 @@ // CHECK: %[[alloc:.*]] = memref.alloc(%[[size0]], %[[size1]]) {{.*}} : memref // CHECK: %[[alloc_t:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: %[[mapped:.*]] = linalg.map - // CHECK: outs(%[[alloc_t]] : tensor) + // CHECK: inits(%[[alloc_t]] : tensor) // CHECK: %[[index0:.*]] = linalg.index 0 // CHECK: %[[index1:.*]] = linalg.index 1 // CHECK: %[[mul:.*]] = arith.muli %[[index0]], %[[index1]] diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -70,8 +70,8 @@ %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor /// Overwrite A inplace. - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]] + %r1 = linalg.fill ins(%f0 : f32) inits(%r0 : tensor) -> tensor // CHECK: return // CHECK-NOT: tensor @@ -90,8 +90,8 @@ { %f0 = arith.constant 0.0 : f32 - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]] + %r0 = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK-NOT: alloc // CHECK: %[[SV_A:.*]] = memref.subview %[[A]] @@ -255,7 +255,7 @@ // CHECK: memref.alloc %cst = arith.constant 4.200000e+01 : f32 // CHECK: linalg.fill - %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + %0 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32> // CHECK: memref.copy %1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32> return %1 : tensor<10xf32> @@ -273,7 +273,7 @@ {memory_space = 3 : i64} : tensor // CHECK: %[[padded_alloc:.*]] = memref.alloc() {{.*}} : memref<15xf32, 3> // CHECK: linalg.map - // CHECK: outs(%[[padded_alloc]] : memref<15xf32, 3>) + // CHECK: inits(%[[padded_alloc]] : memref<15xf32, 3>) // CHECK: linalg.yield %{{.*}} // CHECK: } // CHECK: %[[subview:.*]] = memref.subview {{.*}} : memref<15xf32, 3> to memref, 3> @@ -297,8 +297,8 @@ %cst = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> - // CHECK: linalg.fill {{.*}} outs(%[[alloc]] : memref<10xf32>) - %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + // CHECK: linalg.fill {{.*}} inits(%[[alloc]] : memref<10xf32>) + %1 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32> // Read %1 so that it does not DCE away. %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32> @@ -318,8 +318,8 @@ func.func @insert_slice_full_overwrite(%t: tensor<10xf32>, %b: tensor<10xf32>) -> tensor<10xf32> { %cst = arith.constant 0.0 : f32 %c0 = arith.constant 0 : index - // CHECK: linalg.fill {{.*}} outs(%[[t]] : memref<10xf32,{{.*}}>) - %1 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + // CHECK: linalg.fill {{.*}} inits(%[[t]] : memref<10xf32,{{.*}}>) + %1 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32> // Read %1 so that it does not DCE away. %vec = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<10xf32> diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir --- a/mlir/test/Dialect/Transform/selective-targeting.mlir +++ b/mlir/test/Dialect/Transform/selective-targeting.mlir @@ -12,7 +12,7 @@ // CHECK-SAME: -> tensor<4x4xf32> %0 = linalg.matmul { test.attrA } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -30,7 +30,7 @@ // CHECK: vector.transfer_write %0 = linalg.matmul { test.attrA, test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -47,7 +47,7 @@ // CHECK: vector.transfer_write %0 = linalg.matmul { test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -94,7 +94,7 @@ // CHECK: vector.contract %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -105,7 +105,7 @@ -> tensor<128x128xf32> { // CHECK: linalg.matmul %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -139,11 +139,11 @@ // CHECK: vector.contract %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: vector.contract %1 = linalg.matmul ins(%arg0, %0: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg3: tensor<128x128xf32>) + inits(%arg3: tensor<128x128xf32>) -> tensor<128x128xf32> return %1 : tensor<128x128xf32> } diff --git a/mlir/test/Dialect/Vector/transform-vector.mlir b/mlir/test/Dialect/Vector/transform-vector.mlir --- a/mlir/test/Dialect/Vector/transform-vector.mlir +++ b/mlir/test/Dialect/Vector/transform-vector.mlir @@ -8,7 +8,7 @@ // CHECK: vector.extract {{.*}} : vector<8x4xf32> // CHECK: vector.store {{.*}} : memref<8x32xf32>, vector<4xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x16xf32>, tensor<16x32xf32>) - outs(%arg2: tensor<8x32xf32>) + inits(%arg2: tensor<8x32xf32>) -> tensor<8x32xf32> return %0 : tensor<8x32xf32> } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -72,7 +72,7 @@ // LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) + // LINALG: linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>) // LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) @@ -165,7 +165,7 @@ // LINALG-SAME: memref>, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) + // LINALG: linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>) // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir --- a/mlir/test/IR/slice.mlir +++ b/mlir/test/IR/slice.mlir @@ -6,9 +6,9 @@ %c = memref.alloc(%arg0, %arg1) : memref %d = memref.alloc(%arg0, %arg1) : memref linalg.matmul ins(%a, %b : memref, memref) - outs(%c : memref) + inits(%c : memref) linalg.matmul ins(%a, %b : memref, memref) - outs(%d : memref) + inits(%d : memref) memref.dealloc %c : memref memref.dealloc %b : memref memref.dealloc %a : memref diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir @@ -43,7 +43,7 @@ iterator_types = ["parallel", "parallel"] } ins(%lhs, %rhs : memref, memref) - outs(%sum : memref) + inits(%sum : memref) { ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): %0 = arith.addf %lhs_in, %rhs_in : f32 @@ -68,8 +68,8 @@ %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32> %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32> - linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>) - linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>) %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir @@ -90,8 +90,8 @@ %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32> %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32> - linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>) - linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>) %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir @@ -12,9 +12,9 @@ %x = memref.dim %A, %c0 : memref %y = memref.dim %B, %c1 : memref %C = memref.alloc(%x, %y) : memref - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return %C : memref } @@ -26,12 +26,12 @@ %x = memref.dim %A, %c1 : memref %n = memref.dim %B, %c1 : memref %C = memref.alloc(%m, %n) : memref - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) scf.for %i = %c0 to %n step %c1 { %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref to memref> %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref to memref> linalg.matvec ins(%A, %b: memref, memref>) - outs(%c: memref>) + inits(%c: memref>) } return %C : memref } @@ -46,8 +46,8 @@ %val2 = arith.constant 17.0 : f32 %A = memref.alloc(%m, %x) : memref %B = memref.alloc(%x, %n) : memref - linalg.fill ins(%val1 : f32) outs(%A : memref) - linalg.fill ins(%val2 : f32) outs(%B : memref) + linalg.fill ins(%val1 : f32) inits(%A : memref) + linalg.fill ins(%val2 : f32) inits(%B : memref) memref.store %val1, %B[%c0, %c0] : memref %C1 = call @matmul(%A, %B) : (memref, memref) -> memref %C2 = call @matvec(%A, %B) : (memref, memref) -> memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -14,7 +14,7 @@ // Creates and returns a 1-D buffer of size %s1 filled with the value %f func.func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref { %buf = memref.alloc(%s1) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -14,7 +14,7 @@ // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f func.func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -14,7 +14,7 @@ %cst = arith.constant 0.000000e+00 : f32 %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor) -> tensor %1 = affine.apply #map0(%c0, %c64)[%c2] %2 = bufferization.alloc_tensor(%1) : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { @@ -61,7 +61,7 @@ %13 = tensor.extract_slice %6[%12, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> %14 = affine.apply #map1(%arg3, %c0)[%c2] %15 = tensor.extract_slice %3[%14, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> - %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor) -> tensor + %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) inits(%arg4 : tensor) -> tensor // %AA = tensor.cast %13 : tensor<2xf32> to tensor<*xf32> // call @printMemrefF32(%AA) : (tensor<*xf32>) -> () @@ -83,9 +83,9 @@ %A = bufferization.alloc_tensor() : tensor<64xf32> %B = bufferization.alloc_tensor() : tensor<64xf32> %C = bufferization.alloc_tensor() : tensor - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32> + %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32> + %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor) -> tensor %res = call @init_and_dot(%AA, %BB, %CC) : (tensor<64xf32>, tensor<64xf32>, tensor) -> tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -23,7 +23,7 @@ %C = arith.constant dense<1000.0> : tensor<2x4xf32> %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>) - outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32> + inits(%C: tensor<2x4xf32>) -> tensor<2x4xf32> %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32> call @printMemrefF32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir @@ -73,7 +73,7 @@ %init = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_assign ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: f64, %x: f64): %0 = arith.mulf %a, %c2 : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir @@ -39,7 +39,7 @@ %xv = bufferization.alloc_tensor (%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: bf16, %b: bf16, %x: bf16): %1 = sparse_tensor.binary %a, %b : bf16, bf16 to bf16 overlap={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir @@ -48,7 +48,7 @@ %xv = bufferization.alloc_tensor (%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f16, %b: f16, %x: f16): %1 = sparse_tensor.binary %a, %b : f16, f16 to f16 overlap={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/reshape_dot.mlir @@ -27,8 +27,8 @@ %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> %0 = tensor.empty() : tensor<5x6xf32> %cst = arith.constant 0.000000e+00 : f32 - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor return %ret1 : tensor @@ -38,8 +38,8 @@ %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> %0 = tensor.empty() : tensor<5x6xf32> %cst = arith.constant 0.000000e+00 : f32 - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor return %ret1 : tensor @@ -49,8 +49,8 @@ %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32> into tensor<6x6xf32> %0 = tensor.empty() : tensor<5x6xf32> %cst = arith.constant 0.000000e+00 : f32 - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor return %ret1 : tensor @@ -61,8 +61,8 @@ %collapsed = tensor.collapse_shape %arg1 [[0, 1], [2]] : tensor<2x3x6xf32, #COO_3D> into tensor<6x6xf32, #COO_2D> %0 = tensor.empty() : tensor<5x6xf32> %cst = arith.constant 0.000000e+00 : f32 - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> - %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<5x6xf32>) -> tensor<5x6xf32> + %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) inits(%1 : tensor<5x6xf32>) -> tensor<5x6xf32> %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32> %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor return %ret1 : tensor diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir @@ -45,7 +45,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: f64, %x: f64) : %result = math.absf %a : f64 linalg.yield %result : f64 @@ -60,7 +60,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: i32, %x: i32) : %result = math.absi %a : i32 linalg.yield %result : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir @@ -72,7 +72,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %1 = sparse_tensor.binary %a, %b : i32, i32 to i32 overlap={ @@ -96,7 +96,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -120,7 +120,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={} @@ -138,7 +138,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: i32): %idx = linalg.index 0 : index %1 = sparse_tensor.binary %a, %idx : f64, index to i32 @@ -165,7 +165,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b: f64, f64 to f64 overlap={ @@ -191,7 +191,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -213,7 +213,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -241,7 +241,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %row = linalg.index 0 : index %col = linalg.index 1 : index @@ -278,7 +278,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -323,7 +323,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xi8, #DCSR>) { + inits(%C: tensor<4x4xi8, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: i8) : %result = sparse_tensor.binary %a, %b : f64, f64 to i8 overlap={ @@ -346,7 +346,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir @@ -63,7 +63,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -74,7 +74,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.uitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -85,7 +85,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f32, %x : i32): %cst = arith.fptosi %a : f32 to i32 linalg.yield %cst : i32 @@ -96,7 +96,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf64, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f64, %x : i32): %cst = arith.fptoui %a : f64 to i32 linalg.yield %cst : i32 @@ -107,7 +107,7 @@ %argb: tensor<10xf64>) -> tensor<10xf64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf64>) { + inits(%argb: tensor<10xf64>) { ^bb(%a: f32, %x : f64): %cst = arith.extf %a : f32 to f64 linalg.yield %cst : f64 @@ -118,7 +118,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf64, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f64, %x : f32): %cst = arith.truncf %a : f64 to f32 linalg.yield %cst : f32 @@ -129,7 +129,7 @@ %argb: tensor<10xi64>) -> tensor<10xi64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi64>) { + inits(%argb: tensor<10xi64>) { ^bb(%a: i32, %x : i64): %cst = arith.extsi %a : i32 to i64 linalg.yield %cst : i64 @@ -140,7 +140,7 @@ %argb: tensor<10xi64>) -> tensor<10xi64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi64>) { + inits(%argb: tensor<10xi64>) { ^bb(%a: i32, %x : i64): %cst = arith.extui %a : i32 to i64 linalg.yield %cst : i64 @@ -151,7 +151,7 @@ %argb: tensor<10xi8>) -> tensor<10xi8> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi8>) { + inits(%argb: tensor<10xi8>) { ^bb(%a: i32, %x : i8): %cst = arith.trunci %a : i32 to i8 linalg.yield %cst : i8 @@ -162,7 +162,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f32, %x : i32): %cst = arith.bitcast %a : f32 to i32 linalg.yield %cst : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir @@ -48,7 +48,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.add %a, %b : complex linalg.yield %1 : complex @@ -65,7 +65,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.mul %a, %b : complex linalg.yield %1 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir @@ -48,7 +48,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.add %a, %b : complex linalg.yield %1 : complex @@ -65,7 +65,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.mul %a, %b : complex linalg.yield %1 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir @@ -57,7 +57,7 @@ %0 = linalg.generic #trait_op2 ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.neg %b : complex %2 = complex.sub %a, %1 : complex @@ -73,7 +73,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.sin %a : complex linalg.yield %1 : complex @@ -88,7 +88,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.sqrt %a : complex linalg.yield %1 : complex @@ -103,7 +103,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.tanh %a : complex linalg.yield %1 : complex @@ -118,7 +118,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.log1p %a : complex %2 = complex.expm1 %1 : complex @@ -135,7 +135,7 @@ %c = complex.constant [2.0 : f64, 0.0 : f64] : complex %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.div %a, %c : complex linalg.yield %1 : complex @@ -150,7 +150,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f64): %1 = complex.abs %a : complex linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_1d_nwc_wcf.mlir @@ -43,7 +43,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) inits(%buf : tensor) -> tensor return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_2d_nhwc_hwcf.mlir @@ -42,7 +42,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) inits(%buf : tensor) -> tensor return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d.mlir @@ -42,7 +42,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) inits(%buf : tensor) -> tensor return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_conv_3d_ndhwc_dhwcf.mlir @@ -42,7 +42,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> tensor { %buf = bufferization.alloc_tensor(%s1, %s2, %s3, %s4, %s5) : tensor - %ret = linalg.fill ins(%f : f32) outs(%buf : tensor) -> tensor + %ret = linalg.fill ins(%f : f32) inits(%buf : tensor) -> tensor return %ret : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_coo_test.mlir @@ -52,11 +52,11 @@ %zero = arith.constant 0.000000e+00 : f32 %init = linalg.fill ins(%zero : f32) - outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> + inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32, #CSR>, tensor<8x8xf32, #SortedCOO>) - outs(%init: tensor<8x8xf32>) { + inits(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -71,11 +71,11 @@ %zero = arith.constant 0.000000e+00 : f32 %init = linalg.fill ins(%zero : f32) - outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> + inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32, #SortedCOO>, tensor<8x8xf32, #SortedCOO>) - outs(%init: tensor<8x8xf32>) { + inits(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -90,11 +90,11 @@ %zero = arith.constant 0.000000e+00 : f32 %init = linalg.fill ins(%zero : f32) - outs(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> + inits(%empty : tensor<8x8xf32>) -> tensor<8x8xf32> %0 = linalg.generic #trait ins(%arga, %argb: tensor<8x8xf32>, tensor<8x8xf32, #SortedCOO>) - outs(%init: tensor<8x8xf32>) { + inits(%init: tensor<8x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir @@ -38,7 +38,7 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + inits(%x: tensor) -> tensor return %dot : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir @@ -43,7 +43,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir @@ -63,7 +63,7 @@ -> tensor<7x3xf64> { %0 = linalg.generic #trait_flatten ins(%arga: tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>) - outs(%argx: tensor<7x3xf64>) { + inits(%argx: tensor<7x3xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir @@ -66,7 +66,7 @@ %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64, #SparseVector>) { + inits(%init: tensor<8xi64, #SparseVector>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -84,7 +84,7 @@ %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64, #SparseVector>) { + inits(%init: tensor<8xi64, #SparseVector>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -102,7 +102,7 @@ %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix> %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%init: tensor<3x4xi64, #SparseMatrix>) { + inits(%init: tensor<3x4xi64, #SparseMatrix>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -123,7 +123,7 @@ %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix> %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%init: tensor<3x4xi64, #SparseMatrix>) { + inits(%init: tensor<3x4xi64, #SparseMatrix>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -141,7 +141,7 @@ %0 = bufferization.alloc_tensor() : tensor<2x3xf32, #SparseMatrix> %1 = linalg.generic #trait_2d ins(%arg0 : tensor<2x3xf32, #SparseMatrix>) - outs(%0 : tensor<2x3xf32, #SparseMatrix>) { + inits(%0 : tensor<2x3xf32, #SparseMatrix>) { ^bb0(%arg1: f32, %arg2: f32): %2 = linalg.index 0 : index %3 = arith.index_cast %2 : index to i64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir @@ -64,7 +64,7 @@ %out: tensor<8xi64>) -> tensor<8xi64> { %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%out: tensor<8xi64>) { + inits(%out: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -81,7 +81,7 @@ %out: tensor<8xi64>) -> tensor<8xi64> { %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%out: tensor<8xi64>) { + inits(%out: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -98,7 +98,7 @@ %out: tensor<3x4xi64>) -> tensor<3x4xi64> { %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%out: tensor<3x4xi64>) { + inits(%out: tensor<3x4xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -118,7 +118,7 @@ %out: tensor<3x4xi64>) -> tensor<3x4xi64> { %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%out: tensor<3x4xi64>) { + inits(%out: tensor<3x4xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -57,7 +57,7 @@ %C: tensor<4x4xf64>) -> tensor<4x4xf64> { %D = linalg.matmul ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>) - outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64> + inits(%C: tensor<4x4xf64>) -> tensor<4x4xf64> return %D: tensor<4x4xf64> } @@ -69,7 +69,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } @@ -81,7 +81,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> return %D: tensor<4x4xf64, #DCSR> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul_slice.mlir @@ -61,7 +61,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_dyn>, tensor<4x4xf64, #DCSR_SLICE_dyn>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } @@ -73,7 +73,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x4xf64, #CSR_SLICE_1>, tensor<4x4xf64, #DCSR_SLICE_1>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } @@ -85,7 +85,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR_SLICE>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } @@ -97,7 +97,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR_SLICE>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> return %D: tensor<4x4xf64, #DCSR> } @@ -215,7 +215,7 @@ %d = bufferization.alloc_tensor() copy(%zero) : tensor<4x4xf64> %r = linalg.matmul ins(%ds2, %ds1: tensor<4x4xf64>, tensor<4x4xf64>) - outs(%d: tensor<4x4xf64>) -> tensor<4x4xf64> + inits(%d: tensor<4x4xf64>) -> tensor<4x4xf64> %du = tensor.cast %r : tensor<4x4xf64> to tensor<*xf64> call @printMemrefF64(%du) : (tensor<*xf64>) -> () diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir @@ -69,7 +69,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 @@ -81,7 +81,7 @@ func.func @matrix_scale_inplace(%argx: tensor) -> tensor { %s = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %s : f64 linalg.yield %1 : f64 @@ -99,7 +99,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 @@ -117,7 +117,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -69,7 +69,7 @@ -> tensor { %0 = linalg.generic #matvec ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -65,7 +65,7 @@ %0 = linalg.generic #mttkrp ins(%argb, %argc, %argd: tensor, tensor, tensor) - outs(%arga: tensor) { + inits(%arga: tensor) { ^bb(%b: f64, %c: f64, %d: f64, %a: f64): %0 = arith.mulf %b, %c : f64 %1 = arith.mulf %d, %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir @@ -47,7 +47,7 @@ %argx = bufferization.alloc_tensor() : tensor<32x16xf32, #DCSR> %0 = linalg.generic #trait_mult_elt ins(%arga, %argb: tensor<32x16xf32, #DCSR>, tensor<32x16xf32, #DCSR>) - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%a: f32, %b: f32, %x: f32): %1 = arith.mulf %a, %b : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir @@ -56,7 +56,7 @@ %0 = linalg.generic #redsum ins(%arga, %argb: tensor, tensor) - outs(%xinit: tensor) { + inits(%xinit: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -58,7 +58,7 @@ func.func @kernel_eltwise_mult(%argx: tensor) -> tensor { %0 = linalg.generic #eltwise_mult - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %0 = arith.mulf %x, %x : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -41,7 +41,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> + inits(%output : tensor<5x6xi32>) -> tensor<5x6xi32> return %0: tensor<5x6xi32> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir @@ -45,7 +45,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f32): %1 = complex.re %a : complex linalg.yield %1 : f32 @@ -60,7 +60,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f32): %1 = complex.im %a : complex linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir @@ -59,7 +59,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_matmul ins(%arga, %argb: tensor, tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %b: f64, %output: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap = { @@ -90,7 +90,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_matmul ins(%arga, %argb: tensor, tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %b: f64, %output: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap = { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir @@ -47,7 +47,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_mat_reduce_rowwise ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 { ^bb0(%x: f64, %y: f64): @@ -66,7 +66,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_mat_reduce_rowwise ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 { ^bb0(%x: f64, %y: f64): diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -46,7 +46,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.addi %x, %a : i32 linalg.yield %0 : i32 @@ -58,7 +58,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -70,7 +70,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.andi %x, %a : i32 linalg.yield %0 : i32 @@ -82,7 +82,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.ori %x, %a : i32 linalg.yield %0 : i32 @@ -94,7 +94,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.xori %x, %a : i32 linalg.yield %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions_prod.mlir @@ -38,7 +38,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.muli %x, %a : i32 linalg.yield %0 : i32 @@ -50,7 +50,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %x, %a : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir @@ -62,7 +62,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%s: f32, %a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %s, %0 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -72,7 +72,7 @@ %2 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor<8x8xf64, #SM>, tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1: tensor<8x8xf64>) { + inits(%1: tensor<8x8xf64>) { ^bb(%s: f64, %a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.mulf %s, %p : f64 @@ -93,7 +93,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -102,7 +102,7 @@ // Sample the result with elements-wise multiplication with sparse matrix. %3 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 @@ -121,7 +121,7 @@ %2 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor<8x8xf64, #SM>, tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1: tensor<8x8xf64, #SM>) { + inits(%1: tensor<8x8xf64, #SM>) { ^bb(%s: f64, %a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.mulf %s, %p : f64 @@ -143,7 +143,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -153,7 +153,7 @@ %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM> %4 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%3 : tensor<8x8xf64, #SM>) { + inits(%3 : tensor<8x8xf64, #SM>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -44,7 +44,7 @@ func.func @sparse_scale(%argx: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor<8x8xf32, #CSR>) { + inits(%argx: tensor<8x8xf32, #CSR>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir @@ -36,7 +36,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>) - outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) { + inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) { ^bb0(%arg1: f64, %arg2: f64, %arg3: f64): %4 = arith.subf %arg1, %arg2 : f64 linalg.yield %4 : f64 @@ -50,7 +50,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>) - outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) { + inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) { ^bb0(%arg1: f64, %arg2: f64, %arg3: f64): %4 = arith.addf %arg1, %arg2 : f64 linalg.yield %4 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir @@ -56,7 +56,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_vec_select ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.select %a : f64 { ^bb0(%x: f64): @@ -76,7 +76,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1): tensor %0 = linalg.generic #trait_mat_select ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %row = linalg.index 0 : index %col = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir @@ -51,7 +51,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: f64, %x: f64) : %result = sparse_tensor.unary %a : f64 to f64 present={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -69,7 +69,7 @@ -> tensor { %c = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %c : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -59,7 +59,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #spmm ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -59,7 +59,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir @@ -40,7 +40,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: bf16, %x: bf16): %0 = arith.addf %x, %a : bf16 linalg.yield %0 : bf16 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir @@ -59,7 +59,7 @@ %argx: tensor>) -> tensor> { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor, #SparseMatrix>) - outs(%argx: tensor>) { + inits(%argx: tensor>) { ^bb(%a: complex, %x: complex): %0 = complex.add %x, %a : complex linalg.yield %0 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir @@ -49,7 +49,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f16, %x: f16): %0 = arith.addf %x, %a : f16 linalg.yield %0 : f16 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir @@ -44,7 +44,7 @@ func.func @sparse_tanh(%vec: tensor) -> tensor { %0 = linalg.generic #trait_op - outs(%vec: tensor) { + inits(%vec: tensor) { ^bb(%x: f64): %1 = math.tanh %x : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir @@ -54,7 +54,7 @@ %xt = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor, tensor) - outs(%xt: tensor) { + inits(%xt: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir @@ -54,7 +54,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir @@ -60,7 +60,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%t: tensor<3x4xf64, #DCSC>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> @@ -79,7 +79,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%arga: tensor<3x4xf64, #DCSR>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose_coo.mlir @@ -42,7 +42,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arga : tensor<10x5xf32, #SortedCOO>) - outs(%0 : tensor<5x10xf32, #SortedCOO>) { + inits(%0 : tensor<5x10xf32, #SortedCOO>) { ^bb0(%in: f32, %out: f32): linalg.yield %in : f32 } -> tensor<5x10xf32, #SortedCOO> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir @@ -57,7 +57,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: i32): %1 = sparse_tensor.unary %a : f64 to i32 present={} @@ -77,7 +77,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = sparse_tensor.unary %a : f64 to f64 present={ @@ -100,7 +100,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %idx = linalg.index 0 : index %1 = sparse_tensor.unary %a : f64 to f64 @@ -128,7 +128,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_scale ins(%argx: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = sparse_tensor.unary %a: f64 to f64 present={ @@ -155,7 +155,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_scale ins(%argx: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %row = linalg.index 0 : index %col = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir @@ -75,7 +75,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 @@ -87,7 +87,7 @@ func.func @vector_scale_inplace(%argx: tensor) -> tensor { %s = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %s : f64 linalg.yield %1 : f64 @@ -103,7 +103,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 @@ -119,7 +119,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 @@ -135,7 +135,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 @@ -149,7 +149,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_dot ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 %2 = arith.addf %x, %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py @@ -38,7 +38,7 @@ %argx = bufferization.alloc_tensor() : tensor<3x4xf64, #DCSR> %0 = linalg.generic #trait_add_elt ins(%arga, %argb: tensor<3x4xf64, #DCSR>, tensor<3x4xf64, #DCSR>) - outs(%argx: tensor<3x4xf64, #DCSR>) { + inits(%argx: tensor<3x4xf64, #DCSR>) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -3,7 +3,7 @@ func.func @gemm(%arg0 : memref, %arg1 : memref, %arg2 : memref) { linalg.matmul ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-LABEL: func @gemm @@ -34,7 +34,7 @@ affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : memref<200x300xi32>, memref<300xi16>, memref<200xi8>) - outs(%arg3 : memref<300x200xi64>) { + inits(%arg3 : memref<300x200xi64>) { ^bb0(%b0 : i32, %b1 : i16, %b2 : i8, %b3 : i64): %0 = linalg.index 0 : index %1 = arith.index_cast %0 : index to i16 @@ -84,7 +84,7 @@ strides = dense<[1, 2]> : tensor<2xi64>, dilations = dense<[3, 4]> : tensor<2xi64>} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1 + d4 * 3)> @@ -128,7 +128,7 @@ strides = dense<[1, 2]> : tensor<2xi64>, dilations = dense<[3, 4]> : tensor<2xi64>} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1 + d4 * 3)> @@ -165,7 +165,7 @@ func.func @map(%lhs: memref<64xf32>, %rhs: memref<64xf32>, %out: memref<64xf32>) { linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>) - outs(%out : memref<64xf32>) + inits(%out : memref<64xf32>) (%in: f32, %in_0: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 @@ -192,7 +192,7 @@ func.func @transpose(%arg0: memref<16x32x64xf32>, %arg1: memref<32x64x16xf32>) { linalg.transpose ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] return } // CHECK-LABEL: func.func @transpose( @@ -216,7 +216,7 @@ func.func @reduce(%arg0: memref<16x32x64xf32>, %arg1: memref<16x64xf32>) { linalg.reduce ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<16x64xf32>) dimensions = [1] + inits(%arg1 : memref<16x64xf32>) dimensions = [1] (%in: f32, %init: f32) { %0 = arith.addf %in, %init : f32 linalg.yield %0 : f32 @@ -247,7 +247,7 @@ %init: memref<8x16x32xf32>) { linalg.broadcast ins(%input:memref<8x32xf32>) - outs(%init:memref<8x16x32xf32>) + inits(%init:memref<8x16x32xf32>) dimensions = [1] func.return } diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -7,10 +7,10 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"} ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor return %gemm : tensor } // CHECK: func.func @gemm_fill_fusion( @@ -25,10 +25,10 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[INSERT]] @@ -42,15 +42,15 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %generic = linalg.generic { __internal_linalg_transform__ = "fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%gemm, %arg2 : tensor, tensor) outs(%init : tensor) { + ins(%gemm, %arg2 : tensor, tensor) inits(%init : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -70,15 +70,15 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK-DAG: %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]] // CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]], %[[BIAS_TILE]] : -// CHECK-SAME: outs(%[[OUTS_TILE]] : +// CHECK-SAME: inits(%[[OUTS_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[INSERT]] @@ -91,14 +91,14 @@ %d0 = tensor.dim %lhs0, %c0 : tensor %d1 = tensor.dim %rhs0, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor) -> tensor %gemm0 = linalg.matmul - ins(%lhs0, %rhs0 : tensor, tensor) outs(%fill0 : tensor) -> tensor + ins(%lhs0, %rhs0 : tensor, tensor) inits(%fill0 : tensor) -> tensor %d2 = tensor.dim %rhs1, %c1 : tensor %init1 = tensor.empty(%d0, %d2) : tensor - %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor) -> tensor + %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor) -> tensor %gemm1 = linalg.matmul {__internal_linalg_transform__ = "gemm_fusion"} - ins(%gemm0, %rhs1 : tensor, tensor) outs(%fill1 : tensor) -> tensor + ins(%gemm0, %rhs1 : tensor, tensor) inits(%fill1 : tensor) -> tensor return %gemm1 : tensor } // CHECK: func.func @gemm_gemm_fusion( @@ -118,17 +118,17 @@ // CHECK-DAG: %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0] // CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]], 0] // CHECK: %[[FILL0_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]] : // CHECK: %[[GEMM0_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS0_TILE]], %[[RHS0_TILE]] : -// CHECK-SAME: outs(%[[FILL0_TILE]] : +// CHECK-SAME: inits(%[[FILL0_TILE]] : // CHECK-DAG: %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0] // CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0] // CHECK: %[[FILL1_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT1_TILE]] : +// CHECK-SAME: inits(%[[INIT1_TILE]] : // CHECK: %[[GEMM1_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] : -// CHECK-SAME: outs(%[[FILL1_TILE]] : +// CHECK-SAME: inits(%[[FILL1_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0] // CHECK: scf.yield %[[INSERT]] @@ -141,16 +141,16 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init0 : tensor) -> tensor %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %init1 = tensor.empty(%d1, %d0) : tensor %transpose = linalg.generic { __internal_linalg_transform__ = "fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} - ins(%gemm : tensor) outs(%init1 : tensor) { + ins(%gemm : tensor) inits(%init1 : tensor) { ^bb0(%b0 : f32, %b1 : f32): linalg.yield %b0 : f32 } -> tensor @@ -173,14 +173,14 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]] : -// CHECK-SAME: outs(%[[OUTS_TILE]] : +// CHECK-SAME: inits(%[[OUTS_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: scf.yield %[[INSERT]] @@ -193,15 +193,15 @@ %d1 = tensor.dim %arg1, %c1 : tensor %cst = arith.constant 0.0 : f32 %0 = tensor.empty(%d0, %d1) : tensor - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%1 : tensor) -> tensor + inits(%1 : tensor) -> tensor %3 = linalg.generic { __internal_linalg_transform__ = "gemm_interchange_fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%2 : tensor) outs(%0 : tensor) { + ins(%2 : tensor) inits(%0 : tensor) { ^bb0(%b0 : f32, %b1 : f32): %4 = arith.addf %b0, %b0 : f32 linalg.yield %4 : f32 @@ -220,14 +220,14 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV1]], %[[IV0]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK: %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE_2]] : +// CHECK-SAME: inits(%[[INIT_TILE_2]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: scf.yield %[[INSERT]] @@ -240,7 +240,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -251,7 +251,7 @@ iterator_types = ["parallel", "parallel"], __internal_linalg_transform__ = "gemm_plus_gemm_fusion"} ins(%2, %2 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) : %7 = arith.addf %arg3, %arg4 : f32 linalg.yield %7 : f32 @@ -273,11 +273,11 @@ // CHECK-DAG: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]] // CHECK: %[[MATMUL:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] : -// CHECK-SAME: outs(%[[ST_ARG2]] : +// CHECK-SAME: inits(%[[ST_ARG2]] : // CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: %[[ST_RESULT:.+]] = linalg.generic // CHECK-SAME: ins(%[[MATMUL]], %[[MATMUL]] : -// CHECK-SAME: outs(%[[ST_ARG6]] : +// CHECK-SAME: inits(%[[ST_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[UPDATE]] @@ -293,7 +293,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -304,7 +304,7 @@ iterator_types = ["parallel", "parallel"], __internal_linalg_transform__ = "gemm_plus_gemm_fusion"} ins(%2, %2 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) : %7 = arith.addf %arg3, %arg4 : f32 linalg.yield %7 : f32 @@ -325,17 +325,17 @@ // CHECK: %[[LHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] // CHECK-SAME: : tensor, tensor) -// CHECK-SAME: outs(%[[ST_ARG2]] : tensor) +// CHECK-SAME: inits(%[[ST_ARG2]] : tensor) // CHECK-DAG: %[[STR_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0] // CHECK-DAG: %[[STR_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]] // CHECK-DAG: %[[STR_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV0]]] // CHECK: %[[RHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[STR_ARG0]], %[[STR_ARG1]] : -// CHECK-SAME: outs(%[[STR_ARG2]] : +// CHECK-SAME: inits(%[[STR_ARG2]] : // CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: %[[ST_RESULT:.+]] = linalg.generic // CHECK-SAME: ins(%[[LHS]], %[[RHS]] : -// CHECK-SAME: outs(%[[ST_ARG6]] : +// CHECK-SAME: inits(%[[ST_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[UPDATE]] @@ -348,13 +348,13 @@ %arg2: tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, %arg6: tensor) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor // [M, N0] * [N0, N1] + inits(%arg2 : tensor) -> tensor // [M, N0] * [N0, N1] %1 = linalg.matmul ins(%0, %arg3 : tensor, tensor) - outs(%arg4 : tensor) -> tensor // [M, N1] * [N1, N2] + inits(%arg4 : tensor) -> tensor // [M, N1] * [N1, N2] %2 = linalg.matmul {__internal_linalg_transform__ = "gemm_sequence_fusion"} ins(%1, %arg5 : tensor, tensor) - outs(%arg6 : tensor) -> tensor // [M, N2] * [N2, N3] + inits(%arg6 : tensor) -> tensor // [M, N2] * [N2, N3] return %2 : tensor } @@ -383,16 +383,16 @@ // CHECK-DAG: %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]] // CHECK-DAG: %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]] // CHECK-DAG: %[[TILE_GEMM1:.+]] = linalg.matmul ins(%[[SLICE_ARG0]], %[[SLICE_ARG1]] : -// CHECK-SAME: outs(%[[SLICE_ARG2]] : +// CHECK-SAME: inits(%[[SLICE_ARG2]] : // CHECK-DAG: %[[SLICE_ARG3:.+]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[N1]], %[[N2]]] // CHECK-DAG: %[[SLICE_ARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV]], 0] [%[[TILE_M]], %[[N2]]] // CHECK-DAG: %[[TILE_GEMM2:.+]] = linalg.matmul ins(%[[TILE_GEMM1]], %[[SLICE_ARG3]] : -// CHECK-SAME: outs(%[[SLICE_ARG4]] : +// CHECK-SAME: inits(%[[SLICE_ARG4]] : // CHECK-DAG: %[[SLICE_ARG5:.+]] = tensor.extract_slice %[[ARG5]][0, 0] [%[[N2]], %[[N3]]] // CHECK-DAG: %[[SLICE_ARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]] // CHECK-DAG: %[[TILE_GEMM3:.+]] = linalg.matmul // CHECK-SAME: ins(%[[TILE_GEMM2]], %[[SLICE_ARG5]] : -// CHECK-SAME: outs(%[[SLICE_ARG6]] : +// CHECK-SAME: inits(%[[SLICE_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[TILE_GEMM3]] into %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]] // CHECK: scf.yield %[[UPDATE]] @@ -402,22 +402,22 @@ %cst = arith.constant 0.000000e+00 : f32 %cst_0 = arith.constant 0xFF800000 : f32 %0 = tensor.empty() : tensor<30xf32> - %1 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32> + %1 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<30xf32>) -> tensor<30xf32> %2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<30x3xf32>) outs(%1 : tensor<30xf32>) { + ins(%arg0 : tensor<30x3xf32>) inits(%1 : tensor<30xf32>) { ^bb0(%arg1: f32, %arg2: f32): %8 = arith.maxf %arg2, %arg1 : f32 linalg.yield %8 : f32 } -> tensor<30xf32> %3 = tensor.empty() : tensor<30x3xf32> - %4 = linalg.fill ins(%cst : f32) outs(%0 : tensor<30xf32>) -> tensor<30xf32> + %4 = linalg.fill ins(%cst : f32) inits(%0 : tensor<30xf32>) -> tensor<30xf32> %5:2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0, %2 : tensor<30x3xf32>, tensor<30xf32>) outs(%4, %3 : tensor<30xf32>, tensor<30x3xf32>) { + ins(%arg0, %2 : tensor<30x3xf32>, tensor<30xf32>) inits(%4, %3 : tensor<30xf32>, tensor<30x3xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %8 = arith.subf %arg1, %arg2 : f32 %9 = math.exp %8 : f32 @@ -429,7 +429,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%5#1, %5#0 : tensor<30x3xf32>, tensor<30xf32>) outs(%3 : tensor<30x3xf32>) { + ins(%5#1, %5#0 : tensor<30x3xf32>, tensor<30xf32>) inits(%3 : tensor<30x3xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %8 = arith.divf %arg1, %arg2 : f32 linalg.yield %8 : f32 @@ -444,20 +444,20 @@ // CHECK-DAG: %[[ARG0_SLICE:.+]] = tensor.extract_slice %[[ARG0]][%[[IV]], 0] // CHECK-DAG: %[[INIT0_SLICE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]]] // CHECK: %[[FILL0:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_SLICE]] : +// CHECK-SAME: inits(%[[INIT0_SLICE]] : // CHECK: %[[GENERIC0:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0_SLICE]] : -// CHECK-SAME: outs(%[[FILL0]] : +// CHECK-SAME: inits(%[[FILL0]] : // CHECK: %[[FILL1:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_SLICE]] : +// CHECK-SAME: inits(%[[INIT0_SLICE]] : // CHECK: %[[INIT1_SLICE:.+]] = tensor.extract_slice %[[INIT1]][%[[IV]], 0] // CHECK: %[[GENERIC1:.+]]:2 = linalg.generic // CHECK-SAME: ins(%[[ARG0_SLICE]], %[[GENERIC0]] : -// CHECK-SAME: outs(%[[FILL1]], %[[INIT1_SLICE]] : +// CHECK-SAME: inits(%[[FILL1]], %[[INIT1_SLICE]] : // CHECK: %[[ITERARG0_SLICE:.+]] = tensor.extract_slice %[[ITERARG0]][%[[IV]], 0] // CHECK: %[[GENERIC2:.+]] = linalg.generic // CHECK-SAME: ins(%[[GENERIC1]]#1, %[[GENERIC1]]#0 : -// CHECK-SAME: outs(%[[ITERARG0_SLICE]] : +// CHECK-SAME: inits(%[[ITERARG0_SLICE]] : // CHECK-DAG: %[[INSERTSLICE:.+]] = tensor.insert_slice %[[GENERIC2]] into %[[ITERARG0]][%[[IV]], 0] // CHECK: scf.yield %[[INSERTSLICE]] // CHECK: return %[[RESULT]] diff --git a/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-fuse-and-yield-using-interface.mlir @@ -8,13 +8,13 @@ %cst = arith.constant 0.0 : f32 %d0 = tensor.dim %lhs0, %c0 : tensor %d1 = tensor.dim %rhs0, %c1 : tensor - %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor) -> tensor %gemm0 = linalg.matmul - ins(%lhs0, %rhs0 : tensor, tensor) outs(%fill0 : tensor) -> tensor + ins(%lhs0, %rhs0 : tensor, tensor) inits(%fill0 : tensor) -> tensor %d2 = tensor.dim %rhs1, %c1 : tensor - %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor) -> tensor + %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor) -> tensor %gemm1 = linalg.matmul {__internal_linalg_transform__ = "gemm_sequence_fusion_and_yield"} - ins(%gemm0, %rhs1 : tensor, tensor) outs(%fill1 : tensor) -> tensor + ins(%gemm0, %rhs1 : tensor, tensor) inits(%fill1 : tensor) -> tensor return %gemm0, %gemm1 : tensor, tensor } // CHECK: func.func @gemm_gemm_fusion_yield_both( @@ -31,17 +31,17 @@ // CHECK-DAG: %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0] // CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV]], 0] // CHECK: %[[FILL0_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]] : // CHECK: %[[GEMM0_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS0_TILE]], %[[RHS0_TILE]] : -// CHECK-SAME: outs(%[[FILL0_TILE]] : +// CHECK-SAME: inits(%[[FILL0_TILE]] : // CHECK-DAG: %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0] // CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG0]][%[[IV]], 0] // CHECK: %[[FILL1_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT1_TILE]] : +// CHECK-SAME: inits(%[[INIT1_TILE]] : // CHECK: %[[GEMM1_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] : -// CHECK-SAME: outs(%[[FILL1_TILE]] : +// CHECK-SAME: inits(%[[FILL1_TILE]] : // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG0]][%[[IV]], 0] // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[GEMM0_TILE]] into %[[ITERARG1]][%[[IV]], 0] // CHECK: scf.yield %[[INSERT0]], %[[INSERT1]] diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -4,7 +4,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -34,7 +34,7 @@ // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT1]] // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] // CHECK: scf.yield %[[UPDATE]] @@ -47,7 +47,7 @@ %arg2 : memref) { linalg.matmul {__internal_linalg_transform__ = "simple_gemm_memref"} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -79,7 +79,7 @@ // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[OUT_TILE]] : +// CHECK-SAME: inits(%[[OUT_TILE]] : // ----- @@ -94,7 +94,7 @@ iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "parallel_generic_transpose"} ins(%arg0 : tensor<128x200x300xf32>) - outs(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) { + inits(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): linalg.yield %b0, %b0 : f32, f32 } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) @@ -123,7 +123,7 @@ // CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1] // CHECK: %[[RESULT_TILE:.+]]:2 = linalg.generic // CHECK-SAME: ins(%[[ARG_TILE]] : -// CHECK-SAME: outs(%[[INIT0_TILE]], %[[INIT1_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]], %[[INIT1_TILE]] : // CHECK: %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]] // CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1] // CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]] @@ -141,7 +141,7 @@ dilation = dense<[4, 5]> : tensor<2xi64>, __internal_linalg_transform__ = "simple_conv"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -187,7 +187,7 @@ // CHECK: %[[CONV_TILE:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilation = dense<[4, 5]> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64> // CHECK-SAME: ins(%[[INPUT_TILE]], %[[FILTER_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: tensor.insert_slice %[[CONV_TILE]] into %[[INIT2]] // CHECK-SAME: [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]] @@ -207,7 +207,7 @@ iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "indexed_semantics"} ins(%arg0: tensor) - outs(%arg1: tensor) { + inits(%arg1: tensor) { ^bb0(%arg2: f32, %arg3: f32): // CHECK: %[[INDEX0:.+]] = linalg.index 0 // CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]]) @@ -231,7 +231,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> @@ -266,7 +266,7 @@ // CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT2]] // CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: scf.yield %[[UPDATE]] diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2832,7 +2832,7 @@ let assemblyFormat = [{ attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)? - `outs` `(` $outputs `:` type($outputs) `)` + `inits` `(` $outputs `:` type($outputs) `)` $region (`->` type($results)^)? }]; @@ -2894,7 +2894,7 @@ let assemblyFormat = [{ attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)? - `outs` `(` $outputs `:` type($outputs) `)` + `inits` `(` $outputs `:` type($outputs) `)` $region (`->` type($results)^)? }]; diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -25,7 +25,7 @@ %c4 = arith.constant 4.0 : f32 %A = memref.alloc() : memref<4xf32> - linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>) + linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>) // CHECK: [0, 0, 0, 0] %U = memref.cast %A : memref<4xf32> to memref<*xf32> diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir --- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir @@ -7,14 +7,14 @@ %cf1 = arith.constant 1.00000e+00 : f32 - linalg.fill ins(%cf1 : f32) outs(%A : memref<16x16xf32>) - linalg.fill ins(%cf1 : f32) outs(%B : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%A : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%B : memref<16x16xf32>) %num_reps = arith.constant 5 : index %t_start = call @rtclock() : () -> f64 affine.for %arg0 = 0 to %num_reps { - linalg.fill ins(%cf1 : f32) outs(%C : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%C : memref<16x16xf32>) func.call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> () } %t_end = call @rtclock() : () -> f64 diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir --- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir @@ -42,18 +42,18 @@ %f10 = arith.constant 10.00000e+00 : f32 %V = memref.cast %A : memref<10x3xf32, 0> to memref - linalg.fill ins(%f10 : f32) outs(%V : memref) + linalg.fill ins(%f10 : f32) inits(%V : memref) %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32> call @printMemrefF32(%U) : (memref<*xf32>) -> () %V2 = memref.cast %U : memref<*xf32> to memref - linalg.fill ins(%f5 : f32) outs(%V2 : memref) + linalg.fill ins(%f5 : f32) inits(%V2 : memref) %U2 = memref.cast %V2 : memref to memref<*xf32> call @printMemrefF32(%U2) : (memref<*xf32>) -> () %V3 = memref.cast %V2 : memref to memref<*xf32> %V4 = memref.cast %V3 : memref<*xf32> to memref - linalg.fill ins(%f2 : f32) outs(%V4 : memref) + linalg.fill ins(%f2 : f32) inits(%V4 : memref) %U3 = memref.cast %V2 : memref to memref<*xf32> call @printMemrefF32(%U3) : (memref<*xf32>) -> () @@ -79,7 +79,7 @@ func.func @return_two_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = arith.constant 1.0 : f32 - linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>) + linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>) %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) call @printMemrefF32(%1#0) : (memref<*xf32>) -> () call @printMemrefF32(%1#1) : (memref<*xf32>) -> () @@ -94,7 +94,7 @@ func.func @return_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = arith.constant 1.0 : f32 - linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>) + linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>) %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32> call @printMemrefF32(%1) : (memref<*xf32>) -> () return diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -19,7 +19,7 @@ %f = arith.constant 2.00000e+00 : f32 %A = memref.alloc() : memref<16xf32> %B = memref.cast %A: memref<16xf32> to memref - linalg.fill ins(%f : f32) outs(%B : memref) + linalg.fill ins(%f : f32) inits(%B : memref) %U = memref.cast %B : memref to memref<*xf32> call @printMemrefF32(%U): (memref<*xf32>) -> () memref.dealloc %A : memref<16xf32> @@ -33,7 +33,7 @@ %f4 = arith.constant 4.00000e+00 : f32 %A = memref.alloc() : memref<3x4x5xf32> %B = memref.cast %A: memref<3x4x5xf32> to memref - linalg.fill ins(%f : f32) outs(%B : memref) + linalg.fill ins(%f : f32) inits(%B : memref) %c2 = arith.constant 2 : index memref.store %f4, %B[%c2, %c2, %c2]: memref diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir --- a/mlir/test/mlir-opt/async.mlir +++ b/mlir/test/mlir-opt/async.mlir @@ -20,7 +20,7 @@ %c4 = arith.constant 4.0 : f32 %A = memref.alloc() : memref<4xf32> - linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>) + linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>) %U = memref.cast %A : memref<4xf32> to memref<*xf32> call @printMemrefF32(%U): (memref<*xf32>) -> () diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py --- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py @@ -56,7 +56,7 @@ # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] # CHECK-SAME: ins(%[[A]], %[[B]] - # CHECK-SAME: outs(%[[INITC]] + # CHECK-SAME: inits(%[[INITC]] @func.FuncOp.from_py_func( RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32)) def test_matmul_mono(lhs, rhs): diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -21,7 +21,7 @@ # CHECK-LABEL: func @fill_tensor # CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32 - # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32> + # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32> # CHECK-NEXT: return %[[RES]] : tensor<12x?xf32> @func.FuncOp.from_py_func( RankedTensorType.get((12, ShapedType.get_dynamic_size()), f32)) @@ -32,7 +32,7 @@ # CHECK-LABEL: func @fill_buffer # CHECK-SAME: %[[OUT:[0-9a-z]+]]: memref<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32 - # CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>) + # CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : memref<12x?xf32>) # CHECK-NEXT: return @func.FuncOp.from_py_func( MemRefType.get((12, ShapedType.get_dynamic_size()), f32)) @@ -59,12 +59,12 @@ # CHECK: linalg.elemwise_unary # CHECK-SAME: cast = #linalg.type_fn # CHECK-SAME: fun = #linalg.unary_fn - # CHECK-SAME: ins(%{{.*}} : tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>) + # CHECK-SAME: ins(%{{.*}} : tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>) unary_result = linalg.elemwise_unary(lhs, outs=[init_result.result]) # CHECK: linalg.elemwise_binary # CHECK-SAME: cast = #linalg.type_fn # CHECK-SAME: fun = #linalg.binary_fn - # CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>) + # CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>) # CHECK: return binary_result = linalg.elemwise_binary( lhs, @@ -144,7 +144,7 @@ init = tensor.EmptyOp([4, 8], f32) # CHECK: linalg.matmul # CHECK: ins(%[[LHS]], %[[RHS]] - # CHECK: outs(%[[INIT]] + # CHECK: inits(%[[INIT]] return linalg.matmul(lhs, rhs, outs=init) print(module) diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -29,10 +29,10 @@ %rhs = memref.alloc() : memref<4x8xf32> %O0 = memref.alloc() : memref<4x8xf32> %O1 = memref.alloc() : memref<4x8xf32> - linalg.fill ins(%v1 : f32) outs(%lhs : memref) - linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>) + linalg.fill ins(%v1 : f32) inits(%lhs : memref) + linalg.fill ins(%v2 : f32) inits(%rhs : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%O0 : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%O1 : memref<4x8xf32>) call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) : (memref, memref<4x8xf32>, memref<4x8xf32>) -> () @@ -60,10 +60,10 @@ %B = memref.alloc() : memref<16x8xf32> %C0 = memref.alloc() : memref<4x8xf32> %C1 = memref.alloc() : memref<4x8xf32> - linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>) - linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>) - linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>) + linalg.fill ins(%v1 : i8) inits(%A : memref<4x16xi8>) + linalg.fill ins(%v2 : f32) inits(%B : memref<16x8xf32>) + linalg.fill ins(%v0 : f32) inits(%C0 : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%C1 : memref<4x8xf32>) call @matmul_signed_on_buffers(%A, %B, %C0) : (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> () @@ -137,9 +137,9 @@ %input = memref.alloc() : memref<1x4x16x1xf64> %filter = memref.alloc() : memref<2x2x1xf64> %output = memref.alloc() : memref<1x2x4x1xi32> - linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>) - linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>) - linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>) + linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>) + linalg.fill ins(%v2 : f64) inits(%filter : memref<2x2x1xf64>) + linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>) call @conv_on_buffers(%input, %filter, %output) : (memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> () @@ -163,9 +163,9 @@ %input = memref.alloc() : memref<1x4x16x1xf64> %shape = memref.alloc() : memref<2x2xf64> %output = memref.alloc() : memref<1x2x4x1xi32> - linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>) - linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>) - linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>) + linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>) + linalg.fill ins(%v1 : f64) inits(%shape : memref<2x2xf64>) + linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>) %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index