diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -71,7 +71,7 @@ linalg.generic #trait_attribute ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) attrs = {other-optional-attributes} {region} ``` @@ -112,7 +112,7 @@ linalg.generic #matmul_trait ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) {other-optional-attributes} { ^bb0(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 @@ -153,7 +153,7 @@ ```mlir %C = linalg.generic #trait_attribute ins(%A, %B : tensor, memref) - outs(%C : tensor) + inits(%C : tensor) {other-optional-attributes} {region} -> (tensor) @@ -249,7 +249,7 @@ ``` %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init: tensor<64xf32>) + inits(%init: tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -326,7 +326,7 @@ ``` %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -398,7 +398,7 @@ ``` %transpose = linalg.transpose ins(%input:tensor<16x64xf32>) - outs(%init:tensor<64x16xf32>) + inits(%init:tensor<64x16xf32>) permutation = [1, 0] ``` }]; diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -143,7 +143,7 @@ return failure(); } - if (succeeded(parser.parseOptionalKeyword("outs"))) { + if (succeeded(parser.parseOptionalKeyword("inits"))) { outputsOperandsLoc = parser.getCurrentLocation(); if (parser.parseLParen() || parser.parseOperandList(outputsOperands) || parser.parseColonTypeList(outputTypes) || parser.parseRParen()) @@ -170,7 +170,7 @@ if (!inputs.empty()) p << " ins(" << inputs << " : " << inputs.getTypes() << ")"; if (!outputs.empty()) - p << " outs(" << outputs << " : " << outputs.getTypes() << ")"; + p << " inits(" << outputs << " : " << outputs.getTypes() << ")"; } static void printCommonStructuredOpPartsWithNewLine(OpAsmPrinter &p, @@ -182,7 +182,7 @@ } if (!outputs.empty()) { p.printNewline(); - p << "outs(" << outputs << " : " << outputs.getTypes() << ")"; + p << "inits(" << outputs << " : " << outputs.getTypes() << ")"; } } //===----------------------------------------------------------------------===// @@ -685,7 +685,7 @@ for (Value v : getRegionInputArgs()) setNameFn(v, "in"); for (Value v : getRegionOutputArgs()) - setNameFn(v, "out"); + setNameFn(v, "init"); } void GenericOp::build( @@ -2177,7 +2177,7 @@ OpResult resultValue = castOp.getSource().cast(); unsigned resultNumber = resultValue.getResultNumber(); auto resultType = castOp->getResult(0).getType().cast(); - // Replace the `outs` for the result with a `tensor.cast`. This cast is now + // Replace the `inits` for the result with a `tensor.cast`. This cast is now // going from a more dynamic shape to a less dynamic shape. If the producer // for this cast, i.e. producer of the out operand, is also an operation // that folds with tensor.cast consumer (like this pattern), the cast will diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir --- a/mlir/test/Analysis/test-match-reduction.mlir +++ b/mlir/test/Analysis/test-match-reduction.mlir @@ -12,7 +12,7 @@ affine_map<(d0) -> (0)>], iterator_types = ["reduction"]} ins(%in0t : tensor) - outs(%out0t : tensor<1xf32>) { + inits(%out0t : tensor<1xf32>) { ^bb0(%in0: f32, %out0: f32): %add = arith.addf %in0, %out0 : f32 linalg.yield %add : f32 @@ -49,7 +49,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%in0t : tensor<4x4xf32>) - outs(%out0t : tensor<4xf32>) { + inits(%out0t : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %cmp = arith.cmpf ogt, %in0, %out0 : f32 %sel = arith.select %cmp, %in0, %out0 : f32 @@ -69,7 +69,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%in0t : tensor<4x4xf32>) - outs(%out0t : tensor<4xf32>) { + inits(%out0t : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %mul = arith.mulf %in0, %in0 : f32 %sub = arith.subf %mul, %in0 : f32 diff --git a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir --- a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir +++ b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir @@ -49,7 +49,7 @@ } { linalg.generic #single_workgroup_reduction_trait ins(%input : memref<16xi32, #spirv.storage_class>) - outs(%output : memref<1xi32, #spirv.storage_class>) { + inits(%output : memref<1xi32, #spirv.storage_class>) { ^bb(%in: i32, %out: i32): %sum = arith.addi %in, %out : i32 linalg.yield %sum : i32 @@ -78,7 +78,7 @@ // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} linalg.generic #single_workgroup_reduction_trait ins(%input : memref<16xi32, #spirv.storage_class>) - outs(%output : memref<1xi32, #spirv.storage_class>) { + inits(%output : memref<1xi32, #spirv.storage_class>) { ^bb(%in: i32, %out: i32): %sum = arith.addi %in, %out : i32 linalg.yield %sum : i32 @@ -109,7 +109,7 @@ // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} linalg.generic #single_workgroup_reduction_trait ins(%input : memref<16xi32, #spirv.storage_class>) - outs(%output : memref<1xi32, #spirv.storage_class>) { + inits(%output : memref<1xi32, #spirv.storage_class>) { ^bb(%in: i32, %out: i32): %sum = arith.addi %in, %out : i32 linalg.yield %sum : i32 @@ -140,7 +140,7 @@ // expected-error @+1 {{failed to legalize operation 'linalg.generic'}} linalg.generic #single_workgroup_reduction_trait ins(%input : memref<16x8xi32, #spirv.storage_class>) - outs(%output : memref<16xi32, #spirv.storage_class>) { + inits(%output : memref<16xi32, #spirv.storage_class>) { ^bb(%in: i32, %out: i32): %sum = arith.addi %in, %out : i32 linalg.yield %sum : i32 diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir --- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir +++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir @@ -7,7 +7,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -32,7 +32,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> // CHECK: return %[[PADDED]] : tensor<4x?x?x?xf32> // CHECK: } diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir @@ -4,8 +4,8 @@ func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) inits([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) inits([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -17,10 +17,10 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) { // CHECK: [[C0:%.+]] = arith.constant 0 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) inits([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> + // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) inits([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32> %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) return %0 : tensor<1x5x6xi32> } @@ -33,8 +33,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[C0_0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor) -> tensor - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) outs(%[[FILLED]] : tensor) -> tensor + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) inits(%[[INIT]] : tensor) -> tensor + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor, tensor) inits(%[[FILLED]] : tensor) -> tensor %0 = "tosa.matmul"(%arg0, %arg1) : (tensor, tensor) -> (tensor) return %0 : tensor } @@ -47,8 +47,8 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]] // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) inits(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> (tensor<1x5x?xf32>) return %0 : tensor<1x5x?xf32> } @@ -59,8 +59,8 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) { // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> - // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> + // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) inits(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32> %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) return %0 : tensor<1x5x6xf32> } @@ -74,12 +74,12 @@ func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) { // CHECK: [[INITT:%.+]] = tensor.empty() // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]] // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) // CHECK: [[INITB:%.+]] = tensor.empty() - // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) { + // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) inits([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) inits([[INITB]] : tensor<5x6xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -97,14 +97,14 @@ func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) { // CHECK: [[INITT:%.+]] = tensor.empty() // CHECK: [[ZERO:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]] // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]> // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]]) // CHECK: [[INITB:%.+]] = tensor.empty() // CHECK: [[ONE:%.+]] = arith.constant 1 // CHECK: [[TWO:%.+]] = arith.constant 2 - // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> - // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]] + // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) inits([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32> + // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) inits([[INITB]] // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi // CHECK: linalg.yield [[ADD]] : i32 @@ -123,12 +123,12 @@ // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]]) // CHECK: %[[ZERO:.+]] = arith.constant 0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}inits(%[[INITT]] // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]> // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]]) // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]]) - // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) outs(%[[FILL]] : tensor) -> tensor - // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) outs(%[[INITB]] : tensor) { + // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor, tensor<3x6xf32>) inits(%[[FILL]] : tensor) -> tensor + // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor) inits(%[[INITB]] : tensor) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield %[[ADD]] : f32 @@ -143,9 +143,9 @@ func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () { // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 // CHECK-DAG: [[INIT:%.+]] = tensor.empty() - // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[INIT]] // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x32x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x32x62xf32>) return } @@ -157,9 +157,9 @@ // CHECK-DAG: tensor.yield [[CONST]] // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32 // CHECK-DAG: [[INIT:%.+]] = tensor.empty() - // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]] + // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}inits([[INIT]] // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x33x62xf32>) %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>) -> (tensor<1x4x33x62xf32>) return } @@ -170,9 +170,9 @@ // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]] // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38 // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}inits(%[[INIT]] // CHECK: %[[KERNEL:.+]] = tensor.empty() - // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor<3x3xf32>) outs(%[[FILL]] : tensor) + // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor<3x3xf32>) inits(%[[FILL]] : tensor) %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor) -> (tensor) return } @@ -209,11 +209,11 @@ // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0] // CHECK: [[CONST:%.+]] = arith.constant 0 // CHECK: [[POOLINIT:%.+]] = tensor.empty() - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[POOLINIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[POOLINIT]] // CHECK: [[KERNEL:%.+]] = tensor.empty() - // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>) + // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) inits([[FILL]] : tensor<1x5x33x62xf32>) // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) inits([[INIT]] : tensor<1x5x33x62xf32>) // CHECK: ^bb0(%[[BBARG1:[a-zA-Z0-9_]+]]: f32, // CHECK: [[ZERO:%.0]] = arith.constant 0 // CHECK: [[ONE:%.+]] = arith.constant 1 @@ -273,9 +273,9 @@ // CHECK: %[[POOLINIT:.+]] = tensor.empty(%[[BATCH]]) // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[KERNEL:.+]] = tensor.empty() - // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor, tensor<4x4xf32>) outs(%[[FILL]] : tensor) + // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor, tensor<4x4xf32>) inits(%[[FILL]] : tensor) // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor) inits(%[[INIT]] : tensor) %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor) -> (tensor) return %0 : tensor } @@ -353,8 +353,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty() - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x45x40x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) inits(%[[B_IN]] : tensor<1x45x40x28xf32>) // CHECK: arith.addf // CHECK: linalg.yield %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x45x40x28xf32>) @@ -376,8 +376,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor) outs(%[[B_IN]] : tensor) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor) inits(%[[B_IN]] : tensor) // CHECK: %[[ADD:.+]] = arith.addf // CHECK: linalg.yield %[[ADD]] : f32 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor) @@ -436,8 +436,8 @@ // CHECK: %[[CST:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]]) - // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>) - // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>) + // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x?x?x28xf32>) + // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) inits(%[[B_IN]] : tensor<1x?x?x28xf32>) // CHECK: %[[ADD:.+]] = arith.addf // CHECK: linalg.yield %[[ADD]] : f32 %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x?x?x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>) -> (tensor<1x?x?x28xf32>) @@ -477,11 +477,11 @@ func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -503,9 +503,9 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[FILL:.+]] = linalg.fill // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor) + // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor<3x1x3x11xf32>) inits(%[[FILL]] : tensor) // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor) outs(%[[OUT]] : tensor) { + // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor) inits(%[[OUT]] : tensor) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield %[[ADD]] : f32 @@ -523,11 +523,11 @@ func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32): // CHECK: [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32 // CHECK: linalg.yield [[ADD]] : f32 @@ -549,13 +549,13 @@ // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x12x12x4x128xi32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) inits([[OUT]] : tensor<1x12x12x512xi32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32 // CHECK: linalg.yield [[ADD]] : i32 @@ -573,13 +573,13 @@ func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] // CHECK: [[OUT:%.+]] = tensor.empty() // CHECK: [[C128:%.+]] = arith.constant -128 // CHECK: [[C42:%.+]] = arith.constant 42 - // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>) + // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x10x10x4x128xi32>) // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]] - // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) { + // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) inits([[OUT]] : tensor<1x10x10x512xi32>) { // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32): // CHECK: [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32 // CHECK: linalg.yield [[ADD]] : i32 @@ -598,7 +598,7 @@ // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index): // CHECK: tensor.yield %cst : f32 // CHECK: } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32> - // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> + // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) inits(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32> // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]] %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 2, 3, 4], dilation = [2, 1], stride = [1, 2]} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32> return diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir @@ -11,7 +11,7 @@ // CHECK-SAME: indexing_maps = [#map, #map1] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xf32>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xf32>) // CHECK-NEXT: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32): // CHECK: linalg.yield %[[IN]] %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xf32>) -> tensor<3x15x13x7xf32> @@ -33,7 +33,7 @@ // CHECK-SAME: indexing_maps = [#map, #map1] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xf32>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xf32>) // CHECK-NEXT: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32): // CHECK: linalg.yield %[[IN]] %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xf32>) -> tensor<3x15x13x7xf32> @@ -55,7 +55,7 @@ // CHECK-SAME: indexing_maps = [#map, #map1] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi8>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi8>) // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i8): // CHECK: linalg.yield %[[IN]] %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xi8>) -> tensor<3x15x13x7xi8> @@ -77,7 +77,7 @@ // CHECK-SAME: indexing_maps = [#map, #map1] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi32>) // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32): // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 // CHECK: linalg.yield %[[EXT]] @@ -100,7 +100,7 @@ // CHECK-SAME: indexing_maps = [#map, #map1] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>) - // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>) + // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi32>) // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32): // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : i32 diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir @@ -6,7 +6,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor) -> tensor { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor) outs([[INIT]] : tensor) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor) inits([[INIT]] : tensor) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -26,7 +26,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -45,7 +45,7 @@ // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]] func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) inits([[INIT]] : tensor<2x3xf32>) { // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = math.absf %[[ARG1]] // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -97,7 +97,7 @@ func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG0]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor, tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -117,7 +117,7 @@ func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32> // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG1]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor) outs([[INIT]] : tensor<2xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor) inits([[INIT]] : tensor<2xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -139,7 +139,7 @@ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32> // CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]] // CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0, 1]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) { + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) inits([[INIT]] : tensor<2x3xf32>) { // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32): // CHECK: [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32 // CHECK: linalg.yield [[ELEMENT]] : f32 @@ -654,7 +654,7 @@ func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () { %0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32> // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32> - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) inits([[OUT:%.+]] : tensor<2x3x1xi32>) // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -674,7 +674,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) inits([[OUT:%.+]] : tensor) // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -696,7 +696,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs([[OUT:%.+]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits([[OUT:%.+]] : tensor) // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32) // CHECK: linalg.yield [[ARG1]] // CHECK: } @@ -715,8 +715,8 @@ func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32> // CHECK: [[CST0:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<4xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield [[RES]] : f32 @@ -725,8 +725,8 @@ // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32> // CHECK: [[CST0:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<5xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield [[RES]] : f32 @@ -765,8 +765,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor // CHECK: %[[CST0:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -785,8 +785,8 @@ func.func @reduce_float_dyn_rank_1(%arg0: tensor) -> () { // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -807,8 +807,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32> // CHECK: %[[CST1:.+]] = arith.constant 1.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) inits(%[[FILL]] : tensor<5x?xf32>) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[RES]] : f32 @@ -829,8 +829,8 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]] - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor) outs(%[[FILL]] : tensor) + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}inits(%[[INIT]] + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor) inits(%[[FILL]] : tensor) // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32) // CHECK: %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32 // CHECK: linalg.yield %[[MAX]] : f32 @@ -850,8 +850,8 @@ func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<4xi32>) // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32 // CHECK: linalg.yield [[RES]] : i32 @@ -860,8 +860,8 @@ // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant 0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<5xi32>) // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32) // CHECK: [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32 // CHECK: linalg.yield [[RES]] : i32 @@ -900,8 +900,8 @@ func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() // CHECK: [[CST0:%.+]] = arith.constant true - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]] - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>) + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]] + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) inits([[FILL]] : tensor<4xi1>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1) // CHECK: [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1 // CHECK: linalg.yield [[RES]] : i1 @@ -930,7 +930,7 @@ // CHECK: [[IDX1:%.+]] = arith.constant 1 : index // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<11x1xf32> // CHECK: [[CST:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}inits([[INIT]] // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1] // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG1]] into [[INSERT0]][5, 0] [6, 1] [1, 1] %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>) -> (tensor<11x1xf32>) @@ -942,7 +942,7 @@ // CHECK: [[IDX1:%.+]] = arith.constant 1 : index // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5x2xf32> // CHECK: [[CST:%.+]] = arith.constant 0.0 - // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]] + // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}inits([[INIT]] // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1] // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG0]] into [[INSERT0]][0, 1] [5, 1] [1, 1] %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>) -> (tensor<5x2xf32>) @@ -965,7 +965,7 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[IDX1_2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<11x?xf32> // CHECK: %[[CST:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}inits(%[[INIT]] // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1] // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[ARG1]] into %[[INSERT0]][5, 0] [6, %[[SIZE]]] [1, 1] %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x?xf32>, tensor<6x?xf32>) -> (tensor<11x?xf32>) @@ -988,7 +988,7 @@ // CHECK: %[[IDX1:.+]] = arith.constant 1 : index // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor // CHECK: %[[CST:.+]] = arith.constant 0.0 - // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]] + // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}inits(%[[INIT]] // CHECK: %[[DYN1:.+]] = tensor.dim %[[ARG0]], %[[AXIS]] // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [%[[DYN1]], 3] [1, 1] // CHECK: %[[SUM:.+]] = arith.addi %[[OFFSET]], %[[DYN1]] @@ -1007,7 +1007,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xi8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -1028,7 +1028,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xui8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -1061,13 +1061,13 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor) -> (tensor) // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor) -> (tensor) return @@ -1085,7 +1085,7 @@ // CHECK: %[[C2:.+]] = arith.constant 2 // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]]) - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) inits(%[[INIT]] : tensor<1x?x?x32xi8>) %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = [1376784203 : i32], output_zp = 0 : i32, per_channel = false, scale32 = true, shift = [38 : i32]} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8> return } @@ -1100,7 +1100,7 @@ // CHECK: [[C0:%.+]] = arith.constant 19689 // CHECK: [[C1:%.+]] = arith.constant 15 // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) inits([[INIT]] : tensor<2xi8>) // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8): // CHECK: [[C17:%.+]] = arith.constant 17 // CHECK: [[C22:%.+]] = arith.constant 22 @@ -1132,7 +1132,7 @@ // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]> // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]> // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) inits([[INIT]] : tensor<3xi8>) // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8): // CHECK: [[C243:%.+]] = arith.constant 243 // CHECK: [[C252:%.+]] = arith.constant 252 @@ -1185,7 +1185,7 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[I1:.+]] = linalg.index 1 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 @@ -1198,7 +1198,7 @@ // CHECK: %[[C1:.+]] = arith.constant 1 // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[I1:.+]] = linalg.index 1 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 @@ -1222,7 +1222,7 @@ // CHECK: %[[C0_2:.+]] = arith.constant 0 // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} inits(%[[INIT]] : tensor) // CHECK-DAG: %[[I0:.+]] = linalg.index 0 // CHECK-DAG: %[[SUB1:.+]] = arith.constant 1 // CHECK-DAG: %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]] @@ -1242,21 +1242,21 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8> func.func @tile(%arg0 : tensor<2x3xi8>) -> () { // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<2x2x1x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]] %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>) -> (tensor<4x3xi8>) // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<1x2x2x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]] %1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>) -> (tensor<2x6xi8>) // CHECK: [[INIT:%.+]] = tensor.empty() - // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>) + // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<5x2x7x3xi8>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8 // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]] @@ -1276,7 +1276,7 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor<2x?x1x3xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor<2x?x1x3xi8>) // CHECK: ^bb0(%[[ARG1:.+]]: i8, // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]] @@ -1297,7 +1297,7 @@ // CHECK: %[[CST1:.+]] = arith.constant 1 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8> // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits(%[[INIT]] : tensor<2x2x?x3xi8>) // CHECK: ^bb0(%[[ARG1:.+]]: i8, // CHECK: linalg.yield %[[ARG1]] : i8 // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]] @@ -1405,11 +1405,11 @@ func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () { // CHECK: [[IDX_INIT:%.+]] = tensor.empty() // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]] + // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]] // CHECK: [[VAL_INIT:%.+]] = tensor.empty() // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: [[IDX:%.+]] = linalg.index 0 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]] @@ -1421,11 +1421,11 @@ // CHECK: [[IDX_INIT:%.+]] = tensor.empty() // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32 - // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]] + // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]] // CHECK: [[VAL_INIT:%.+]] = tensor.empty() // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648 - // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) + // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: [[IDX:%.+]] = linalg.index 1 // CHECK: [[CAST:%.+]] = arith.index_cast [[IDX]] @@ -1457,11 +1457,11 @@ // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32 - // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]] + // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]] // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]]) // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648 - // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor, tensor) + // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor, tensor) // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32 // CHECK: %[[IDX:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]] @@ -1481,11 +1481,11 @@ func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () { // CHECK: %[[IDX_INIT:.+]] = tensor.empty() // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32 - // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]] + // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]] // CHECK: %[[VAL_INIT:.+]] = tensor.empty() // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648 - // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]] - // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) + // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]] + // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>) // CHECK: %[[IDX:.+]] = linalg.index 1 // CHECK: %[[CAST:.+]] = arith.index_cast %[[IDX]] // CHECK: %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]] @@ -1503,7 +1503,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]] func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xf32>) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1523,7 +1523,7 @@ // CHECK: %[[C0:.+]] = arith.constant 0 // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor) inits(%[[INIT]] : tensor) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1541,7 +1541,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]] func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xi32>) // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32) // CHECK: %[[IDX0:.+]] = linalg.index 0 // CHECK: %[[CAST:.+]] = arith.index_cast %[[BBARG0]] @@ -1559,7 +1559,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 @@ -1577,7 +1577,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) inits(%[[INIT]] : tensor<6xi32>) // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32) // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]] // CHECK: %[[C32768:.+]] = arith.constant 32768 @@ -1612,7 +1612,7 @@ // CHECK: %[[CST0:.+]] = arith.constant 0 // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor) outs(%[[INIT]] : tensor) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor) inits(%[[INIT]] : tensor) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 @@ -1630,7 +1630,7 @@ // CHECK-SAME: %[[ARG1:[0-9a-zA-Z_]*]]: func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor) -> () { // CHECK: %[[INIT:.+]] = tensor.empty() - // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>) + // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>) // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8) // CHECK: %[[CAST:.+]] = arith.index_cast %[[ARG_IN]] // CHECK: %[[OFFSET:.+]] = arith.constant 128 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir @@ -11,7 +11,7 @@ %f0 = arith.constant 0.0: f32 // CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]]) - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref) + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[EXTRACT_SLICE_ALLOC]] : memref) // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis). // CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]] // This allocs the whole dim to allow for a full clone of t. @@ -20,7 +20,7 @@ // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place // extract_slice. %a = bufferization.alloc_tensor(%sz) : tensor - %f = linalg.fill ins(%f0 : f32) outs(%a : tensor) -> tensor + %f = linalg.fill ins(%f0 : f32) inits(%a : tensor) -> tensor // CHECK: memref.copy %[[FUNC_ARG]], %[[ALLOC]] : memref to memref // CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref to memref> @@ -52,8 +52,8 @@ // CHECK: %[[T_SUBVIEW:.*]] = memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1] %a = bufferization.alloc_tensor(%sz) : tensor - // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref) -> tensor + // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref) -> tensor // Self-copy canonicalizes away later. %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor into tensor @@ -79,8 +79,8 @@ %iv_i32 = arith.index_cast %iv : index to i32 %f = arith.sitofp %iv_i32 : i32 to f32 - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]] + %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32> // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%iv][5][1] : tensor<5xf32> into tensor @@ -109,8 +109,8 @@ %iv_i32 = arith.index_cast %iv : index to i32 %f = arith.sitofp %iv_i32 : i32 to f32 - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]] - %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32> + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]] + %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32> // CHECK-NOT: memref.copy %inserted = tensor.insert_slice %filled into %bb[%idx][5][1] : tensor<5xf32> into tensor @@ -130,7 +130,7 @@ func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> { %cst = arith.constant 8.0 : f32 %0 = bufferization.alloc_tensor() : tensor<128xf32> - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<128xf32>) -> tensor<128xf32> %2 = tensor.expand_shape %1 [[0, 1, 2]] : tensor<128xf32> into tensor<1x1x128xf32> %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1] diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir @@ -137,10 +137,10 @@ // One alloc for the alloc_tensor, another one because the transfer_write // bufferizes out-of-place. // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32> - // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]] + // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[m1]] // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]] %t1 = bufferization.alloc_tensor() : tensor<10xf32> - %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) -> tensor<10xf32> + %filled = linalg.fill ins(%cst : f32) inits(%t1 : tensor<10xf32>) -> tensor<10xf32> // The transfer_write is out-of-place because "dummy_op" may read. // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32> diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir @@ -51,13 +51,13 @@ // CHECK: %[[call:.*]] = call @return_slice(%[[t]] // CHECK: %[[alloc:.*]] = memref.alloc // CHECK: memref.copy %[[call]], %[[alloc]] -// CHECK: linalg.fill ins({{.*}}) outs(%[[t]] +// CHECK: linalg.fill ins({{.*}}) inits(%[[t]] // CHECK: memref.load %[[alloc]] // CHECK: memref.load %[[t]] func.func @main(%t: tensor, %sz: index, %idx: index) -> (f32, f32) { %cst = arith.constant 1.0 : f32 %0 = call @return_slice(%t, %sz) : (tensor, index) -> (tensor) - %filled = linalg.fill ins(%cst : f32) outs(%t : tensor) -> tensor + %filled = linalg.fill ins(%cst : f32) inits(%t : tensor) -> tensor %r1 = tensor.extract %0[%idx] : tensor %r2 = tensor.extract %filled[%idx] : tensor return %r1, %r2 : f32, f32 diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir @@ -75,21 +75,21 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand does not interferes with input operand. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return @@ -260,7 +260,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -292,7 +292,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} @@ -304,7 +304,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor) -> tensor %3 = vector.transfer_read %1[%idx2], %cst2 : tensor, vector<5xf32> @@ -336,14 +336,14 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%B: tensor<4x4xf32>) + inits(%B: tensor<4x4xf32>) -> tensor<4x4xf32> // matmul output operand is inplaceable at the function boundary. // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%C: tensor<4x4xf32>) + inits(%C: tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: return @@ -370,7 +370,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) + inits(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. @@ -385,7 +385,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) + inits(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> @@ -410,7 +410,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} - %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor) -> tensor + %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) inits(%15 : tensor) -> tensor // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} @@ -450,7 +450,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %D = linalg.matmul ins(%B, %C: tensor, tensor) - outs(%sB: tensor<4x4xf32>) + inits(%sB: tensor<4x4xf32>) -> tensor<4x4xf32> // Step 2. %sC forward propagates to an inplace write in %E. @@ -465,7 +465,7 @@ // CHECK: linalg.matmul // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) - outs(%sC: tensor<4x4xf32>) + inits(%sC: tensor<4x4xf32>) -> tensor<4x4xf32> return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> @@ -504,7 +504,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -527,7 +527,7 @@ %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor to tensor<4x?xf32> %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> - %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> + %FB = linalg.fill ins(%f0 : f32) inits(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -550,7 +550,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor to tensor - %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor) -> tensor + %FC = linalg.fill ins(%f0 : f32) inits(%ssC : tensor) -> tensor %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor into tensor %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -577,12 +577,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> call @foo(%A) : (tensor<64xf32>) -> () call @foo(%B) : (tensor<64xf32>) -> () @@ -613,12 +613,12 @@ // bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 4. Bufferizes inplace: no alias to %A is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> + %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32> // 3. Does not read or write, bufferizes inplace. // CHECK: scf.for @@ -638,12 +638,12 @@ // cannot bufferize inplace. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} - %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %A2 = linalg.fill ins(%f1 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32> // 1. Bufferizes inplace: no alias to %A2 is yet possible. // CHECK: fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> + %B2 = linalg.fill ins(%f2 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32> call @bar(%A2) : (tensor<64xf32>) -> () call @bar(%B2) : (tensor<64xf32>) -> () @@ -688,8 +688,8 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} @@ -701,7 +701,7 @@ %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -726,7 +726,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -734,7 +734,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} // CHECK: vector.transfer_write // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] - %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> + %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> @@ -748,7 +748,7 @@ %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> %r = linalg.matmul ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) - outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: return // CHECK-SAME: __equivalent_func_args__ = [2] @@ -779,7 +779,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> // CHECK: tensor.extract_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true"] @@ -857,7 +857,7 @@ // CHECK: linalg.generic // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"] %o:2 = linalg.generic #trait ins(%t1 : tensor) - outs (%t2, %t2 : tensor, tensor) { + inits (%t2, %t2 : tensor, tensor) { ^bb(%0: f32, %1: f32, %2 : f32) : linalg.yield %0, %0 : f32, f32 } -> (tensor, tensor) @@ -892,7 +892,7 @@ // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"] %o:3 = linalg.generic #trait ins(%t1 : tensor) - outs (%t2, %t2, %t2 : tensor, tensor, tensor) { + inits (%t2, %t2, %t2 : tensor, tensor, tensor) { ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) : linalg.yield %0, %0, %0 : f32, f32, f32 } -> (tensor, tensor, tensor) diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir @@ -222,7 +222,7 @@ func.func @mini_test_case1() -> tensor<10x20xf32> { %f0 = arith.constant 0.0 : f32 %t = bufferization.alloc_tensor() : tensor<10x20xf32> - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32> + %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<10x20xf32>) -> tensor<10x20xf32> // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}} return %r : tensor<10x20xf32> } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir @@ -244,7 +244,7 @@ // CHECK-NOT: copy func.func @does_not_read(%t: tensor) -> tensor { %f0 = arith.constant 0.0 : f32 - %r = linalg.fill ins(%f0 : f32) outs(%t : tensor) -> tensor + %r = linalg.fill ins(%f0 : f32) inits(%t : tensor) -> tensor return %r : tensor } @@ -416,12 +416,12 @@ // CHECK-NEXT: %[[C0:.*]] = arith.constant 0{{.*}} : f32 %v0 = arith.constant 0.0 : f32 - // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref>) - %d = linalg.fill ins(%v0 : f32) outs(%c : tensor) -> tensor + // CHECK-NEXT: linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref>) + %d = linalg.fill ins(%v0 : f32) inits(%c : tensor) -> tensor - // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref>) + // CHECK-NEXT: linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) inits(%[[C]] : memref>) %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>) - outs(%d: tensor) -> tensor + inits(%d: tensor) -> tensor // CHECK-NEXT: return return %e : tensor @@ -446,12 +446,12 @@ %B = bufferization.alloc_tensor() : tensor<64xf32> %C = bufferization.alloc_tensor() : tensor - // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>) - // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>) - // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref) - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + // CHECK-DAG: linalg.fill ins(%[[C1]] : f32) inits(%[[A]] : memref<64xf32>) + // CHECK-DAG: linalg.fill ins(%[[C2]] : f32) inits(%[[B]] : memref<64xf32>) + // CHECK-DAG: linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref) + %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32> + %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32> + %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor) -> tensor // CHECK-NEXT: call @init_and_dot(%[[cA]], %[[cB]], %[[cC]]) %res = call @init_and_dot(%AA, %BB, %CC) : diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir --- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir @@ -56,11 +56,11 @@ -> (tensor<5xf32>, tensor<5xf32>) { // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>) + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<5xf32>) %r = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%t : tensor<5xf32>) { + inits(%t : tensor<5xf32>) { ^bb0(%arg0 : f32) : linalg.yield %f : f32 } -> tensor<5xf32> @@ -75,11 +75,11 @@ { %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32> // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>) + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<3xf32>) %r = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%0 : tensor<3xf32>) { + inits(%0 : tensor<3xf32>) { ^bb0(%arg0 : f32) : linalg.yield %f : f32 } -> tensor<3xf32> diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir --- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir +++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir @@ -151,7 +151,7 @@ %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one) threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one) { - %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32> + %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) inits(%z : tensor<32x32xf32>) -> tensor<32x32xf32> gpu.terminator } return diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir --- a/mlir/test/Dialect/Linalg/affine.mlir +++ b/mlir/test/Dialect/Linalg/affine.mlir @@ -10,7 +10,7 @@ %B = memref.view %arg0[%c0][%K, %N] : memref to memref %C = memref.view %arg0[%c0][%M, %N] : memref to memref linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return } @@ -19,7 +19,7 @@ //----------------------------------------------------------------------------// func.func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { linalg.batch_matmul ins(%A, %B: memref, memref) - outs(%C : memref) + inits(%C : memref) return } // CHECK-LABEL: @named_batch_matmul diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir --- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir +++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir @@ -7,7 +7,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -22,7 +22,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg3] [%arg5] [1] : tensor to tensor // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[%arg2, %arg3] [%arg4, %arg5] [1, 1] : tensor to tensor // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor) outs(%[[SLICE2]] : tensor) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor) inits(%[[SLICE2]] : tensor) // CHECK: return %[[GENERIC]] : tensor //----- @@ -34,7 +34,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor<16x8xf32>, tensor<8xf32>) - outs(%arg0 : tensor<16x8xf32>) { + inits(%arg0 : tensor<16x8xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -49,7 +49,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<8xf32> to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<16x8xf32> to tensor<4x2xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>) // CHECK: return %[[GENERIC]] : tensor<4x2xf32> //----- @@ -61,7 +61,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor<8xf32>) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -76,7 +76,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg2] [2] [1] : tensor<8xf32> to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, %arg2] [%arg3, 2] [1, 1] : tensor to tensor // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor<2xf32>) outs(%[[SLICE2]] : tensor) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor, tensor<2xf32>) inits(%[[SLICE2]] : tensor) // CHECK: return %[[GENERIC]] : tensor //----- @@ -88,7 +88,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -103,7 +103,7 @@ // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor to tensor<2xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor to tensor<4x2xf32> // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>) +// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>) // CHECK: return %[[GENERIC]] : tensor<4x2xf32> //----- @@ -112,7 +112,7 @@ %lhs = arith.constant dense<1.0> : tensor<4x4xf32> %rhs = arith.constant dense<1.0> : tensor<4x4xf32> %dst = arith.constant dense<[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]]> : tensor<4x4xf32> - %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) -> tensor<4x4xf32> + %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) inits(%dst : tensor<4x4xf32>) -> tensor<4x4xf32> %1 = tensor.extract_slice %0[1,1][2,2][1,1] : tensor<4x4xf32> to tensor<2x2xf32> return %1 : tensor<2x2xf32> } @@ -121,7 +121,7 @@ // CHECK: %[[SLICE0:.+]] = arith.constant dense<1.000000e+00> : tensor<2x4xf32> // CHECK: %[[SLICE1:.+]] = arith.constant dense<1.000000e+00> : tensor<4x2xf32> // CHECK: %[[SLICE3:.+]] = tensor.extract_slice %[[CST:.+]][1, 1] [2, 2] [1, 1] : tensor<4x4xf32> to tensor<2x2xf32> -// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32> +// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) inits(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32> // CHECK: return %[[MATMUL]] : tensor<2x2xf32> //----- @@ -136,12 +136,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %slice = tensor.extract_slice %conv [0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> @@ -153,8 +153,8 @@ // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32> // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32> // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> -// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) inits(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> +// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) inits(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32> // CHECK: return %[[CONV]] : tensor<1x32x32x16xf32> //----- @@ -163,7 +163,7 @@ func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> { %cst = arith.constant 1.000000e+00 : f32 %init = tensor.empty(%width) : tensor<1x?xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x?xf32>) -> tensor<1x?xf32> %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor into tensor<1x1x1x?xf32> return %expand : tensor<1x1x1x?xf32> diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir --- a/mlir/test/Dialect/Linalg/bufferize.mlir +++ b/mlir/test/Dialect/Linalg/bufferize.mlir @@ -16,7 +16,7 @@ // CHECK-DAG: %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32> // CHECK: linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} // CHECK-SAME: ins(%[[MEMREF]] : memref<4xf32>) -// CHECK-SAME: outs(%[[RESULT_MEMREF]] : memref<4xf32>) { +// CHECK-SAME: inits(%[[RESULT_MEMREF]] : memref<4xf32>) { // CHECK: ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32): // CHECK: %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32 // CHECK: linalg.yield %[[DIM1]] : f32 @@ -28,7 +28,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel"] } ins(%arg0 : tensor<4xf32>) - outs(%arg0 : tensor<4xf32>) { + inits(%arg0 : tensor<4xf32>) { ^bb0(%gen_arg1: f32, %out: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 @@ -50,14 +50,14 @@ // CHECK-DAG: %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF]] : memref) -// CHECK-SAME: outs(%[[OUT_BUF]] : memref) { +// CHECK-SAME: inits(%[[OUT_BUF]] : memref) { func.func @empty_tensor(%in : tensor, %size: index) -> tensor { %init = tensor.empty(%size) : tensor %0 = linalg.generic { indexing_maps = [#map0, #map0], iterator_types = ["parallel"] } ins(%in : tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%gen_arg1: f32, %out: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1 : f32 @@ -75,14 +75,14 @@ // CHECK: %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%{{.*}} : memref<4xf32>) -// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>) +// CHECK-SAME: inits(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>) // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32): func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) { %0, %1 = linalg.generic { indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel"] } ins(%arg0 : tensor<4xf32>) - outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) { + inits (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) { ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 @@ -108,14 +108,14 @@ // CHECK-DAG: %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref // CHECK: linalg.generic // CHECK-SAME: ins(%[[MEMREF_ARG]] : memref) -// CHECK-SAME: outs(%[[RESULT0]], %[[RESULT1]] : memref, memref) +// CHECK-SAME: inits(%[[RESULT0]], %[[RESULT1]] : memref, memref) func.func @dynamic_results(%arg0: tensor) -> (tensor, tensor) { %0, %1 = linalg.generic { indexing_maps = [#map_2d, #map_2d, #map_2d], iterator_types = ["parallel", "parallel"] } ins(%arg0 : tensor) - outs (%arg0, %arg0 : tensor, tensor) { + inits (%arg0, %arg0 : tensor, tensor) { ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32): %tmp1 = math.exp %gen_arg1 : f32 linalg.yield %tmp1, %tmp1 : f32, f32 @@ -146,13 +146,13 @@ // CHECK: memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>) -// CHECK-SAME: outs(%[[INIT_BUFFER]] : memref<3x2xf32>) { +// CHECK-SAME: inits(%[[INIT_BUFFER]] : memref<3x2xf32>) { func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>, %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) { %0 = linalg.generic #trait ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>) - outs(%arg1 : tensor<3x2xf32>) { + inits(%arg1 : tensor<3x2xf32>) { ^bb(%v0: vector<3x4xi4>, %v1: f32) : linalg.yield %v1 : f32 } -> tensor<3x2xf32> @@ -167,10 +167,10 @@ func.func @bufferize_fill(%arg0: tensor) -> tensor { %c0 = arith.constant 0.0 : f32 // CHECK: %[[ALLOC:.*]] = memref.alloc - // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref) + // CHECK: linalg.fill ins(%cst : f32) inits(%[[ALLOC]] : memref) // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[TENSOR]] - %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor) -> tensor + %0 = linalg.fill ins(%c0 : f32) inits(%arg0 : tensor) -> tensor return %0 : tensor } @@ -179,13 +179,13 @@ // CHECK-LABEL: func @bufferize_dot func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor) -> tensor { %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>) - outs(%out : tensor) -> tensor + inits(%out : tensor) -> tensor return %dot : tensor // CHECK: %[[ALLOC:.*]] = memref.alloc // TODO: The copy is not necessary. // CHECK: memref.copy {{.*}}, %[[ALLOC]] // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>) - // CHECK-SAME: outs(%[[ALLOC:.*]] : memref) + // CHECK-SAME: inits(%[[ALLOC:.*]] : memref) // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref // CHECK: return %[[OUT_TENSOR]] } @@ -202,14 +202,14 @@ // CHECK: %[[collapse:.*]] = tensor.collapse_shape %[[arg0]] // CHECK: %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]] // CHECK: %[[alloc:.*]] = memref.alloc() -// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>) +// CHECK: linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) inits(%[[alloc]] : memref<6xi64>) // CHECK: %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]] // CHECK: %[[call:.*]] = call @csum(%[[generic_t]]) // CHECK: return %[[call]] func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> { %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1> %1 = tensor.empty() : tensor<6xi64> - %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) { + %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) inits(%1 : tensor<6xi64>) { ^bb0(%arg1: i1, %arg2: i64): %4 = arith.extui %arg1 : i1 to i64 linalg.yield %4 : i64 diff --git a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir --- a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir @@ -13,7 +13,7 @@ // CHECK: arith.addf %[[BBARG]], %[[BBARG]] %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg0 : tensor, tensor) - outs(%arg0 : tensor) attrs = {someattr} { + inits(%arg0 : tensor) attrs = {someattr} { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 @@ -36,7 +36,7 @@ // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]] %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %1 = arith.addf %arg1, %arg2 : f32 linalg.yield %1 : f32 @@ -61,7 +61,7 @@ // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]]) %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg0, %arg0 : tensor, tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32 linalg.yield %1 : f32 @@ -83,7 +83,7 @@ // CHECK: "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]]) %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]} ins(%arg0, %arg1, %arg0, %arg1 : tensor, tensor, tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32): %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32 linalg.yield %1 : f32 @@ -105,7 +105,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %arg0, %arg0 + inits(%arg0, %arg0, %arg0, %arg0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 @@ -120,7 +120,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -137,7 +137,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["reduction"]} ins(%arg0 : tensor) - outs(%init0, %init1 : tensor, tensor) { + inits(%init0, %init1 : tensor, tensor) { ^bb0(%b0: f32, %b1: f32, %b2: i32): %8 = linalg.index 0 : index %9 = arith.index_cast %8 : index to i32 @@ -156,7 +156,7 @@ // CHECK-DAG: %[[INIT0:.+]] = tensor.empty() : tensor // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor // CHECK: %[[GENERIC:.+]]:2 = linalg.generic -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK: return %[[GENERIC]]#1 // ----- @@ -165,11 +165,11 @@ func.func @loop_dim_operand(%arg0 : tensor) -> tensor { %cst = arith.constant 0 : i32 %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor) -> tensor %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} - ins(%arg0 : tensor) outs(%fill : tensor) { + ins(%arg0 : tensor) inits(%fill : tensor) { ^bb0(%b0: f32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 @@ -190,11 +190,11 @@ %cst = arith.constant 0 : i32 %init1 = tensor.empty(%arg0) : tensor %init = tensor.empty() : tensor - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor) -> tensor %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>], iterator_types = ["parallel"]} - outs(%init1, %fill : tensor, tensor) { + inits(%init1, %fill : tensor, tensor) { ^bb0(%b0: i32, %b1: i32): %1 = linalg.index 0 : index %2 = arith.index_cast %1 : index to i32 @@ -207,7 +207,7 @@ // CHECK-SAME: %[[ARG0:.+]]: index // CHECK: %[[INIT:.+]] = tensor.empty(%[[ARG0]]) // CHECK: linalg.generic -// CHECK-SAME: outs(%[[INIT]] +// CHECK-SAME: inits(%[[INIT]] // ----- @@ -222,7 +222,7 @@ iterator_types = ["parallel", "reduction"]} ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3 : tensor, tensor, tensor, tensor, tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32): %1 = arith.addi %b0, %b1 : i32 %2 = arith.addi %1, %b2 : i32 @@ -248,7 +248,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] : -// CHECK-SAME: outs(%[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]] : // CHECK: ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: i32 // CHECK-SAME: %[[B2:[a-zA-Z0-9_]+]]: i32 @@ -274,7 +274,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0 : tensor, tensor) { + inits(%arg0, %arg0 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %1 = arith.addf %b0, %b0 : f32 linalg.yield %1, %1 : f32, f32 @@ -284,7 +284,7 @@ // CHECK: func @drop_redundant_results // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK: %[[GENERIC:.+]] = linalg.generic -// CHECK-SAME: outs(%[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]] : // CHECK: return %[[GENERIC]] // ----- @@ -308,7 +308,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %init0, %init0 + inits(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32 @@ -323,7 +323,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -347,7 +347,7 @@ indexing_maps = [#map0, #map1, #map2, #map3, #map4], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %arg0, %init0, %init0 + inits(%arg0, %arg0, %init0, %init0 : tensor, tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %1 = arith.addf %b0, %b0: f32 @@ -365,7 +365,7 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor) // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[ARG0]] : +// CHECK-SAME: inits(%[[ARG0]], %[[ARG0]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -388,7 +388,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %init0, %init0 + inits(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : linalg.yield %b2, %b1, %b3 : f32, f32, f32 @@ -403,7 +403,7 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : +// CHECK-SAME: inits(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 // ----- @@ -426,7 +426,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor) - outs(%arg0, %init0, %init0 + inits(%arg0, %init0, %init0 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : %1 = arith.addf %b1, %b2: f32 @@ -443,5 +443,5 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] -// CHECK-SAME: outs(%[[ARG0]], %[[INIT]] : +// CHECK-SAME: inits(%[[ARG0]], %[[INIT]] : // CHECK: return %[[GENERIC]]#0 diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -10,9 +10,9 @@ %2 = memref.view %1[%c0][] : memref to memref<16x16xf32> %3 = memref.cast %2 : memref<16x16xf32> to memref - // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>) + // CHECK: linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) inits({{.*}}memref<16x16xf32>) linalg.matmul ins(%3, %3: memref, memref) - outs(%3: memref) + inits(%3: memref) return %3: memref } @@ -32,7 +32,7 @@ memref.copy %arg0, %arg0 : memref<0xf32> to memref<0xf32> // tensor<0xf32> cannot be dce'ed - %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) { + %1 = linalg.generic #trait inits(%arg1 : tensor<0xf32>) { ^bb(%0: f32) : linalg.yield %0 : f32 } -> tensor<0xf32> @@ -57,9 +57,9 @@ %tc = tensor.cast %c : tensor<3x?xf32> to tensor // CHECK: linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>) - // CHECK-SAME: outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> + // CHECK-SAME: inits({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32> %0 = linalg.matmul ins(%ta, %tb: tensor, tensor) - outs(%tc: tensor) -> tensor + inits(%tc: tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<3x?xf32> @@ -75,11 +75,11 @@ func.func @linalg_effects(%a : tensor, %b : memref, %c : tensor) { // CHECK-NOT: %{{.*}} = linalg.matmul %t = linalg.matmul ins(%a, %b : tensor, memref) - outs(%c : tensor) -> tensor + inits(%c : tensor) -> tensor // CHECK: linalg.matmul linalg.matmul ins(%a, %c : tensor, tensor) - outs(%b : memref) + inits(%b : memref) return } @@ -99,7 +99,7 @@ indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%3, %3 : tensor, tensor) { + inits(%3, %3 : tensor, tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg3, %arg2 : f32, f32 } -> (tensor, tensor) @@ -120,7 +120,7 @@ indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0 : tensor) - outs(%out : tensor<1x2x3xf32>) { + inits(%out : tensor<1x2x3xf32>) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> (tensor<1x2x3xf32>) @@ -140,7 +140,7 @@ indexing_maps = [#map, #map], iterator_types = [] } ins(%arg0 : f32) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> (tensor) @@ -164,7 +164,7 @@ ^bb1(%arg1 : f32): %3 = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%2 : tensor) { + ins(%arg0 : tensor) inits(%2 : tensor) { ^bb0(%arg2: f32, %arg3 : f32): linalg.yield %arg1 : f32 } -> tensor @@ -192,7 +192,7 @@ {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2, %2 : tensor, tensor) { + inits(%2, %2 : tensor, tensor) { ^bb0(%arg3: f32, %arg4 : f32, %arg5 : f32, %arg6 : f32): linalg.yield %arg2, %arg4 : f32, f32 } -> (tensor, tensor) @@ -224,10 +224,10 @@ %c0_i32 = arith.constant 0 : i32 %c0 = arith.constant 0 : index %cst = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32> + %0 = linalg.fill ins(%c0_i32 : i32) inits(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32> %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>) - outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> - %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) { + inits(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32> + %2 = linalg.generic #trait inits(%arg0 : tensor<7x7xi32>) { ^bb(%3: i32) : linalg.yield %3 : i32 } -> tensor<7x7xi32> @@ -247,7 +247,7 @@ %c21 = arith.constant 21 : index %c42 = arith.constant 42 : index %0 = tensor.empty(%c21, %c42) : tensor - %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg1 : f32) inits(%0 : tensor) -> tensor %2 = tensor.dim %arg0, %c0 : tensor %3 = tensor.dim %arg0, %c1 : tensor %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor into tensor @@ -255,7 +255,7 @@ } // CHECK-LABEL: func @propogate_casts // CHECK: %[[INIT:.+]] = tensor.empty -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]] // CHECK: %[[RESULT:.+]] = tensor.cast %[[INSERTED]] // CHECK: return %[[RESULT]] @@ -278,7 +278,7 @@ // CHECK-SAME: (%[[ARG0:.*]]: tensor) -> tensor { // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @remove_deadargs_generic_basic(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index @@ -286,7 +286,7 @@ %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor %2 = tensor.empty(%0) : tensor - %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor, tensor) outs (%2:tensor) { + %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor, tensor) inits (%2:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.addf %arg1, %cst : f32 linalg.yield %4 : f32 @@ -299,7 +299,7 @@ // CHECK-LABEL: func @remove_deadargs_generic_mixedaccess // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-NOT: ins -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor) -> (tensor) { @@ -312,7 +312,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = tensor.empty(%1, %0) : tensor %4 = tensor.empty(%0, %1) : tensor - %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor, tensor) outs (%4:tensor) { + %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor, tensor) inits (%4:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %6 = arith.divf %cst1, %cst2 : f32 linalg.yield %6 : f32 @@ -326,8 +326,8 @@ %zero = arith.constant 0.0 : f32 // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<6x4xf32> %empty = tensor.empty() : tensor<1x2x3x4xf32> - // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32> - %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> + // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32> + %fill = linalg.fill ins(%zero : f32) inits(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32> %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]] : tensor<1x2x3x4xf32> into tensor<6x4xf32> // CHECK: return %[[FILL]] : tensor<6x4xf32> @@ -341,8 +341,8 @@ func.func @fold_fill_reshape_dynamic(%arg0 : tensor) -> tensor { %zero = arith.constant 0.0 : f32 // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] - %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor) -> tensor - // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[RESHAPE]] + %0 = linalg.fill ins(%zero : f32) inits(%arg0 : tensor) -> tensor + // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[RESHAPE]] %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]] : tensor into tensor // CHECK: return %[[RESULT]] @@ -358,7 +358,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0 : memref<4x16xf32>) - outs(%0 : memref<4x16xf32>) { + inits(%0 : memref<4x16xf32>) { ^bb0(%arg4: f32, %arg5: f32): linalg.yield %arg4 : f32 } @@ -370,12 +370,12 @@ // CHECK-LABEL: func @fold_static_pad_fill // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: return %[[FILL]] func.func @fold_static_pad_fill() -> tensor<412x276xf32> { %f0 = arith.constant 0.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): tensor.yield %f0 : f32 @@ -395,18 +395,18 @@ // CHECK-DAG: %[[I1:.+]] = arith.constant 1 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 -// CHECK: %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[SRC]] : tensor<8x?x16x32xf32>) +// CHECK: %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[SRC]] : tensor<8x?x16x32xf32>) // CHECK: %[[S0:.+]] = affine.apply #[[MAP0]]()[%[[LOW0]]] // CHECK: %[[DIM1:.+]] = tensor.dim %[[OF]], %[[I1]] : tensor<8x?x16x32xf32> // CHECK: %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]] // CHECK: %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]] // CHECK: %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: return %[[FILL]] func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor { %f0 = arith.constant 0.0 : f32 - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32> %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] { ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index): tensor.yield %f0 : f32 @@ -421,7 +421,7 @@ %f0 = arith.constant 0.0 : f32 %f1 = arith.constant 1.0 : f32 %empty = tensor.empty() : tensor<400x273xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32> // CHECK: tensor.pad %pad = tensor.pad %fill low[4, 1] high[8, 2] { ^bb0(%arg1: index, %arg2: index): @@ -451,7 +451,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -461,7 +461,7 @@ // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -482,7 +482,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -492,7 +492,7 @@ // CHECK: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -514,7 +514,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %5 : tensor, tensor<2x?x?xf32>) - outs(%4 : tensor<2x3x4xf32>) { + inits(%4 : tensor<2x3x4xf32>) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %9 = arith.addf %arg3, %arg4 : f32 linalg.yield %9 : f32 @@ -524,7 +524,7 @@ // CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor to tensor<2x3x4xf32> // CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -548,7 +548,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): %9 = arith.addf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -557,7 +557,7 @@ return %7: tensor<2x3x4xf32> // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>) - // CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>) + // CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>) } // ----- @@ -572,7 +572,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor<1x?x?xf32>) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %3 = arith.subf %arg5, %arg6 : f32 linalg.yield %3 : f32 @@ -580,7 +580,7 @@ return %2 : tensor // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>) -// CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%{{.*}} : tensor<1x?x?xf32>) // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor } @@ -594,7 +594,7 @@ // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index // CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.+]] = tensor.empty() -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]] // CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]] // CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor // CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor @@ -608,7 +608,7 @@ tensor.yield %f0 : f32 } : tensor to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> return %0: tensor<8x384x384xf32> } @@ -629,7 +629,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -648,7 +648,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 0, 129] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %1 at dim#3 @@ -668,7 +668,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, 255] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> // Range overlap with %0 at dim#3 @@ -688,7 +688,7 @@ tensor.yield %f0 : f32 } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> - %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> // Overlap btween %0 and %1 is fine but not with %2 is fine. // CHECK-COUNT-3: tensor.insert_slice %0 = tensor.insert_slice %a into %fill[0, 0, %offset] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -711,7 +711,7 @@ } : tensor<7x123x124xf32> to tensor<8x128x128xf32> %empty = tensor.empty() : tensor<8x384x384xf32> // Different filling value than padding value. - %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> + %fill = linalg.fill ins(%f1 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32> %0 = tensor.insert_slice %a into %fill[%offset, 0, 0] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %1 = tensor.insert_slice %a into %0 [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> %2 = tensor.insert_slice %pad into %1 [0, 0, 256] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32> @@ -723,7 +723,7 @@ func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8xf32>, tensor) { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<4x8xf32> return %1, %0 : tensor<4x8xf32>, tensor } @@ -736,7 +736,7 @@ // CHECK-DAG: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor to tensor<4x8xf32> // CHECK: %[[MATMUL:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_CAST]], %[[RHS_CAST]] : -// CHECK-SAME: outs(%[[OUT_CAST]] : +// CHECK-SAME: inits(%[[OUT_CAST]] : // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]] // CHECK: return %[[MATMUL]], %[[RESULT_CAST]] @@ -747,7 +747,7 @@ func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor, %arg1 : tensor, %arg2 : tensor, %arg3 : i1) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor scf.if %arg3 { %1 = tensor.cast %0 : tensor to tensor<4x8xf32> func.call @some_use(%1) : (tensor<4x8xf32>) -> () @@ -759,7 +759,7 @@ // CHECK-LABEL: func @linalgop_with_cond_cast_consumer // CHECK-SAME: (%[[ARG0:.*]]: tensor, %[[ARG1:.*]]: tensor, %[[ARG2:.*]]: tensor, %[[ARG3:.*]]: i1) // CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor, tensor) -// CHECK-SAME: outs(%[[ARG2]] : tensor) -> tensor +// CHECK-SAME: inits(%[[ARG2]] : tensor) -> tensor // CHECK: scf.if %[[ARG3]] { // CHECK: %[[CAST:.*]] = tensor.cast %[[RES]] : tensor to tensor<4x8xf32> // CHECK: func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> () @@ -773,7 +773,7 @@ %arg1 : tensor, %arg2 : tensor) -> (tensor<4x8x12x16xf32>, tensor) { %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.cast %0 : tensor to tensor<4x8x12x16xf32> return %1, %0 : tensor<4x8x12x16xf32>, tensor } @@ -784,7 +784,7 @@ // CHECK: %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor to tensor<4x8x12x16xf32> // CHECK: %[[CONV:.+]] = linalg.conv_2d_nchw_fchw // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[OUT_CAST]] : +// CHECK-SAME: inits(%[[OUT_CAST]] : // CHECK: %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]] // CHECK: return %[[CONV]], %[[RESULT_CAST]] @@ -804,7 +804,7 @@ indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1, d2, d0)>, affine_map<(d0, d1, d2) -> (d2, d1, d0)>]} - ins(%arg0 : tensor) outs(%empty1, %empty2 : tensor, tensor) { + ins(%arg0 : tensor) inits(%empty1, %empty2 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32) : linalg.yield %b0, %b0 : f32, f32 } -> (tensor, tensor) @@ -818,7 +818,7 @@ // CHECK-DAG: %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32> // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: ins(%[[CAST]] : -// CHECK-SAME: outs(%[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT2]], %[[INIT1]] : // CHECK: %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor // CHECK: return %[[RETURN_CAST]], %[[GENERIC]]#1 @@ -830,7 +830,7 @@ indexing_maps = [#map, #map], iterator_types = ["parallel"] } ins(%arg0 : tensor) - outs(%arg1 : memref) { + inits(%arg1 : memref) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } @@ -845,7 +845,7 @@ // CHECK-SAME: indexing_maps = [#map, #map], // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: } ins(%[[ARG1]] : tensor) -// CHECK-SAME: outs(%[[ARG2]] : memref) { +// CHECK-SAME: inits(%[[ARG2]] : memref) { // ----- @@ -858,13 +858,13 @@ affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%1, %1 : memref<4xf32>, memref<4xf32>) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { ^bb0(%in: f32, %in_24: f32, %out: f32): linalg.yield %in : f32 } -> tensor<4xf32> %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%36 : tensor<4xf32>) { + inits(%36 : tensor<4xf32>) { ^bb0(%out: f32): linalg.yield %out : f32 } -> tensor<4xf32> diff --git a/mlir/test/Dialect/Linalg/collapse-dim.mlir b/mlir/test/Dialect/Linalg/collapse-dim.mlir --- a/mlir/test/Dialect/Linalg/collapse-dim.mlir +++ b/mlir/test/Dialect/Linalg/collapse-dim.mlir @@ -7,7 +7,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} - ins(%arg0 : tensor<2x32x10x4096xf32>) outs(%arg1 : tensor<2x32xf32>) { + ins(%arg0 : tensor<2x32x10x4096xf32>) inits(%arg1 : tensor<2x32xf32>) { ^bb0(%arg3: f32, %arg4: f32): %1 = arith.addf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -22,7 +22,7 @@ // CHECK: %[[T:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32> // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[T]] : tensor<2x32x40960xf32>) outs(%{{.*}} : tensor<2x32xf32>) { +// CHECK-SAME: ins(%[[T]] : tensor<2x32x40960xf32>) inits(%{{.*}} : tensor<2x32xf32>) { // CHECK: } -> tensor<2x32xf32> // ----- @@ -34,7 +34,7 @@ affine_map<(d0, d1, d2, d3) -> (d1, d0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%arg0 : tensor<32x2x10x4096xf32>) outs(%arg1 : tensor<2x32x10x4096xf32>) { + ins(%arg0 : tensor<32x2x10x4096xf32>) inits(%arg1 : tensor<2x32x10x4096xf32>) { ^bb0(%arg3: f32, %arg4: f32): %1 = arith.addf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -50,6 +50,6 @@ // CHECK-DAG: %[[D:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[S]] : tensor<32x2x40960xf32>) outs(%[[D]] : tensor<2x32x40960xf32>) { +// CHECK-SAME: ins(%[[S]] : tensor<32x2x40960xf32>) inits(%[[D]] : tensor<2x32x40960xf32>) { // CHECK: } -> tensor<2x32x40960xf32> // CHECK: tensor.expand_shape %[[R]] {{\[}}[0], [1], [2, 3]] : tensor<2x32x40960xf32> into tensor<2x32x10x4096xf32> diff --git a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir --- a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir +++ b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir @@ -18,7 +18,7 @@ %0 = test.linalg_conv_op { indexing_maps = [#map, #map], iterator_types = ["parallel"]} - ins(%arg0 : tensor) outs(%arg1 : tensor) { + ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg3 : f32 } -> tensor @@ -36,7 +36,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -54,7 +54,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -72,7 +72,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -90,7 +90,7 @@ affine_map<(d0, d1) -> (d0 + d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -110,7 +110,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -129,7 +129,7 @@ affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -148,7 +148,7 @@ affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -167,7 +167,7 @@ affine_map<(d0, d1, d2) -> (d0)>], iterator_types = ["parallel", "reduction", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor @@ -186,7 +186,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): linalg.yield %arg5 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir --- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir +++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir @@ -10,7 +10,7 @@ // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]] // CHECK-SAME: iterator_types = [] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] // CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32): // CHECK: %[[YIELD:.*]] = arith.addf %[[LHS]], %[[RHS]] : f32 // CHECK: linalg.yield %[[YIELD]] : f32 @@ -29,7 +29,7 @@ // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] %0 = arith.addf %arg0, %arg1 : tensor return %0 : tensor } @@ -42,7 +42,7 @@ func.func @exp(%arg0: tensor) -> tensor { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]] - // CHECK-SAME: outs(%[[ARG0]] + // CHECK-SAME: inits(%[[ARG0]] // CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32): // CHECK: %[[YIELD:.*]] = math.exp %[[SCALAR]] : f32 // CHECK: linalg.yield %[[YIELD]] : f32 @@ -60,7 +60,7 @@ func.func @select(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] - // CHECK-SAME: outs(%[[ARG1]] + // CHECK-SAME: inits(%[[ARG1]] // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32): // CHECK: arith.select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32 %0 = arith.select %arg0, %arg1, %arg2 : tensor, tensor @@ -78,7 +78,7 @@ // CHECK: %[[INIT:.*]] = tensor.empty() : tensor // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[INIT]] + // CHECK-SAME: inits(%[[INIT]] // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32 %0 = arith.cmpf olt, %arg0, %arg1 : tensor @@ -101,7 +101,7 @@ // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1> // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] - // CHECK-SAME: outs(%[[INIT]] + // CHECK-SAME: inits(%[[INIT]] // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1): // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32 %0 = arith.cmpf olt, %arg0, %arg1 : tensor<4x?x?x8x2x?xf32> diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir --- a/mlir/test/Dialect/Linalg/decompose-ops.mlir +++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir @@ -15,7 +15,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init1, %init2 : tensor, tensor) { + inits(%init1, %init2 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) : %0 = arith.addf %b0, %b1 : f32 %1 = arith.mulf %0, %b2 : f32 @@ -41,7 +41,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -55,7 +55,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B6:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B7:[a-zA-Z0-9_]+]]: f32 @@ -87,7 +87,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -98,7 +98,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP3]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG2]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B3:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f32 @@ -124,7 +124,7 @@ affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init1, %init2, %init2 : tensor, tensor, tensor) { + inits(%init1, %init2, %init2 : tensor, tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32, %b5 : f32) : %0 = arith.addf %b0, %b1 : f32 %1 = arith.mulf %0, %b2 : f32 @@ -150,7 +150,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -165,7 +165,7 @@ // CHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]] // CHECK-SAME: ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 : -// CHECK-SAME: outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] : +// CHECK-SAME: inits(%[[INIT1]], %[[INIT2]], %[[INIT2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B7:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B8:[a-zA-Z0-9_]+]]: f32 @@ -195,7 +195,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]], %[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]], %[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 @@ -206,7 +206,7 @@ // CANONICALIZECHECK-SAME: [#[[MAP3]], #[[MAP2]], #[[MAP0]]] // CANONICALIZECHECK-SAME: ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG2]], %[[GENERIC1]]#0 : -// CANONICALIZECHECK-SAME: outs(%[[INIT2]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[B5:[a-zA-Z0-9_]+]]: f32 @@ -226,7 +226,7 @@ indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<10x20xf32>, tensor<10xi32>) - outs(%init : tensor<20x10xf64>) { + inits(%init : tensor<20x10xf64>) { ^bb0(%b0 : f32, %b1 : i32, %b2 : f64): %1 = arith.sitofp %b1 : i32 to f64 %2 = arith.extf %b0 : f32 to f64 @@ -248,7 +248,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:.+]]: f32 // CHECK-SAME: %[[B1:.+]]: i32 @@ -260,7 +260,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1 : -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B4:.+]]: f32 // CHECK-SAME: %[[B5:.+]]: i32 @@ -273,7 +273,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP0]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1, %[[GENERIC1]]#1 : -// CHECK-SAME: outs(%[[INIT0]] : +// CHECK-SAME: inits(%[[INIT0]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B9:.+]]: f32 // CHECK-SAME: %[[B10:.+]]: i32 @@ -296,7 +296,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B0:.+]]: i32 // CANONICALIZECHECK-SAME: %[[B1:.+]]: f64 @@ -306,7 +306,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT1]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT1]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B2:.+]]: f32 // CANONICALIZECHECK-SAME: %[[B3:.+]]: f64 @@ -316,7 +316,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP1]], #[[MAP2]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[GENERIC0]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[INIT0]] : +// CANONICALIZECHECK-SAME: inits(%[[INIT0]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[B4:[a-zA-Z0-9_]+]]: f64 // CANONICALIZECHECK-SAME: %[[B5:[a-zA-Z0-9_]+]]: f64 @@ -339,7 +339,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2, %arg3 : tensor, tensor) { + inits(%arg2, %arg3 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) : %1 = arith.addf %b0, %b2 : f32 %2 = arith.mulf %b1, %b3 : f32 @@ -360,7 +360,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]], %[[ARG3]], %[[ARG2]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 @@ -373,7 +373,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 : -// CHECK-SAME: outs(%[[ARG2]], %[[ARG3]] : +// CHECK-SAME: inits(%[[ARG2]], %[[ARG3]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[ARG9:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[ARG10:[a-zA-Z0-9_]+]]: f32 @@ -397,7 +397,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG0]] : -// CANONICALIZECHECK-SAME: outs(%[[ARG2]] : +// CANONICALIZECHECK-SAME: inits(%[[ARG2]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 @@ -407,7 +407,7 @@ // CANONICALIZECHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP1]], #[[MAP1]], #[[MAP3]]] // CANONICALIZECHECK-SAME: iterator_types = ["parallel", "parallel"] // CANONICALIZECHECK-SAME: ins(%[[ARG1]], %[[GENERIC1]] : -// CANONICALIZECHECK-SAME: outs(%[[ARG2]], %[[ARG3]] : +// CANONICALIZECHECK-SAME: inits(%[[ARG2]], %[[ARG3]] : // CANONICALIZECHECK-NEXT: ^bb0( // CANONICALIZECHECK-SAME: %[[ARG4:[a-zA-Z0-9_]+]]: f32 // CANONICALIZECHECK-SAME: %[[ARG5:[a-zA-Z0-9_]+]]: f32 diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir --- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir @@ -6,7 +6,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -25,7 +25,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -34,7 +34,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %5 = arith.mulf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -43,7 +43,7 @@ %6 = tensor.empty() : tensor %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%1, %4 : tensor, tensor) - outs(%6 : tensor) { + inits(%6 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %5 = arith.divf %arg3, %arg4 : f32 linalg.yield %5 : f32 @@ -65,7 +65,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 %3 = arith.mulf %2, %arg4 : f32 @@ -86,7 +86,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []} ins(%arg1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32 linalg.yield %2 : f32 diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir --- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir @@ -10,7 +10,7 @@ %3 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%arg0_t : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i1, %arg3: i8): %10 = arith.extui %arg2 : i1 to i8 linalg.yield %10 : i8 @@ -23,7 +23,7 @@ %7 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} ins(%arg1_t, %cst : tensor, tensor) - outs(%6 : tensor) { + inits(%6 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir --- a/mlir/test/Dialect/Linalg/detensorize_if.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir @@ -18,7 +18,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -30,7 +30,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 @@ -79,7 +79,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -91,7 +91,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 @@ -142,7 +142,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -159,7 +159,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %12 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir --- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir @@ -15,7 +15,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%farg0, %1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir @@ -15,7 +15,7 @@ %1 = tensor.empty() : tensor %2 = linalg.generic #attrs ins(%0, %farg1 : tensor, tensor) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -27,7 +27,7 @@ %5 = tensor.empty() : tensor %6 = linalg.generic #attrs ins(%4, %4 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %8 = arith.addi %arg0, %arg1 : i32 linalg.yield %8 : i32 diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir @@ -28,7 +28,7 @@ %1 = tensor.empty() : tensor %2 = linalg.generic #sum_reduction_attrs ins(%0: tensor<10xi32>) - outs(%1: tensor) { + inits(%1: tensor) { ^bb(%a: i32, %x: i32): %b = arith.addi %x, %a : i32 linalg.yield %b : i32 @@ -37,7 +37,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %farg1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -49,7 +49,7 @@ %7 = tensor.empty() : tensor<10xi32> %9 = linalg.generic #broadcast_attrs ins(%6: tensor) - outs(%7: tensor<10xi32>) { + inits(%7: tensor<10xi32>) { ^bb(%a: i32, %b: i32) : linalg.yield %a : i32 } -> tensor<10xi32> @@ -67,7 +67,7 @@ // DET-ALL: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>) // DET-ALL: ^[[bb1]](%{{.*}}: tensor<10xi32>) // DET-ALL: tensor.empty() : tensor -// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor) { +// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor) { // DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): // DET-ALL: %{{.*}} = arith.addi %{{.*}}, %{{.*}} // DET-ALL: linalg.yield %{{.*}} : i32 @@ -78,7 +78,7 @@ // DET-ALL: ^[[bb2]](%{{.*}}: i32) // DET-ALL: tensor.from_elements %{{.*}} : tensor // DET-ALL: tensor.empty() : tensor<10xi32> -// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor) outs(%{{.*}} : tensor<10xi32>) { +// DET-ALL: linalg.generic {{{.*}}} ins(%{{.*}} : tensor) inits(%{{.*}} : tensor<10xi32>) { // DET-ALL: ^bb0(%{{.*}}: i32, %{{.*}}: i32): // DET-ALL: linalg.yield %{{.*}} : i32 // DET-ALL: } -> tensor<10xi32> @@ -92,12 +92,12 @@ // DET-CF-SAME: (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor) // DET-CF: cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>) // DET-CF: ^bb1(%{{.*}}: tensor<10xi32>) -// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor) { +// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor) { // DET-CF: tensor.extract %{{.*}}[] : tensor // DET-CF: cmpi slt, %{{.*}}, %{{.*}} : i32 // DET-CF: cf.cond_br %{{.*}}, ^bb2(%{{.*}} : tensor), ^bb3(%{{.*}} : tensor) // DET-CF: ^bb2(%{{.*}}: tensor) -// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor) outs(%{{.*}} : tensor<10xi32>) { +// DET-CF: %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor) inits(%{{.*}} : tensor<10xi32>) { // DET-CF: cf.br ^bb1(%{{.*}} : tensor<10xi32>) // DET-CF: ^bb3(%{{.*}}: tensor) // DET-CF: return %{{.*}} : tensor diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir --- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir +++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir @@ -20,7 +20,7 @@ %3 = tensor.empty() : tensor %4 = linalg.generic #attrs ins(%2, %reshaped1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i1): %8 = arith.cmpi slt, %arg0, %arg1 : i32 linalg.yield %8 : i1 @@ -32,7 +32,7 @@ %7 = tensor.empty() : tensor %8 = linalg.generic #attrs ins(%6, %6 : tensor, tensor) - outs(%7 : tensor) { + inits(%7 : tensor) { ^bb0(%arg0: i32, %arg1: i32, %arg2: i32): %9 = arith.addi %arg0, %arg1 : i32 linalg.yield %9 : i32 diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -15,7 +15,7 @@ func.func @drop_one_trip_loops(%arg0 : tensor, %arg1 : f32, %shape: tensor) -> tensor { %0 = linalg.generic #trait ins(%arg0, %arg1 : tensor, f32) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } -> tensor @@ -49,7 +49,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor) - outs(%shape: tensor) { + inits(%shape: tensor) { ^bb0(%arg6 : i32, %arg7 : i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -95,7 +95,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%arg0 : tensor<1x1xf32>) { + inits(%arg0 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> @@ -122,7 +122,7 @@ (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{ %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xi32>) - outs(%arg0 : tensor<1x1xi32>) { + inits(%arg0 : tensor<1x1xi32>) { ^bb0(%arg3: i32, %arg4: i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -155,7 +155,7 @@ func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait ins(%arg0 : tensor<1x5xf32>) - outs(%shape : tensor<5xf32>) { + inits(%shape : tensor<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor<5xf32> @@ -189,7 +189,7 @@ %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32> %2 = linalg.generic #trait ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>) - outs(%shape : tensor<5x5xf32>) { + inits(%shape : tensor<5x5xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -223,7 +223,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -249,7 +249,7 @@ %1 = tensor.empty() : tensor<1x2x5xf32> %2 = linalg.generic {i64, indexing_maps = [#map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) { + ins(%arg0 : tensor<5xf32>) inits(%1 : tensor<1x2x5xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<1x2x5xf32> @@ -266,11 +266,11 @@ func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> { %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1xf32>) -> tensor<1xf32> %add = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) { + ins(%input : tensor<1x1000xf32>)inits(%fill : tensor<1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %1823 = arith.addf %arg1, %arg2 : f32 linalg.yield %1823 : f32 @@ -287,12 +287,12 @@ // CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32> // CHECK: %[[INIT:.+]] = tensor.empty() : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor) -> tensor // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] // CHECK-SAME: ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[GENERIC_RESHAPE:.+]] = tensor.expand_shape %[[GENERIC]] [] : tensor into tensor<1xf32> // CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32> @@ -331,13 +331,13 @@ %c3 = arith.constant 3 : index %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32> %1 = tensor.empty(%0) : tensor<1x?xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x?xf32>) -> tensor<1x?xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor<1x?x1x?xf32>) - outs(%2 : tensor<1x?xf32>) { + inits(%2 : tensor<1x?xf32>) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -350,12 +350,12 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x?xf32> // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]] // CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[RESHAPE]] : tensor) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -365,13 +365,13 @@ %cst = arith.constant 1.000000e+00 : f32 %c3 = arith.constant 3 : index %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor<1x?x1x1xf32>) - outs(%2 : tensor<1x1xf32>) { + inits(%2 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -383,12 +383,12 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor<1x?x1x1xf32> // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3] // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1xf32> -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel"] // CHECK-SAME: ins(%[[RESHAPE]] : tensor) -// CHECK-SAME: outs(%[[FILL]] : tensor<1xf32>) +// CHECK-SAME: inits(%[[FILL]] : tensor<1xf32>) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -399,13 +399,13 @@ %c2 = arith.constant 2 : index %0 = tensor.dim %arg0, %c2 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor %3 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction", "reduction"]} ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg1: f32, %arg2: f32): %4 = arith.addf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -418,12 +418,12 @@ // CHECK-SAME: %[[ARG0:.+]]: tensor // CHECK-DAG: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]] // CHECK: %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]] // CHECK: %[[RESULT:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "reduction"] // CHECK-SAME: ins(%[[RESHAPE]] : tensor) -// CHECK-SAME: outs(%[[FILL]] : tensor) +// CHECK-SAME: inits(%[[FILL]] : tensor) // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]] // CHECK: return %[[RESULT_RESHAPE]] @@ -468,7 +468,7 @@ func.func @drop_one_trip_loops(%arg0 : memref, %arg1 : f32, %shape: memref) -> memref { linalg.generic #trait ins(%arg0, %arg1 : memref, f32) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } @@ -501,7 +501,7 @@ { linalg.generic #trait ins(%arg0 : memref) - outs(%shape: memref) { + inits(%shape: memref) { ^bb0(%arg6 : i32, %arg7 : i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -547,7 +547,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%arg0 : memref<1x1xf32>) { + inits(%arg0 : memref<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } @@ -574,7 +574,7 @@ (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{ linalg.generic #trait ins(%arg0 : memref<1x1xi32>) - outs(%arg0 : memref<1x1xi32>) { + inits(%arg0 : memref<1x1xi32>) { ^bb0(%arg3: i32, %arg4: i32) : %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -607,7 +607,7 @@ func.func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> { linalg.generic #trait ins(%arg0 : memref<1x5xf32>) - outs(%shape : memref<5xf32>) { + inits(%shape : memref<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } @@ -641,7 +641,7 @@ %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32> linalg.generic #trait ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>) - outs(%shape : memref<5x5xf32>) { + inits(%shape : memref<5x5xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -675,7 +675,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } @@ -701,7 +701,7 @@ %1 = memref.alloc() : memref<1x2x5xf32> linalg.generic {i64, indexing_maps = [#map1, #map0], iterator_types = ["parallel", "parallel", "parallel"]} - ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) { + ins(%arg0 : memref<5xf32>) inits(%1 : memref<1x2x5xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } @@ -713,7 +713,7 @@ // CHECK: %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32> // CHECK: %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]] // CHECK: linalg.generic -// CHECK-SAME: outs(%[[OUT:.*]] : +// CHECK-SAME: inits(%[[OUT:.*]] : // CHECK: %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]] // CHECK: return %[[RESULT]] @@ -725,7 +725,7 @@ linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) { + ins(%input : memref<1x1000xf32>)inits(%init : memref<1xf32>) { ^bb0(%arg1: f32, %arg2: f32): %1823 = arith.addf %arg1, %arg2 : f32 linalg.yield %1823 : f32 @@ -745,7 +745,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction"] // CHECK-SAME: ins(%[[INPUT_RESHAPE]] : memref<1000xf32>) -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : memref) +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : memref) // CHECK: return %[[INIT:.+]] : memref<1xf32> @@ -768,7 +768,7 @@ func.func @input_stays_same(%arg0 : memref>, %arg1 : f32, %shape: memref) -> memref { linalg.generic #trait ins(%arg0, %arg1 : memref>, f32) - outs(%shape : memref) { + inits(%shape : memref) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) : linalg.yield %arg3 : f32 } @@ -788,7 +788,7 @@ // CHECK-SAME: {indexing_maps = [#[[MAP1]], #[[MAP2]], #[[MAP3]]], // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"]} // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : memref>, f32) -// CHECK-SAME: outs(%[[OUT]] : memref) { +// CHECK-SAME: inits(%[[OUT]] : memref) { // CHECK: ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32): // CHECK: linalg.yield %[[ARG]] : f32 // CHECK: } @@ -812,7 +812,7 @@ %0 = tensor.empty() : tensor<8xf32> %1 = linalg.generic #matvec ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>) - outs(%0: tensor<8xf32>) { + inits(%0: tensor<8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %m = arith.mulf %a, %b : f32 %add = arith.addf %x, %m : f32 @@ -834,7 +834,7 @@ %0 = tensor.empty() : tensor<4x2xf32> %res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) { %1 = tensor.empty() : tensor<1x1xf32> - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32> scf.foreach_thread.perform_concurrently { // CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}} // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor into tensor<4x2xf32> @@ -859,7 +859,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32, 3>) - outs(%arg0 : memref<1x1xf32, 3>) { + inits(%arg0 : memref<1x1xf32, 3>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } @@ -867,6 +867,6 @@ } // CHECK-LABEL: func @drop_all_loops -// CHECK: memref.collapse_shape +// CHECK: memref.collapse_shape // CHECK-SAME: [] : memref<1x1xf32, 3> into memref // CHECK: linalg.generic{{.*}}memref diff --git a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir --- a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir +++ b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir @@ -18,7 +18,7 @@ %0 = test.linalg_fill_op { indexing_maps = [#map0, #map0, #map1], iterator_types = ["parallel"]} - ins(%arg0, %arg0 : f32, f32) outs(%arg1 : tensor) { + ins(%arg0, %arg0 : f32, f32) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -34,7 +34,7 @@ %0 = test.linalg_fill_op { indexing_maps = [#map1, #map1], iterator_types = ["parallel"]} - ins(%arg0 : tensor) outs(%arg1 : tensor) { + ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2 : f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir --- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir +++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir @@ -15,7 +15,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor) - outs(%shape : tensor) { + inits(%shape : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : linalg.yield %arg1 : f32 } -> tensor @@ -42,7 +42,7 @@ { %0 = linalg.generic #trait ins(%arg0 : tensor<1x1xf32>) - outs(%arg0 : tensor<1x1xf32>) { + inits(%arg0 : tensor<1x1xf32>) { ^bb0(%arg1: f32, %arg2: f32) : linalg.yield %arg1 : f32 } -> tensor<1x1xf32> @@ -68,7 +68,7 @@ { linalg.generic #trait ins(%arg0 : memref<1x1xf32>) - outs(%arg1 : memref<1x1xf32>) { + inits(%arg1 : memref<1x1xf32>) { ^bb0(%arg2: f32, %arg3 : f32) : linalg.yield %arg2 : f32 } @@ -96,7 +96,7 @@ func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> { %0 = linalg.generic #trait ins(%arg0 : tensor<1x5xf32>) - outs(%shape : tensor<5xf32>) { + inits(%shape : tensor<5xf32>) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor<5xf32> diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir --- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir +++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir @@ -29,7 +29,7 @@ %c0 = arith.constant 0: index %f0 = arith.constant 0.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>) %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> @@ -69,7 +69,7 @@ %alloc = memref.alloc() : memref<128 x i8> %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32> %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32> - linalg.fill ins(%f0 : f32) outs(%view : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%view : memref<32 x f32>) memref.copy %in, %subview : memref to memref<16 x f32> %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32> memref.dealloc %alloc : memref<128 x i8> @@ -129,7 +129,7 @@ %f0 = arith.constant 0.0: f32 %f1 = arith.constant 1.0: f32 %alloc = memref.alloc() : memref<32 x f32> - linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>) + linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>) %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32> memref.copy %in, %subview : memref to memref<16 x f32> "some_interleaved_use"(%subview) : (memref<16 x f32>) -> () diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir --- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir +++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir @@ -16,7 +16,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %t0 = arith.addi %b0, %b1 : i32 %t1 = arith.addi %t0, %b2 : i32 @@ -39,7 +39,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] @@ -67,7 +67,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %iv0 = linalg.index 0: index %iv1 = linalg.index 1: index @@ -129,7 +129,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor<9x7x8x2x3x4x5x6xi32>, tensor<7x8x2xi32>, tensor<6x3x4x5xi32>) - outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) { + inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %t0 = arith.addi %b0, %b1 : i32 %t1 = arith.addi %t0, %b2 : i32 @@ -153,7 +153,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] : -// CHECK-SAME: outs(%[[INIT_RESHAPE]] : +// CHECK-SAME: inits(%[[INIT_RESHAPE]] : // CHECK: %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}} // CHECK: return %[[RESULT_RESHAPE]] @@ -181,7 +181,7 @@ indexing_maps = [#map0, #map1, #map2, #map3], iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]} ins(%expand, %arg1, %arg2 : tensor, tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32): %iv0 = linalg.index 0: index %iv1 = linalg.index 1: index @@ -229,7 +229,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "reduction", "reduction", "parallel"]} - ins(%0 : tensor<2x6x?x5xf32>) outs(%arg1 : tensor<2x5xf32>) { + ins(%0 : tensor<2x6x?x5xf32>) inits(%arg1 : tensor<2x5xf32>) { ^bb0(%b0 : f32, %b1 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -245,7 +245,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "reduction", "parallel"] // CHECK-SAME: ins(%[[ARG0]] : tensor<2x?x5xf32>) -// CHECK-SAME: outs(%[[ARG1]] : tensor<2x5xf32>) +// CHECK-SAME: inits(%[[ARG1]] : tensor<2x5xf32>) // ----- @@ -258,7 +258,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x3x4x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x3x4x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -285,7 +285,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) outs(%init : tensor<2x4x3x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) inits(%init : tensor<2x4x3x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -312,7 +312,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} - ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x5xf32>) { + ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -341,7 +341,7 @@ %2 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) outs(%init : tensor<2x3x4x5xf32>) { + ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) inits(%init : tensor<2x3x4x5xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %3 = arith.addf %b0, %b1 : f32 linalg.yield %3 : f32 @@ -358,7 +358,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%{{.+}}: tensor<6x20xf32>) +// CHECK-SAME: inits(%{{.+}}: tensor<6x20xf32>) // CHECK: %[[RESHAPE1:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]{{\]}} // CHECK: %[[RESHAPE2:.+]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1], [2], [3]{{\]}} // CHECK: return %[[RESHAPE2]] @@ -374,7 +374,7 @@ // CONTROL: %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}} // CONTROL: %[[GENERIC:.+]] = linalg.generic // CONTROL-SAME: ins(%[[EXPAND]], %[[ARG1]] : -// CONTROL-SAME: outs(%[[INIT_RESHAPE]] : +// CONTROL-SAME: inits(%[[INIT_RESHAPE]] : // CONTROL: %[[RESULT:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}} // ----- @@ -387,7 +387,7 @@ %1 = linalg.generic { indexing_maps = [#map, #map], iterator_types = ["parallel"]} - ins(%0: tensor<1xf32>) outs(%init : tensor<1xf32>) { + ins(%0: tensor<1xf32>) inits(%init : tensor<1xf32>) { ^bb0(%b0 : f32, %b1 : f32): linalg.yield %b0: f32 } -> tensor<1xf32> @@ -410,7 +410,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor<4x?x?x8xf32>) - outs(%arg1 : tensor<4x?x?x8xf32>) { + inits(%arg1 : tensor<4x?x?x8xf32>) { ^bb0(%b0: f32, %b1 : f32, %b2 : f32): %2 = arith.addf %b0, %b1 : f32 linalg.yield %2 : f32 @@ -430,7 +430,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[COLLAPSE_ARG0]], %[[COLLAPSE_ARG1_0]] : -// CHECK-SAME: outs(%[[COLLAPSE_ARG1_1]] : +// CHECK-SAME: inits(%[[COLLAPSE_ARG1_1]] : // CHECK: %[[EXPAND_GENERIC:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}} // CHECK: return %[[EXPAND_GENERIC]] @@ -448,7 +448,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - ins(%0 : tensor) outs(%init : tensor) { + ins(%0 : tensor) inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index @@ -474,7 +474,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]] : -// CHECK-SAME: outs(%[[COLLAPSE_INIT]] : +// CHECK-SAME: inits(%[[COLLAPSE_INIT]] : // CHECK-NEXT: ^bb{{[0-9]}} // CHECK: %[[ID0:.+]] = linalg.index 0 // CHECK-DAG: %[[T0:.+]] = arith.remui %[[ID0]], %[[C4]] @@ -504,7 +504,7 @@ %1 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["reduction", "reduction", "reduction", "reduction"]} - ins(%0 : tensor) outs(%init : tensor) { + ins(%0 : tensor) inits(%init : tensor) { ^bb0(%b0 : i32, %b1 : i32): %2 = linalg.index 0 : index %3 = linalg.index 1 : index diff --git a/mlir/test/Dialect/Linalg/fusion-2-level.mlir b/mlir/test/Dialect/Linalg/fusion-2-level.mlir --- a/mlir/test/Dialect/Linalg/fusion-2-level.mlir +++ b/mlir/test/Dialect/Linalg/fusion-2-level.mlir @@ -13,7 +13,7 @@ %1 = memref.dim %C, %c1 : memref> %2 = memref.dim %D, %c1 : memref> linalg.matmul ins(%A, %B: memref>, memref>) - outs(%C: memref>) + inits(%C: memref>) scf.for %arg5 = %c0 to %0 step %c20 { scf.for %arg6 = %c0 to %2 step %c30 { scf.for %arg7 = %c0 to %1 step %c40 { @@ -30,7 +30,7 @@ %16 = memref.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref> to memref> %17 = memref.subview %8[%arg8, %arg9][%c2, %c3][%c1, %c1] : memref> to memref> linalg.matmul ins(%14, %16: memref>, memref>) - outs(%17: memref>) + inits(%17: memref>) } } } diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -13,7 +13,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -22,7 +22,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]] @@ -55,7 +55,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, f32) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -64,7 +64,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP1]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, f32) - outs(%2 : tensor) { + inits(%2 : tensor) { // CHECK: ^{{[a-zA-Z0-9_]*}} // CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]] // CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]] @@ -97,7 +97,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -106,7 +106,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %5 = arith.mulf %arg5, %arg6 : f32 linalg.yield %5 : f32 @@ -131,7 +131,7 @@ %2 = tensor.empty(%0, %1) : tensor %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %4 = arith.addf %arg3, %arg4 : f32 linalg.yield %4 : f32 @@ -140,7 +140,7 @@ // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}} %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%3, %arg2 : tensor, tensor) - outs(%2 : tensor){ + inits(%2 : tensor){ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %5 = arith.mulf %arg5, %arg6 : f32 linalg.yield %5 : f32 @@ -165,7 +165,7 @@ %1 = tensor.empty(%0) : tensor %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.addf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -176,7 +176,7 @@ %4 = tensor.empty(%0, %3) : tensor %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%2, %arg2 : tensor, tensor) - outs(%4 : tensor){ + inits(%4 : tensor){ ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %6 = arith.mulf %arg5, %arg6 : f32 linalg.yield %6 : f32 @@ -195,7 +195,7 @@ %0 = tensor.empty() : tensor %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} ins(%arg0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -205,7 +205,7 @@ // CHECK: arith.mulf %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []} ins(%1, %arg2 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %3 = arith.mulf %arg3, %arg4 : f32 linalg.yield %3 : f32 @@ -231,7 +231,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>) - outs(%2 : tensor<5x?x?xf32>) { + inits(%2 : tensor<5x?x?xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.mulf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -263,7 +263,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%cst, %arg0 : tensor, tensor<5x?x?xf32>) - outs(%2 : tensor<5x?x?xf32>) { + inits(%2 : tensor<5x?x?xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %4 = arith.mulf %arg1, %arg2 : f32 linalg.yield %4 : f32 @@ -291,7 +291,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0, %arg1 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 @@ -300,7 +300,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%3 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -342,7 +342,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg4: i32, %arg5: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -356,7 +356,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%3, %arg0 : tensor, tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32, %arg4: i32): %10 = arith.addi %arg2, %arg3 : i32 linalg.yield %10 : i32 @@ -396,7 +396,7 @@ indexing_maps = [#map0, #map0], iterator_types = ["parallel", "parallel"] } ins(%arg0 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -410,7 +410,7 @@ indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"] } ins(%3 : tensor) - outs(%2 : tensor) { + inits(%2 : tensor) { ^bb0(%arg2: i32, %arg3: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -457,7 +457,7 @@ %1 = linalg.generic {indexing_maps = [#map1, #map1], iterator_types = ["parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2 : i32, %arg3 : i32): %2 = linalg.index 0 : index %3 = arith.index_cast %2 : index to i32 @@ -471,7 +471,7 @@ {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel", "parallel"]} ins(%arg1, %1 : tensor, tensor) - outs(%4 : tensor) { + inits(%4 : tensor) { ^bb0(%arg2 : i32, %arg3 : i32, %arg4: i32): %6 = arith.addi %arg2, %arg3 : i32 linalg.yield %6 : i32 @@ -503,7 +503,7 @@ %1 = linalg.generic {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []} - ins(%arg1 : tensor) outs(%0 : tensor) { + ins(%arg1 : tensor) inits(%0 : tensor) { ^bb0(%arg2: i32, %arg3: f32): %3 = arith.index_cast %arg2 : i32 to index %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32> @@ -514,7 +514,7 @@ {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%1, %cst : tensor, tensor<10xf32>) outs(%2 : tensor<10xf32>) { + ins(%1, %cst : tensor, tensor<10xf32>) inits(%2 : tensor<10xf32>) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %4 = arith.mulf %arg2, %arg3 : f32 linalg.yield %4 : f32 @@ -544,7 +544,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins (%arg0, %cst : tensor<4xf32>, tensor<4xf32>) - outs (%1 : tensor<4xf32>) { + inits (%1 : tensor<4xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %3 = arith.addf %arg1, %arg2 : f32 linalg.yield %3 : f32 @@ -559,7 +559,7 @@ // CHECK: %[[T1:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]], #[[MAP]]] // CHECK-SAME: ins(%[[ARG0]] : tensor<4xf32>) -// CHECK-SAME: outs(%[[T0]] : tensor<4xf32>) +// CHECK-SAME: inits(%[[T0]] : tensor<4xf32>) // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG1:[a-zA-Z0-9_]+]]: f32, %[[ARG2:[a-zA-Z0-9_]+]]: f32) // CHECK: %[[T2:.+]] = arith.addf %[[ARG1]], %[[CST]] @@ -579,7 +579,7 @@ {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>) - outs(%init : tensor<1x10xf32>) { + inits(%init : tensor<1x10xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -588,7 +588,7 @@ {indexing_maps = [#map1, #map2], iterator_types = ["reduction"]} ins(%0 : tensor<1x10xf32>) - outs(%arg2 : tensor<1xf32>) { + inits(%arg2 : tensor<1xf32>) { ^bb0(%arg3: f32, %arg4: f32): %2 = arith.addf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -625,7 +625,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] } - outs(%init0 : tensor) { + inits(%init0 : tensor) { ^bb0(%a: f32): linalg.yield %cp5 : f32 } -> tensor @@ -638,7 +638,7 @@ iterator_types = ["parallel", "parallel"] } ins(%0, %1 : tensor, tensor) - outs(%init1 : tensor) { + inits(%init1 : tensor) { ^bb0(%a: f32, %b: f32, %c: f32): %m = arith.mulf %a, %b : f32 linalg.yield %m : f32 @@ -656,7 +656,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : tensor<1x8xf64>) { + inits(%arg0 : tensor<1x8xf64>) { ^bb0(%a: f64): %r = func.call @compute1(%a) : (f64) -> f64 linalg.yield %r : f64 @@ -672,7 +672,7 @@ indexing_maps = [affine_map<(i, j) -> (i, j)>, affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} ins(%0 : tensor<1x8xf64>) - outs(%arg1 : tensor<1x8xi32>) { + inits(%arg1 : tensor<1x8xi32>) { ^bb0(%a: f64, %b: i32): %r = func.call @compute2(%a, %b) : (f64, i32) -> i32 linalg.yield %r : i32 @@ -697,7 +697,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%three : tensor<3x2xf32>) outs(%init : tensor<3xf32>) { + ins(%three : tensor<3x2xf32>) inits(%init : tensor<3xf32>) { ^bb0(%arg0 : f32, %arg1 : f32): %0 = arith.addf %arg0, %arg1 : f32 linalg.yield %0 : f32 @@ -714,12 +714,12 @@ } func.func @break_outs_dependency(%arg0 : tensor) -> tensor { - %0 = linalg.generic #trait ins(%arg0 : tensor) outs(%arg0 : tensor) { + %0 = linalg.generic #trait ins(%arg0 : tensor) inits(%arg0 : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : %1 = arith.addf %arg1, %arg1 : f32 linalg.yield %1 : f32 } -> tensor - %2 = linalg.generic #trait ins(%0 : tensor) outs(%0 : tensor) { + %2 = linalg.generic #trait ins(%0 : tensor) inits(%0 : tensor) { ^bb0(%arg1 : f32, %arg2 : f32) : %3 = arith.mulf %arg1, %arg1 : f32 linalg.yield %3 : f32 @@ -734,12 +734,12 @@ // CHECK-DAG: %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]]) // CHECK: %[[GENERIC1:.+]] = linalg.generic -// CHECK-SAME: outs(%[[INIT]] : tensor) +// CHECK-SAME: inits(%[[INIT]] : tensor) // CHECK-DAG: %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]] // CHECK-DAG: %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]] // CHECK-DAG: %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]]) // CHECK: %[[RESULT:.+]] = linalg.generic -// CHECK-SAME: outs(%[[INIT]] : tensor) +// CHECK-SAME: inits(%[[INIT]] : tensor) // ----- @@ -760,7 +760,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %cst, %c42 : tensor, f32, i32) - outs(%0, %1 : tensor, tensor) { + inits(%0, %1 : tensor, tensor) { ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : i32, %arg4 : f32, %arg5 : i32) : %3 = arith.addf %arg1, %arg2 : f32 linalg.yield %3, %arg3 : f32, i32 @@ -785,7 +785,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<3x2xf32> @@ -803,7 +803,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) { + } ins(%input : tensor<2x3xf64>) inits(%init : tensor<3x2xf64>) { ^bb0(%arg1: f64, %arg2: f64): linalg.yield %arg1 : f64 } -> tensor<3x2xf64> @@ -827,7 +827,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) { + } ins(%input : tensor<1x2x3x4xi32>) inits(%init : tensor<3x1x4x2xi32>) { ^bb0(%arg1: i32, %arg2: i32): linalg.yield %arg1 : i32 } -> tensor<3x1x4x2xi32> @@ -851,7 +851,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) { + } ins(%input : tensor<1x2x3x4xi16>) inits(%init : tensor<3x1x4x2xi16>) { ^bb0(%arg1: i16, %arg2: i16): linalg.yield %arg1 : i16 } -> tensor<3x1x4x2xi16> @@ -867,7 +867,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<3x2xf32> @@ -884,7 +884,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %cst : f32 } -> tensor<3x2xf32> @@ -900,7 +900,7 @@ %1 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) { + } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) { ^bb0(%arg1: f32, %arg2: f32): %add = arith.addf %arg1, %arg1 : f32 linalg.yield %add : f32 @@ -929,16 +929,16 @@ %5 = linalg.generic { indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"] - } ins(%arg0 : tensor) outs(%4 : tensor) { + } ins(%arg0 : tensor) inits(%4 : tensor) { ^bb0(%arg2: f32, %arg3: f32): linalg.yield %arg2 : f32 } -> tensor %6 = tensor.empty(%arg1) : tensor - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor) -> tensor + %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor) -> tensor %8 = linalg.generic { indexing_maps = [#map2, #map3], iterator_types = ["parallel", "reduction"] - } ins(%5 : tensor) outs(%7 : tensor) { + } ins(%5 : tensor) inits(%7 : tensor) { ^bb0(%arg2: f32, %arg3: f32): %9 = arith.maxf %arg2, %arg3 : f32 linalg.yield %9 : f32 @@ -953,7 +953,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%arg0 : tensor<5000xi64>) { + inits(%arg0 : tensor<5000xi64>) { ^bb0(%arg3: i64): // no predecessors %22 = linalg.index 0 : index %23 = arith.index_cast %22 : index to i64 @@ -963,7 +963,7 @@ %2 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>], iterator_types = ["parallel", "parallel"]} - ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) { + ins(%0 : tensor<5000xi64>) inits(%1 : tensor<5000xi32>) { ^bb0(%arg3: i64, %arg5: i32): // no predecessors %22 = arith.index_cast %arg3 : i64 to index %23 = tensor.extract %arg1[%22] : tensor<5000xi32> @@ -980,7 +980,7 @@ // CHECK-DAG: %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32> // CHECK: %[[RESULT:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] -// CHECK-SAME: outs(%[[INIT0]], %[[INIT1]] : +// CHECK-SAME: inits(%[[INIT0]], %[[INIT1]] : // CHECK-NEXT: ^bb0( // CHECK-SAME: %[[B0:.+]]: i64 // CHECK-SAME: %[[B1:.+]]: i32 @@ -998,16 +998,16 @@ // CHECK-NOT: linalg.fill // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]] : tensor) -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0) -> (d0)> func.func @fold_fill_generic_basic(%arg0: tensor) -> (tensor) { %c0 = arith.constant 0 : index %cst = arith.constant 7.0 : f32 %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.empty(%0) : tensor - %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor %3 = tensor.empty(%0) : tensor - %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) outs (%3:tensor) { + %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor, tensor) inits (%3:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %5 = arith.addf %arg1, %arg2 : f32 linalg.yield %5 : f32 @@ -1021,7 +1021,7 @@ // CHECK-NOT: linalg.fill // CHECK: %[[GENERIC_OP:.*]] = linalg.generic // CHECK-NOT: ins -// CHECK-SAME: outs({{.*}} : tensor) { +// CHECK-SAME: inits({{.*}} : tensor) { #map0 = affine_map<(d0, d1) -> (d0, d1)> #map1 = affine_map<(d0, d1) -> (d1, d0)> func.func @fold_fill_generic_mixedaccess(%arg0: tensor) -> (tensor) { @@ -1032,11 +1032,11 @@ %0 = tensor.dim %arg0, %c0 : tensor %1 = tensor.dim %arg0, %c1 : tensor %2 = tensor.empty(%0, %1) : tensor - %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%cst1 : f32) inits(%2 : tensor) -> tensor %4 = tensor.empty(%1, %0) : tensor - %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor) -> tensor + %5 = linalg.fill ins(%cst2 : f32) inits(%4 : tensor) -> tensor %6 = tensor.empty(%0, %1) : tensor - %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor, tensor) outs (%6:tensor) { + %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor, tensor) inits (%6:tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %8 = arith.divf %arg1, %arg2 : f32 linalg.yield %8 : f32 @@ -1053,7 +1053,7 @@ %1 = tensor.empty() : tensor %2:2 = linalg.generic { indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []} - ins(%arg0, %arg1, %arg1 : tensor, tensor, tensor) outs(%0, %1 : tensor, tensor) { + ins(%arg0, %arg1, %arg1 : tensor, tensor, tensor) inits(%0, %1 : tensor, tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32, %arg8: f32, %arg9: f32): %4 = arith.addf %arg5, %arg6 : f32 %5 = arith.addf %4, %arg7 : f32 @@ -1061,7 +1061,7 @@ } -> (tensor, tensor) %3 = linalg.generic { indexing_maps = [#map, #map, #map], iterator_types = []} - ins(%2#1, %arg1 : tensor, tensor) outs(%arg4 : tensor) { + ins(%2#1, %arg1 : tensor, tensor) inits(%arg4 : tensor) { ^bb0(%arg5: f32, %arg6: f32, %arg7: f32): %4 = arith.addf %arg5, %arg6 : f32 %5 = arith.addf %4, %arg6 : f32 @@ -1076,7 +1076,7 @@ // CHECK: %[[INIT:.+]] = tensor.empty // CHECK: %[[GENERIC:.+]] = linalg.generic // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[INIT]] : +// CHECK-SAME: inits(%[[INIT]] : // CHECK-NEXT: ^bb0 // CHECK-SAME: %[[B0:[a-zA-Z0-9_]+]]: f32 // CHECK-SAME: %[[B1:[a-zA-Z0-9_]+]]: f32 diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir --- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir @@ -21,28 +21,28 @@ %init = tensor.empty(%d0, %d1) : tensor %0 = linalg.generic #binary2Dpointwise ins(%arg0, %arg1 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %1 = arith.mulf %arg6, %arg7 : f32 linalg.yield %1 : f32 } -> tensor %2 = linalg.generic #binary2Dpointwise ins(%arg2, %arg3 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %3 = arith.mulf %arg6, %arg7 : f32 linalg.yield %3 : f32 } -> tensor %4 = linalg.generic #binary2Dpointwise ins(%arg4, %arg5 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32): %5 = arith.mulf %arg6, %arg7 : f32 linalg.yield %5 : f32 } -> tensor %6 = linalg.generic #ternary2Dpointwise ins(%0, %2, %4 : tensor, tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32, %arg9 : f32): %7 = arith.addf %arg6, %arg7 : f32 %8 = arith.addf %7, %arg8 : f32 diff --git a/mlir/test/Dialect/Linalg/fusion-indexed.mlir b/mlir/test/Dialect/Linalg/fusion-indexed.mlir --- a/mlir/test/Dialect/Linalg/fusion-indexed.mlir +++ b/mlir/test/Dialect/Linalg/fusion-indexed.mlir @@ -11,7 +11,7 @@ %D: memref) { linalg.generic #pointwise_2d_trait ins(%A, %B: memref, memref) - outs(%C : memref) { + inits(%C : memref) { ^bb0(%e: f32, %arg5: f32, %arg6: f32): %2 = arith.addf %e, %arg5 : f32 linalg.yield %2 : f32 @@ -34,7 +34,7 @@ indexing_maps = [#id_2d, #id_2d], iterator_types = ["parallel", "parallel"]} ins(%4 : memref>) - outs(%5 : memref>) { + inits(%5 : memref>) { ^bb0(%arg4: f32, %arg5: f32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -73,7 +73,7 @@ linalg.generic { indexing_maps = [affine_map<(i, j) -> (j, i)>], iterator_types = ["parallel", "parallel"]} - outs(%A : memref) { + inits(%A : memref) { ^bb0(%a: index): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -92,7 +92,7 @@ affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} ins(%A_view : memref>) - outs(%B_view : memref>) { + inits(%B_view : memref>) { ^bb0(%a: index, %b: index): linalg.yield %a : index } @@ -121,7 +121,7 @@ linalg.generic { indexing_maps = [affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} - outs(%A : memref) { + inits(%A : memref) { ^bb0(%a: index): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -140,7 +140,7 @@ affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} ins(%A_view : memref>) - outs(%B_view : memref>) { + inits(%B_view : memref>) { ^bb0(%a: index, %b: index): linalg.yield %a : index } diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir --- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir +++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir @@ -8,7 +8,7 @@ // CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor into tensor // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]], // CHECK-SAME: iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor<16xf32>) outs(%[[RI]] : tensor) +// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor<16xf32>) inits(%[[RI]] : tensor) // CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor into tensor // CHECK: return %[[RR]] : tensor func.func @reshape(%A: tensor, %B: tensor<16xf32>, %init: tensor) -> tensor { @@ -19,7 +19,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %B : tensor, tensor<16xf32>) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %s = arith.subf %arg1, %arg2 : f32 linalg.yield %s : f32 @@ -38,7 +38,7 @@ // CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]], // CHECK-SAME: iterator_types = ["parallel", "parallel"]} -// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<12544x16xf32>) +// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<12544x16xf32>) // CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<12544x16xf32> into tensor<112x112x16xf32> // CHECK: return %[[RR]] : tensor<112x112x16xf32> func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>, @@ -55,7 +55,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>) - outs(%2 : tensor<112x112x16xf32>) { + inits(%2 : tensor<112x112x16xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %s = arith.subf %arg1, %arg2 : f32 %m = arith.mulf %s, %arg3 : f32 @@ -81,7 +81,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>) - outs(%21 : tensor<112x112x16xf32>) { + inits(%21 : tensor<112x112x16xf32>) { ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): %s = arith.subf %arg1, %arg2 : f32 linalg.yield %s : f32 @@ -106,7 +106,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>) - outs(%26 : tensor<2x3x5xf32>) { + inits(%26 : tensor<2x3x5xf32>) { ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32): %29 = arith.sitofp %arg6 : i32 to f32 %30 = arith.addf %arg7, %cst_8 : f32 @@ -121,6 +121,6 @@ // CHECK-LABEL: func @type_correctness // CHECK: %[[OP:.+]] = linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}}, %{{.+}} : tensor<6x5xi32>, tensor<5xf32>, tensor<5xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<6x5xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<6x5xf32>) // CHECK: tensor.expand_shape %[[OP]] // CHECK-SAME: tensor<6x5xf32> into tensor<2x3x5xf32> diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir --- a/mlir/test/Dialect/Linalg/fusion.mlir +++ b/mlir/test/Dialect/Linalg/fusion.mlir @@ -16,7 +16,7 @@ %2 = memref.dim %B, %c1 : memref> linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) scf.for %arg5 = %c0 to %0 step %c2 { scf.for %arg6 = %c0 to %2 step %c3 { scf.for %arg7 = %c0 to %1 step %c4 { @@ -31,7 +31,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8: memref>) + inits(%8: memref>) } } } @@ -60,7 +60,7 @@ %c2 = arith.constant 2 : index linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C: memref>) + inits(%C: memref>) %0 = memref.dim %C, %c0 : memref> %1 = memref.dim %C, %c1 : memref> %2 = memref.dim %D, %c1 : memref> @@ -78,7 +78,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -110,7 +110,7 @@ %c2 = arith.constant 2 : index linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) %0 = memref.dim %D, %c0 : memref> %1 = memref.dim %D, %c1 : memref> %2 = memref.dim %C, %c1 : memref> @@ -128,7 +128,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -162,10 +162,10 @@ %c2 = arith.constant 2 : index linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) linalg.matmul ins(%A, %B : memref>, memref>) - outs(%D : memref>) + inits(%D : memref>) %0 = memref.dim %C, %c0 : memref> %1 = memref.dim %C, %c1 : memref> %2 = memref.dim %D, %c1 : memref> @@ -183,7 +183,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -222,10 +222,10 @@ %2 = memref.dim %D, %c1 : memref> linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) linalg.matmul ins(%C, %B : memref>, memref>) - outs(%D : memref>) + inits(%D : memref>) scf.for %arg5 = %c0 to %1 step %c2 { scf.for %arg6 = %c0 to %0 step %c3 { scf.for %arg7 = %c0 to %2 step %c4 { @@ -240,7 +240,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -274,9 +274,9 @@ // CHECK: %[[BOUND_4_B1:.*]] = affine.min #[[BOUND_4_MAP]](%[[K]])[%[[B_1]]] // CHECK: %[[B_0K:.*]] = memref.subview %[[B]][0, %[[K]]] // CHECK: %[[D_IK_OUT:.+]] = memref.subview %[[D]][%[[I]], %[[K]]] [%[[BOUND_2_C0]], %[[BOUND_4_B1]]] -// CHECK: linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} outs(%[[C_I0_OUT]] -// CHECK: linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} outs(%[[D_IK_OUT]] -// CHECK: linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} outs(%[[E_IJ]] +// CHECK: linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} inits(%[[C_I0_OUT]] +// CHECK: linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} inits(%[[D_IK_OUT]] +// CHECK: linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} inits(%[[E_IJ]] // ----- @@ -298,10 +298,10 @@ %0 = memref.dim %C, %c1 : memref> linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) linalg.matmul ins(%A, %C : memref>, memref>) - outs(%E : memref>) + inits(%E : memref>) %1 = memref.dim %C, %c0 : memref> %2 = memref.dim %D, %c1 : memref> scf.for %arg5 = %c0 to %1 step %c2 { @@ -321,7 +321,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -357,10 +357,10 @@ %4 = memref.dim %D, %c1 : memref> linalg.matmul ins(%A, %C : memref>, memref>) - outs(%E : memref>) + inits(%E : memref>) linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) scf.for %arg5 = %c0 to %0 step %c2 { scf.for %arg6 = %c0 to %2 step %c3 { scf.for %arg7 = %c0 to %1 step %c4 { @@ -375,7 +375,7 @@ memref> linalg.matmul ins(%7, %9 : memref>, memref>) - outs(%10 : memref>) + inits(%10 : memref>) } } } @@ -393,7 +393,7 @@ memref> linalg.matmul ins(%7, %9 : memref>, memref>) - outs(%10 : memref>) + inits(%10 : memref>) } } } @@ -408,7 +408,7 @@ // CHECK: %[[C_1:.*]] = memref.dim %[[C]], %[[C1:.*]] : memref> // CHECK: %[[C_0:.*]] = memref.dim %[[C]], %[[C0:.*]] : memref> // CHECK: %[[D_1:.*]] = memref.dim %[[D]], %[[C1:.*]] : memref> -// CHECK: linalg.matmul ins(%[[A]], %[[C]]{{.*}} outs(%[[E]] +// CHECK: linalg.matmul ins(%[[A]], %[[C]]{{.*}} inits(%[[E]] // CHECK: scf.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} { // CHECK: scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} { // CHECK: scf.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} { @@ -441,10 +441,10 @@ %1 = memref.dim %A, %c1 : memref> linalg.matmul ins(%A, %C : memref>, memref>) - outs(%D : memref>) + inits(%D : memref>) linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) %2 = memref.dim %D, %c1 : memref> scf.for %arg5 = %c0 to %0 step %c2 { scf.for %arg6 = %c0 to %2 step %c3 { @@ -463,7 +463,7 @@ memref> linalg.matmul ins(%5, %7 : memref>, memref>) - outs(%8 : memref>) + inits(%8 : memref>) } } } @@ -497,7 +497,7 @@ linalg.generic #pointwise_2d_trait ins(%A, %A: memref>, memref>) - outs(%B : memref>) { + inits(%B : memref>) { ^bb0(%E: f32, %arg5: f32, %arg6: f32): %2 = arith.addf %E, %arg5 : f32 linalg.yield %2 : f32 @@ -518,7 +518,7 @@ linalg.generic #pointwise_2d_trait ins(%4, %5: memref>, memref>) - outs(%6 : memref>) { + inits(%6 : memref>) { ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): %7 = arith.mulf %arg6, %arg7 : f32 linalg.yield %7 : f32 @@ -555,7 +555,7 @@ %E = memref.alloc (%M, %N): memref linalg.generic #pointwise_2d_trait ins(%A, %A : memref, memref) - outs(%B : memref) { + inits(%B : memref) { ^bb0(%e: f32, %arg5: f32, %arg6: f32): %2 = arith.addf %e, %arg5 : f32 linalg.yield %2 : f32 @@ -576,7 +576,7 @@ linalg.generic #pointwise_2d_trait ins(%4, %5: memref>, memref>) - outs(%6 : memref>) { + inits(%6 : memref>) { ^bb0(%arg6: f32, %arg7: f32, %arg8: f32): %7 = arith.mulf %arg6, %arg7 : f32 linalg.yield %7 : f32 @@ -610,7 +610,7 @@ indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg1 : memref<100xf32>) - outs(%0 : memref<100x10xf32>) { + inits(%0 : memref<100x10xf32>) { ^bb0(%arg3: f32, %arg4: f32): linalg.yield %arg3 : f32 } @@ -619,7 +619,7 @@ indexing_maps = [#map1, #map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0, %0: memref<100x10xf32>, memref<100x10xf32>) - outs(%1 : memref<100x10xf32>) { + inits(%1 : memref<100x10xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %2 = arith.subf %arg3, %arg4 : f32 linalg.yield %2 : f32 @@ -639,7 +639,7 @@ indexing_maps = [#map1, #map1], iterator_types = ["parallel", "parallel"]} ins(%6 : memref>) - outs(%7 : memref>) { + inits(%7 : memref>) { ^bb0(%arg3: f32, %arg4: f32): %8 = math.exp %arg3 : f32 linalg.yield %8 : f32 @@ -677,7 +677,7 @@ %c3 = arith.constant 3 : index %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index - linalg.fill ins(%cst : f32) outs(%arg0 : memref) + linalg.fill ins(%cst : f32) inits(%arg0 : memref) %2 = memref.dim %arg1, %c0 : memref %3 = memref.dim %arg1, %c1 : memref %4 = memref.dim %arg2, %c0 : memref @@ -690,7 +690,7 @@ %9 = affine.min #map0(%arg3)[%4] %10 = affine.min #map1(%arg4)[%5] %11 = memref.subview %arg2[%arg3, %arg4] [%9, %10] [1, 1] : memref to memref> - linalg.conv_2d ins(%8, %arg1 : memref>, memref) outs(%11 : memref>) + linalg.conv_2d ins(%8, %arg1 : memref>, memref) inits(%11 : memref>) } } return @@ -717,7 +717,7 @@ linalg.matmul ins(%A, %B : memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) scf.for %i = %c0 to %dim step %c2 { scf.for %j = %c0 to %dim step %c3 { @@ -733,7 +733,7 @@ memref> linalg.matmul ins(%0, %1 : memref>, memref>) - outs(%2 : memref>) + inits(%2 : memref>) } } } diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -2,7 +2,7 @@ func.func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) { linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>) - outs(%C: memref<16x32xf32>) + inits(%C: memref<16x32xf32>) return } @@ -20,7 +20,7 @@ // CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-SAME: ins(%[[A]], %[[B]] -// CHECK-SAME: outs(%[[C]] +// CHECK-SAME: inits(%[[C]] // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) // CHECK: %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32 @@ -31,7 +31,7 @@ func.func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -39,7 +39,7 @@ // CHECK: linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<16x32xf32>) // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32 @@ -54,7 +54,7 @@ %C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xcomplex>, tensor<8x32xcomplex>) - outs(%C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> + inits(%C: tensor<16x32xcomplex>) -> tensor<16x32xcomplex> return %0: tensor<16x32xcomplex> } @@ -62,7 +62,7 @@ // CHECK: linalg.generic // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<16x8xcomplex>, tensor<8x32xcomplex>) -// CHECK-SAME: outs(%{{.+}} : tensor<16x32xcomplex>) +// CHECK-SAME: inits(%{{.+}} : tensor<16x32xcomplex>) // CHECK: ^{{.*}}(%[[A_ARG:.+]]: complex, %[[B_ARG:.+]]: complex, %[[C_ARG:.+]]: complex) // CHECK-NEXT: %[[MUL:.+]] = complex.mul %[[A_ARG]], %[[B_ARG]] : complex @@ -76,7 +76,7 @@ linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x3x4x2x3xf32>) + inits(%output : memref<2x3x4x2x3xf32>) return } @@ -90,7 +90,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -103,7 +103,7 @@ linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x2x3x2x3xf32>) + inits(%output : memref<2x2x3x2x3xf32>) return } @@ -117,7 +117,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<2x2x3x2x3xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<2x2x3x2x3xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -129,7 +129,7 @@ func.func @depthwise_conv_2d_nhwc_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) { linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -143,7 +143,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -156,7 +156,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1 + d3, d4)> @@ -169,7 +169,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -182,7 +182,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2 + d4)> @@ -195,7 +195,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK-NEXT: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 @@ -205,7 +205,7 @@ // ----- func.func @generalize_fill(%output: memref, %value : f32) { - linalg.fill ins(%value : f32) outs(%output : memref) + linalg.fill ins(%value : f32) inits(%output : memref) return } @@ -219,7 +219,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]] // CHECK-SAME: iterator_types = ["parallel", "parallel"]} // CHECK-SAME: ins(%[[VAL]] : f32) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32) // CHECK-NEXT: linalg.yield %[[BBARG0]] : f32 @@ -228,7 +228,7 @@ func.func @generalize_batch_matm_vec(%lhs : memref, %rhs: memref, %out: memref) { linalg.batch_matvec ins(%lhs, %rhs: memref, memref) - outs(%out: memref) + inits(%out: memref) return } // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)> @@ -241,7 +241,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) -// CHECK-SAME: outs(%{{.+}} : memref) +// CHECK-SAME: inits(%{{.+}} : memref) // CHECK: ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: f32) // CHECK: %[[BBARG0_F32:.+]] = arith.sitofp %[[BBARG0]] : i8 to f32 // CHECK: %[[BBARG1_F32:.+]] = arith.sitofp %[[BBARG1]] : i8 to f32 @@ -253,7 +253,7 @@ func.func @batch_reduce_gemm(%lhs: memref<7x8x9xf32>, %rhs: memref<7x9x8xf32>, %out: memref<8x8xf32>) { linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>) - outs(%out: memref<8x8xf32>) + inits(%out: memref<8x8xf32>) return } @@ -267,7 +267,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction"]} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<7x8x9xf32>, memref<7x9x8xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<8x8xf32> +// CHECK-SAME: inits(%{{.+}} : memref<8x8xf32> // CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) // CHECK: %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32 // CHECK: %[[ADD:.+]] = arith.addf %[[BBARG2]], %[[MUL]] : f32 diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir --- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir @@ -3,7 +3,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_f16f64f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -22,7 +22,7 @@ // Verifies that different argument types is legal. func.func @generalize_matmul_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -43,7 +43,7 @@ func.func @generalize_matmul_tensor_i16i64i32_unsigned(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul {cast = #linalg.type_fn} ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -54,7 +54,7 @@ func.func @generalize_matmul_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -67,7 +67,7 @@ func.func @generalize_matmul_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -80,7 +80,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -93,7 +93,7 @@ func.func @generalize_matmul_unsigned_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -106,7 +106,7 @@ func.func @generalize_matmul_unsigned_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> { %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>) - outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32> + inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32> return %0: tensor<16x32xi32> } @@ -119,7 +119,7 @@ func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -133,7 +133,7 @@ func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -145,7 +145,7 @@ func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -157,7 +157,7 @@ func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -171,7 +171,7 @@ func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -183,7 +183,7 @@ func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -195,7 +195,7 @@ func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> + ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> return %0: tensor<1x2x4x1xf32> } @@ -209,7 +209,7 @@ func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> { %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>} - ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> + ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> return %0: tensor<1x2x4x1xi32> } @@ -222,7 +222,7 @@ // ----- func.func @generalize_fill_0d(%value: f64, %O: tensor) -> tensor { - %0 = linalg.fill ins(%value: f64) outs(%O : tensor) -> tensor + %0 = linalg.fill ins(%value: f64) inits(%O : tensor) -> tensor return %0: tensor } @@ -236,7 +236,7 @@ // ----- func.func @generalize_fill_2d(%value: f64, %O: memref<16x32xf32>) { - linalg.fill ins(%value: f64) outs(%O : memref<16x32xf32>) + linalg.fill ins(%value: f64) inits(%O : memref<16x32xf32>) return } @@ -251,7 +251,7 @@ // ----- func.func @generalize_index(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -264,7 +264,7 @@ // ----- func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> { - %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32> + %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -277,7 +277,7 @@ // Verifies the default value of the fun attribute is an exp op. func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -289,7 +289,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -301,7 +301,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -313,7 +313,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -325,7 +325,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -337,7 +337,7 @@ // Verifies the fun attribute controls the unary function used. func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_unary {fun = #linalg.unary_fn} - ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -349,7 +349,7 @@ // Verifies the default value of the fun attribute is an add op. func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -362,7 +362,7 @@ func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -375,7 +375,7 @@ func.func @generalize_elemwise_rank_zero(%lhs : tensor, %rhs : tensor, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { %0 = linalg.elemwise_binary {fun = #linalg.binary_fn} ins(%lhs, %rhs: tensor, tensor) - outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } @@ -388,7 +388,7 @@ // Verifies the fun attribute controls the binary function used. func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> { - %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32> + %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32> return %0: tensor<4x8xf32> } diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir @@ -4,7 +4,7 @@ // CHECK-SAME: %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { // CHECK: %[[C0:.*]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32> // CHECK: return %[[PADDED]] : tensor<1x32x32x1xf32> func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> { @@ -29,7 +29,7 @@ // CHECK: %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32> // CHECK: %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32> // CHECK: %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32> // CHECK: %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1_1]], 2, %[[DIM3_1]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32> diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir --- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir +++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir @@ -12,7 +12,7 @@ %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel"]} ins(%arg0, %scalar : tensor<4xf32>, tensor) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32) ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // CHECK: tensor.extract %[[SCALAR]][] @@ -37,7 +37,7 @@ %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2], iterator_types = ["parallel"]} ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>) - outs(%0 : tensor<4xf32>) { + inits(%0 : tensor<4xf32>) { // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32) ^bb0(%arg1: f32, %arg2: f32, %arg3: f32): // CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]] diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir --- a/mlir/test/Dialect/Linalg/inlining.mlir +++ b/mlir/test/Dialect/Linalg/inlining.mlir @@ -23,7 +23,7 @@ // CHECK: linalg.generic linalg.generic #trait ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0 : f32, %1 : f32) : %2 = arith.addf %0, %0: f32 linalg.yield %2 : f32 diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -36,7 +36,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.index 2 : index linalg.yield %0 : f32 @@ -50,7 +50,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.index -1 : index linalg.yield %0 : f32 @@ -74,7 +74,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32): linalg.yield } @@ -87,7 +87,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> (0)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref<1xi32>) { + inits(%arg0 : memref<1xi32>) { ^bb(%i : i32): linalg.yield %i : i32 } @@ -100,7 +100,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["random"]} - outs(%arg0 : memref<1xi32>) { + inits(%arg0 : memref<1xi32>) { ^bb(%i : i32): linalg.yield %i : i32 } @@ -113,7 +113,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> (0, 0)> ], iterator_types = []} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%f : f32): linalg.yield %f: f32 } @@ -128,7 +128,7 @@ indexing_maps = [ affine_map<() -> (0)>, affine_map<() -> (0, 0)> ], iterator_types = []} ins(%cst : f32) - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%0 : f32, %1 : f32): linalg.yield %0: f32 } @@ -141,7 +141,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%0: f32): %1 = arith.constant 1: i4 linalg.yield %1: i4 @@ -159,7 +159,7 @@ ], iterator_types = ["parallel","parallel"]} ins(%arg0 : memref(off + i)>>) - outs(%arg1 : memref(off + i)>>) { + inits(%arg1 : memref(off + i)>>) { ^bb(%0: f32, %1: f32): linalg.yield %1: f32 } @@ -178,7 +178,7 @@ indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], iterator_types = []} ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb1: linalg.yield %f0: f32 ^bb2: @@ -195,7 +195,7 @@ indexing_maps = [ affine_map<() -> ()> , affine_map<() -> ()> ], iterator_types = []} ins(%arg0 : memref) - outs(%arg0 : memref) { + inits(%arg0 : memref) { } } @@ -206,7 +206,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()>, affine_map<() -> ()> ], iterator_types = []} - outs(%arg0, %arg0 : memref, memref) { + inits(%arg0, %arg0 : memref, memref) { ^bb(%f: f32): linalg.yield %f: f32 } @@ -219,7 +219,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%i: i1): linalg.yield %i : i1 } @@ -232,7 +232,7 @@ linalg.generic { indexing_maps = [ affine_map<() -> ()> ], iterator_types = []} - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb(%i: i1): linalg.yield %i : i1 } -> tensor @@ -245,7 +245,7 @@ linalg.generic { indexing_maps = [ affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} - outs(%arg0 : memref(off + i)>>) { + inits(%arg0 : memref(off + i)>>) { ^bb(%i: f32): %0 = arith.constant 0: i1 linalg.yield %0: i1 @@ -261,7 +261,7 @@ indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ], iterator_types = ["parallel"]} ins(%arg0 : memref(off + i)>>) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb(%i: f32, %j: f32): linalg.yield %i: f32 } -> tensor @@ -274,7 +274,7 @@ linalg.generic { indexing_maps = [ affine_map<(i, j) -> (i, j)> ], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb(%0: f32) : %1 = arith.addf %0, %0: f32 } @@ -298,7 +298,7 @@ func.func @named_ops(%a3: memref, %b3: memref, %c3: memref) { // expected-error @+1 {{expected operand rank (2) to match the result rank of indexing_map #1 (3)}} linalg.batch_matmul ins(%a3, %b3: memref, memref) - outs(%c3 : memref) + inits(%c3 : memref) return } @@ -316,7 +316,7 @@ func.func @matching_inits(%m: memref, %t: tensor) { // expected-error @+1 {{expected type of operand #2 ('tensor') to match type of corresponding result ('tensor')}} %res = linalg.matmul ins(%m, %m : memref, memref) - outs(%t : tensor) + inits(%t : tensor) -> tensor return } @@ -327,7 +327,7 @@ { %0 = tensor.empty(%arg0, %arg1) : tensor // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}} - linalg.fill ins(%arg2 : f32) outs(%0 : tensor) + linalg.fill ins(%arg2 : f32) inits(%0 : tensor) } // ----- @@ -336,7 +336,7 @@ (%arg0 : memref, %arg1 : f32) -> tensor { // expected-error @+1 {{expected the number of results (1) to be equal to the number of output tensors (0)}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : memref) -> tensor + %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : memref) -> tensor return %0 : tensor } @@ -346,7 +346,7 @@ (%arg0 : tensor, %arg1 : f32) -> memref { // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref'}} - %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor) -> memref + %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : tensor) -> memref return %0 : memref } @@ -355,7 +355,7 @@ func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) { // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}} linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>) - outs(%arg2 :memref<2x4xf32>) + inits(%arg2 :memref<2x4xf32>) return } @@ -366,7 +366,7 @@ linalg.conv_2d_nhwc_hwcf { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>) - outs(%output : memref<1x2x3x1xf32>) + inits(%output : memref<1x2x3x1xf32>) return } @@ -382,7 +382,7 @@ func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) { // expected-error @+1 {{unexpected result less than 0 at expression #0 in}} - linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) { + linalg.generic #attrs ins(%A: memref<5xf32>) inits(%B: memref<5xf32>) { ^bb0(%a: f32, %b: f32): linalg.yield %a : f32 } @@ -396,7 +396,7 @@ -> tensor<64xf32> { %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 // expected-error @+1{{'linalg.yield' op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}} @@ -413,7 +413,7 @@ // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -429,7 +429,7 @@ // expected-error@+1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f64, %rhs_elem: f64) { %0 = arith.addf %lhs_elem, %rhs_elem: f64 linalg.yield %0: f64 @@ -445,7 +445,7 @@ // expected-error@+1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}} %add = linalg.map ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>) - outs(%init:tensor<32xf32>) + inits(%init:tensor<32xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -461,7 +461,7 @@ // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [2] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -477,7 +477,7 @@ // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [3] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -493,7 +493,7 @@ // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16xf32>) + inits(%init:tensor<16xf32>) dimensions = [1, 1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -509,7 +509,7 @@ // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16xf32>) + inits(%init:tensor<16xf32>) dimensions = [2, 1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -525,7 +525,7 @@ // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}} %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1, 2] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -543,7 +543,7 @@ // expected-error @+1{{'linalg.reduce' op mismatching number of operands and block arguments}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -561,7 +561,7 @@ // expected-error @+1{{'linalg.reduce' op input element type 'f32' does not match corresponding block argument type 'f64'}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>) dimensions = [1] (%in1: f32, %in2: f64, %out1: f32, %out2: f64) { %0 = arith.addf %in1, %out1: f32 @@ -580,7 +580,7 @@ // expected-error @+1{{'linalg.reduce' op output element type 'f64' does not match corresponding block argument type 'f32'}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -597,7 +597,7 @@ // expected-error @+1{{'linalg.reduce' op expects all inputs to have the same shapes. Shape at input-index 1 is not equal to the shape at input-index 0.}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -615,7 +615,7 @@ // expected-error @+1{{'linalg.reduce' op expects all outputs to have the same shapes. Shape at output-index 1 is not equal to the shape at output-index 0.}} %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>) dimensions = [1] (%in1: f32, %in2: f32, %out1: f32, %out2: f32) { %0 = arith.addf %in1, %out1: f32 @@ -632,7 +632,7 @@ // expected-error @+1 {{'linalg.transpose' op permutation is not valid}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 1, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -644,7 +644,7 @@ // expected-error @+1 {{'linalg.transpose' op dim(result, 0) = 32 doesn't match dim(input, permutation[0]) = 16}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [0, 1, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -657,7 +657,7 @@ // expected-error @+1 {{'linalg.transpose' op size of permutation 2 does not match the argument rank 3}} %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 0] func.return %transpose : tensor<32x64x16xf32> } @@ -669,7 +669,7 @@ // expected-error @+1 {{'linalg.transpose' op input rank 2 does not match init rank 3}} %transpose = linalg.transpose ins(%input:tensor<16x32xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 0, 2] func.return %transpose : tensor<32x64x16xf32> } @@ -682,7 +682,7 @@ // expected-error @+1 {{'linalg.broadcast' op dimensions should be in sorted order}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x8x16xf32>) + inits(%init:tensor<4x8x16xf32>) dimensions = [1, 0] func.return %bcast : tensor<4x8x16xf32> } @@ -695,7 +695,7 @@ // expected-error @+1 {{'linalg.broadcast' op input rank does match the number of dimensions. expected: 2, got: 1}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x8x16xf32>) + inits(%init:tensor<4x8x16xf32>) dimensions = [0] func.return %bcast : tensor<4x8x16xf32> } @@ -708,7 +708,7 @@ // expected-error @+1 {{'linalg.broadcast' op dimension 1 is out of range. expected range: [0, 2], got: 5}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x8x16xf32>) + inits(%init:tensor<4x8x16xf32>) dimensions = [0, 5] func.return %bcast : tensor<4x8x16xf32> } @@ -721,7 +721,7 @@ // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<5x8x16xf32>) + inits(%init:tensor<5x8x16xf32>) dimensions = [0, 2] func.return %bcast : tensor<5x8x16xf32> } @@ -734,7 +734,7 @@ // expected-error @+1 {{'linalg.broadcast' op init dim 1 can't be dynamic, because it's not matched to input}} %bcast = linalg.broadcast ins(%input:tensor<4x16xf32>) - outs(%init:tensor<4x?x16xf32>) + inits(%init:tensor<4x?x16xf32>) dimensions = [0, 2] func.return %bcast : tensor<4x?x16xf32> } @@ -747,7 +747,7 @@ // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}} %bcast = linalg.broadcast ins(%input:tensor<1x16xf32>) - outs(%init:tensor<4x?x16xf32>) + inits(%init:tensor<4x?x16xf32>) dimensions = [0, 2] func.return %bcast : tensor<4x?x16xf32> } diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir --- a/mlir/test/Dialect/Linalg/library-calls.mlir +++ b/mlir/test/Dialect/Linalg/library-calls.mlir @@ -14,11 +14,11 @@ %C = memref.alloc(%x, %y) : memref // CHECK: call @linalg_fill_f32_viewsxsxf32({{.*}}) : (f32, memref) - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) // CHECK: call @linalg_matmul_viewsxsxf32_viewsxsxf32_viewsxsxf32({{.*}}) : (memref, memref, memref) -> () linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return %C : memref } diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir --- a/mlir/test/Dialect/Linalg/loops.mlir +++ b/mlir/test/Dialect/Linalg/loops.mlir @@ -15,7 +15,7 @@ %B = memref.view %arg0[%c0][%K, %N] : memref to memref %C = memref.view %arg0[%c0][%M, %N] : memref to memref linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return } // CHECK-LABEL: func @matmul(%{{.*}}: memref, @@ -60,7 +60,7 @@ %3 = memref.view %arg0[%c0][%M] : memref to memref %4 = memref.view %arg0[%c0][%N] : memref to memref linalg.matvec ins(%2, %3: memref, memref) - outs(%4 : memref) + inits(%4 : memref) return } // CHECK-LABEL: func @matvec(%{{.*}}: memref, @@ -101,7 +101,7 @@ %2 = memref.view %arg0[%c0][%M] : memref to memref %3 = memref.view %arg0[%c0][] : memref to memref linalg.dot ins(%1, %2 : memref, memref) - outs(%3 : memref) + inits(%3 : memref) return } // CHECK-LABEL: func @dot(%{{.*}}: memref, @@ -135,7 +135,7 @@ %arg3: memref) { // Verifies that we use the correct arith operations for integers. linalg.dot ins(%arg0, %arg1 : memref, memref) - outs(%arg3 : memref) + inits(%arg3 : memref) return } // CHECK-LABEL: func @dot_int( @@ -148,7 +148,7 @@ %arg3: memref) { // Verifies that we use the correct (saturating) arith operations for booleans. linalg.dot ins(%arg0, %arg1 : memref, memref) - outs(%arg3 : memref) + inits(%arg3 : memref) return } // CHECK-LABEL: func @dot_bool( @@ -160,7 +160,7 @@ func.func @dot_view(%arg0: memref>, %arg1: memref>, %arg2: memref) { linalg.dot ins(%arg0, %arg1 : memref>, memref>) - outs(%arg2: memref) + inits(%arg2: memref) return } // CHECK-LABEL: func @dot_view( @@ -186,7 +186,7 @@ // CHECKPARALLEL: store %[[res]], %{{.*}}[] : memref func.func @fill_view(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view( @@ -200,7 +200,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[%{{.*}}] : memref> func.func @fill_view0(%arg0: memref, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref) return } // CHECK-LABEL: func @fill_view0(%{{.*}}: memref, %{{.*}}: f32) { @@ -210,7 +210,7 @@ // CHECKPARALLEL: store %{{.*}}, %{{.*}}[] : memref func.func @fill_view3(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view3( @@ -230,7 +230,7 @@ iterator_types = ["parallel"], indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] } ins(%arg0: memref>) - outs(%arg1: memref>) { + inits(%arg1: memref>) { ^bb0(%a: f32, %b: f32): linalg.yield %a : f32 } @@ -264,7 +264,7 @@ func.func @generic_region(%arg0: memref>, %arg1: memref>, %arg2: memref>) { linalg.generic #trait2 ins(%arg0: memref>) - outs(%arg1, %arg2 : memref>, + inits(%arg1, %arg2 : memref>, memref>) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b : f32 @@ -309,7 +309,7 @@ %arg2: memref>) { linalg.generic #trait4 ins(%arg0 : memref>) - outs(%arg1, %arg2 : memref>, + inits(%arg1, %arg2 : memref>, memref>) { ^bb0(%a: f32, %b: f32, %c: f32): %i = linalg.index 0 : index @@ -377,7 +377,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : memref) - outs(%arg1 : memref<3x4xf32>) { + inits(%arg1 : memref<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } @@ -403,7 +403,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : f32) - outs(%arg1 : memref<3x4xf32>) { + inits(%arg1 : memref<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } @@ -427,7 +427,7 @@ { linalg.generic #trait_broadcast ins(%arg0 : memref) - outs(%arg1 : memref<3x4xi32>) { + inits(%arg1 : memref<3x4xi32>) { ^bb(%a: i32, %b: i32) : %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -477,7 +477,7 @@ { linalg.generic #trait_reduce_1D ins(%arg0 : memref) - outs(%arg1 : memref) { + inits(%arg1 : memref) { ^bb(%a: f32, %b: f32) : %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -523,7 +523,7 @@ { linalg.generic #trait_reduce_init_1D ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { + inits(%arg2 : memref) { ^bb(%a: f32, %b: f32, %c: f32) : %i = linalg.index 0 : index %0 = arith.constant 0 : index @@ -567,7 +567,7 @@ } func.func @generic_const_init(%arg0: memref) { %cst = arith.constant 1.0 : f32 - linalg.generic #trait_const_fill outs(%arg0 : memref) { + linalg.generic #trait_const_fill inits(%arg0 : memref) { ^bb0(%arg1: f32): linalg.yield %cst : f32 } @@ -601,7 +601,7 @@ { linalg.generic #scalar_trait ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) { + inits(%arg2 : memref) { ^bb(%a : f32, %b : f32, %c : f32) : %result = scf.if %arg3 -> (f32) { scf.yield %a : f32 @@ -643,7 +643,7 @@ //----------------------------------------------------------------------------// func.func @named_batch_matmul(%A: memref, %B: memref, %C: memref) { linalg.batch_matmul ins(%A, %B : memref, memref) - outs(%C : memref) + inits(%C : memref) return } // CHECK-LABEL: @named_batch_matmul @@ -685,7 +685,7 @@ func.func @conv1d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_1d ins(%in, %filter : memref, memref) - outs(%out : memref) + inits(%out : memref) return } @@ -728,7 +728,7 @@ func.func @conv2d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_2d ins(%in, %filter : memref, memref) - outs(%out: memref) + inits(%out: memref) return } // CHECK-LABEL: @conv2d_no_symbols @@ -781,7 +781,7 @@ func.func @conv3d_no_symbols(%in : memref, %filter : memref, %out : memref) -> () { linalg.conv_3d ins(%in, %filter : memref, memref) - outs(%out : memref) + inits(%out : memref) return } @@ -856,7 +856,7 @@ iterator_types = ["parallel"], indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]} ins(%0: memref>) - outs(%1: memref>) { + inits(%1: memref>) { ^bb0(%a: i32, %b: i32): linalg.yield %a : i32 } diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir --- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir +++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir @@ -57,7 +57,7 @@ // CHECK: %[[R2c:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[$MAP4]], #[[$MAP5]]] // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"] -// CHECK: ins(%{{.*}} : tensor<1x28x28x1xf32>) outs(%{{.*}} : tensor<1x32x32x1xf32>) +// CHECK: ins(%{{.*}} : tensor<1x28x28x1xf32>) inits(%{{.*}} : tensor<1x32x32x1xf32>) // CHECK: ^bb0(%[[VAL:.+]]: f32, %{{.*}}: f32) // CHECK: linalg.yield %[[VAL]] : f32 // CHECK: return %[[R2c:.+]] diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -28,7 +28,7 @@ iterator_types = ["parallel", "parallel"] } ins(%arg0: tensor<10x34xf32>) - outs(%arg1: tensor<10x34xf32>) { + inits(%arg1: tensor<10x34xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -55,7 +55,7 @@ // CHECK: %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]]) // CHECK: %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1] // CHECK: %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1] - // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>) + // CHECK: %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>) // CHECK: %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]] // CHECK: scf.yield %[[RESPARTIAL]] diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir --- a/mlir/test/Dialect/Linalg/named-ops.mlir +++ b/mlir/test/Dialect/Linalg/named-ops.mlir @@ -4,11 +4,11 @@ func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> // CHECK: depthwise_conv_1d_nwc_wcm %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>) - outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> + inits(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32> return %0 : tensor<1x10x8x8xf32> } @@ -18,11 +18,11 @@ func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<1x10x8xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> // CHECK: depthwise_conv_1d_nwc_wc %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>) - outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> + inits(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32> return %0 : tensor<1x10x8xf32> } @@ -32,15 +32,15 @@ func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x4x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<2x3x4x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : tensor<2x3x4x2x3xf32>) %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> + inits(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32> return %0 : tensor<2x3x4x2x3xf32> } @@ -49,11 +49,11 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>) linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x3x4x2x3xf32>) + inits(%output : memref<2x3x4x2x3xf32>) return } @@ -63,10 +63,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_1d_nw // CHECK-SAME: {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32> %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>} ins(%input, %filter: tensor<1x113x96xf32>, tensor<3x96xf32>) - outs(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32> + inits(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32> return %0: tensor<1x56x96xf32> } @@ -76,10 +76,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>) - outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> + inits(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32> return %0: tensor<1x56x56x96xf32> } @@ -88,10 +88,10 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwc // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>) linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -101,10 +101,10 @@ // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nchw_chw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x96x113x113xf32>, tensor<96x3x3xf32>) - outs(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> + inits(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32> return %0: tensor<1x96x56x56xf32> } @@ -113,25 +113,25 @@ // CHECK: linalg.depthwise_conv_2d_nchw_chw // CHECK-SAME: {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x96x113x113xf32>, memref<96x3x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<1x96x56x56xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<1x96x56x56xf32>) linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: memref<1x96x113x113xf32>, memref<96x3x3xf32>) - outs(%output: memref<1x96x56x56xf32>) + inits(%output: memref<1x96x56x56xf32>) return } func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x6x7x2x3xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> // CHECK: %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<2x6x7x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : tensor<2x6x7x2x3xf32>) %0 = linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>) - outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> + inits(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32> return %0 : tensor<2x6x7x2x3xf32> } @@ -140,11 +140,11 @@ // CHECK: linalg.depthwise_conv_2d_nhwc_hwcm // CHECK-SAME: {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) - // CHECK-SAME: outs(%{{.+}} : memref<2x6x7x2x3xf32>) + // CHECK-SAME: inits(%{{.+}} : memref<2x6x7x2x3xf32>) linalg.depthwise_conv_2d_nhwc_hwcm { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> } ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>) - outs(%output : memref<2x6x7x2x3xf32>) + inits(%output : memref<2x6x7x2x3xf32>) return } @@ -157,7 +157,7 @@ // CHECK-NOT: dilations = linalg.depthwise_conv_2d_nhwc_hwc ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -167,7 +167,7 @@ // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}} linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>} ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -177,7 +177,7 @@ // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}} linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> } ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>) - outs(%output: memref<1x56x56x96xf32>) + inits(%output: memref<1x56x56x96xf32>) return } @@ -187,11 +187,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> // CHECK: depthwise_conv_3d_ndhwc_dhwcm %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>) - outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> + inits(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> return %0 : tensor<2x3x13x4x6x6xf32> } @@ -201,11 +201,11 @@ func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> { %zero = arith.constant 0.000000e+00 : f32 %init = tensor.empty() : tensor<2x3x13x4x6xf32> - %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> // CHECK: depthwise_conv_3d_ndhwc_dhwc %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>} ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>) - outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> + inits(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32> return %0 : tensor<2x3x13x4x6xf32> } @@ -217,11 +217,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -233,11 +233,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } @@ -249,11 +249,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -265,11 +265,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<1xi64> // CHECK-SAME: strides = dense<1> : tensor<1xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } @@ -281,11 +281,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -297,11 +297,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -313,11 +313,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -329,11 +329,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor<64x3x3x32xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<64x3x3x32xf32>) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -345,11 +345,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } @@ -361,11 +361,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } @@ -377,11 +377,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor, tensor) - // CHECK-SAME: outs(%{{.+}} : tensor) -> tensor + // CHECK-SAME: inits(%{{.+}} : tensor) -> tensor %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor return %0 : tensor } @@ -393,11 +393,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) + // CHECK-SAME: inits(%{{.+}} : memref) linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%input, %filter: memref, memref) - outs (%output: memref) + inits (%output: memref) return } @@ -408,15 +408,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -427,11 +427,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_sum(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -442,15 +442,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> return %res : tensor<1x1x2x2xf32> } @@ -461,11 +461,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x1x4x4xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x1x2x2xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x1x2x2xf32>) func.func @pooling_nchw_sum(%input: memref<1x1x4x4xf32>, %fake: memref<3x3xf32>, %output: memref<1x1x2x2xf32>) { linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x1x4x4xf32>, memref<3x3xf32>) - outs(%output: memref<1x1x2x2xf32>) + inits(%output: memref<1x1x2x2xf32>) return } @@ -476,15 +476,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -494,16 +494,16 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x1x2x2xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> + inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32> return %res : tensor<1x1x2x2xf32> } @@ -514,11 +514,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_max(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -529,15 +529,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> { %fake = tensor.empty() : tensor<3x3xi8> %init = tensor.empty() : tensor<1x2x2x1xi8> %cst = arith.constant 0 : i8 - %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>) - outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> + inits(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8> return %res : tensor<1x2x2x1xi8> } @@ -548,11 +548,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi8>, memref<3x3xi8>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi8>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi8>) func.func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %output: memref<1x2x2x1xi8>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi8>, memref<3x3xi8>) - outs(%output: memref<1x2x2x1xi8>) + inits(%output: memref<1x2x2x1xi8>) return } @@ -563,15 +563,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> { %fake = tensor.empty() : tensor<3x3xi16> %init = tensor.empty() : tensor<1x2x2x1xi16> %cst = arith.constant 0 : i16 - %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>) - outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> + inits(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16> return %res : tensor<1x2x2x1xi16> } @@ -582,11 +582,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi16>, memref<3x3xi16>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi16>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi16>) func.func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>, %output: memref<1x2x2x1xi16>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi16>, memref<3x3xi16>) - outs(%output: memref<1x2x2x1xi16>) + inits(%output: memref<1x2x2x1xi16>) return } @@ -597,15 +597,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> { %fake = tensor.empty() : tensor<3x3xi32> %init = tensor.empty() : tensor<1x2x2x1xi32> %cst = arith.constant 0 : i32 - %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>) - outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> + inits(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32> return %res : tensor<1x2x2x1xi32> } @@ -616,11 +616,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi32>, memref<3x3xi32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xi32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xi32>) func.func @pooling_nhwc_i32_max(%input: memref<1x4x4x1xi32>, %fake: memref<3x3xi32>, %output: memref<1x2x2x1xi32>) { linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xi32>, memref<3x3xi32>) - outs(%output: memref<1x2x2x1xi32>) + inits(%output: memref<1x2x2x1xi32>) return } @@ -632,15 +632,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3xf32> %init = tensor.empty() : tensor<1x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>) - outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> + inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32> return %res : tensor<1x2x2x1xf32> } @@ -651,11 +651,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<2xi64> // CHECK-SAME: strides = dense<1> : tensor<2xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x1xf32>) func.func @pooling_nhwc_min(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) { linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>) - outs(%output: memref<1x2x2x1xf32>) + inits(%output: memref<1x2x2x1xf32>) return } @@ -666,15 +666,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -685,11 +685,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -700,15 +700,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -719,11 +719,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -734,15 +734,15 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> +// CHECK-SAME: inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> { %fake = tensor.empty() : tensor<3x3x3xf32> %init = tensor.empty() : tensor<1x2x2x2x1xf32> %cst = arith.constant 0.000000e+00 : f32 - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>) - outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> + inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32> return %res : tensor<1x2x2x2x1xf32> } @@ -753,11 +753,11 @@ // CHECK-SAME: dilations = dense<1> : tensor<3xi64> // CHECK-SAME: strides = dense<1> : tensor<3xi64> // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) -// CHECK-SAME: outs(%{{.+}} : memref<1x2x2x2x1xf32>) +// CHECK-SAME: inits(%{{.+}} : memref<1x2x2x2x1xf32>) func.func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) { linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>) - outs(%output: memref<1x2x2x2x1xf32>) + inits(%output: memref<1x2x2x2x1xf32>) return } @@ -800,8 +800,8 @@ func.func @batch_reduce_matmul(%arg0: tensor<8x128x256xf32>, %arg1: tensor<8x256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> { // CHECK: %{{.+}} = linalg.batch_reduce_matmul // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<8x128x256xf32>, tensor<8x256x512xf32>) - // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32> - %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32> + // CHECK-SAME: inits(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32> + %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) inits(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32> return %0: tensor<128x512xf32> } @@ -810,7 +810,7 @@ func.func @batch_reduce_matmul(%arg0: memref, %arg1: memref, %arg2: memref) { // CHECK: linalg.batch_reduce_matmul // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref, memref) - // CHECK-SAME: outs(%{{.+}} : memref) - linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref, memref) outs(%arg2: memref) + // CHECK-SAME: inits(%{{.+}} : memref) + linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref, memref) inits(%arg2: memref) return } diff --git a/mlir/test/Dialect/Linalg/namedop_conversion.mlir b/mlir/test/Dialect/Linalg/namedop_conversion.mlir --- a/mlir/test/Dialect/Linalg/namedop_conversion.mlir +++ b/mlir/test/Dialect/Linalg/namedop_conversion.mlir @@ -4,9 +4,9 @@ func.func @depthwise_conv(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]] // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] - // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor) outs(%[[INIT]] : tensor) + // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor, tensor) inits(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor, tensor) inits(%arg2 : tensor) -> tensor return %0 : tensor } @@ -17,8 +17,8 @@ func.func @depthwise_conv_q(%arg0: tensor, %arg1: tensor, %arg2: tensor, %arg3 : i32, %arg4 : i32) -> tensor { // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]] // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]] - // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor, tensor, i32, i32) outs(%[[INIT]] : tensor) + // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor, tensor, i32, i32) inits(%[[INIT]] : tensor) // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]] - %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) outs(%arg2 : tensor) -> tensor + %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor, tensor, i32, i32) inits(%arg2 : tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir @@ -18,15 +18,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -45,15 +45,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -72,15 +72,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -99,15 +99,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -126,15 +126,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -153,15 +153,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -180,15 +180,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -207,15 +207,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -234,15 +234,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -261,15 +261,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -288,15 +288,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -315,15 +315,15 @@ %0 = bufferization.alloc_tensor() : tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "false"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -344,13 +344,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -371,13 +371,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -397,13 +397,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -424,13 +424,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -453,11 +453,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -480,11 +480,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -505,13 +505,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -532,13 +532,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -559,13 +559,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -586,13 +586,13 @@ // CHECK: {__inplace_operands_attr__ = ["false"]} %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -615,11 +615,11 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } @@ -642,10 +642,10 @@ // CHECK: {__inplace_operands_attr__ = ["true"]} %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> + %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32> // CHECK: {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> + %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32> // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]} - %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> + %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> return %5 : tensor<256x256xf32> } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir @@ -14,7 +14,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"] @@ -41,7 +41,7 @@ // CHECK: linalg.fill // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor // CHECK: tensor.insert_slice // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"] diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -22,8 +22,8 @@ /// Inplaceable, no alloc // CHECK-NOT: alloc - // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[A]] : memref>) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[A]] : memref>) + %r = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK: return // CHECK-NOT: tensor @@ -45,8 +45,8 @@ // CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref> // CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) {alignment = 128 : i64} : memref - // CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref) - %r = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[ALLOC]] : memref) + %r = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK-NOT: dealloc // CHECK: return %[[ALLOC]] : memref @@ -67,14 +67,14 @@ /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc. // CHECK: %[[ALLOC:.*]] = memref.alloc - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[ALLOC]] - %f = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[ALLOC]] + %f = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor /// The second op has no interfering reads and can reuse. // CHECK-NOT: alloc - // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]] + // CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) inits(%[[A]] %r = linalg.matmul ins(%f, %f: tensor, tensor) - outs(%A: tensor) + inits(%A: tensor) -> tensor // CHECK: memref.dealloc %[[ALLOC]] @@ -91,7 +91,7 @@ /// Within op multiple uses of %A, must alloc. // CHECK: alloc %r = linalg.matmul ins(%A, %A: tensor, tensor) - outs(%A: tensor) + inits(%A: tensor) -> tensor // CHECK-NOT: dealloc return %r: tensor @@ -181,8 +181,8 @@ tensor<128x192xf32> to tensor<8x16xf32> // linalg.fill is inplace. - // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]] - %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> + // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[C_SLICE]] + %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<8x16xf32>) -> tensor<8x16xf32> // CHECK: scf.for %[[K:.*]] = %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) { @@ -192,9 +192,9 @@ tensor<256x16xf32> to tensor<32x16xf32> // linalg.matmul is inplace as well as the enclosing scf.for. - // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]] + // CHECK: linalg.matmul ins({{.*}} inits(%[[C_SLICE]] %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>) - outs(%arg8 : tensor<8x16xf32>) + inits(%arg8 : tensor<8x16xf32>) -> tensor<8x16xf32> scf.yield %10 : tensor<8x16xf32> } @@ -231,7 +231,7 @@ %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor to tensor %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor to tensor<4x4xf32> - %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> + %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor into tensor @@ -250,7 +250,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg1 : tensor) outs(%arg2 : tensor) { + ins(%arg1 : tensor) inits(%arg2 : tensor) { ^bb0(%arg3: i32, %arg4 : f32): %iv1 = linalg.index 1 : index %1 = arith.index_cast %arg3: i32 to index @@ -266,7 +266,7 @@ // CHECK-SAME: ) { // CHECK: linalg.generic // CHECK-SAME: ins(%[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]] : +// CHECK-SAME: inits(%[[ARG2]] : // CHECK: %[[YIELD:.+]] = memref.load %[[ARG0]] // CHECK: linalg.yield %[[YIELD]] @@ -281,14 +281,14 @@ %s1: index, %s2: index, %cst: f32) -> tensor { - // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}}) + // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) inits(%[[t3]] : {{.*}}) %r = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1)-> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%t1, %t2 : tensor, tensor) - outs(%t3 : tensor) { + inits(%t3 : tensor) { ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) : %add = arith.addf %arg0, %arg1 : f32 linalg.yield %add : f32 @@ -316,15 +316,15 @@ // Make sure that a copy is inserted here. // CHECK: %[[ALLOC:.*]] = memref.alloc // CHECK: memref.copy %[[t0]], %[[ALLOC]] - // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref - %r0 =linalg.generic #trait outs (%t0 : tensor) { + // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref + %r0 =linalg.generic #trait inits (%t0 : tensor) { ^bb(%0: f32) : %a = arith.addf %cst, %0 : f32 linalg.yield %a : f32 } -> (tensor) - // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref - %r1 = linalg.generic #trait outs (%r0 : tensor) { + // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref + %r1 = linalg.generic #trait inits (%r0 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) @@ -344,7 +344,7 @@ // CHECK-NEXT: ins(%[[LHS]], %[[RHS]] : memref<64xf32 %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -362,7 +362,7 @@ // CHECK-NEXT: ins(%[[INPUT]] : memref<16x32x64xf32 %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -381,7 +381,7 @@ // CHECK-NEXT: ins(%[[ARG0]] : memref<16x32x64xf32 %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 2, 0] func.return %transpose : tensor<32x64x16xf32> } @@ -394,7 +394,7 @@ %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x32xf32>) - outs(%init:tensor<8x16x32xf32>) + inits(%init:tensor<8x16x32xf32>) dimensions = [0, 2] func.return %bcast : tensor<8x16x32xf32> } diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir --- a/mlir/test/Dialect/Linalg/pad_fusion.mlir +++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir @@ -10,7 +10,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%init : tensor) { + ins(%arg0 : tensor) inits(%init : tensor) { ^bb0(%arg6 : f32, %arg7 : f32): %1 = arith.mulf %arg6, %arg6 : f32 linalg.yield %1 : f32 @@ -38,13 +38,13 @@ // CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]]) -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}inits(%[[INIT]] // CHECK-DAG: %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]] // CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]] // CHECK-SAME: [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1] // CHECK: %[[SOURCE:.+]] = linalg.generic -// CHECK-SAME: outs(%[[SLICE]] : tensor) +// CHECK-SAME: inits(%[[SLICE]] : tensor) // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]] // CHECK-SAME: [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1] // CHECK: return %[[RESULT]] @@ -59,7 +59,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%init : tensor<42x?xf32>) { + ins(%arg0 : tensor) inits(%init : tensor<42x?xf32>) { ^bb0(%arg4 : f32, %arg5 : f32): %1 = arith.mulf %arg4, %arg4 : f32 linalg.yield %1 : f32 @@ -82,12 +82,12 @@ // CHECK-DAG: %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK-DAG: %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]] // CHECK: %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]]) -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]] +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}inits(%[[INIT]] // CHECK-DAG: %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]] // CHECK: %[[SLICE:.+]] = tensor.extract_slice %[[FILL]] // CHECK-SAME: [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1] // CHECK: %[[SOURCE:.+]] = linalg.generic -// CHECK-SAME: outs(%[[SLICE]] : tensor<42x?xf32>) +// CHECK-SAME: inits(%[[SLICE]] : tensor<42x?xf32>) // CHECK: %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]] // CHECK-SAME: [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1] // CHECK: return %[[RESULT]] diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir --- a/mlir/test/Dialect/Linalg/parallel-loops.mlir +++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir @@ -8,7 +8,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>) - outs(%sum : memref<2x2xf32>) { + inits(%sum : memref<2x2xf32>) { ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): %0 = arith.addf %lhs_in, %rhs_in : f32 linalg.yield %0 : f32 @@ -41,7 +41,7 @@ func.func @lower_outer_parallel(%A: memref, %B: memref) { linalg.generic #trait ins(%A : memref) - outs(%B : memref) { + inits(%B : memref) { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 } @@ -74,7 +74,7 @@ func.func @lower_mixed_parallel(%A: memref, %B: memref) { linalg.generic #trait ins(%A : memref) - outs(%B : memref) { + inits(%B : memref) { ^bb0(%a: f32, %b: f32): linalg.yield %a: f32 } diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir --- a/mlir/test/Dialect/Linalg/promote.mlir +++ b/mlir/test/Dialect/Linalg/promote.mlir @@ -25,7 +25,7 @@ linalg.matmul ins(%11, %14: memref>, memref>) - outs(%17: memref>) + inits(%17: memref>) } } } @@ -56,7 +56,7 @@ // CHECK: memref.copy %[[vB]], %[[partialB]] : memref> to memref> // CHECK: memref.copy %[[vC]], %[[partialC]] : memref> to memref> // -// CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]] +// CHECK: linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} inits(%[[partialC]] // // CHECK: memref.copy %[[partialC]], %[[vC]] : // CHECK: memref> to @@ -95,7 +95,7 @@ linalg.matmul ins(%11, %14: memref>, memref>) - outs(%17: memref>) + inits(%17: memref>) } } } @@ -126,7 +126,7 @@ // CHECK: memref.copy %[[vB_f64]], %[[partialB_f64]] : memref> to memref> // CHECK: memref.copy %[[vC_f64]], %[[partialC_f64]] : memref> to memref> // -// CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]] +// CHECK: linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} inits(%[[partialC_f64]] // // CHECK: memref.copy %[[partialC_f64]], %[[vC_f64]] : // CHECK: memref> to @@ -170,9 +170,9 @@ // CHECK-COUNT-3: memref.copy // CHECK: linalg.generic // CHECK-SAME: ins(%[[a_pro_subview]], %[[b_pro_subview]] - // CHECK-SAME: outs(%[[c_pro_subview]] + // CHECK-SAME: inits(%[[c_pro_subview]] - linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref>, memref<128x32xf32, strided<[?, ?], offset: ?>>) outs(%9 : memref>) { + linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref>, memref<128x32xf32, strided<[?, ?], offset: ?>>) inits(%9 : memref>) { ^bb0(%arg9: f32, %arg10: f32, %arg11: f32): %15 = arith.mulf %arg9, %arg10 : f32 %16 = arith.addf %arg11, %15 : f32 diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir --- a/mlir/test/Dialect/Linalg/promotion_options.mlir +++ b/mlir/test/Dialect/Linalg/promotion_options.mlir @@ -3,7 +3,7 @@ func.func @gemm(%a : memref, %b : memref, %c : memref) { linalg.matmul ins(%a, %b: memref, memref) - outs(%c: memref) + inits(%c: memref) return } @@ -29,7 +29,7 @@ // CHECK: memref.copy %[[svA]], %[[svAA]] // CHECK: memref.copy %[[svC]], %[[svCC]] -// CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]] +// CHECK: linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} inits(%[[VC]] // CHECK: memref.copy %[[svCC]], %[[svC]] // CHECK: memref.dealloc %[[tmpA]] // CHECK: memref.dealloc %[[tmpC]] diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir --- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir @@ -11,7 +11,7 @@ indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor) - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg2 : f32, %arg3:f32, %arg4 : f32): %2 = arith.addf %arg2, %arg3 : f32 linalg.yield %2 : f32 @@ -44,19 +44,19 @@ %fill = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - outs(%init : tensor) { + inits(%init : tensor) { ^bb0(%arg2: f32): linalg.yield %cst : f32 } -> tensor %0 = tensor.expand_shape %fill [[0, 1], [2]] : tensor into tensor<1x?x?xf32> %1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x?x?xf32>, tensor<1x?x?xf32>) - outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> + inits(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32> return %1 : tensor<1x?x?xf32> } // CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2) // CHECK: func @control_consumer_reshape_fusion // CHECK: %[[FILL:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP]]] -// CHECK-SAME: outs(%{{.+}} : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%{{.+}} : tensor<1x?x?xf32>) // CHECK: linalg.batch_matmul -// CHECK-SAME: outs(%[[FILL]] : tensor<1x?x?xf32>) +// CHECK-SAME: inits(%[[FILL]] : tensor<1x?x?xf32>) diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir --- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir +++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir @@ -14,7 +14,7 @@ indexing_maps = [#map0, #map1, #map2, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %arg1, %arg2 : tensor, tensor, f32) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -38,7 +38,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP7]], #[[MAP6]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[ARG0]], %[[T1]], %[[ARG2]] : tensor, tensor, f32) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: %[[T4:.+]] = tensor.collapse_shape %[[T3]] // CHECK-SAME: [0], [1], [2, 3] // CHECK-SAME: tensor into tensor @@ -57,7 +57,7 @@ indexing_maps = [#map0, #map0, #map1, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : tensor, tensor, f32) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -87,7 +87,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP3]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]], %[[ARG2]] : tensor, tensor, f32) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor @@ -102,7 +102,7 @@ affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor, tensor) - outs(%a : tensor) { + inits(%a : tensor) { ^bb0(%arg0 : f32, %arg1: f32, %s: f32): %1 = arith.addf %arg0, %arg1 : f32 linalg.yield %1 : f32 @@ -130,7 +130,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP8]], #[[MAP9]], #[[MAP10]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor<3x4x?x?x2x?xf32>, tensor<3x4x?x?xf32>) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor // ----- @@ -147,7 +147,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>) - outs(%0 : tensor<264x4xf32>) { + inits(%0 : tensor<264x4xf32>) { ^bb0(%arg1: f32, %arg2: f32, %s: f32): %2 = arith.mulf %arg1, %arg2 : f32 linalg.yield %2 : f32 @@ -173,7 +173,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]] // CHECK-SAME: ["parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[CST]] : -// CHECK-SAME: outs(%[[T1]] : tensor<8x33x4xf32>) +// CHECK-SAME: inits(%[[T1]] : tensor<8x33x4xf32>) // CHECK: return %[[T2]] : tensor<8x33x4xf32> // ----- @@ -190,7 +190,7 @@ indexing_maps = [#map0, #map1, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3: i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -239,7 +239,7 @@ indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -288,7 +288,7 @@ affine_map<(d0, d1, d2) -> (d0, d2, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>) - outs(%shape : tensor<6x4x210xi32>) { + inits(%shape : tensor<6x4x210xi32>) { ^bb0(%arg3 : i32, %arg4: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -329,7 +329,7 @@ // CHECK: %[[T4:.+]] = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]] // CHECK-SAME: ins(%[[T1]], %[[T2]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>) -// CHECK-SAME: outs(%[[T3]] : tensor<2x3x4x5x6x7xi32>) +// CHECK-SAME: inits(%[[T3]] : tensor<2x3x4x5x6x7xi32>) // CHECK: ^{{.+}}( // CHECK-SAME: %[[ARG8:[a-zA-Z0-9_]+]]: i32, %[[ARG9:[a-zA-Z0-9_]+]]: i32, // CHECK-SAME: %[[ARG10:[a-zA-Z0-9_]+]]: i32) @@ -362,7 +362,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%0 : tensor<264x?xi32>) - outs(%shape : tensor<264x?x4xi32>) { + inits(%shape : tensor<264x?x4xi32>) { ^bb0(%arg1: i32, %s: i32): %idx0 = linalg.index 0 : index %idx1 = linalg.index 1 : index @@ -418,7 +418,7 @@ indexing_maps = [#map0, #map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg0 : tensor) { + inits(%arg0 : tensor) { ^bb0(%arg3: f32, %arg4: f32, %s: f32): %1 = arith.mulf %arg3, %arg4 : f32 linalg.yield %1 : f32 @@ -446,7 +446,7 @@ // CHECK-SAME: indexing_maps = [#[[MAP4]], #[[MAP4]], #[[MAP5]]] // CHECK-SAME: ["parallel", "parallel", "parallel", "parallel"] // CHECK-SAME: ins(%[[T0]], %[[T1]] : tensor, tensor) -// CHECK-SAME: outs(%[[T2]] : tensor) +// CHECK-SAME: inits(%[[T2]] : tensor) // CHECK: return %[[T3]] : tensor // ----- @@ -459,7 +459,7 @@ %3 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%0 : tensor) outs(%2 : tensor) { + ins(%0 : tensor) inits(%2 : tensor) { ^bb0(%arg1 : f32, %arg2: f32): %4 = arith.addf %arg1, %arg1 : f32 linalg.yield %4 : f32 @@ -484,7 +484,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0, %arg1 : tensor<2xi64>, tensor) - outs(%1 : tensor<2xi64>) { + inits(%1 : tensor<2xi64>) { ^bb0(%arg4: i64, %arg5: i64, %arg6: i64): %3 = arith.addi %arg4, %arg5 : i64 linalg.yield %3 : i64 @@ -512,7 +512,7 @@ affine_map<(d0, d1, d2) -> (d2, d0, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%a, %b : tensor, tensor) - outs(%a, %a : tensor, tensor) { + inits(%a, %a : tensor, tensor) { ^bb0(%arg0 : f32, %arg1: f32, %s: f32, %t : f32): %1 = arith.addf %arg0, %arg1 : f32 linalg.yield %1, %1 : f32, f32 @@ -537,7 +537,7 @@ // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]] // CHECK-SAME: ins(%[[RESHAPE0]], %[[RESHAPE1]] : -// CHECK-SAME: outs(%[[RESHAPE2]], %[[RESHAPE3]] : +// CHECK-SAME: inits(%[[RESHAPE2]], %[[RESHAPE3]] : // CHECK: return %[[GENERIC]]#0, %[[GENERIC]]#1 // ----- @@ -551,7 +551,7 @@ indexing_maps = [#map0, #map0, #map0, #map1], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<512xf32>, tensor<512xf32>) - outs(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) { + inits(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32, %arg7: f32): %2 = arith.addf %arg4, %arg5 : f32 linalg.yield %2, %2 : f32, f32 @@ -571,5 +571,5 @@ // CHECK: %[[GENERIC:.+]]:2 = linalg.generic // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]], #[[MAP1]]] // CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : -// CHECK-SAME: outs(%[[ARG2]], %[[OUTS]] : +// CHECK-SAME: inits(%[[ARG2]], %[[OUTS]] : // CHECK: return %[[GENERIC]]#1 diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir --- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir +++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir @@ -54,7 +54,7 @@ affine_map<(d0, d1, d2) -> (d0 + d1, d1 - d0)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) { + inits(%arg2 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %1 = arith.mulf %arg3, %arg4 : f32 %2 = arith.addf %1, %arg5 : f32 @@ -92,7 +92,7 @@ {indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2: f32, %arg3: f32) : linalg.yield %arg2 : f32 } -> tensor @@ -111,7 +111,7 @@ %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %1 = tensor.dim %0, %c0 : tensor %2 = tensor.dim %0, %c1 : tensor %3 = linalg.generic @@ -120,7 +120,7 @@ affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%0 : tensor) { + inits(%0 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32): %4 = arith.mulf %arg3, %arg4 : f32 %5 = arith.addf %4, %arg5 : f32 @@ -154,7 +154,7 @@ {indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%arg0 : tensor) outs(%0 : tensor) { + ins(%arg0 : tensor) inits(%0 : tensor) { ^bb0(%arg2: f32, %arg3 : f32): linalg.yield %arg2 : f32 } -> tensor @@ -179,7 +179,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel"] } ins(%arg_0 : tensor) - outs(%arg_0, %arg_1 : tensor, tensor) { + inits(%arg_0, %arg_1 : tensor, tensor) { ^bb0(%in: f32, %out_0: f32, %out_1: f32): linalg.yield %in, %in : f32, f32 } -> (tensor, tensor) diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -32,38 +32,38 @@ %arg3: memref) { linalg.matmul ins(%arg0, %arg0 : memref>, memref>) - outs(%arg0 : memref>) + inits(%arg0 : memref>) linalg.matvec ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref>) + inits(%arg2: memref>) linalg.dot ins(%arg1, %arg2: memref>, memref>) - outs(%arg3: memref) + inits(%arg3: memref) return } // CHECK-LABEL: func @ops(% // CHECK: linalg.matmul // CHECK-SAME: ins(%{{.*}}, %{{.*}} : memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}} : memref>) +// CHECK-SAME: inits(%{{.*}} : memref>) // CHECK: linalg.matvec // CHECK-SAME: ins(%{{.*}}, %{{.*}}: memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}}: memref>) +// CHECK-SAME: inits(%{{.*}}: memref>) // CHECK: linalg.dot // CHECK-SAME: ins(%{{.*}}, %{{.*}}: memref>, // CHECK-SAME: memref>) -// CHECK-SAME: outs(%{{.*}}: memref) +// CHECK-SAME: inits(%{{.*}}: memref) // ----- func.func @fill_view(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view( // CHECK: %{{.*}}: memref>, %{{.*}}: f32) { -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref>) // ----- @@ -79,12 +79,12 @@ func.func @fill_view3(%arg0: memref>, %arg1: f32) { - linalg.fill ins(%arg1 : f32) outs(%arg0 : memref>) + linalg.fill ins(%arg1 : f32) inits(%arg0 : memref>) return } // CHECK-LABEL: func @fill_view3( // CHECK: %{{.*}}: memref>, %{{.*}}: f32) { -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref>) // ----- @@ -105,7 +105,7 @@ %cst = arith.constant 0.0 : f32 linalg.generic #trait_0 ins(%arg0, %cst : memref, strided<[?, 1], offset: ?>>, f32) - outs(%arg1 : memref>) + inits(%arg1 : memref>) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) : linalg.yield %1 : f32 @@ -118,7 +118,7 @@ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} // CHECK-SAME: ins({{.*}}, {{.*}} : memref, strided<[?, 1], offset: ?>>, f32) -// CHECK-SAME: outs({{.*}} : memref>) +// CHECK-SAME: inits({{.*}} : memref>) // CHECK-SAME: {foo = 1 : i64} // ----- @@ -127,7 +127,7 @@ func.func @generic_without_inputs(%arg0 : memref) { linalg.generic {indexing_maps = [#map0], iterator_types = ["parallel", "parallel", "parallel"]} - outs(%arg0 : memref) { + inits(%arg0 : memref) { ^bb0(%arg3: f32): %cst = arith.constant 0.000000e+00 : f32 linalg.yield %cst : f32 @@ -158,7 +158,7 @@ -> (tensor) { %0 = linalg.generic #trait_1 ins(%arg0, %arg1 : tensor>, tensor) - outs(%arg1 : tensor) + inits(%arg1 : tensor) attrs = {foo = 1} { ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) : %f0 = arith.constant 0.0 : f32 @@ -171,7 +171,7 @@ // CHECK-SAME: indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_1"} // CHECK-SAME: ins({{.*}} : tensor>, tensor) -// CHECK-SAME: outs({{.*}} : tensor) +// CHECK-SAME: inits({{.*}} : tensor) // CHECK-SAME: {foo = 1 : i64} // CHECK: -> tensor // CHECK: return {{.*}} : tensor @@ -183,14 +183,14 @@ -> (tensor, tensor) { %c0 = arith.constant 0 : index %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : i32) inits(%0 : tensor) -> tensor %2 = tensor.empty() : tensor - %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor) -> tensor + %3 = linalg.fill ins(%arg2 : i32) inits(%2 : tensor) -> tensor %4:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%1, %3 : tensor, tensor) { + inits(%1, %3 : tensor, tensor) { ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32): %5 = arith.cmpi sge, %arg3, %arg5 : i32 %6 = arith.select %5, %arg3, %arg5 : i32 @@ -206,7 +206,7 @@ // CHECK-LABEL: func @generic_with_multiple_tensor_outputs // CHECK: %{{.*}} = linalg.generic { // CHECK-SAME: ins({{.*}} : tensor, tensor) -// CHECK-SAME: outs({{.*}} : tensor, tensor) +// CHECK-SAME: inits({{.*}} : tensor, tensor) // CHECK: } -> (tensor, tensor) // ----- @@ -226,7 +226,7 @@ { %0 = linalg.generic #trait_broadcast ins(%arg0 : tensor) - outs(%arg1 : tensor<3x4xf32>) { + inits(%arg1 : tensor<3x4xf32>) { ^bb(%a: f32, %b: f32) : linalg.yield %a : f32 } -> tensor<3x4xf32> @@ -251,7 +251,7 @@ %arg1: memref>) { linalg.generic #trait_3 ins(%arg0 : memref, strided<[?, 1], offset: ?>>) - outs(%arg1 : memref>) + inits(%arg1 : memref>) attrs = {foo = 1} { ^bb(%a: vector<3x4xi4>, %b: f32) : %0 = linalg.index 0 : index @@ -267,7 +267,7 @@ // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel"], // CHECK-SAME: library_call = "some_external_function_name_2" // CHECK-SAME: ins({{.*}} : memref, strided<[?, 1], offset: ?>>) -// CHECK-SAME: outs({{.*}} : memref>) +// CHECK-SAME: inits({{.*}} : memref>) // CHECK-SAME: attrs = {foo = 1 : i64} { // CHECK: ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32): // CHECK: %{{.*}} = linalg.index 0 : index @@ -283,10 +283,10 @@ -> (tensor) { linalg.batch_matmul ins(%a3, %b3: memref, memref) - outs(%c3: memref) + inits(%c3: memref) %res1 = linalg.batch_matmul ins(%ta3, %tb3: tensor, tensor) - outs(%tc3: tensor) + inits(%tc3: tensor) -> tensor return %res1 : tensor } @@ -298,10 +298,10 @@ func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor { %0 = tensor.empty(%arg0, %arg1) : tensor - %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%arg2 : f32) inits(%0 : tensor) -> tensor return %1 : tensor } -// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor) -> tensor +// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) inits(%{{.+}} : tensor) -> tensor // ----- @@ -313,7 +313,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2, %arg3 : tensor, tensor) { + inits(%arg2, %arg3 : tensor, tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32): %1 = arith.mulf %b0, %b1 : f32 %2 = arith.addf %1, %b3 : f32 @@ -328,7 +328,7 @@ func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) () { %0 = arith.constant 0.0: f32 linalg.yield %0: f32 @@ -337,7 +337,7 @@ } // CHECK-LABEL: func @map_no_inputs // CHECK: linalg.map -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: () { // CHECK-NEXT: arith.constant // CHECK-NEXT: linalg.yield @@ -349,7 +349,7 @@ %init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -359,7 +359,7 @@ // CHECK-LABEL: func @map_binary // CHECK: linalg.map // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: (%{{.*}}: f32, %{{.*}}: f32) { // CHECK-NEXT: arith.addf // CHECK-NEXT: linalg.yield @@ -371,7 +371,7 @@ %init: memref<64xf32>) { linalg.map ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>) - outs(%init:memref<64xf32>) + inits(%init:memref<64xf32>) (%lhs_elem: f32, %rhs_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 @@ -386,7 +386,7 @@ func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> { %abs = linalg.map ins(%input:tensor<64xf32>) - outs(%init:tensor<64xf32>) + inits(%init:tensor<64xf32>) (%input_elem: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 @@ -401,7 +401,7 @@ func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%input:memref<64xf32>) - outs(%init:memref<64xf32>) + inits(%init:memref<64xf32>) (%input_elem: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 @@ -417,7 +417,7 @@ %init: tensor<16x64xf32>) -> tensor<16x64xf32> { %reduce = linalg.reduce ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<16x64xf32>) + inits(%init:tensor<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -428,7 +428,7 @@ // CHECK-LABEL: func @reduce // CHECK: linalg.reduce // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: dimensions = [1] // CHECK-NEXT: (%{{.*}}: f32, %{{.*}}: f32) { // CHECK-NEXT: arith.addf @@ -441,7 +441,7 @@ %init: memref<16x64xf32>) { linalg.reduce ins(%input:memref<16x32x64xf32>) - outs(%init:memref<16x64xf32>) + inits(%init:memref<16x64xf32>) dimensions = [1] (%in: f32, %out: f32) { %0 = arith.addf %in, %out: f32 @@ -459,7 +459,7 @@ %init2: tensor<16x64xi64>) -> (tensor<16x64xf32>, tensor<16x64xi64>) { %reduce, %reduce2 = linalg.reduce ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>) - outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) + inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>) dimensions = [1] (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { %0 = arith.addf %in1, %out1: f32 @@ -478,7 +478,7 @@ %init2: memref<16x64xi64>) { linalg.reduce ins(%input1, %input2 : memref<16x32x64xf32>, memref<16x32x64xi64>) - outs(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>) + inits(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>) dimensions = [1] (%in1: f32, %in2: i64, %out1: f32, %out2: i64) { %0 = arith.addf %in1, %out1: f32 @@ -496,14 +496,14 @@ %init: tensor<32x64x16xf32>) -> tensor<32x64x16xf32> { %transpose = linalg.transpose ins(%input:tensor<16x32x64xf32>) - outs(%init:tensor<32x64x16xf32>) + inits(%init:tensor<32x64x16xf32>) permutation = [1, 2, 0] func.return %transpose : tensor<32x64x16xf32> } // CHECK-LABEL: func @transpose // CHECK: linalg.transpose // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: permutation // ----- @@ -512,7 +512,7 @@ %init: memref<32x64x16xf32>) { linalg.transpose ins(%input:memref<16x32x64xf32>) - outs(%init:memref<32x64x16xf32>) + inits(%init:memref<32x64x16xf32>) permutation = [1, 2, 0] func.return } @@ -524,14 +524,14 @@ %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x32xf32>) - outs(%init:tensor<8x16x32xf32>) + inits(%init:tensor<8x16x32xf32>) dimensions = [0, 2] func.return %bcast : tensor<8x16x32xf32> } // CHECK-LABEL: func @broadcast_static_sizes // CHECK: linalg.broadcast // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: dimensions // ----- @@ -541,14 +541,14 @@ -> tensor<8x16x?xf32> { %bcast = linalg.broadcast ins(%input:tensor<8x?xf32>) - outs(%init:tensor<8x16x?xf32>) + inits(%init:tensor<8x16x?xf32>) dimensions = [0, 2] func.return %bcast : tensor<8x16x?xf32> } // CHECK-LABEL: func @broadcast_with_dynamic_sizes // CHECK: linalg.broadcast // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: dimensions // ----- @@ -557,7 +557,7 @@ %init: memref<8x16x32xf32>) { linalg.broadcast ins(%input:memref<8x32xf32>) - outs(%init:memref<8x16x32xf32>) + inits(%init:memref<8x16x32xf32>) dimensions = [0, 2] func.return } @@ -565,5 +565,5 @@ // CHECK-LABEL: func @broadcast_memref // CHECK: linalg.broadcast // CHECK-NEXT: ins -// CHECK-NEXT: outs +// CHECK-NEXT: inits // CHECK-NEXT: dimensions diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir --- a/mlir/test/Dialect/Linalg/standard.mlir +++ b/mlir/test/Dialect/Linalg/standard.mlir @@ -5,7 +5,7 @@ %arg2: memref) { linalg.dot ins(%arg0, %arg1: memref>, memref>) - outs(%arg2: memref) + inits(%arg2: memref) return } // CHECK-LABEL: func @dot( @@ -44,7 +44,7 @@ func.func @matmul_vec_impl(%A: !matrix_type_A, %B: !matrix_type_B, %C: !matrix_type_C) { linalg.generic #matmul_trait ins(%A, %B : !matrix_type_A, !matrix_type_B) - outs(%C : !matrix_type_C) { + inits(%C : !matrix_type_C) { ^bb0(%a: !vector_type_A, %b: !vector_type_B, %c: !vector_type_C): %d = vector.outerproduct %a, %b, %c: !vector_type_A, !vector_type_B linalg.yield %d: !vector_type_C diff --git a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir --- a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir +++ b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir @@ -4,11 +4,11 @@ // CHECK-SAME: (%[[INIT:.+]]: tensor, %[[OFFSET0:.+]]: index, %[[SIZE1:.+]]: index) // CHECK: %[[F0:.+]] = arith.constant 0.000000e+00 : f32 // CHECK: %[[EXT:.+]] = tensor.extract_slice %[[INIT]][%[[OFFSET0]], 8, 4] [1, %[[SIZE1]], 6] [1, 3, 1] -// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EXT]] : tensor) -> tensor // CHECK: return %[[FILL]] func.func @swap_fill_insert_slice(%init : tensor, %offset0: index, %size1: index) -> tensor { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor) -> tensor %1 = tensor.extract_slice %0[%offset0, 8, 4] [1, %size1, 6] [1, 3, 1] : tensor to tensor return %1: tensor @@ -21,7 +21,7 @@ // CHECK: tensor.extract_slice func.func @dont_swap_fill_insert_slice_multi_user(%init : tensor, %offset0: index, %size1: index) -> (tensor, tensor<2x?x6xf32>) { %f0 = arith.constant 0.000000e+00 : f32 - %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor) -> tensor + %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor) -> tensor %1 = tensor.extract_slice %0[%offset0, 8, 4] [2, %size1, 6] [1, 3, 1] : tensor to tensor<2x?x6xf32> return %0, %1: tensor, tensor<2x?x6xf32> diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -2,7 +2,7 @@ func.func @matmul_tensors(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %t0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor %c4 = arith.constant 4 : index @@ -19,7 +19,7 @@ %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor to tensor %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor to tensor<4x?xf32> %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor to tensor - %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) outs(%8 : tensor) -> tensor + %9 = linalg.matmul ins(%6, %7 : tensor, tensor<4x?xf32>) inits(%8 : tensor) -> tensor %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1] : tensor into tensor scf.yield %10 : tensor } @@ -50,8 +50,8 @@ // slices of the producing matmul. // CHECK-DAG: %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1] : tensor to tensor // CHECK-DAG: %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1] : tensor to tensor<2x4xf32> -// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) outs(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> -// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> +// CHECK: %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor) inits(%[[stC]] : tensor<2x4xf32>) -> tensor<2x4xf32> +// CHECK-NEXT: %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) inits(%[[stF]] : tensor<2x3xf32>) -> tensor<2x3xf32> // CHECK-NEXT: tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]] // ----- @@ -66,12 +66,12 @@ %cst = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<1x112x112x32xf32> - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>) - outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> + inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> { %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> { @@ -87,7 +87,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) outs(%2 : tensor<1x8x16x4xf32>) { + ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) inits(%2 : tensor<1x8x16x4xf32>) { ^bb0(%arg3: f32, %arg4: f32, %arg5: f32): %result = arith.addf %arg3, %arg4 : f32 linalg.yield %result : f32 @@ -110,7 +110,7 @@ // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>) // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32> -// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> +// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32> // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]]) // CHECK-NEXT: %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]]) @@ -124,10 +124,10 @@ // CHECK-NEXT: %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32> // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>) -// CHECK-SAME: outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>) +// CHECK-SAME: inits(%[[ST_FILL]] : tensor<1x8x16x4xf32>) // CHECK-NEXT: %[[ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) -// CHECK-SAME: outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) +// CHECK-SAME: inits(%[[ST_ARG2]] : tensor<1x8x16x4xf32>) // CHECK: tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] // ----- @@ -148,12 +148,12 @@ %oc = tensor.dim %elementwise, %c3 : tensor %init = tensor.empty(%n, %oh, %ow, %oc) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %conv = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%input, %filter : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor { %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor { @@ -174,7 +174,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"] } - ins(%0, %1 : tensor, tensor) outs(%2 : tensor) { + ins(%0, %1 : tensor, tensor) inits(%2 : tensor) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): %result = arith.addf %arg4, %arg5 : f32 linalg.yield %result : f32 @@ -217,7 +217,7 @@ // CHECK-DAG: %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor // CHECK: %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor -// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor) -> tensor +// CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor) -> tensor // CHECK-DAG: %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor // CHECK-DAG: %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor @@ -256,10 +256,10 @@ // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]] // CHECK-NEXT: %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor, tensor) -// CHECK-SAME: outs(%[[ST_FILL]] : tensor) -> tensor +// CHECK-SAME: inits(%[[ST_FILL]] : tensor) -> tensor // CHECK-NEXT: %[[ST_ADD:.+]] = linalg.generic // CHECK-SAME: ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor, tensor) -// CHECK-SAME: outs(%[[ST_ARG]] : tensor) +// CHECK-SAME: inits(%[[ST_ARG]] : tensor) // CHECK: tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] @@ -301,7 +301,7 @@ tensor.yield %zero : f32 } : tensor<58x1xf32> to tensor<64x128xf32> - %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32> + %fill = linalg.fill ins(%zero : f32) inits(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32> %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> { %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> { @@ -311,7 +311,7 @@ %add = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} - ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) outs(%2 : tensor<16x32xf32>) { + ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) inits(%2 : tensor<16x32xf32>) { ^bb0(%arg4: f32, %arg5: f32, %arg6: f32): %result = arith.addf %arg4, %arg5 : f32 linalg.yield %result : f32 diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -5,7 +5,7 @@ // CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 + s0 - 1)> func.func @conv(%arg0 : memref, %arg1 : memref, %arg2 : memref) { - linalg.conv_2d ins(%arg0, %arg1 : memref, memref) outs(%arg2 : memref) + linalg.conv_2d ins(%arg0, %arg1 : memref, memref) inits(%arg2 : memref) return } @@ -38,4 +38,4 @@ // CHECK-DAG: %[[SVOUT:.*]] = memref.subview %[[ARG2]][%[[I]], %[[J]]] [%[[T4]], %[[T5]]] // CHECK: linalg.conv_2d // CHECK-SAME: ins(%[[SVIN]], %[[SVKER]] -// CHECK-SAME: outs(%[[SVOUT]] +// CHECK-SAME: inits(%[[SVOUT]] diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir --- a/mlir/test/Dialect/Linalg/tile-indexed.mlir +++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir @@ -3,7 +3,7 @@ func.func @indexed_vector(%arg0: memref<50xindex>) { linalg.generic {indexing_maps = [affine_map<(i) -> (i)>], iterator_types = ["parallel"]} - outs(%arg0 : memref<50xindex>) { + inits(%arg0 : memref<50xindex>) { ^bb0(%a: index): %i = linalg.index 0 : index linalg.yield %i : index @@ -31,7 +31,7 @@ func.func @indexed_matrix(%arg0: memref<50x50xindex>) { linalg.generic {indexing_maps = [affine_map<(i, j) -> (i, j)>], iterator_types = ["parallel", "parallel"]} - outs(%arg0 : memref<50x50xindex>) { + inits(%arg0 : memref<50x50xindex>) { ^bb0(%a: index): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir --- a/mlir/test/Dialect/Linalg/tile-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir @@ -14,13 +14,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor to tensor // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: inits(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor // CHECK: scf.yield %[[TD]] : tensor // CHECK: scf.yield %[[TD2]] : tensor // CHECK: scf.yield %[[TD1]] : tensor %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor // CHECK: return %[[TD0]] : tensor @@ -50,7 +50,7 @@ affine_map<(d0, d1, d2) -> (d2, d1, d0)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %arg1 : tensor, tensor) - outs(%3 : tensor) { + inits(%3 : tensor) { ^bb0(%arg2 : f32, %arg3: f32, %arg4: f32): %5 = arith.addf %arg2, %arg3 : f32 linalg.yield %5 : f32 @@ -76,7 +76,7 @@ // CHECK: %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor to tensor // CHECK: %[[STRETURN:.+]] = linalg.generic // CHECK-SAME: ins(%[[STARG0]], %[[STARG1]] : tensor, tensor) -// CHECK-SAME: outs(%[[STARG2]] : tensor) +// CHECK-SAME: inits(%[[STARG2]] : tensor) // CHECK: %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]] // CHECK: scf.yield %[[TD]] // CHECK: } @@ -121,7 +121,7 @@ affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "parallel"]} ins(%1, %arg2 : tensor, tensor) - outs(%arg1 : tensor) { + inits(%arg1 : tensor) { ^bb0(%arg3 : f32, %arg4: f32, %arg5: f32): %5 = arith.addf %arg3, %arg5 : f32 linalg.yield %5 : f32 diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir --- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir +++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir @@ -21,14 +21,14 @@ // CHECK: %[[tC:.*]] = tensor.extract_slice %[[C_BLK]]{{.*}} : tensor to tensor // CHECK: %[[RES:.*]] = linalg.matmul // CHECK-SAME: ins(%[[tA]], %[[tB]] : tensor, tensor) - // CHECK-SAME: outs(%[[tC]] : tensor) -> tensor + // CHECK-SAME: inits(%[[tC]] : tensor) -> tensor // CHECK: scf.foreach_thread.perform_concurrently { // CHECK-NEXT: tensor.parallel_insert_slice %[[RES]] into %[[C_BLK]]{{.*}} : // CHECK-SAME: tensor into tensor // CHECK-NEXT: } // CHECK-NEXT: } {thread_dim_mapping = [1, 0]} %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -69,7 +69,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -110,7 +110,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -148,7 +148,7 @@ // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>) - outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) + inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>) return %0 : tensor<100x300xf32> } @@ -166,7 +166,7 @@ %result = linalg.generic {indexing_maps = [ affine_map<(d0) -> (d0)>,affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - ins(%A : tensor<4xf32>) outs(%B1 : tensor<4xf32>) { + ins(%A : tensor<4xf32>) inits(%B1 : tensor<4xf32>) { ^bb0(%arg3: f32, %arg4: f32): // no predecessors %2 = arith.addf %arg3, %arg3 : f32 linalg.yield %2 : f32 @@ -221,7 +221,7 @@ // CHECK-NEXT: tensor.parallel_insert_slice %tile_size = "test.dummy"() : () -> (index) %0 = linalg.matmul ins(%A, %B : tensor, tensor) - outs(%C : tensor) -> (tensor) + inits(%C : tensor) -> (tensor) return %0 : tensor } @@ -270,7 +270,7 @@ affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] } ins(%IN1, %IN2 : tensor<100xf32>, tensor<100xf32>) - outs(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>) + inits(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>) { ^bb0(%a1: f32, %a2: f32, %a3: f32, %a4: f32): %1 = arith.addf %a1, %a3 : f32 @@ -323,7 +323,7 @@ ], iterator_types = ["parallel", "parallel"] } ins(%IN1, %IN2, %IN3 : tensor<100xf32>, tensor<100x300xf32>, tensor<300xf32>) - outs(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>) { + inits(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>) { ^bb0(%i1: f32, %i2: f32, %i3: f32, %o1: f32, %o2: f32): %1 = arith.addf %i1, %o1 : f32 %2 = arith.addf %i2, %1 : f32 diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir --- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir @@ -13,7 +13,7 @@ %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor<1x?x?x?xf32>) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor // CHECK: return %[[RES]] return %0 : tensor } @@ -31,7 +31,7 @@ %0 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%input, %filter: tensor, tensor) - outs (%init: tensor) -> tensor + inits (%init: tensor) -> tensor // CHECK: return %[[RES]] return %0 : tensor } @@ -47,11 +47,11 @@ // CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]] // CHECK: %[[OPRES:.+]] = linalg.depthwise_conv_1d_nwc_wc // CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] - // CHECK-SAME: outs(%[[SLICERES]] + // CHECK-SAME: inits(%[[SLICERES]] // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[OPRES]] into %[[RES]] %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>} ins(%input, %filter: tensor<1x1x113x96xf32>, tensor<1x3x96xf32>) - outs(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32> + inits(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32> // CHECK: %[[INSERTED]] return %0: tensor<1x1x56x96xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir @@ -12,7 +12,7 @@ func.func @fuse_tileable_op(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg1 : tensor) -> tensor %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -23,11 +23,11 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor to tensor // CHECK: %[[T0:.*]] = tensor.extract_slice %[[IN]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]] + // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]] %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -74,7 +74,7 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]] - %7 = linalg.elemwise_unary ins(%0 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%0 : tensor) inits(%5 : tensor) -> tensor scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor<64xf32> } @@ -108,7 +108,7 @@ func.func @fuse_tileable_op_through_bbarg(%arg0: index, %arg1: tensor, %arg2: tensor) -> tensor { %cst = arith.constant 4.200000e+01 : f32 %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor) -> tensor %d0 = tensor.dim %arg1, %c0 : tensor %1 = affine.apply #map0()[%d0, %arg0] @@ -119,11 +119,11 @@ %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor to tensor // CHECK: %[[T0:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}] - // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]] + // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]] %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor to tensor - // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]] - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} inits(%[[T1]] + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } @@ -162,7 +162,7 @@ %0:2 = linalg.generic { indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"] - } ins(%in : tensor) outs(%out_1, %out_3 : tensor, tensor) { + } ins(%in : tensor) inits(%out_1, %out_3 : tensor, tensor) { ^bb0(%a: f32, %b: f32, %c: f32): %d = arith.addf %a, %b : f32 %e = arith.addf %d, %c : f32 @@ -183,7 +183,7 @@ %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor to tensor // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0 - %7 = linalg.elemwise_unary ins(%6 : tensor) outs(%5 : tensor) -> tensor + %7 = linalg.elemwise_unary ins(%6 : tensor) inits(%5 : tensor) -> tensor scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor into tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir @@ -9,9 +9,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %1 : tensor } @@ -36,9 +36,9 @@ // CHECK: linalg.elemwise_binary // CHECK: return %[[RES]] %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor, tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %1 : tensor } @@ -66,18 +66,18 @@ // CHECK: scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]]) // CHECK: %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]] // CHECK: %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]] -// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor) +// CHECK: %[[FILL:.+]] = linalg.fill {{.+}} inits(%[[OUT_SLICE1]] : tensor) // CHECK: scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]]) // CHECK: %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]] // CHECK: %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0] -// CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) outs(%[[OUT_SLICE2]] : tensor) +// CHECK: linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor) inits(%[[OUT_SLICE2]] : tensor) // CHECK: return %[[RES]] - %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) -> tensor<12x25xf32> + %fill = linalg.fill ins(%five : f32) inits(%init : tensor<12x25xf32>) -> tensor<12x25xf32> %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input : tensor<12x7x25xf32>) outs(%fill : tensor<12x25xf32>) { + } ins(%input : tensor<12x7x25xf32>) inits(%fill : tensor<12x25xf32>) { ^bb0(%arg0: f32, %arg1: f32): %2 = arith.addf %arg0, %arg1 : f32 linalg.yield %2 : f32 diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir @@ -6,7 +6,7 @@ // CHECK-NOT: linalg.elemwise_unary // CHECK: linalg.generic %0 = linalg.elemwise_unary ins(%arg0 : tensor) - outs(%arg1: tensor) -> tensor + inits(%arg1: tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir --- a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir @@ -10,7 +10,7 @@ %0 = linalg.generic { indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"] - } ins(%arg0 : tensor) outs(%arg1 : tensor) { + } ins(%arg0 : tensor) inits(%arg1 : tensor) { ^bb0(%arg2: f32, %arg3: f32): %1 = math.exp %arg2 : f32 linalg.yield %1 : f32 @@ -28,7 +28,7 @@ func.func @interchange_matmul(%arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) outs(%arg2 : tensor) -> tensor + %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) inits(%arg2 : tensor) -> tensor return %0 : tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-match.mlir b/mlir/test/Dialect/Linalg/transform-op-match.mlir --- a/mlir/test/Dialect/Linalg/transform-op-match.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-match.mlir @@ -48,7 +48,7 @@ %1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<12x128x32xf32>) - outs(%0 : tensor<128x12x32xf32>) { + inits(%0 : tensor<128x12x32xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<128x12x32xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir --- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir @@ -13,7 +13,7 @@ %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>) -> tensor<13x42xf32> { %0 = linalg.matmul ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>) - outs(%arg2: tensor<13x42xf32>) + inits(%arg2: tensor<13x42xf32>) -> tensor<13x42xf32> // The first application computes the total size. // CHECK: %{{.*}} = affine.apply #[[$MAP13]]() @@ -59,7 +59,7 @@ %arg0: tensor, %arg1: tensor, %arg2: tensor) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1: tensor, tensor) - outs(%arg2: tensor) + inits(%arg2: tensor) -> tensor return %0 : tensor diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir --- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir @@ -25,8 +25,8 @@ // CHECK: %[[T5:.*]] = linalg.matmul // CHECK-SAME: ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>) - // CHECK-SAME: outs(%[[T2]] : tensor<4x5xf32>) - %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + // CHECK-SAME: inits(%[[T2]] : tensor<4x5xf32>) + %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> func.return %5 : tensor<24x25xf32> } @@ -43,7 +43,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -60,7 +60,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -78,7 +78,7 @@ %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // This is attached to an error that is silenceable and is not reported by this transform // {{when applied to this op}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir @@ -6,7 +6,7 @@ // The op is first tiled by 10 in the first dimension, which creates a // dynamic size, and then scalarized, which brings the dimension to static 1. // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<1x12 - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir @@ -7,14 +7,14 @@ // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"] // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}} : tensor, tensor<256x32xf32>, tensor<64x4xi1>) - // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor) { + // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor) { // CHECK: linalg.generic // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor) - // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor) { + // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor) { %0 = linalg.matmul ins(%A, %B: tensor, tensor<256x32xf32>) - outs(%C: tensor) -> tensor + inits(%C: tensor) -> tensor return %0: tensor } diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir @@ -2,7 +2,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -16,16 +16,16 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor<16x32x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) { +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) { // CHECK: arith.addf // CHECK: linalg.yield %{{.*}} : f32 // CHECK: } -> tensor<16x32xf32> @@ -45,7 +45,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor<32xf32>, tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -64,16 +64,16 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], -// CHECK: iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { +// CHECK: iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor) inits(%[[F]] : tensor<4xf32>) { // CHECK: arith.subf // CHECK: math.exp // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor<4xf32> -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor) { // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor @@ -97,7 +97,7 @@ affine_map<(d0, d1, d2) -> (d2, d0)> ], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 %4 = arith.maxf %3, %arg2 : f32 @@ -116,15 +116,15 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) inits(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf // CHECK: arith.maxf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { +// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) { // CHECK: arith.maxf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> @@ -140,7 +140,7 @@ func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> { %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>) - outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32> + inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32> return %0: tensor<16x32xf32> } @@ -154,16 +154,16 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]] // CHECK-SAME: , iterator_types = ["parallel", "parallel", "reduction", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor<16x32x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], -// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) { +// CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) { // CHECK: arith.addf // CHECK: linalg.yield %{{.*}} : f32 // CHECK: } -> tensor<16x32xf32> @@ -183,7 +183,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0, %arg1 : tensor<32xf32>, tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -202,16 +202,16 @@ // CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32 // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32> // CHECK: %[[G:.*]] = linalg.generic // CHECK: {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], -// CHECK: iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor) outs(%[[F]] : tensor<4xf32>) { +// CHECK: iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor) inits(%[[F]] : tensor<4xf32>) { // CHECK: arith.subf // CHECK: math.exp // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor<4xf32> -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor) { // CHECK: arith.mulf // CHECK: linalg.yield // CHECK: } -> tensor @@ -235,7 +235,7 @@ affine_map<(d0, d1, d2) -> (d2, d0)> ], iterator_types = ["parallel", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %3 = arith.addf %arg0, %arg1 : f32 %4 = arith.minf %3, %arg2 : f32 @@ -254,15 +254,15 @@ // CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32> // CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32> // CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32> -// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> +// CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32> // CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]} -// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) { +// CHECK-SAME: ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) inits(%[[F]] : tensor<5x2x4xf32>) { // CHECK: arith.addf // CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2x4xf32> // CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]} -// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) { +// CHECK-SAME: ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) { // CHECK: arith.minf // CHECK: linalg.yield // CHECK: } -> tensor<5x2xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -17,7 +17,7 @@ // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32> // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_LOW]] - // CHECK: outs(%[[OUT_SLICE_LOW]] + // CHECK: inits(%[[OUT_SLICE_LOW]] // CHECK: linalg.index 0 // CHECK: func.call @elem // CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [42] [1] @@ -26,7 +26,7 @@ // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][42] [58] [1] : tensor<100xf32> to tensor<58xf32> // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_HIGH]] - // CHECK: outs(%[[OUT_SLICE_HIGH]] + // CHECK: inits(%[[OUT_SLICE_HIGH]] // CHECK: %[[IDX:.+]] = linalg.index 0 // CHECK: affine.apply #[[$ADD_42_MAP]](%[[IDX]]) // CHECK: func.call @elem @@ -35,7 +35,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32 @@ -53,14 +53,14 @@ // the splitting altogether. // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN]] - // CHECK: outs(%[[OUT]] + // CHECK: inits(%[[OUT]] // CHECK: linalg.index 0 // CHECK: func.call @elem %0 = linalg.generic { indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) { + ins(%arg0: tensor<10xf32>) inits(%arg1: tensor<10xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32 @@ -92,7 +92,7 @@ // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_LOW]] - // CHECK: outs(%[[OUT_SLICE_LOW]] + // CHECK: inits(%[[OUT_SLICE_LOW]] // CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1] // // CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] @@ -101,7 +101,7 @@ // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_HIGH]] - // CHECK: outs(%[[OUT_SLICE_HIGH]] + // CHECK: inits(%[[OUT_SLICE_HIGH]] // CHECK: %[[SPLIT_HIGH_4:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] // CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_4]]] [1] %0 = func.call @get_size() : () -> index @@ -109,7 +109,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): %5 = arith.addf %3, %4 : f32 linalg.yield %5 : f32 @@ -137,7 +137,7 @@ // CHECK: %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0] // CHECK: %[[RES_1:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_1]] : tensor<4x34xf32>) - // CHECK-SAME: outs(%[[OUT_1]] : tensor<4x34xf32>) + // CHECK-SAME: inits(%[[OUT_1]] : tensor<4x34xf32>) // CHECK: %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]] // // CHECK: %[[IN_2:.+]] = tensor.extract_slice %[[IN]] @@ -148,14 +148,14 @@ // CHECK: %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]] // CHECK: %[[RES_21:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_21]] : tensor<6x16xf32>) - // CHECK-SAME: outs(%[[OUT_21]] : tensor<6x16xf32>) + // CHECK-SAME: inits(%[[OUT_21]] : tensor<6x16xf32>) // CHECK: %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]] // // CHECK: %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]] // CHECK: %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]] // CHECK: %[[RES_22:.+]] = linalg.generic // CHECK-SAME: ins(%[[IN_22]] : tensor<6x18xf32>) - // CHECK-SAME: outs(%[[OUT_22]] : tensor<6x18xf32>) + // CHECK-SAME: inits(%[[OUT_22]] : tensor<6x18xf32>) // CHECK: %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]] // CHECK: %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]] %0 = linalg.generic { @@ -164,7 +164,7 @@ iterator_types = ["parallel", "parallel"] } ins(%arg0: tensor<10x34xf32>) - outs(%arg1: tensor<10x34xf32>) { + inits(%arg1: tensor<10x34xf32>) { ^bb0(%0: f32, %1: f32): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -201,7 +201,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): linalg.yield %3 : f32 } -> tensor<100xf32> @@ -225,7 +225,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%3: f32, %4: f32): linalg.yield %3 : f32 } -> tensor<100xf32> @@ -261,7 +261,7 @@ indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>], iterator_types = ["parallel"] } - ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) { + ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) { ^bb0(%0: f32, %1: f32): linalg.yield %0 : f32 } -> tensor<100xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir --- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir @@ -21,13 +21,13 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32> // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> +// CHECK-SAME: inits(%[[sTC]] : tensor<4x4xf32>) -> tensor<4x4xf32> // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor<4x4xf32> into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> @@ -60,14 +60,14 @@ // CHECK: %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x?xf32> // CHECK: %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor // CHECK: %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor, tensor<4x?xf32>) -// CHECK-SAME: outs(%[[sTC]] : tensor) -> tensor +// CHECK-SAME: inits(%[[sTC]] : tensor) -> tensor // CHECK: %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}] : tensor into tensor<128x128xf32> // CHECK: scf.yield %[[TD]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD2]] : tensor<128x128xf32> // CHECK: scf.yield %[[TD1]] : tensor<128x128xf32> %sz = func.call @get_dynamic_tile_size() : () -> index %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: return %[[TD0]] : tensor<128x128xf32> diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir --- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir @@ -12,7 +12,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } @@ -57,7 +57,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -105,7 +105,7 @@ // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]] // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]] // CHECK: vector.transfer_write %[[vR]], %[[C]] - %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> + %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32> %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32> return %9 : tensor<24x25xf32> } @@ -123,7 +123,7 @@ %arg1: tensor<12x25xf32>, %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> { // expected-note @below {{non-isolated target}} - %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> + %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32> func.return %0 : tensor<24x25xf32> } diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir --- a/mlir/test/Dialect/Linalg/transform-patterns.mlir +++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir @@ -7,7 +7,7 @@ %v: memref) { linalg.dot ins(%x, %y: memref>, memref>) - outs(%v: memref) + inits(%v: memref) return } @@ -31,7 +31,7 @@ linalg.matvec ins(%A, %x: memref>, memref>) - outs(%y: memref>) + inits(%y: memref>) return } @@ -49,7 +49,7 @@ // CHECK: scf.for {{.*}} step %[[c6]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -58,7 +58,7 @@ %C: memref>) { linalg.matmul ins(%A, %B: memref>, memref>) - outs(%C: memref>) + inits(%C: memref>) return } @@ -99,7 +99,7 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] { // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -126,7 +126,7 @@ linalg.generic #generic_matmul_trait ins(%A, %B : memref>, memref>) - outs(%C : memref>) { + inits(%C : memref>) { ^bb(%a: f32, %b: f32, %c: f32): %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -157,7 +157,7 @@ %y: memref>) { linalg.matvec ins(%A, %x: memref>, memref>) - outs(%y: memref>) + inits(%y: memref>) return } @@ -175,7 +175,7 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]] // CHECK: linalg.matvec // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) // ----- @@ -184,7 +184,7 @@ %C: memref>) { linalg.matmul ins(%A, %B: memref>, memref>) - outs(%C : memref>) + inits(%C : memref>) return } @@ -218,4 +218,4 @@ // CHECK: scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] { // CHECK: linalg.matmul // CHECK: ins({{.*}}: memref>, memref>) -// CHECK: outs({{.*}}: memref>) +// CHECK: inits({{.*}}: memref>) diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir --- a/mlir/test/Dialect/Linalg/transform-promotion.mlir +++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir @@ -22,7 +22,7 @@ memref> to memref> linalg.matmul ins(%3, %4: memref>, memref>) - outs(%5: memref>) + inits(%5: memref>) } } } @@ -56,7 +56,7 @@ // CHECK: memref.copy %[[s2]], %[[l2]] : memref to memref // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[v1]] : memref, memref) -// CHECK-SAME: outs(%[[v2]] : memref) +// CHECK-SAME: inits(%[[v2]] : memref) transform.sequence failures(propagate) { ^bb0(%arg1: !pdl.operation): @@ -89,7 +89,7 @@ linalg.matmul {__internal_linalg_transform__ = "_promote_first_view_"} ins(%3, %4: memref>, memref>) - outs(%5: memref>) + inits(%5: memref>) } } } @@ -116,7 +116,7 @@ // CHECK-NOT: memref.copy // CHECK: linalg.matmul // CHECK-SAME: ins(%[[v0]], %[[s1]] : memref, memref>) -// CHECK-SAME: outs(%[[s2]] : memref>) +// CHECK-SAME: inits(%[[s2]] : memref>) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): @@ -138,7 +138,7 @@ %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref> to memref> linalg.fill - ins(%cf : f32) outs(%3 : memref>) + ins(%cf : f32) inits(%3 : memref>) return } // CHECK-LABEL: func @aligned_promote_fill @@ -147,9 +147,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref to memref> -// CHECK: linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref) +// CHECK: linalg.fill ins({{.*}} : f32) inits(%[[v0]] : memref) // CHECK: memref.copy %[[s0]], %[[l0]] : memref to memref -// CHECK: linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref) +// CHECK: linalg.fill ins(%[[cf]] : f32) inits(%[[v0]] : memref) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): @@ -172,7 +172,7 @@ %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] : memref, strided<[?, 1], offset: ?>> to memref, strided<[?, ?], offset: ?>> linalg.fill ins(%cc : complex) - outs(%3 : memref, strided<[?, ?], offset: ?>>) + inits(%3 : memref, strided<[?, ?], offset: ?>>) return } // CHECK-LABEL: func @aligned_promote_fill_complex @@ -181,9 +181,9 @@ // CHECK: %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8> // CHECK: %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref> // CHECK: %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref> to memref, strided<[?, 1], offset: ?>> -// CHECK: linalg.fill ins({{.*}} : complex) outs(%[[v0]] : memref>) +// CHECK: linalg.fill ins({{.*}} : complex) inits(%[[v0]] : memref>) // CHECK: memref.copy %[[s0]], %[[l0]] : memref, strided{{.*}}> to memref, strided{{.*}}> -// CHECK: linalg.fill ins(%[[cc]] : complex) outs(%[[v0]] : memref>) +// CHECK: linalg.fill ins(%[[cc]] : complex) inits(%[[v0]] : memref>) transform.with_pdl_patterns { ^bb0(%arg0: !pdl.operation): diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir @@ -17,11 +17,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + inits(%D : tensor) -> tensor %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + inits(%5 : tensor) -> tensor %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, @@ -30,7 +30,7 @@ iterator_types = ["parallel", "parallel"] } ins(%C, %6 : tensor, tensor) - outs(%D : tensor) { + inits(%D : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %16 = arith.maxf %arg3, %cst : f32 %17 = arith.cmpf ogt, %arg2, %cst : f32 @@ -74,11 +74,11 @@ %5 = linalg.fill {__producer__} ins(%cst : f32) - outs(%D : tensor) -> tensor + inits(%D : tensor) -> tensor %6 = linalg.matmul {__producer__} ins(%A, %B : tensor, tensor) - outs(%5 : tensor) -> tensor + inits(%5 : tensor) -> tensor %7 = linalg.generic {__root__, indexing_maps = [affine_map<(d0, d1) -> (d0)>, @@ -87,7 +87,7 @@ iterator_types = ["parallel", "parallel"] } ins(%C, %6 : tensor, tensor) - outs(%D : tensor) { + inits(%D : tensor) { ^bb0(%arg2: f32, %arg3: f32, %arg4: f32): %16 = arith.maxf %arg3, %cst : f32 %17 = arith.cmpf ogt, %arg2, %cst : f32 diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir --- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir +++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir @@ -5,7 +5,7 @@ affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %1 = arith.mulf %arg7, %arg7 : f32 %2 = arith.addf %1, %arg9 : f32 @@ -32,12 +32,12 @@ // CHECK-DAG: %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor // CHECK-DAG: %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor // CHECK: %[[E:.*]] = tensor.empty(%[[D2]]) : tensor -// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor) -> tensor +// CHECK: %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor) -> tensor // CHECK: %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor) { // CHECK: %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]] // CHECK: %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor // CHECK: %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor to tensor -// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) outs(%[[EXT]] : tensor) { +// CHECK: %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor) inits(%[[EXT]] : tensor) { // CHECK: arith.mulf // CHECK: arith.addf // CHECK: linalg.yield @@ -47,7 +47,7 @@ // CHECK: %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor into tensor // CHECK: scf.yield %[[INS]] : tensor // CHECK: } -// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) outs(%[[ARG1]] : tensor) { +// CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor) inits(%[[ARG1]] : tensor) { // CHECK: arith.addf // CHECK: linalg.yield // CHECK: } -> tensor @@ -60,7 +60,7 @@ affine_map<(d0, d1) -> (d1)>], iterator_types = ["reduction", "parallel"]} ins(%arg0 : tensor) - outs(%out : tensor) { + inits(%out : tensor) { ^bb0(%arg7: f32, %arg9: f32): %42 = arith.addf %arg7, %arg9 : f32 linalg.yield %42 : f32 diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir --- a/mlir/test/Dialect/Linalg/vectorization.mlir +++ b/mlir/test/Dialect/Linalg/vectorization.mlir @@ -8,7 +8,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [0] : vector<1584xf32> to f32 linalg.dot ins(%A, %B: memref<1584xf32>, memref<1584xf32>) - outs(%C: memref) + inits(%C: memref) return } @@ -27,7 +27,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [1] : vector<1584x1584xf32> to vector<1584xf32> linalg.matvec ins(%A, %B: memref<1584x1584xf32>, memref<1584xf32>) - outs(%C: memref<1584xf32>) + inits(%C: memref<1584xf32>) return } @@ -45,7 +45,7 @@ // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584xf32> // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<1584x1584x1584xf32> to vector<1584x1584xf32> linalg.matmul ins(%A, %B: memref<1584x1584xf32>, memref<1584x1584xf32>) - outs(%C: memref<1584x1584xf32>) + inits(%C: memref<1584x1584xf32>) return } @@ -64,7 +64,7 @@ // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [3] : vector<1584x1584x1584x1584xf32> to vector<1584x1584x1584xf32> linalg.batch_matmul ins(%A, %B: memref<1584x1584x1584xf32>, memref<1584x1584x1584xf32>) - outs(%C: memref<1584x1584x1584xf32>) + inits(%C: memref<1584x1584x1584xf32>) return } @@ -99,7 +99,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<8x32xf32> linalg.generic #matmul_trait ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>) - outs(%C : memref<8x32xf32>) { + inits(%C : memref<8x32xf32>) { ^bb(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -139,7 +139,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<32x8xf32> linalg.generic #matmul_transpose_out_trait ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>) - outs(%C : memref<32x8xf32>) { + inits(%C : memref<32x8xf32>) { ^bb(%a: f32, %b: f32, %c: f32) : %d = arith.mulf %a, %b: f32 %e = arith.addf %c, %d: f32 @@ -168,7 +168,7 @@ %1 = linalg.generic {indexing_maps = [#map0, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0 : tensor<12x128x32xf32>) - outs(%0 : tensor<128x12x32xf32>) { + inits(%0 : tensor<128x12x32xf32>) { ^bb0(%arg1: f32, %arg2: f32): linalg.yield %arg1 : f32 } -> tensor<128x12x32xf32> @@ -206,7 +206,7 @@ // CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xi32>, memref<8x32xi32> linalg.generic #matmul_trait ins(%A, %B : memref<8x16xi32>, memref<16x32xi32>) - outs(%C : memref<8x32xi32>) { + inits(%C : memref<8x32xi32>) { ^bb(%a: i32, %b: i32, %c: i32) : %d = arith.muli %a, %b: i32 %e = arith.addi %c, %d: i32 @@ -231,7 +231,7 @@ // CHECK: vector.multi_reduction , %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32> linalg.matmul ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>) - outs(%C: memref<8x32xf32>) + inits(%C: memref<8x32xf32>) return } @@ -252,7 +252,7 @@ indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>], iterator_types = ["parallel", "parallel"]} ins(%arg0 : f32) - outs(%A: memref<8x16xf32>) { + inits(%A: memref<8x16xf32>) { ^bb(%0: f32, %1: f32) : linalg.yield %0 : f32 } @@ -276,7 +276,7 @@ indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>], iterator_types = ["parallel", "parallel"]} ins(%arg0 : complex) - outs(%A: memref<8x16xcomplex>) { + inits(%A: memref<8x16xcomplex>) { ^bb(%0: complex, %1: complex) : linalg.yield %0 : complex } @@ -296,7 +296,7 @@ func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) { // CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32> // CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32> - linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>) + linalg.fill ins(%arg0 : f32) inits(%A : memref<8x16xf32>) return } @@ -314,7 +314,7 @@ // CHECK-SAME: (%[[M:.*]]: memref, %[[val:.*]]: f32) // CHECK: %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector // CHECK: vector.transfer_write %[[VEC]], %[[M]][] : vector, memref - linalg.fill ins(%arg0 : f32) outs(%A : memref) + linalg.fill ins(%arg0 : f32) inits(%A : memref) return } @@ -372,7 +372,7 @@ indexing_maps = [ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - outs(%arg0: memref<1x2x4x8xindex>) { + inits(%arg0: memref<1x2x4x8xindex>) { ^bb0(%arg1: index): // CHECK: %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<8xindex> to vector<1x2x4x8xindex> // CHECK: vector.transfer_write %[[BCST]], %[[ARG0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {{.*}} : vector<1x2x4x8xindex>, memref<1x2x4x8xindex> @@ -400,7 +400,7 @@ indexing_maps = [ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} - outs(%arg0: memref<1x2x4x8xindex>) { + inits(%arg0: memref<1x2x4x8xindex>) { ^bb0(%arg1: index): // CHECK: %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<2xindex> to vector<1x8x4x2xindex> // CHECK: %[[TRAN:.*]] = vector.transpose %[[BCST]], [0, 3, 2, 1] : vector<1x8x4x2xindex> to vector<1x2x4x8xindex> @@ -448,7 +448,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg1, %arg2: memref<4x256xf32>, memref<256xf32>) - outs( + inits( %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 : memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, @@ -530,7 +530,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg1, %arg2: tensor<4x256xf32>, tensor<256xf32>) - outs( + inits( %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 : tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, @@ -621,7 +621,7 @@ affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>) - outs(%C : memref<4x4x4x4xf32>) { + inits(%C : memref<4x4x4x4xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32): %s = arith.subf %arg0, %arg1 : f32 %a = arith.addf %arg2, %s : f32 @@ -665,7 +665,7 @@ %C: memref<16x14x7x8xf32>, %D: memref<7x14x8x16xf32>) { linalg.generic #matmul_trait ins(%A, %B, %C : memref<14x7xf32>, memref<16x14xf32>, memref<16x14x7x8xf32>) - outs(%D : memref<7x14x8x16xf32>) { + inits(%D : memref<7x14x8x16xf32>) { ^bb(%a: f32, %b: f32, %c: f32, %d: f32) : %e = arith.addf %a, %b: f32 %f = arith.addf %e, %c: f32 @@ -700,7 +700,7 @@ // CHECK: %[[R:.*]] = vector.multi_reduction , %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32> // CHECK: %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32> %0 = linalg.matmul ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>) - outs(%arg2: tensor<8x12xf32>) + inits(%arg2: tensor<8x12xf32>) -> tensor<8x12xf32> // CHECK: return %[[W]] : tensor<8x12xf32> return %0 : tensor<8x12xf32> @@ -787,7 +787,7 @@ // CHECK: %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index // CHECK: %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index // CHECK: %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32> -// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> +// CHECK: %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) inits(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32> // CHECK: %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32> // CHECK: %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32> // CHECK: return %[[RESULT]] @@ -1026,7 +1026,7 @@ affine_map<(d0, d1, d2) -> (d0, d1)> ], iterator_types = ["parallel", "parallel", "reduction"] - } ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) { + } ins(%input : tensor<4x16x8xf32>) inits(%output : tensor<4x16xf32>) { ^bb0(%arg0: f32, %arg1: f32): %1 = math.exp %arg0 : f32 %2 = arith.addf %1, %arg1 : f32 @@ -1069,7 +1069,7 @@ affine_map<(d0, d1, d2, d3) -> (d3, d0)> ], iterator_types = ["parallel", "reduction", "reduction", "parallel"] - } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) { + } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) inits(%output : tensor<5x2xf32>) { ^bb0(%arg0: f32, %arg1: f32, %arg2: f32): %1 = math.exp %arg0 : f32 %2 = math.exp %arg1 : f32 @@ -1098,11 +1098,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant -3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %max = arith.maxf %in0, %out0 : f32 linalg.yield %max : f32 @@ -1129,11 +1129,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %maxf32 = arith.constant 3.40282e+38 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%maxf32 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %min = arith.minf %out0, %in0 : f32 linalg.yield %min : f32 @@ -1159,11 +1159,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32> %ident = arith.constant 1.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) { + ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) { ^bb0(%in0: f32, %out0: f32): %mul = arith.mulf %in0, %out0 : f32 linalg.yield %mul : f32 @@ -1189,11 +1189,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %or = arith.ori %in0, %out0 : i1 linalg.yield %or : i1 @@ -1219,11 +1219,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant true %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %and = arith.andi %in0, %out0 : i1 linalg.yield %and : i1 @@ -1249,11 +1249,11 @@ // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1> %ident = arith.constant false %init = tensor.empty() : tensor<4xi1> - %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1> + %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} - ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) { + ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) { ^bb0(%in0: i1, %out0: i1): %xor = arith.xori %in0, %out0 : i1 linalg.yield %xor : i1 @@ -1281,13 +1281,13 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4x4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4x4xf32>) -> tensor<4x4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>) - outs(%fill : tensor<4x4xf32>) { + inits(%fill : tensor<4x4xf32>) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 linalg.yield %40 : f32 @@ -1317,13 +1317,13 @@ // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32> %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<4xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32> %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, 0)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>) - outs(%fill : tensor<4xf32>) { + inits(%fill : tensor<4xf32>) { ^bb0(%arg7: f32, %arg8: f32, %arg9: f32): %40 = arith.subf %arg7, %arg8 : f32 %41 = math.exp %40 : f32 @@ -1358,7 +1358,7 @@ // CHECK: %[[init:.*]] = tensor.empty() : tensor %0 = tensor.empty() : tensor - %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%f0 : f32) inits(%0 : tensor) -> tensor // CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]] // CHECK-SAME: : tensor<32xf32>, vector<32xf32> // CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector @@ -1372,7 +1372,7 @@ affine_map<(d0) -> ()>], iterator_types = ["reduction"]} ins(%arg0 : tensor<32xf32>) - outs(%1 : tensor) { + inits(%1 : tensor) { ^bb0(%a: f32, %b: f32): %3 = arith.addf %a, %b : f32 linalg.yield %3 : f32 @@ -1399,13 +1399,13 @@ func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> { %c0 = arith.constant 0.0 : f32 %init = tensor.empty() : tensor<6x6x3x3xf32> - %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32> + %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32> // CHECK: linalg.generic %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg0 : tensor<8x8xf32>) - outs(%fill : tensor<6x6x3x3xf32>) { + inits(%fill : tensor<6x6x3x3xf32>) { ^bb0(%arg7: f32, %arg9: f32): linalg.yield %arg7 : f32 } -> tensor<6x6x3x3xf32> @@ -1430,7 +1430,7 @@ affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg0, %arg1 : tensor<2x4x8xf32>, tensor<2x4xf32>) - outs(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) { + inits(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32): %1 = arith.mulf %b0, %b1 : f32 %2 = arith.addf %1, %b3 : f32 @@ -1467,7 +1467,7 @@ %2 = linalg.generic { indexing_maps = [#map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%arg1, %arg2 : tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg3 : tensor<4x7x3x2xf32>) { + } ins(%arg1, %arg2 : tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg3 : tensor<4x7x3x2xf32>) { ^bb0(%arg4: i32, %arg5: f32, %arg6: f32): %3 = arith.index_cast %arg4 : i32 to index %7 = tensor.extract %arg0[%3] : tensor<3xf32> @@ -1504,7 +1504,7 @@ %2 = linalg.generic { indexing_maps = [#map0, #map0, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"] - } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg4 : tensor<4x7x3x2xf32>) { + } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg4 : tensor<4x7x3x2xf32>) { ^bb0(%arg5: i32, %arg6: i32, %arg7: f32, %arg8: f32): %3 = arith.index_cast %arg5 : i32 to index %4 = arith.index_cast %arg6 : i32 to index @@ -1528,7 +1528,7 @@ func.func @vectorize_map(%arg0: memref<64xf32>, %arg1: memref<64xf32>, %arg2: memref<64xf32>) { linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) - outs(%arg2 : memref<64xf32>) + inits(%arg2 : memref<64xf32>) (%in: f32, %in_0: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 @@ -1552,7 +1552,7 @@ func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>, %arg1: memref<32x64x16xf32>) { linalg.transpose ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] return } // CHECK-LABEL: func @vectorize_transpose @@ -1571,7 +1571,7 @@ func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>, %arg1: memref<16x64xf32>) { linalg.reduce ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<16x64xf32>) dimensions = [1] + inits(%arg1 : memref<16x64xf32>) dimensions = [1] (%in: f32, %init: f32) { %0 = arith.addf %in, %init : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir --- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir +++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir @@ -4,7 +4,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<1x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -67,7 +67,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>) - outs(%output : memref<4x2x8xi32>) + inits(%output : memref<4x2x8xi32>) return } @@ -129,7 +129,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -206,7 +206,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>) - outs(%output : memref<4x2x8xf32>) + inits(%output : memref<4x2x8xf32>) return } @@ -255,7 +255,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x1xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -324,7 +324,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -409,7 +409,7 @@ linalg.conv_1d_ncw_fcw {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>) - outs(%output : memref<4x8x2xf32>) + inits(%output : memref<4x8x2xf32>) return } @@ -467,7 +467,7 @@ linalg.depthwise_conv_1d_nwc_wc {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<3x5x4xf32>, memref<2x4xf32>) - outs(%output : memref<3x2x4xf32>) + inits(%output : memref<3x2x4xf32>) return } @@ -508,7 +508,7 @@ linalg.depthwise_conv_1d_nwc_wc {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins(%input, %filter : memref<3x5x4xi8>, memref<2x4xi8>) - outs(%output : memref<3x2x4xi32>) + inits(%output : memref<3x2x4xi32>) return } @@ -553,7 +553,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>} ins(%input, %filter : memref<1x2x3xf16>, memref<1x3x2xf16>) - outs(%output : memref<1x2x2xf32>) + inits(%output : memref<1x2x2xf32>) return } diff --git a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir --- a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir +++ b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir @@ -5,9 +5,9 @@ %c2 = arith.constant 2 : index %cst_0 = arith.constant -0.000000e+00 : f32 %0 = memref.alloc() : memref<128x384xf32> - linalg.fill ins(%cst_0 : f32) outs(%0 : memref<128x384xf32>) + linalg.fill ins(%cst_0 : f32) inits(%0 : memref<128x384xf32>) %2 = memref.alloc() : memref<128xf32> - linalg.fill ins(%cst_0 : f32) outs(%2 : memref<128xf32>) + linalg.fill ins(%cst_0 : f32) inits(%2 : memref<128xf32>) scf.foreach_thread (%arg0) in (%c2) { %7 = affine.min affine_map<(d0) -> (d0 * -64 + 128, 64)>(%arg0) %8 = affine.max affine_map<(d0) -> (0, d0)>(%7) @@ -21,12 +21,12 @@ %12 = memref.subview %2[%9] [%10] [1] : memref<128xf32> to memref (d0 + s0)>> - // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref) outs(%{{.*}} : memref) + // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref) inits(%{{.*}} : memref) linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"]} ins(%11 : memref (d0 * 384 + s0 + d1)>>) - outs(%12 : memref (d0 + s0)>>) { + inits(%12 : memref (d0 + s0)>>) { ^bb0(%arg1: f32, %arg2: f32): %14 = arith.addf %arg1, %arg2 : f32 linalg.yield %14 : f32 diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir --- a/mlir/test/Dialect/SCF/loop-pipelining.mlir +++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir @@ -558,7 +558,7 @@ // CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]]) // CHECK: %[[CV:.+]] = memref.subview %[[ARG2]] // CHECK: linalg.generic -// CHECK-SAME: ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] : +// CHECK-SAME: ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) inits(%[[CV]] : // CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] // CHECK: %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] : // CHECK: %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] : @@ -573,7 +573,7 @@ // CHECK: } // CHECK: %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] : // CHECK: linalg.generic -// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] : +// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) inits(%[[CV]] : #map = affine_map<(d0)[s0]->(d0 + s0)> @@ -608,7 +608,7 @@ %C_view = memref.subview %result[%i0][8][1] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : memref to memref<8xf32, #map> %scalar = arith.addf %cf, %cf {__test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1} : f32 linalg.generic #linalg_attrs ins(%a_buf_view, %b_buf_view, %scalar : memref<8xf32, #map>, memref<8xf32, #map>, f32) - outs(%C_view: memref<8xf32, #map>) { + inits(%C_view: memref<8xf32, #map>) { ^bb0(%a: f32, %b: f32, %s: f32, %c: f32): %add = arith.addf %a, %b : f32 %accum = arith.addf %add, %c : f32 diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir @@ -153,7 +153,7 @@ // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o = linalg.generic #trait outs (%t3 : tensor) { + %o = linalg.generic #trait inits (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) @@ -191,7 +191,7 @@ // Write to %t1 via %t2. (Overwrite %t3.) // CHECK: linalg.generic // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o2 = linalg.generic #trait outs (%t2 : tensor) { + %o2 = linalg.generic #trait inits (%t2 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) @@ -204,7 +204,7 @@ // Use %t3 in some way without reading it, so that it does not get DCE'd. // CHECK: linalg.generic // CHECK-SAME: __inplace_operands_attr__ = ["true"] - %o = linalg.generic #trait outs (%t3 : tensor) { + %o = linalg.generic #trait inits (%t3 : tensor) { ^bb(%0: f32) : linalg.yield %cst : f32 } -> (tensor) @@ -620,9 +620,9 @@ // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32> + %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<1xf32>) -> tensor<1xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32> + %8 = linalg.fill ins(%cst : f32) inits(%7 : tensor<1xf32>) -> tensor<1xf32> scf.foreach_thread.perform_concurrently { // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]} @@ -647,14 +647,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> scf.yield %5 : tensor<4xf32> } @@ -677,14 +677,14 @@ %0 = bufferization.alloc_tensor() : tensor<4xf32> // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32> + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32> %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) { // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]} %4 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %4 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]} - %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %5 = linalg.fill ins(%cst2 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> scf.yield %5 : tensor<4xf32> } @@ -693,7 +693,7 @@ %6 = tensor.extract %1[%arg4] : tensor<4xf32> vector.print %6 : f32 // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]} - %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32> + %7 = linalg.fill ins(%cst3 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32> return %2, %7 : tensor<4xf32>, tensor<4xf32> } diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir --- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir @@ -54,7 +54,7 @@ // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]] // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]]) %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor { - %r = linalg.fill ins(%cst : f32) outs(%1 : tensor) -> tensor + %r = linalg.fill ins(%cst : f32) inits(%1 : tensor) -> tensor scf.yield %B : tensor } %1 = tensor.extract %0[%c1] : tensor @@ -546,8 +546,8 @@ %2 = scf.foreach_thread (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor) { // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor - // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref) -> tensor + // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview]] : memref) -> tensor // Self-copy will DCE away later. // CHECK: memref.copy %[[subview]], %[[subview]] @@ -574,8 +574,8 @@ // CHECK-SAME: %[[arg1:.*]]: memref, // CHECK-SAME: %[[arg2:.*]]: memref func.func @parallel_insert_slice_with_conflict( - %idx: index, - %idx2: index, + %idx: index, + %idx2: index, %arg1: tensor {bufferization.writable = true}, %arg2: tensor {bufferization.writable = true}) -> (f32, f32) { @@ -593,8 +593,8 @@ // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1] %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor to tensor - // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref) -> tensor + // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview1]] : memref) -> tensor // Now the copy of the actual insert_slice. (It will fold away.) // CHECK: memref.copy %[[subview1]], %[[subview1]] @@ -636,8 +636,8 @@ %6 = tensor.extract_slice %arg1[0, %4] [8, 4] [1, 1] : tensor<8x8xf32> to tensor<8x4xf32> %7 = tensor.extract_slice %o[%1, %4] [4, 4] [1, 1] : tensor<8x8xf32> to tensor<4x4xf32> - // CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) outs({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>) - %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) -> tensor<4x4xf32> + // CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) inits({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>) + %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) inits(%7 : tensor<4x4xf32>) -> tensor<4x4xf32> scf.foreach_thread.perform_concurrently { tensor.parallel_insert_slice %8 into %o[%1, %4] [4, 4] [1, 1] : tensor<4x4xf32> into tensor<8x8xf32> } @@ -872,7 +872,7 @@ // ----- // CHECK-LABEL: func @non_tensor_for_arg -func.func @non_tensor_for_arg(%A : tensor {bufferization.writable = true}) +func.func @non_tensor_for_arg(%A : tensor {bufferization.writable = true}) -> tensor { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -888,7 +888,7 @@ // ----- // This is a regression test. Just check that the IR bufferizes. - + // CHECK-LABEL: func @buffer_type_of_collapse_shape func.func @buffer_type_of_collapse_shape(%arg0: tensor) { %true = arith.constant true @@ -906,10 +906,10 @@ // ----- // This is a regression test. Just check that the IR bufferizes. - + // CHECK-LABEL: func @non_block_argument_yield func.func @non_block_argument_yield() { - %true = arith.constant true + %true = arith.constant true %0 = bufferization.alloc_tensor() : tensor %1 = scf.while (%arg0 = %0) : (tensor) -> (tensor) { scf.condition(%true) %arg0 : tensor diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir --- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir +++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir @@ -47,14 +47,14 @@ // CHECK: } do { // CHECK: ^bb0(%[[I2:.*]]: index): // CHECK: scf.yield %[[I2]] : index -// CHECK: } +// CHECK: } // CHECK: %[[M2:.*]] = memref.realloc %[[B]](%[[P2]]) // CHECK: scf.yield %[[M2]] : memref // CHECK: } else { // CHECK: scf.yield %[[B]] : memref // CHECK: } // CHECK: %[[S:.*]] = memref.subview %[[M]]{{\[}}%[[S1]]] {{\[}}%[[D]]] [1] -// CHECK: linalg.fill ins(%[[C]] : f64) outs(%[[S]] +// CHECK: linalg.fill ins(%[[C]] : f64) inits(%[[S]] // CHECK: memref.store %[[S2]], %[[A]]{{\[}}%[[C2]]] // CHECK: return %[[M]] : memref func.func @sparse_push_back_n(%arg0: memref, %arg1: memref, %arg2: f64, %arg3: index) -> memref { diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir --- a/mlir/test/Dialect/SparseTensor/codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/codegen.mlir @@ -291,7 +291,7 @@ // CHECK: %[[T5:.*]] = memref.cast %[[T4]] : memref<16xindex> to memref // CHECK: %[[T6:.*]] = memref.alloc() : memref<16xf64> // CHECK: %[[T7:.*]] = memref.cast %[[T6]] : memref<16xf64> to memref -// CHECK: linalg.fill ins(%[[C0]] : index) outs(%[[T1]] : memref<3xindex>) +// CHECK: linalg.fill ins(%[[C0]] : index) inits(%[[T1]] : memref<3xindex>) // CHECK: memref.store %[[A]], %[[T0]][%[[C0]]] : memref<2xindex> // CHECK: memref.store %[[C10]], %[[T0]][%[[C1]]] : memref<2xindex> // CHECK: %[[P0:.*]] = sparse_tensor.push_back %[[T1]], %[[T3]] @@ -318,7 +318,7 @@ // CHECK: %[[A1:.*]] = memref.alloc() : memref<1xindex> // CHECK: %[[AV:.*]] = memref.alloc() : memref<16xf64> // CHECK: %[[A2:.*]] = memref.cast %[[AV]] : memref<16xf64> to memref -// CHECK: linalg.fill ins(%[[C0]] : index) outs(%[[A1]] : memref<1xindex>) +// CHECK: linalg.fill ins(%[[C0]] : index) inits(%[[A1]] : memref<1xindex>) // CHECK: memref.store %[[C30]], %[[A0]][%[[C0]]] : memref<3xindex> // CHECK: memref.store %[[C10]], %[[A0]][%[[C1]]] : memref<3xindex> // CHECK: memref.store %[[C20]], %[[A0]][%[[C2]]] : memref<3xindex> @@ -336,8 +336,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion1() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR> @@ -351,8 +351,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion2() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC> @@ -371,8 +371,8 @@ // CHECK: %[[V:.*]] = memref.alloc(%[[D1]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[D1]]) : memref // CHECK: %[[D:.*]] = memref.alloc(%[[D1]]) : memref -// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[V]] : memref) -// CHECK: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[V]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK: return %[[D]] : memref func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir --- a/mlir/test/Dialect/SparseTensor/conversion.mlir +++ b/mlir/test/Dialect/SparseTensor/conversion.mlir @@ -321,8 +321,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion1() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR> @@ -337,8 +337,8 @@ // CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1> // CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>) // CHECK: return %[[D]] : memref func.func @sparse_expansion2() -> memref { %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC> @@ -354,8 +354,8 @@ // CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK: return %[[C]] : memref func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref { %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir --- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir +++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir @@ -37,7 +37,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32> -// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<13xi32>) +// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) inits(%[[M]] : memref<13xi32>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -75,7 +75,7 @@ // CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref -// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -118,7 +118,7 @@ // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64> // CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -136,7 +136,7 @@ // CHECK-RWT-SAME: %[[A:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>) -> tensor<2x4xf64> { // CHECK-RWT: %[[F0:.*]] = arith.constant 0.000000e+00 : f64 // CHECK-RWT: %[[B:.*]] = memref.alloc() : memref<2x4xf64> -// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]] +// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]] // CHECK-RWT: sparse_tensor.foreach in %[[A]] // CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): // CHECK-RWT: memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]] @@ -174,7 +174,7 @@ // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref // CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -218,7 +218,7 @@ // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64> // CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x?xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -262,7 +262,7 @@ // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref // CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) @@ -284,7 +284,7 @@ // CHECK-RWT: %[[D0:.*]] = tensor.dim %[[A]], %[[C0]] // CHECK-RWT: %[[D1:.*]] = tensor.dim %[[A]], %[[C1]] // CHECK-RWT: %[[B:.*]] = memref.alloc(%[[D0]], %[[D1]]) -// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]] +// CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]] // CHECK-RWT: sparse_tensor.foreach in %[[A]] // CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64): // CHECK-RWT: memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]] @@ -326,7 +326,7 @@ // CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref // CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64> // CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64 -// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>) +// CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x3x4xf64>) // CHECK: scf.while : () -> () { // CHECK: %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr, memref, memref) -> i1 // CHECK: scf.condition(%[[Cond]]) diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir --- a/mlir/test/Dialect/SparseTensor/dense.mlir +++ b/mlir/test/Dialect/SparseTensor/dense.mlir @@ -59,7 +59,7 @@ %c = arith.constant 1.0 : f32 %0 = linalg.generic #trait_2d ins(%arga: tensor<32x16xf32, #DenseMatrix>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %a, %c : f32 linalg.yield %1 : f32 @@ -99,7 +99,7 @@ %c = arith.constant 1.0 : f32 %0 = linalg.generic #trait_2d ins(%arga: tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32, #DenseMatrix>) { + inits(%argx: tensor<32x16xf32, #DenseMatrix>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %a, %c : f32 linalg.yield %1 : f32 @@ -145,7 +145,7 @@ -> tensor<32x16xf32, #DenseMatrix> { %0 = linalg.generic #trait_3d ins(%arga: tensor<32x16x8xf32>) - outs(%argx: tensor<32x16xf32, #DenseMatrix>) { + inits(%argx: tensor<32x16xf32, #DenseMatrix>) { ^bb(%a: f32, %x: f32): %1 = arith.addf %x, %a : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir --- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir +++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir @@ -59,12 +59,12 @@ -> (tensor<10xf32>, tensor<10xf32>) { // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [false]} : tensor<10xf32> - // CHECK: linalg.generic {{.*}} outs(%[[alloc]] + // CHECK: linalg.generic {{.*}} inits(%[[alloc]] // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [true]} : tensor<10xf32> - // CHECK-FUNC: linalg.generic {{.*}} outs(%[[alloc]] + // CHECK-FUNC: linalg.generic {{.*}} inits(%[[alloc]] %0 = linalg.generic #trait ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f32, %x : f32): %up = arith.addf %a, %x : f32 linalg.yield %up : f32 diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir --- a/mlir/test/Dialect/SparseTensor/one_trip.mlir +++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir @@ -25,7 +25,7 @@ func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor<1x1xf32, #Dense>) { + inits(%argx: tensor<1x1xf32, #Dense>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir --- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir +++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir @@ -56,7 +56,7 @@ func.func @sparse_scale(%argx: tensor) -> tensor { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 @@ -96,7 +96,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 @@ -121,7 +121,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64> -// CHECK-DAG: linalg.fill ins(%[[VAL_3]] : f64) outs(%[[VAL_14]] : memref<32x64xf64>) +// CHECK-DAG: linalg.fill ins(%[[VAL_3]] : f64) inits(%[[VAL_14]] : memref<32x64xf64>) // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref @@ -190,7 +190,7 @@ %argz: tensor<32x64xf64>) -> tensor<32x64xf64> { %0 = linalg.generic #trait_mul ins(%argx, %argy : tensor<32x64xf64, #SortedCOO>, tensor<32x64xf64, #SortedCOO>) - outs(%argz: tensor<32x64xf64>) { + inits(%argz: tensor<32x64xf64>) { ^bb(%x: f64, %y: f64, %z: f64): %1 = arith.mulf %x, %y : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir @@ -22,7 +22,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32 @@ -34,7 +34,7 @@ func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -52,7 +52,7 @@ // CHECK: %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32> // CHECK: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32> -// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>) +// CHECK: linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_7]] : memref<32xf32>) // CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] { // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32 @@ -65,7 +65,7 @@ %u = tensor.empty() : tensor<32xf32> %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%u: tensor<32xf32>) { + inits(%u: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -82,7 +82,7 @@ // CHECK-DAG: %[[VAL_5:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32 @@ -94,7 +94,7 @@ func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %argb : f32 linalg.yield %0 : f32 @@ -116,7 +116,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32xf32>) // CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { // CHECK: %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index // CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index @@ -149,7 +149,7 @@ func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %argb : f32 linalg.yield %0 : f32 @@ -168,7 +168,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] // CHECK-DAG: %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref // CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] { // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref @@ -186,7 +186,7 @@ func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.addf %a, %a : f32 // same tensor %1 = arith.addf %a, %a : f32 // should yield @@ -206,7 +206,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK-DAG: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] { @@ -221,7 +221,7 @@ func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %argb : f32 linalg.yield %0 : f32 @@ -249,7 +249,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32> @@ -262,7 +262,7 @@ func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -280,7 +280,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref // CHECK-DAG: %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32> @@ -293,7 +293,7 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -314,7 +314,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -352,7 +352,7 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -371,7 +371,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>) // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { @@ -387,7 +387,7 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -408,7 +408,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) { @@ -446,7 +446,7 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -465,7 +465,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>) // CHECK-DAG: %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] { @@ -481,7 +481,7 @@ func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -502,7 +502,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref @@ -564,7 +564,7 @@ func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -585,7 +585,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>) // CHECK-DAG: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref @@ -625,7 +625,7 @@ func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -647,7 +647,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>) // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref @@ -716,7 +716,7 @@ // Kernel "x(i) = a(i) * c + b(i) * c". %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %argc : f32 %1 = arith.mulf %b, %argc : f32 @@ -740,7 +740,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>) // CHECK-DAG: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK-DAG: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref @@ -809,7 +809,7 @@ // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c". %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 %1 = arith.mulf %0, %argc : f32 @@ -850,7 +850,7 @@ func.func @sum_reduction(%arga: tensor, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -954,7 +954,7 @@ // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction2 ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -1067,7 +1067,7 @@ // as two separate reductions kernels. %0 = linalg.generic #trait_sum_reduction_inv ins(%arga, %argb, %argc : tensor<16xf32, #SV>, tensor, tensor<16xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %c: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %0, %c : f32 @@ -1108,7 +1108,7 @@ // CHECK-DAG: %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor> to memref // CHECK-DAG: %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor // CHECK-DAG: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]] -// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref) +// CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_18]] : memref) // CHECK-DAG: %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref // CHECK-DAG: %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK-DAG: %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref @@ -1246,7 +1246,7 @@ %argx: tensor) -> tensor { %r = linalg.generic #trait_four_tensors ins(%arga, %argb, %argc, %argd: tensor, tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %c: f64, %d: f64, %x: f64): %0 = arith.addf %a, %b : f64 %1 = arith.addf %c, %d : f64 @@ -1559,7 +1559,7 @@ %argc: tensor, %argx: tensor) ->tensor{ %0 = linalg.generic #trait_red3s ins(%arga, %argb, %argc: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64,%b: f64,%c: f64,%x: f64): %0 = arith.addf %x, %a : f64 %1 = arith.addf %0, %b : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir @@ -27,7 +27,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -44,7 +44,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -63,7 +63,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index @@ -80,7 +80,7 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -102,7 +102,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref // CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_7]] : index @@ -143,7 +143,7 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -163,7 +163,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index @@ -182,7 +182,7 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -204,7 +204,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { @@ -250,7 +250,7 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -270,7 +270,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] { @@ -290,7 +290,7 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -314,7 +314,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) { @@ -384,7 +384,7 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -405,7 +405,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32x16xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] { @@ -427,7 +427,7 @@ func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -452,7 +452,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref @@ -591,7 +591,7 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -616,7 +616,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref @@ -687,7 +687,7 @@ func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -711,7 +711,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) { @@ -793,7 +793,7 @@ func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -815,7 +815,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32> -// CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>) +// CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] { @@ -839,7 +839,7 @@ func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -890,7 +890,7 @@ func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%argA, %argb: tensor<16x32xf32, #Tds>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 @@ -936,7 +936,7 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor<10x20xf32, #Tds>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -964,7 +964,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor> to memref // CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref) +// CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_11]] : memref) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref // CHECK: %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index @@ -983,7 +983,7 @@ %0 = arith.constant 2.0 : f64 %1 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %2 = arith.mulf %a, %0 : f64 linalg.yield %2 : f64 @@ -1049,7 +1049,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%s: f32, %a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %s, %0 : f32 @@ -1273,7 +1273,7 @@ tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %c: f32, %d: f32, %e: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %0, %d : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir @@ -35,7 +35,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index @@ -56,7 +56,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -77,7 +77,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index @@ -98,7 +98,7 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -122,7 +122,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_9]] { // CHECK: %[[VAL_18:.*]] = arith.muli %[[VAL_16]], %[[VAL_5]] : index @@ -167,7 +167,7 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -189,7 +189,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] { // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] { // CHECK: %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_5]] : index @@ -212,7 +212,7 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -236,7 +236,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_8]] : index @@ -285,7 +285,7 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -307,7 +307,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref // CHECK: %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_6]] : index @@ -330,7 +330,7 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -356,7 +356,7 @@ // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] { // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref // CHECK: %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_9]] : index @@ -429,7 +429,7 @@ func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -452,7 +452,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref // CHECK: %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_6]] : index @@ -477,7 +477,7 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -501,7 +501,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -555,7 +555,7 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -577,7 +577,7 @@ // CHECK-DAG: %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>) // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] { @@ -601,7 +601,7 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -627,7 +627,7 @@ // CHECK-DAG: %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>) // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -705,7 +705,7 @@ func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -728,7 +728,7 @@ // CHECK-DAG: %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>) // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref // CHECK: scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] { @@ -754,7 +754,7 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -780,7 +780,7 @@ // CHECK-DAG: %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_16]] : memref<32x16x8xf32>) // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref // CHECK: %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) { @@ -862,7 +862,7 @@ func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -885,7 +885,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>) // CHECK: %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] { @@ -911,7 +911,7 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -939,7 +939,7 @@ // CHECK-DAG: %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_19]] : memref<32x16x8xf32>) // CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref // CHECK: %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref // CHECK: %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) { @@ -1045,7 +1045,7 @@ func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.addf %a, %b : f32 linalg.yield %0 : f32 @@ -1069,7 +1069,7 @@ // CHECK-DAG: %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32> // CHECK-DAG: %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>) // CHECK: %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref // CHECK: scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] { @@ -1097,7 +1097,7 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>) - outs(%argx: tensor<32x16x8xf32>) { + inits(%argx: tensor<32x16x8xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -1163,7 +1163,7 @@ %argd: tensor) -> tensor { %0 = linalg.generic #trait_kernel_3d ins(%argb, %argc, %argd: tensor, tensor, tensor) - outs(%arga: tensor) { + inits(%arga: tensor) { ^bb(%b: f32, %c: f32, %d: f32, %a: f32): %0 = arith.mulf %b, %c : f32 %1 = arith.mulf %0, %d : f32 @@ -1219,7 +1219,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction ins(%arga: tensor<10x20x30xf32, #Tsss>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -1273,7 +1273,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduction_inv ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -1308,7 +1308,7 @@ // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32> // CHECK-DAG: %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<10x20x30xf32>) // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] { // CHECK: %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref // CHECK: scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] { @@ -1330,7 +1330,7 @@ %argx: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> { %0 = linalg.generic #trait_invariants ins(%arga, %argb, %argc : tensor<10xf32, #Td>, tensor<20xf32>, tensor<30xf32>) - outs(%argx: tensor<10x20x30xf32>) { + inits(%argx: tensor<10x20x30xf32>) { ^bb(%a: f32, %b: f32, %c: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %0, %c : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir @@ -45,7 +45,7 @@ %argx: tensor<32xf32>) -> tensor<32xf32> { %0 = linalg.generic #trait1 ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32>) - outs(%argx: tensor<32xf32>) { + inits(%argx: tensor<32xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -77,7 +77,7 @@ // CHECK-DAG: %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32> // CHECK-DAG: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32> -// CHECK: linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>) +// CHECK: linalg.fill ins(%[[ZERO]] : i32) inits(%[[VAL_11]] : memref<32xi32>) // CHECK: %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref // CHECK: %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_4]] { @@ -96,7 +96,7 @@ %argx: tensor<32xi32>) -> tensor<32xi32> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32>) - outs(%argx: tensor<32xi32>) { + inits(%argx: tensor<32xi32>) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.andi %a, %b : i32 linalg.yield %0 : i32 @@ -152,7 +152,7 @@ %argx: tensor<32x16xf64>) -> tensor<32x16xf64> { %0 = linalg.generic #trait3 ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64>) - outs(%argx: tensor<32x16xf64>) { + inits(%argx: tensor<32x16xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir @@ -46,7 +46,7 @@ func.func public @main(%arg0: tensor<4x5xi32, #DCSR>) -> tensor<4x3x5xi32, #SparseTensor> { %0 = bufferization.alloc_tensor() : tensor<4x3x5xi32, #SparseTensor> %1 = linalg.generic #trait - ins(%arg0 : tensor<4x5xi32, #DCSR>) outs(%0 : tensor<4x3x5xi32, #SparseTensor>) { + ins(%arg0 : tensor<4x5xi32, #DCSR>) inits(%0 : tensor<4x3x5xi32, #SparseTensor>) { ^bb0(%in: i32, %out: i32): linalg.yield %in : i32 } -> tensor<4x3x5xi32, #SparseTensor> diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir @@ -21,7 +21,7 @@ // CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64> -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<5x4xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { // CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64> @@ -250,7 +250,7 @@ // CHECK-DAG: %[[TMP_c0:.*]] = arith.constant 0 : index // CHECK-DAG: %[[TMP_c4:.*]] = arith.constant 4 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64> -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<4x5xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64> @@ -311,7 +311,7 @@ // CHECK-DAG: %[[TMP_c1:.*]] = arith.constant 1 : index // CHECK: %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64> // CHECK: %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref -// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>) +// CHECK: linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<3x5xf64>) // CHECK: scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] { // CHECK: scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] { // CHECK: %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64> diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir @@ -53,8 +53,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: } @@ -71,7 +71,7 @@ %v = bufferization.alloc_tensor(%n) : tensor %0 = linalg.generic #rowsum ins(%arga: tensor) - outs(%v: tensor) { + inits(%v: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.addf %x, %a : f64 linalg.yield %1 : f64 @@ -104,8 +104,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C4]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C4]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C4]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { @@ -123,7 +123,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>) - outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> + inits(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR> return %D: tensor<8x4xf64, #CSR> } @@ -152,8 +152,8 @@ // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C8]]) : memref // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C8]]) : memref // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C8]]) : memref -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref) -// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref) +// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref) // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { // CHECK-CONVERT: scf.for {{.*}} { @@ -171,6 +171,6 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir @@ -35,8 +35,8 @@ // CHECK: %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref // CHECK: %[[VAL_24:.*]] = memref.alloc() : memref<300xindex> // CHECK: %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_20]] : memref<300xf64>) -// CHECK: linalg.fill ins(%[[VAL_7]] : i1) outs(%[[VAL_22]] : memref<300xi1>) +// CHECK: linalg.fill ins(%[[VAL_2]] : f64) inits(%[[VAL_20]] : memref<300xf64>) +// CHECK: linalg.fill ins(%[[VAL_7]] : i1) inits(%[[VAL_22]] : memref<300xi1>) // CHECK: %[[VAL_26:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK: %[[VAL_27:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr, index) -> memref // CHECK: %[[VAL_28:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr, index) -> memref @@ -120,8 +120,8 @@ %0 = bufferization.alloc_tensor() : tensor<100x300xf64, #DCSR> %cst = arith.constant 0.000000e+00 : f64 %1 = linalg.fill ins(%cst : f64) - outs(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + inits(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> %2 = linalg.matmul ins(%arg0, %arg1 : tensor<100x200xf64, #DCSR>, tensor<200x300xf64, #DCSR>) - outs(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> + inits(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR> return %2 : tensor<100x300xf64, #DCSR> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir @@ -54,7 +54,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.absf %a : f64 linalg.yield %0 : f64 @@ -86,7 +86,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.ceil %a : f64 linalg.yield %0 : f64 @@ -118,7 +118,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = math.floor %a : f64 linalg.yield %0 : f64 @@ -150,7 +150,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.negf %a : f64 linalg.yield %0 : f64 @@ -210,7 +210,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.addf %a, %b : f64 linalg.yield %0 : f64 @@ -272,7 +272,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.subf %a, %b : f64 linalg.yield %0 : f64 @@ -308,7 +308,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 @@ -342,7 +342,7 @@ %c = arith.constant 2.0 : f64 %0 = linalg.generic #traitc ins(%arga: tensor<32xf64, #SV>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.divf %a, %c : f64 linalg.yield %0 : f64 @@ -382,7 +382,7 @@ %xinp = bufferization.alloc_tensor() : tensor<32xf64, #SV> %0 = linalg.generic #trait1 ins(%arga: tensor<32xf64, #SV>) - outs(%xinp: tensor<32xf64, #SV>) { + inits(%xinp: tensor<32xf64, #SV>) { ^bb(%a: f64, %x: f64): %0 = math.absf %a : f64 %1 = math.ceil %0 : f64 @@ -423,7 +423,7 @@ %init = bufferization.alloc_tensor() : tensor<32xcomplex, #SV> %0 = linalg.generic #traitc ins(%arg0: tensor<32xcomplex, #SV>) - outs(%init: tensor<32xcomplex, #SV>) { + inits(%init: tensor<32xcomplex, #SV>) { ^bb(%a: complex, %x: complex): %0 = complex.div %a, %c : complex linalg.yield %0 : complex diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir @@ -55,7 +55,7 @@ %init = bufferization.alloc_tensor(%0, %1) : tensor %r = linalg.generic #trait ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -112,7 +112,7 @@ %init = bufferization.alloc_tensor(%0, %1) : tensor %r = linalg.generic #trait ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir @@ -74,7 +74,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.addi %a, %b : i64 linalg.yield %0 : i64 @@ -137,7 +137,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.subi %a, %b : i64 linalg.yield %0 : i64 @@ -173,7 +173,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.muli %a, %b : i64 linalg.yield %0 : i64 @@ -207,7 +207,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.divsi %a, %c : i64 linalg.yield %0 : i64 @@ -241,7 +241,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.divui %a, %c : i64 linalg.yield %0 : i64 @@ -277,7 +277,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.andi %a, %b : i64 linalg.yield %0 : i64 @@ -337,7 +337,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.ori %a, %b : i64 linalg.yield %0 : i64 @@ -397,7 +397,7 @@ %argx: tensor<32xi64>) -> tensor<32xi64> { %0 = linalg.generic #trait2 ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %b: i64, %x: i64): %0 = arith.xori %a, %b : i64 linalg.yield %0 : i64 @@ -431,7 +431,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shrsi %a, %c : i64 linalg.yield %0 : i64 @@ -465,7 +465,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shrui %a, %c : i64 linalg.yield %0 : i64 @@ -499,7 +499,7 @@ %c = arith.constant 2 : i64 %0 = linalg.generic #traitc ins(%arga: tensor<32xi64, #SV>) - outs(%argx: tensor<32xi64>) { + inits(%argx: tensor<32xi64>) { ^bb(%a: i64, %x: i64): %0 = arith.shli %a, %c : i64 linalg.yield %0 : i64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir @@ -47,7 +47,7 @@ %c: tensor<10x30xf32>) -> tensor<10x30xf32> { %0 = linalg.matmul ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>) - outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32> + inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32> return %0 : tensor<10x30xf32> } @@ -144,7 +144,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> return %D: tensor<4x4xf64, #DCSR> } @@ -193,7 +193,7 @@ %output: tensor<6x6xi32>) -> tensor<6x6xi32> { %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + inits (%output: tensor<6x6xi32>) -> tensor<6x6xi32> return %0 : tensor<6x6xi32> } @@ -244,7 +244,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64> + inits(%output : tensor<5x6xi64>) -> tensor<5x6xi64> return %0: tensor<5x6xi64> } @@ -306,6 +306,6 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + inits(%x: tensor) -> tensor return %dot : tensor } diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSC>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir @@ -115,7 +115,7 @@ %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%A: f64, %b: f64, %x: f64): %0 = arith.mulf %A, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir @@ -38,7 +38,7 @@ // CHECK-DAG: %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<16xindex> to memref // CHECK-DAG: %[[VAL_22:.*]] = memref.alloc() : memref<16xf64> // CHECK-DAG: %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<16xf64> to memref -// CHECK: linalg.fill ins(%[[VAL_12]] : index) outs(%[[VAL_17]] : memref<3xindex>) +// CHECK: linalg.fill ins(%[[VAL_12]] : index) inits(%[[VAL_17]] : memref<3xindex>) // CHECK: memref.store %[[VAL_10]], %[[VAL_16]]{{\[}}%[[VAL_12]]] : memref<2xindex> // CHECK: memref.store %[[VAL_10]], %[[VAL_16]]{{\[}}%[[VAL_13]]] : memref<2xindex> // CHECK: %[[VAL_24:.*]] = sparse_tensor.push_back %[[VAL_17]], %[[VAL_19]], %[[VAL_12]] {idx = 0 : index} : memref<3xindex>, memref, index @@ -47,8 +47,8 @@ // CHECK: %[[VAL_27:.*]] = memref.alloc() : memref<4xi1> // CHECK: %[[VAL_28:.*]] = memref.alloc() : memref<4xindex> // CHECK: %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<4xindex> to memref -// CHECK: linalg.fill ins(%[[VAL_11]] : f64) outs(%[[VAL_26]] : memref<4xf64>) -// CHECK: linalg.fill ins(%[[VAL_14]] : i1) outs(%[[VAL_27]] : memref<4xi1>) +// CHECK: linalg.fill ins(%[[VAL_11]] : f64) inits(%[[VAL_26]] : memref<4xf64>) +// CHECK: linalg.fill ins(%[[VAL_14]] : i1) inits(%[[VAL_27]] : memref<4xi1>) // CHECK: %[[VAL_30:.*]]:2 = scf.for %[[VAL_31:.*]] = %[[VAL_12]] to %[[VAL_10]] step %[[VAL_13]] iter_args(%[[VAL_32:.*]] = %[[VAL_21]], %[[VAL_33:.*]] = %[[VAL_23]]) -> (memref, memref) { // CHECK: %[[VAL_34:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_31]]] : memref // CHECK: %[[VAL_35:.*]] = arith.addi %[[VAL_31]], %[[VAL_13]] : index @@ -133,6 +133,6 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir @@ -40,7 +40,7 @@ // CHECK-DAG: %[[VAL_17:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref // CHECK-DAG: %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32> // CHECK: scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] { // CHECK: scf.for %[[VAL_22:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_12]] { // CHECK: %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_9]] : index @@ -89,7 +89,7 @@ %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<10x20x30x40x50x60x70x80xf32>, tensor<80x70x60x50x40x30x20x10xf32, #SparseTensor>) - outs(%argx: tensor<10x20x30x40x50x60x70x80xf32>) { + inits(%argx: tensor<10x20x30x40x50x60x70x80xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir @@ -48,7 +48,7 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 @@ -81,7 +81,7 @@ // CHECK: } func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> { %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%x: f32): %1 = arith.addf %x, %x : f32 linalg.yield %1 : f32 @@ -129,7 +129,7 @@ %xm = bufferization.alloc_tensor() : tensor<10x20xf32, #DCSR> %0 = linalg.generic #trait_scale ins(%arga: tensor<10x20xf32, #CSR>) - outs(%xm: tensor<10x20xf32, #DCSR>) { + inits(%xm: tensor<10x20xf32, #DCSR>) { ^bb(%a: f32, %x: f32): %1 = arith.mulf %a, %s : f32 linalg.yield %1 : f32 @@ -285,7 +285,7 @@ %0 = linalg.generic #trait_sumred ins(%arga, %argb: tensor, tensor) - outs(%xinit: tensor) { + inits(%xinit: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 @@ -400,7 +400,7 @@ %0 = linalg.generic #trait_matmat ins(%arga, %argb: tensor, tensor) - outs(%cinit: tensor) { + inits(%cinit: tensor) { ^bb(%a: f32, %b: f32, %c: f32): %1 = arith.mulf %a, %b : f32 %2 = arith.addf %c, %1 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir @@ -20,7 +20,7 @@ // CHECK: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32> -// CHECK: linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_8]] : memref<10xf32>) +// CHECK: linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] { @@ -36,7 +36,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -54,7 +54,7 @@ // CHECK: %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref // CHECK: %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : memref<10xf32> -// CHECK: linalg.fill ins(%[[VAL_2]] : f32) outs(%[[VAL_8]] : memref<10xf32>) +// CHECK: linalg.fill ins(%[[VAL_2]] : f32) inits(%[[VAL_8]] : memref<10xf32>) // CHECK: %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref // CHECK: %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] { @@ -70,7 +70,7 @@ %m = bufferization.alloc_tensor() : tensor<10xf32> %0 = linalg.generic #trait ins(%arga: tensor<10xi32, #SV>) - outs(%m: tensor<10xf32>) { + inits(%m: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -103,7 +103,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f32, %x : f32): %up = arith.addf %a, %x : f32 linalg.yield %up : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir @@ -61,7 +61,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_dd ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %scale : f32 linalg.yield %0 : f32 @@ -109,7 +109,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_ss ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %scale : f32 linalg.yield %0 : f32 @@ -159,7 +159,7 @@ %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir @@ -53,7 +53,7 @@ %argx: tensor<16xf32>) -> tensor<16xf32> { %0 = linalg.generic #trait_matvec ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>) - outs(%argx: tensor<16xf32>) { + inits(%argx: tensor<16xf32>) { ^bb(%A: f32, %b: f32, %x: f32): %0 = arith.mulf %A, %b : f32 %1 = arith.addf %0, %x : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir @@ -25,7 +25,7 @@ // CHECK-DAG: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK-DAG: %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>> // CHECK-DAG: %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32> -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_9]] : memref<20x30x10xf32>) // CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] { // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] { // CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index @@ -45,7 +45,7 @@ %argx: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> { %0 = linalg.generic #trait ins(%arga: tensor<10x20x30xf32, #X>) - outs(%argx: tensor<20x30x10xf32>) { + inits(%argx: tensor<20x30x10xf32>) { ^bb(%a : f32, %x: f32): linalg.yield %a : f32 } -> tensor<20x30x10xf32> @@ -64,7 +64,7 @@ // CHECK-DAG: %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor> // CHECK-DAG: %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor> // CHECK-DAG: %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref -// CHECK: linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref) +// CHECK: linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_10]] : memref) // CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] { // CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] { // CHECK: %[[VAL_13:.*]] = arith.muli %[[VAL_7]], %[[VAL_11]] : index @@ -84,7 +84,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a : f32, %x: f32): linalg.yield %a : f32 } -> tensor diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir @@ -83,7 +83,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a : f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir @@ -68,7 +68,7 @@ %c = arith.constant 2.2 : f32 %0 = linalg.generic #trait ins(%arga, %argp, %argq: tensor<32x16xf32, #SparseMatrix>, tensor, f32) - outs(%argx: tensor<32x16xf32>) { + inits(%argx: tensor<32x16xf32>) { ^bb(%a: f32, %p: f32, %q: f32, %x: f32): %0 = arith.mulf %a, %p : f32 // scalar tensor argument %1 = arith.mulf %0, %q : f32 // scalar argument diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir @@ -32,7 +32,7 @@ affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} ins(%cst : f64) - outs(%0 : tensor<1024x1024xf64>) { + inits(%0 : tensor<1024x1024xf64>) { ^bb0(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<1024x1024xf64> @@ -49,7 +49,7 @@ %0 = tensor.empty() : tensor<32xf64> %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} - outs(%0 : tensor<32xf64>) { + inits(%0 : tensor<32xf64>) { ^bb0(%x: f64): linalg.yield %cst : f64 } -> tensor<32xf64> @@ -106,7 +106,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -115,7 +115,7 @@ // Sample the result with elements-wise multiplication with sparse matrix. %3 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 @@ -188,7 +188,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -198,7 +198,7 @@ %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM> %4 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%3 : tensor<8x8xf64, #SM>) { + inits(%3 : tensor<8x8xf64, #SM>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir @@ -40,7 +40,7 @@ func.func @mul64(%arga: tensor<32xf64, #SparseVector64>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<32xf64, #SparseVector64>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 @@ -69,7 +69,7 @@ func.func @mul32(%arga: tensor<32xf64, #SparseVector32>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> { %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor<32xf64, #SparseVector32>, tensor<32xf64>) - outs(%argx: tensor<32xf64>) { + inits(%argx: tensor<32xf64>) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir @@ -50,7 +50,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%arga: tensor<3x4xf64, #DCSR>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir --- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir @@ -28,7 +28,7 @@ func.func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> { %0 = linalg.generic #trait_scale_d ins(%arga: tensor<1024xf32, #DenseVector>) - outs(%argx: tensor<1024xf32>) { + inits(%argx: tensor<1024xf32>) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -78,7 +78,7 @@ func.func @mul_s(%arga: tensor<1024xf32, #SparseVector>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> { %0 = linalg.generic #trait_mul_s ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>) - outs(%argx: tensor<1024xf32>) { + inits(%argx: tensor<1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -117,7 +117,7 @@ func.func @reduction_d(%arga: tensor<1024xf32, #DenseVector>, %argb: tensor<1024xf32>, %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction_d ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.addf %x, %0 : f32 @@ -172,7 +172,7 @@ func.func @mul_ds(%arga: tensor<512x1024xf32, #SparseMatrix>, %argb: tensor<512x1024xf32>, %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> { %0 = linalg.generic #trait_mul_ds ins(%arga, %argb: tensor<512x1024xf32, #SparseMatrix>, tensor<512x1024xf32>) - outs(%argx: tensor<512x1024xf32>) { + inits(%argx: tensor<512x1024xf32>) { ^bb(%a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 linalg.yield %0 : f32 @@ -216,7 +216,7 @@ %argx: tensor<33x64xf64>) -> tensor<33x64xf64> { %0 = linalg.generic #trait_affine ins(%arga: tensor<32x64xf64, #SparseMatrix>) - outs(%argx: tensor<33x64xf64>) { + inits(%argx: tensor<33x64xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir --- a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir +++ b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir @@ -17,9 +17,9 @@ // CHECK-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32> // CHECK-DAG: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c20]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]]) // CHECK: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]] -// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : -// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : -// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] : +// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : +// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : +// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] : // CHECK: scf.yield %[[update]] : // CHECK: return %[[tile]] @@ -31,8 +31,8 @@ // FOREACH-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32> // FOREACH: %[[tile:.+]] = scf.foreach_thread (%[[iv:.+]]) in (%[[c20]]) shared_outs(%[[dest:.+]] = %[[init]]) // FOREACH: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]] -// FOREACH: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : -// FOREACH: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : +// FOREACH: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : +// FOREACH: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : // FOREACH: perform_concurrently // FOREACH-NEXT: tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[iv]], 0] [1, 11] [1, 1] : // FOREACH: return %[[tile]] @@ -58,9 +58,9 @@ // CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c10]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]]) // CHECK: %[[inputIv:.+]] = affine.apply #[[$map0]](%[[iv]]) // CHECK: %[[multiIndex:.+]]:3 = affine.delinearize_index %[[inputIv]] into (%[[c3]], %[[c5]], %[[c7]] -// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] : -// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : -// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] : +// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] : +// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : +// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] : // CHECK: scf.yield %[[update]] : // CHECK: return %[[tile]] @@ -110,8 +110,8 @@ // CHECK-DAG: %[[c4:.+]] = arith.constant 4 : index // CHECK-DAG: %[[c11:.+]] = arith.constant 11 : index // CHECK: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor -// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : -// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : +// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : +// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : // CHECK-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] : // CHECK: %[[tile1:.+]] = scf.for %[[iv1:.+]] = %[[c0]] to %[[sz1]] step %[[c1]] iter_args(%[[iterArg1:.+]] = %[[init]]) // CHECK: %[[tile2:.+]] = scf.for %[[iv2:.+]] = %[[c0]] to %[[sz2]] step %[[c1]] iter_args(%[[iterArg2:.+]] = %[[iterArg1]]) @@ -119,12 +119,12 @@ // CHECK: %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[inputIv1]] into (%[[c3]], %[[d1]], %[[d2]]) : // CHECK: %[[inputIv2:.+]] = affine.apply #[[map0:.+]](%[[iv2]])[%[[lb2]]] // CHECK: %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[inputIv2]] into (%[[c11]], %[[d4]]) : -// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : -// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : -// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] : +// CHECK: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : +// CHECK: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : +// CHECK: %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] : // CHECK: scf.yield %[[update]] : // CHECK: scf.yield %[[tile2]] : -// CHECK: return %[[tile1]] : +// CHECK: return %[[tile1]] : // FOREACH: #[[map1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)> // FOREACH: func.func @extract_slice_dynamic_multidim(%[[arg0:.+]]: tensor<3x?x?x11x?xf32>, %[[lb1:.+]]: index, %[[sz1:.+]]: index, %[[lb2:.+]]: index, %[[sz2:.+]]: index) @@ -134,16 +134,16 @@ // FOREACH-DAG: %[[c4:.+]] = arith.constant 4 : index // FOREACH-DAG: %[[c11:.+]] = arith.constant 11 : index // FOREACH: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor -// FOREACH-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : -// FOREACH-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : +// FOREACH-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : +// FOREACH-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : // FOREACH-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] : // FOREACH: %[[tile1:.+]] = scf.foreach_thread (%[[tid1:.+]], %[[tid2:.+]]) in (%[[sz1]], %[[sz2]]) shared_outs(%[[dest:.+]] = %[[init]]) // FOREACH-DAG: %[[iv1:.+]] = affine.apply #[[map1]](%[[tid1]])[%[[lb1]]] // FOREACH: %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[iv1]] into (%[[c3]], %[[d1]], %[[d2]]) : // FOREACH-DAG: %[[iv2:.+]] = affine.apply #[[map1]](%[[tid2]])[%[[lb2]]] // FOREACH: %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[iv2]] into (%[[c11]], %[[d4]]) : -// FOREACH: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : -// FOREACH: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : +// FOREACH: %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : +// FOREACH: %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : // FOREACH: perform_concurrently //FOREACH-NEXT: tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[tid1]], %[[tid2]]] [1, 1] [1, 1] : @@ -154,7 +154,7 @@ // CHECK: @extract_slice_non_sliced_linearized_dim(%[[arg0:.+]]: tensor<{{.*}}>, func.func @extract_slice_non_sliced_linearized_dim(%input: tensor<3x?x?x11x2xf32>, %offt: index, %size: index) -> tensor { - %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor + %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 22] [1, 1] : tensor to tensor // CHECK: scf.for // CHECK-NOT: scf.for @@ -169,7 +169,7 @@ func.func @no_sliced_linearized_dims(%input: tensor<30x11x100xf32>, %offt: index, %size: index) -> tensor<330x?xf32> { %collapsed = tensor.collapse_shape %input [[0, 1], [2]] : tensor<30x11x100xf32> into tensor<330x100xf32> %slice = tensor.extract_slice %collapsed [0, %offt] [330, %size] [1, 1] : tensor<330x100xf32> to tensor<330x?xf32> - // CHECK-NOT: scf.for + // CHECK-NOT: scf.for // CHECK: %[[init:.+]] = tensor.empty(%[[arg2]]) // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, %[[arg1]]] [30, 11, %[[arg2]]] [1, 1, 1] // CHECK: %[[c:.+]] = tensor.collapse_shape %[[e]] {{\[}}[0, 1], [2]] @@ -191,7 +191,7 @@ // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0] [1, 11, 100] [1, 1, 1] // CHECK-SAME: tensor<1x11x100xf32> to tensor<11x100xf32> // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1] - // CHECK-SAME: tensor<11x100xf32> to tensor + // CHECK-SAME: tensor<11x100xf32> to tensor return %slice : tensor } @@ -201,11 +201,11 @@ %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 100] [1, 1] : tensor to tensor // CHECK-NOT: scf.for // CHECK: %[[c1:.+]] = arith.constant 1 : index - // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] : + // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] : // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0, 0] [1, %[[dim]], 1, 100] [1, 1, 1, 1] // CHECK-SAME: tensor<1x?x1x100xf32> to tensor // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1] - // CHECK-SAME: tensor to tensor + // CHECK-SAME: tensor to tensor return %slice : tensor } @@ -228,14 +228,14 @@ return %slice : tensor } -// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape, +// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape, // that should be handled by `linalg-fold-unit-extent-dims`. // CHECK: @collapse_and_slice_multiple_all_unit_dim(%[[arg0:.+]]: tensor<{{.*}}>) func.func @collapse_and_slice_multiple_all_unit_dim(%input: tensor<1x1x1x100xf32>) -> tensor<1x100xf32> { %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32> - %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32> - return %slice : tensor<1x100xf32> + %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32> + return %slice : tensor<1x100xf32> // CHECK: %[[collapse:.+]] = tensor.collapse_shape %[[arg0]] {{\[}}[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32> - // CHECK: return %[[collapse]] + // CHECK: return %[[collapse]] } diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir --- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir @@ -70,8 +70,8 @@ %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor /// Overwrite A inplace. - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]] + %r1 = linalg.fill ins(%f0 : f32) inits(%r0 : tensor) -> tensor // CHECK: return // CHECK-NOT: tensor @@ -90,8 +90,8 @@ { %f0 = arith.constant 0.0 : f32 - // CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]] - %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor) -> tensor + // CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]] + %r0 = linalg.fill ins(%f0 : f32) inits(%A : tensor) -> tensor // CHECK-NOT: alloc // CHECK: %[[SV_A:.*]] = memref.subview %[[A]] @@ -246,7 +246,7 @@ // CHECK-NOT: memref.alloc %cst = arith.constant 4.200000e+01 : f32 // CHECK: linalg.fill - %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32> + %0 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32> // CHECK-NOT: memref.copy %1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32> return %1 : tensor<10xf32> diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir --- a/mlir/test/Dialect/Transform/selective-targeting.mlir +++ b/mlir/test/Dialect/Transform/selective-targeting.mlir @@ -12,7 +12,7 @@ // CHECK-SAME: -> tensor<4x4xf32> %0 = linalg.matmul { test.attrA } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -30,7 +30,7 @@ // CHECK: vector.transfer_write %0 = linalg.matmul { test.attrA, test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -47,7 +47,7 @@ // CHECK: vector.transfer_write %0 = linalg.matmul { test.attrC } ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -94,7 +94,7 @@ // CHECK: vector.contract %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -105,7 +105,7 @@ -> tensor<128x128xf32> { // CHECK: linalg.matmul %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> func.return %0 : tensor<128x128xf32> } @@ -139,11 +139,11 @@ // CHECK: vector.contract %0 = linalg.matmul {test.attrA} ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg2: tensor<128x128xf32>) + inits(%arg2: tensor<128x128xf32>) -> tensor<128x128xf32> // CHECK: vector.contract %1 = linalg.matmul ins(%arg0, %0: tensor<128x128xf32>, tensor<128x128xf32>) - outs(%arg3: tensor<128x128xf32>) + inits(%arg3: tensor<128x128xf32>) -> tensor<128x128xf32> return %1 : tensor<128x128xf32> } diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir --- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir +++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir @@ -72,7 +72,7 @@ // LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) + // LINALG: linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>) // LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) @@ -165,7 +165,7 @@ // LINALG-SAME: memref>, index, index // LINALG: } else { // slow path, fill tmp alloc and yield a memref_casted version of it - // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) + // LINALG: linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>) // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]]) // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir --- a/mlir/test/IR/slice.mlir +++ b/mlir/test/IR/slice.mlir @@ -6,9 +6,9 @@ %c = memref.alloc(%arg0, %arg1) : memref %d = memref.alloc(%arg0, %arg1) : memref linalg.matmul ins(%a, %b : memref, memref) - outs(%c : memref) + inits(%c : memref) linalg.matmul ins(%a, %b : memref, memref) - outs(%d : memref) + inits(%d : memref) memref.dealloc %c : memref memref.dealloc %b : memref memref.dealloc %a : memref diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir @@ -43,7 +43,7 @@ iterator_types = ["parallel", "parallel"] } ins(%lhs, %rhs : memref, memref) - outs(%sum : memref) + inits(%sum : memref) { ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32): %0 = arith.addf %lhs_in, %rhs_in : f32 @@ -68,8 +68,8 @@ %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32> %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32> - linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>) - linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>) %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir --- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir +++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir @@ -90,8 +90,8 @@ %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32> %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32> - linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>) - linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>) + linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>) %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir @@ -12,9 +12,9 @@ %x = memref.dim %A, %c0 : memref %y = memref.dim %B, %c1 : memref %C = memref.alloc(%x, %y) : memref - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) linalg.matmul ins(%A, %B: memref, memref) - outs(%C: memref) + inits(%C: memref) return %C : memref } @@ -26,12 +26,12 @@ %x = memref.dim %A, %c1 : memref %n = memref.dim %B, %c1 : memref %C = memref.alloc(%m, %n) : memref - linalg.fill ins(%f0 : f32) outs(%C : memref) + linalg.fill ins(%f0 : f32) inits(%C : memref) scf.for %i = %c0 to %n step %c1 { %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref to memref> %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref to memref> linalg.matvec ins(%A, %b: memref, memref>) - outs(%c: memref>) + inits(%c: memref>) } return %C : memref } @@ -46,8 +46,8 @@ %val2 = arith.constant 17.0 : f32 %A = memref.alloc(%m, %x) : memref %B = memref.alloc(%x, %n) : memref - linalg.fill ins(%val1 : f32) outs(%A : memref) - linalg.fill ins(%val2 : f32) outs(%B : memref) + linalg.fill ins(%val1 : f32) inits(%A : memref) + linalg.fill ins(%val2 : f32) inits(%B : memref) memref.store %val1, %B[%c0, %c0] : memref %C1 = call @matmul(%A, %B) : (memref, memref) -> memref %C2 = call @matvec(%A, %B) : (memref, memref) -> memref diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir @@ -14,13 +14,13 @@ // Creates and returns a 1-D buffer of size %s1 filled with the value %f func.func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref { %buf = memref.alloc(%s1) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } func.func @conv_1d(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv_1d ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } @@ -22,7 +22,7 @@ linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>} ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir @@ -14,13 +14,13 @@ // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f func.func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } func.func @conv_2d(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv_2d ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } @@ -22,7 +22,7 @@ linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir @@ -14,13 +14,13 @@ // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } func.func @conv_3d(%arg0: memref, %arg1: memref, %arg2: memref) { linalg.conv_3d ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir @@ -14,7 +14,7 @@ // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref { %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref - linalg.fill ins(%f : f32) outs(%buf : memref) + linalg.fill ins(%f : f32) inits(%buf : memref) return %buf : memref } @@ -22,7 +22,7 @@ linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>} ins (%arg0, %arg1: memref, memref) - outs (%arg2: memref) + inits (%arg2: memref) return } diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir @@ -14,7 +14,7 @@ %cst = arith.constant 0.000000e+00 : f32 %c2 = arith.constant 2 : index %c0 = arith.constant 0 : index - %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor) -> tensor + %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor) -> tensor %1 = affine.apply #map0(%c0, %c64)[%c2] %2 = bufferization.alloc_tensor(%1) : tensor %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor) { @@ -61,7 +61,7 @@ %13 = tensor.extract_slice %6[%12, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> %14 = affine.apply #map1(%arg3, %c0)[%c2] %15 = tensor.extract_slice %3[%14, 0] [1, 2] [1, 1] : tensor to tensor<2xf32> - %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor) -> tensor + %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) inits(%arg4 : tensor) -> tensor // %AA = tensor.cast %13 : tensor<2xf32> to tensor<*xf32> // call @printMemrefF32(%AA) : (tensor<*xf32>) -> () @@ -83,9 +83,9 @@ %A = bufferization.alloc_tensor() : tensor<64xf32> %B = bufferization.alloc_tensor() : tensor<64xf32> %C = bufferization.alloc_tensor() : tensor - %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32> - %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32> - %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor) -> tensor + %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32> + %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32> + %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor) -> tensor %res = call @init_and_dot(%AA, %BB, %CC) : (tensor<64xf32>, tensor<64xf32>, tensor) -> tensor diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir --- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir +++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir @@ -23,7 +23,7 @@ %C = arith.constant dense<1000.0> : tensor<2x4xf32> %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>) - outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32> + inits(%C: tensor<2x4xf32>) -> tensor<2x4xf32> %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32> call @printMemrefF32(%unranked) : (tensor<*xf32>) -> () diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir @@ -50,7 +50,7 @@ %init = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_assign ins(%arga: tensor) - outs(%init: tensor) { + inits(%init: tensor) { ^bb(%a: f64, %x: f64): %0 = arith.mulf %a, %c2 : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir @@ -28,7 +28,7 @@ %xv = bufferization.alloc_tensor (%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: bf16, %b: bf16, %x: bf16): %1 = sparse_tensor.binary %a, %b : bf16, bf16 to bf16 overlap={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir @@ -26,7 +26,7 @@ %xv = bufferization.alloc_tensor (%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f16, %b: f16, %x: f16): %1 = sparse_tensor.binary %a, %b : f16, f16 to f16 overlap={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir @@ -23,7 +23,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: f64, %x: f64) : %result = math.absf %a : f64 linalg.yield %result : f64 @@ -38,7 +38,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: i32, %x: i32) : %result = math.absi %a : i32 linalg.yield %result : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir @@ -50,7 +50,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %1 = sparse_tensor.binary %a, %b : i32, i32 to i32 overlap={ @@ -74,7 +74,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -98,7 +98,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={} @@ -116,7 +116,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: i32): %idx = linalg.index 0 : index %1 = sparse_tensor.binary %a, %idx : f64, index to i32 @@ -143,7 +143,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = sparse_tensor.binary %a, %b: f64, f64 to f64 overlap={ @@ -169,7 +169,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -191,7 +191,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -219,7 +219,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %row = linalg.index 0 : index %col = linalg.index 1 : index @@ -256,7 +256,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={ @@ -301,7 +301,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xi8, #DCSR>) { + inits(%C: tensor<4x4xi8, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: i8) : %result = sparse_tensor.binary %a, %b : f64, f64 to i8 overlap={ @@ -324,7 +324,7 @@ %0 = linalg.generic #trait_mat_op ins(%A, %B: tensor<4x4xf64, #DCSR>, tensor<4x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) { + inits(%C: tensor<4x4xf64, #DCSR>) { ^bb0(%a: f64, %b: f64, %c: f64) : %result = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap={} diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir @@ -41,7 +41,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.sitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -52,7 +52,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: i32, %x : f32): %cst = arith.uitofp %a : i32 to f32 linalg.yield %cst : f32 @@ -63,7 +63,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f32, %x : i32): %cst = arith.fptosi %a : f32 to i32 linalg.yield %cst : i32 @@ -74,7 +74,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf64, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f64, %x : i32): %cst = arith.fptoui %a : f64 to i32 linalg.yield %cst : i32 @@ -85,7 +85,7 @@ %argb: tensor<10xf64>) -> tensor<10xf64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xf64>) { + inits(%argb: tensor<10xf64>) { ^bb(%a: f32, %x : f64): %cst = arith.extf %a : f32 to f64 linalg.yield %cst : f64 @@ -96,7 +96,7 @@ %argb: tensor<10xf32>) -> tensor<10xf32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf64, #SV>) - outs(%argb: tensor<10xf32>) { + inits(%argb: tensor<10xf32>) { ^bb(%a: f64, %x : f32): %cst = arith.truncf %a : f64 to f32 linalg.yield %cst : f32 @@ -107,7 +107,7 @@ %argb: tensor<10xi64>) -> tensor<10xi64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi64>) { + inits(%argb: tensor<10xi64>) { ^bb(%a: i32, %x : i64): %cst = arith.extsi %a : i32 to i64 linalg.yield %cst : i64 @@ -118,7 +118,7 @@ %argb: tensor<10xi64>) -> tensor<10xi64> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi64>) { + inits(%argb: tensor<10xi64>) { ^bb(%a: i32, %x : i64): %cst = arith.extui %a : i32 to i64 linalg.yield %cst : i64 @@ -129,7 +129,7 @@ %argb: tensor<10xi8>) -> tensor<10xi8> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xi32, #SV>) - outs(%argb: tensor<10xi8>) { + inits(%argb: tensor<10xi8>) { ^bb(%a: i32, %x : i8): %cst = arith.trunci %a : i32 to i8 linalg.yield %cst : i8 @@ -140,7 +140,7 @@ %argb: tensor<10xi32>) -> tensor<10xi32> { %0 = linalg.generic #trait_cast ins(%arga: tensor<10xf32, #SV>) - outs(%argb: tensor<10xi32>) { + inits(%argb: tensor<10xi32>) { ^bb(%a: f32, %x : i32): %cst = arith.bitcast %a : f32 to i32 linalg.yield %cst : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir @@ -26,7 +26,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.add %a, %b : complex linalg.yield %1 : complex @@ -43,7 +43,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.mul %a, %b : complex linalg.yield %1 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir @@ -26,7 +26,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.add %a, %b : complex linalg.yield %1 : complex @@ -43,7 +43,7 @@ %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.mul %a, %b : complex linalg.yield %1 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir @@ -35,7 +35,7 @@ %0 = linalg.generic #trait_op2 ins(%arga, %argb: tensor, #SparseVector>, tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %b: complex, %x: complex): %1 = complex.neg %b : complex %2 = complex.sub %a, %1 : complex @@ -51,7 +51,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.sin %a : complex linalg.yield %1 : complex @@ -66,7 +66,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.sqrt %a : complex linalg.yield %1 : complex @@ -81,7 +81,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.tanh %a : complex linalg.yield %1 : complex @@ -96,7 +96,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor, #SparseVector> %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.log1p %a : complex %2 = complex.expm1 %1 : complex @@ -113,7 +113,7 @@ %c = complex.constant [2.0 : f64, 0.0 : f64] : complex %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor, #SparseVector>) { + inits(%xv: tensor, #SparseVector>) { ^bb(%a: complex, %x: complex): %1 = complex.div %a, %c : complex linalg.yield %1 : complex @@ -128,7 +128,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op1 ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f64): %1 = complex.abs %a : complex linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir @@ -15,7 +15,7 @@ %x: tensor) -> tensor { %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>, tensor<1024xf32, #SparseVector>) - outs(%x: tensor) -> tensor + inits(%x: tensor) -> tensor return %dot : tensor } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir @@ -19,7 +19,7 @@ %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC> %D = linalg.matmul ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>) - outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> + inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC> return %D: tensor<8x4xf64, #CSC> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir @@ -13,7 +13,7 @@ %output: tensor<6x6xi32>) -> tensor<6x6xi32> { %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32> + inits (%output: tensor<6x6xi32>) -> tensor<6x6xi32> return %0 : tensor<6x6xi32> } @@ -22,7 +22,7 @@ %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR> %0 = linalg.conv_2d ins (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>) - outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> + inits (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR> return %0 : tensor<6x6xi32, #DCSR> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir @@ -40,7 +40,7 @@ -> tensor<7x3xf64> { %0 = linalg.generic #trait_flatten ins(%arga: tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>) - outs(%argx: tensor<7x3xf64>) { + inits(%argx: tensor<7x3xf64>) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir @@ -43,7 +43,7 @@ %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64, #SparseVector>) { + inits(%init: tensor<8xi64, #SparseVector>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -61,7 +61,7 @@ %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector> %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%init: tensor<8xi64, #SparseVector>) { + inits(%init: tensor<8xi64, #SparseVector>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -79,7 +79,7 @@ %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix> %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%init: tensor<3x4xi64, #SparseMatrix>) { + inits(%init: tensor<3x4xi64, #SparseMatrix>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -100,7 +100,7 @@ %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix> %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%init: tensor<3x4xi64, #SparseMatrix>) { + inits(%init: tensor<3x4xi64, #SparseMatrix>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -118,7 +118,7 @@ %0 = bufferization.alloc_tensor() : tensor<2x3xf32, #SparseMatrix> %1 = linalg.generic #trait_2d ins(%arg0 : tensor<2x3xf32, #SparseMatrix>) - outs(%0 : tensor<2x3xf32, #SparseMatrix>) { + inits(%0 : tensor<2x3xf32, #SparseMatrix>) { ^bb0(%arg1: f32, %arg2: f32): %2 = linalg.index 0 : index %3 = arith.index_cast %2 : index to i64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir @@ -41,7 +41,7 @@ %out: tensor<8xi64>) -> tensor<8xi64> { %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%out: tensor<8xi64>) { + inits(%out: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -58,7 +58,7 @@ %out: tensor<8xi64>) -> tensor<8xi64> { %r = linalg.generic #trait_1d ins(%arga: tensor<8xi64, #SparseVector>) - outs(%out: tensor<8xi64>) { + inits(%out: tensor<8xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %ii = arith.index_cast %i : index to i64 @@ -75,7 +75,7 @@ %out: tensor<3x4xi64>) -> tensor<3x4xi64> { %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%out: tensor<3x4xi64>) { + inits(%out: tensor<3x4xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index @@ -95,7 +95,7 @@ %out: tensor<3x4xi64>) -> tensor<3x4xi64> { %r = linalg.generic #trait_2d ins(%arga: tensor<3x4xi64, #SparseMatrix>) - outs(%out: tensor<3x4xi64>) { + inits(%out: tensor<3x4xi64>) { ^bb(%a: i64, %x: i64): %i = linalg.index 0 : index %j = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir @@ -29,7 +29,7 @@ %C: tensor<4x4xf64>) -> tensor<4x4xf64> { %D = linalg.matmul ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>) - outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64> + inits(%C: tensor<4x4xf64>) -> tensor<4x4xf64> return %D: tensor<4x4xf64> } @@ -41,7 +41,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>) - outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> + inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR> return %D: tensor<4x4xf64, #CSR> } @@ -53,7 +53,7 @@ %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR> %D = linalg.matmul ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>) - outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> + inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> return %D: tensor<4x4xf64, #DCSR> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir @@ -45,7 +45,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 @@ -57,7 +57,7 @@ func.func @matrix_scale_inplace(%argx: tensor) -> tensor { %s = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %s : f64 linalg.yield %1 : f64 @@ -75,7 +75,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 @@ -93,7 +93,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir @@ -49,7 +49,7 @@ -> tensor { %0 = linalg.generic #matvec ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir @@ -40,7 +40,7 @@ %0 = linalg.generic #mttkrp ins(%argb, %argc, %argd: tensor, tensor, tensor) - outs(%arga: tensor) { + inits(%arga: tensor) { ^bb(%b: f64, %c: f64, %d: f64, %a: f64): %0 = arith.mulf %b, %c : f64 %1 = arith.mulf %d, %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir @@ -25,7 +25,7 @@ %argx = bufferization.alloc_tensor() : tensor<32x16xf32, #DCSR> %0 = linalg.generic #trait_mult_elt ins(%arga, %argb: tensor<32x16xf32, #DCSR>, tensor<32x16xf32, #DCSR>) - outs(%argx: tensor<32x16xf32, #DCSR>) { + inits(%argx: tensor<32x16xf32, #DCSR>) { ^bb(%a: f32, %b: f32, %x: f32): %1 = arith.mulf %a, %b : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir @@ -34,7 +34,7 @@ %0 = linalg.generic #redsum ins(%arga, %argb: tensor, tensor) - outs(%xinit: tensor) { + inits(%xinit: tensor) { ^bb(%a: i32, %b: i32, %x: i32): %0 = arith.muli %a, %b : i32 %1 = arith.addi %x, %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir @@ -35,7 +35,7 @@ func.func @kernel_eltwise_mult(%argx: tensor) -> tensor { %0 = linalg.generic #eltwise_mult - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %0 = arith.mulf %x, %x : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir @@ -18,7 +18,7 @@ %c2 = arith.constant 2 : i32 %0 = linalg.quantized_matmul ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32) - outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32> + inits(%output : tensor<5x6xi32>) -> tensor<5x6xi32> return %0: tensor<5x6xi32> } diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir @@ -23,7 +23,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f32): %1 = complex.re %a : complex linalg.yield %1 : f32 @@ -38,7 +38,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga: tensor, #SparseVector>) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: complex, %x: f32): %1 = complex.im %a : complex linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir @@ -50,7 +50,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_mat_reduce_rowwise ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 { ^bb0(%x: f64, %y: f64): @@ -69,7 +69,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_mat_reduce_rowwise ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 { ^bb0(%x: f64, %y: f64): @@ -91,7 +91,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_matmul ins(%arga, %argb: tensor, tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %b: f64, %output: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap = { @@ -122,7 +122,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_matmul ins(%arga, %argb: tensor, tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %b: f64, %output: f64): %1 = sparse_tensor.binary %a, %b : f64, f64 to f64 overlap = { diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir @@ -22,7 +22,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.addi %x, %a : i32 linalg.yield %0 : i32 @@ -34,7 +34,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xf32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.addf %x, %a : f32 linalg.yield %0 : f32 @@ -46,7 +46,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.muli %x, %a : i32 linalg.yield %0 : i32 @@ -58,7 +58,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xf32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f32, %x: f32): %0 = arith.mulf %x, %a : f32 linalg.yield %0 : f32 @@ -70,7 +70,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #DV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.andi %x, %a : i32 linalg.yield %0 : i32 @@ -82,7 +82,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.ori %x, %a : i32 linalg.yield %0 : i32 @@ -94,7 +94,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_reduction ins(%arga: tensor<32xi32, #SV>) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: i32, %x: i32): %0 = arith.xori %x, %a : i32 linalg.yield %0 : i32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir @@ -39,7 +39,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor, tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%s: f32, %a: f32, %b: f32, %x: f32): %0 = arith.mulf %a, %b : f32 %1 = arith.mulf %s, %0 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir @@ -49,7 +49,7 @@ %2 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor<8x8xf64, #SM>, tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1: tensor<8x8xf64>) { + inits(%1: tensor<8x8xf64>) { ^bb(%s: f64, %a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.mulf %s, %p : f64 @@ -70,7 +70,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -79,7 +79,7 @@ // Sample the result with elements-wise multiplication with sparse matrix. %3 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 @@ -98,7 +98,7 @@ %2 = linalg.generic #trait_sampled_dense_dense ins(%args, %arga, %argb: tensor<8x8xf64, #SM>, tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1: tensor<8x8xf64, #SM>) { + inits(%1: tensor<8x8xf64, #SM>) { ^bb(%s: f64, %a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.mulf %s, %p : f64 @@ -120,7 +120,7 @@ %1 = arith.constant dense<0.0> : tensor<8x8xf64> %2 = linalg.generic #trait_matmul ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>) - outs(%1 : tensor<8x8xf64>) { + inits(%1 : tensor<8x8xf64>) { ^bb0(%a: f64, %b: f64, %x: f64): %p = arith.mulf %a, %b : f64 %q = arith.addf %x, %p : f64 @@ -130,7 +130,7 @@ %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM> %4 = linalg.generic #trait_scale ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>) - outs(%3 : tensor<8x8xf64, #SM>) { + inits(%3 : tensor<8x8xf64, #SM>) { ^bb0(%t: f64, %s: f64, %x: f64): %r = arith.mulf %t, %s : f64 linalg.yield %r : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir @@ -26,7 +26,7 @@ func.func @sparse_scale(%argx: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> { %c = arith.constant 2.0 : f32 %0 = linalg.generic #trait_scale - outs(%argx: tensor<8x8xf32, #CSR>) { + inits(%argx: tensor<8x8xf32, #CSR>) { ^bb(%x: f32): %1 = arith.mulf %x, %c : f32 linalg.yield %1 : f32 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir @@ -18,7 +18,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>) - outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) { + inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) { ^bb0(%arg1: f64, %arg2: f64, %arg3: f64): %4 = arith.subf %arg1, %arg2 : f64 linalg.yield %4 : f64 @@ -32,7 +32,7 @@ indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel", "parallel"]} ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>) - outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) { + inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) { ^bb0(%arg1: f64, %arg2: f64, %arg3: f64): %4 = arith.addf %arg1, %arg2 : f64 linalg.yield %4 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir @@ -38,7 +38,7 @@ %xv = bufferization.alloc_tensor(%d0): tensor %0 = linalg.generic #trait_vec_select ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %1 = sparse_tensor.select %a : f64 { ^bb0(%x: f64): @@ -58,7 +58,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1): tensor %0 = linalg.generic #trait_mat_select ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64): %row = linalg.index 0 : index %col = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir @@ -33,7 +33,7 @@ %xin = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arg0: tensor) - outs(%xin: tensor) { + inits(%xin: tensor) { ^bb0(%a: f64, %x: f64) : %result = sparse_tensor.unary %a : f64 to f64 present={ diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir @@ -49,7 +49,7 @@ -> tensor { %c = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %c : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir @@ -36,7 +36,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #spmm ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %0 = arith.mulf %a, %b : f64 %1 = arith.addf %x, %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir @@ -33,7 +33,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %x: f64): %0 = arith.addf %x, %a : f64 linalg.yield %0 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir @@ -29,7 +29,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: bf16, %x: bf16): %0 = arith.addf %x, %a : bf16 linalg.yield %0 : bf16 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir @@ -33,7 +33,7 @@ %argx: tensor>) -> tensor> { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor, #SparseMatrix>) - outs(%argx: tensor>) { + inits(%argx: tensor>) { ^bb(%a: complex, %x: complex): %0 = complex.add %x, %a : complex linalg.yield %0 : complex diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir @@ -27,7 +27,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_sum_reduce ins(%arga: tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f16, %x: f16): %0 = arith.addf %x, %a : f16 linalg.yield %0 : f16 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir @@ -19,7 +19,7 @@ func.func @sparse_tanh(%vec: tensor) -> tensor { %0 = linalg.generic #trait_op - outs(%vec: tensor) { + inits(%vec: tensor) { ^bb(%x: f64): %1 = math.tanh %x : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir @@ -32,7 +32,7 @@ %xt = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor %0 = linalg.generic #trait_mul ins(%arga, %argb: tensor, tensor) - outs(%xt: tensor) { + inits(%xt: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir @@ -32,7 +32,7 @@ %xm = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xm: tensor) { + inits(%xm: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir @@ -37,7 +37,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%t: tensor<3x4xf64, #DCSC>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> @@ -56,7 +56,7 @@ %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR> %0 = linalg.generic #transpose_trait ins(%arga: tensor<3x4xf64, #DCSR>) - outs(%i: tensor<4x3xf64, #DCSR>) { + inits(%i: tensor<4x3xf64, #DCSR>) { ^bb(%a: f64, %x: f64): linalg.yield %a : f64 } -> tensor<4x3xf64, #DCSR> diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir @@ -35,7 +35,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: i32): %1 = sparse_tensor.unary %a : f64 to i32 present={} @@ -55,7 +55,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = sparse_tensor.unary %a : f64 to f64 present={ @@ -78,7 +78,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_vec_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %idx = linalg.index 0 : index %1 = sparse_tensor.unary %a : f64 to f64 @@ -106,7 +106,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_scale ins(%argx: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = sparse_tensor.unary %a: f64 to f64 present={ @@ -133,7 +133,7 @@ %xv = bufferization.alloc_tensor(%d0, %d1) : tensor %0 = linalg.generic #trait_mat_scale ins(%argx: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %row = linalg.index 0 : index %col = linalg.index 1 : index diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir @@ -53,7 +53,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_scale ins(%arga: tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %x: f64): %1 = arith.mulf %a, %s : f64 linalg.yield %1 : f64 @@ -65,7 +65,7 @@ func.func @vector_scale_inplace(%argx: tensor) -> tensor { %s = arith.constant 2.0 : f64 %0 = linalg.generic #trait_scale_inpl - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%x: f64): %1 = arith.mulf %x, %s : f64 linalg.yield %1 : f64 @@ -81,7 +81,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 @@ -97,7 +97,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 @@ -113,7 +113,7 @@ %xv = bufferization.alloc_tensor(%d) : tensor %0 = linalg.generic #trait_op ins(%arga, %argb: tensor, tensor) - outs(%xv: tensor) { + inits(%xv: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 linalg.yield %1 : f64 @@ -127,7 +127,7 @@ %argx: tensor) -> tensor { %0 = linalg.generic #trait_dot ins(%arga, %argb: tensor, tensor) - outs(%argx: tensor) { + inits(%argx: tensor) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.mulf %a, %b : f64 %2 = arith.addf %x, %1 : f64 diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py --- a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py +++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py @@ -38,7 +38,7 @@ %argx = bufferization.alloc_tensor() : tensor<3x4xf64, #DCSR> %0 = linalg.generic #trait_add_elt ins(%arga, %argb: tensor<3x4xf64, #DCSR>, tensor<3x4xf64, #DCSR>) - outs(%argx: tensor<3x4xf64, #DCSR>) { + inits(%argx: tensor<3x4xf64, #DCSR>) { ^bb(%a: f64, %b: f64, %x: f64): %1 = arith.addf %a, %b : f64 linalg.yield %1 : f64 diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -3,7 +3,7 @@ func.func @gemm(%arg0 : memref, %arg1 : memref, %arg2 : memref) { linalg.matmul ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-LABEL: func @gemm @@ -34,7 +34,7 @@ affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} ins(%arg0, %arg1, %arg2 : memref<200x300xi32>, memref<300xi16>, memref<200xi8>) - outs(%arg3 : memref<300x200xi64>) { + inits(%arg3 : memref<300x200xi64>) { ^bb0(%b0 : i32, %b1 : i16, %b2 : i8, %b3 : i64): %0 = linalg.index 0 : index %1 = arith.index_cast %0 : index to i16 @@ -84,7 +84,7 @@ strides = dense<[1, 2]> : tensor<2xi64>, dilations = dense<[3, 4]> : tensor<2xi64>} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1 + d4 * 3)> @@ -128,7 +128,7 @@ strides = dense<[1, 2]> : tensor<2xi64>, dilations = dense<[3, 4]> : tensor<2xi64>} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1 + d4 * 3)> @@ -165,7 +165,7 @@ func.func @map(%lhs: memref<64xf32>, %rhs: memref<64xf32>, %out: memref<64xf32>) { linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>) - outs(%out : memref<64xf32>) + inits(%out : memref<64xf32>) (%in: f32, %in_0: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 @@ -192,7 +192,7 @@ func.func @transpose(%arg0: memref<16x32x64xf32>, %arg1: memref<32x64x16xf32>) { linalg.transpose ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] + inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0] return } // CHECK-LABEL: func.func @transpose( @@ -216,7 +216,7 @@ func.func @reduce(%arg0: memref<16x32x64xf32>, %arg1: memref<16x64xf32>) { linalg.reduce ins(%arg0 : memref<16x32x64xf32>) - outs(%arg1 : memref<16x64xf32>) dimensions = [1] + inits(%arg1 : memref<16x64xf32>) dimensions = [1] (%in: f32, %init: f32) { %0 = arith.addf %in, %init : f32 linalg.yield %0 : f32 @@ -247,7 +247,7 @@ %init: memref<8x16x32xf32>) { linalg.broadcast ins(%input:memref<8x32xf32>) - outs(%init:memref<8x16x32xf32>) + inits(%init:memref<8x16x32xf32>) dimensions = [0, 2] func.return } diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir @@ -7,10 +7,10 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"} ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor return %gemm : tensor } // CHECK: func.func @gemm_fill_fusion( @@ -25,10 +25,10 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[INSERT]] @@ -42,15 +42,15 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init : tensor) -> tensor %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %generic = linalg.generic { __internal_linalg_transform__ = "fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%gemm, %arg2 : tensor, tensor) outs(%init : tensor) { + ins(%gemm, %arg2 : tensor, tensor) inits(%init : tensor) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): %add = arith.addf %b0, %b1 : f32 linalg.yield %add : f32 @@ -70,15 +70,15 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK-DAG: %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]] // CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]], %[[BIAS_TILE]] : -// CHECK-SAME: outs(%[[OUTS_TILE]] : +// CHECK-SAME: inits(%[[OUTS_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[INSERT]] @@ -91,14 +91,14 @@ %d0 = tensor.dim %lhs0, %c0 : tensor %d1 = tensor.dim %rhs0, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor) -> tensor %gemm0 = linalg.matmul - ins(%lhs0, %rhs0 : tensor, tensor) outs(%fill0 : tensor) -> tensor + ins(%lhs0, %rhs0 : tensor, tensor) inits(%fill0 : tensor) -> tensor %d2 = tensor.dim %rhs1, %c1 : tensor %init1 = tensor.empty(%d0, %d2) : tensor - %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor) -> tensor + %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor) -> tensor %gemm1 = linalg.matmul {__internal_linalg_transform__ = "gemm_fusion"} - ins(%gemm0, %rhs1 : tensor, tensor) outs(%fill1 : tensor) -> tensor + ins(%gemm0, %rhs1 : tensor, tensor) inits(%fill1 : tensor) -> tensor return %gemm1 : tensor } // CHECK: func.func @gemm_gemm_fusion( @@ -118,17 +118,17 @@ // CHECK-DAG: %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0] // CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]], 0] // CHECK: %[[FILL0_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]] : // CHECK: %[[GEMM0_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS0_TILE]], %[[RHS0_TILE]] : -// CHECK-SAME: outs(%[[FILL0_TILE]] : +// CHECK-SAME: inits(%[[FILL0_TILE]] : // CHECK-DAG: %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0] // CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0] // CHECK: %[[FILL1_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT1_TILE]] : +// CHECK-SAME: inits(%[[INIT1_TILE]] : // CHECK: %[[GEMM1_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] : -// CHECK-SAME: outs(%[[FILL1_TILE]] : +// CHECK-SAME: inits(%[[FILL1_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0] // CHECK: scf.yield %[[INSERT]] @@ -141,16 +141,16 @@ %d0 = tensor.dim %arg0, %c0 : tensor %d1 = tensor.dim %arg1, %c1 : tensor %init0 = tensor.empty(%d0, %d1) : tensor - %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor) -> tensor + %fill = linalg.fill ins(%cst : f32) inits(%init0 : tensor) -> tensor %gemm = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%fill : tensor) -> tensor + inits(%fill : tensor) -> tensor %init1 = tensor.empty(%d1, %d0) : tensor %transpose = linalg.generic { __internal_linalg_transform__ = "fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>], iterator_types = ["parallel", "parallel"]} - ins(%gemm : tensor) outs(%init1 : tensor) { + ins(%gemm : tensor) inits(%init1 : tensor) { ^bb0(%b0 : f32, %b1 : f32): linalg.yield %b0 : f32 } -> tensor @@ -173,14 +173,14 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV0]], %[[IV1]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT0_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK-DAG: %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]] : -// CHECK-SAME: outs(%[[OUTS_TILE]] : +// CHECK-SAME: inits(%[[OUTS_TILE]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: scf.yield %[[INSERT]] @@ -193,15 +193,15 @@ %d1 = tensor.dim %arg1, %c1 : tensor %cst = arith.constant 0.0 : f32 %0 = tensor.empty(%d0, %d1) : tensor - %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor) -> tensor + %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor) -> tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%1 : tensor) -> tensor + inits(%1 : tensor) -> tensor %3 = linalg.generic { __internal_linalg_transform__ = "gemm_interchange_fusion", indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>], iterator_types = ["parallel", "parallel"]} - ins(%2 : tensor) outs(%0 : tensor) { + ins(%2 : tensor) inits(%0 : tensor) { ^bb0(%b0 : f32, %b1 : f32): %4 = arith.addf %b0, %b0 : f32 linalg.yield %4 : f32 @@ -220,14 +220,14 @@ // CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]] // CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV1]], %[[IV0]]] // CHECK: %[[FILL_TILE:.+]] = linalg.fill -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[FILL_TILE]] : +// CHECK-SAME: inits(%[[FILL_TILE]] : // CHECK: %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: %[[GENERIC_TILE:.+]] = linalg.generic // CHECK-SAME: ins(%[[GEMM_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE_2]] : +// CHECK-SAME: inits(%[[INIT_TILE_2]] : // CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]] // CHECK: scf.yield %[[INSERT]] @@ -240,7 +240,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -251,7 +251,7 @@ iterator_types = ["parallel", "parallel"], __internal_linalg_transform__ = "gemm_plus_gemm_fusion"} ins(%2, %2 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) : %7 = arith.addf %arg3, %arg4 : f32 linalg.yield %7 : f32 @@ -273,17 +273,17 @@ // CHECK-DAG: %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]] // CHECK: %[[LHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] : -// CHECK-SAME: outs(%[[ST_ARG2]] : +// CHECK-SAME: inits(%[[ST_ARG2]] : // CHECK-DAG: %[[ST_ARG0_1:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0] // CHECK-DAG: %[[ST_ARG1_1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]] // CHECK-DAG: %[[ST_ARG2_1:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]] // CHECK: %[[RHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0_1]], %[[ST_ARG1_1]] : -// CHECK-SAME: outs(%[[ST_ARG2_1]] : +// CHECK-SAME: inits(%[[ST_ARG2_1]] : // CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: %[[ST_RESULT:.+]] = linalg.generic // CHECK-SAME: ins(%[[LHS]], %[[RHS]] : -// CHECK-SAME: outs(%[[ST_ARG6]] : +// CHECK-SAME: inits(%[[ST_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[UPDATE]] @@ -299,7 +299,7 @@ %0 = tensor.dim %arg2, %c0 : tensor %1 = tensor.dim %arg2, %c1 : tensor %2 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor %3 = tensor.dim %2, %c0 : tensor %4 = tensor.dim %2, %c1 : tensor %5 = tensor.empty(%3, %4) : tensor @@ -310,7 +310,7 @@ iterator_types = ["parallel", "parallel"], __internal_linalg_transform__ = "gemm_plus_gemm_fusion"} ins(%2, %2 : tensor, tensor) - outs(%5 : tensor) { + inits(%5 : tensor) { ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) : %7 = arith.addf %arg3, %arg4 : f32 linalg.yield %7 : f32 @@ -331,17 +331,17 @@ // CHECK: %[[LHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[ST_ARG0]], %[[ST_ARG1]] // CHECK-SAME: : tensor, tensor) -// CHECK-SAME: outs(%[[ST_ARG2]] : tensor) +// CHECK-SAME: inits(%[[ST_ARG2]] : tensor) // CHECK-DAG: %[[STR_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0] // CHECK-DAG: %[[STR_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]] // CHECK-DAG: %[[STR_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV0]]] // CHECK: %[[RHS:.+]] = linalg.matmul // CHECK-SAME: ins(%[[STR_ARG0]], %[[STR_ARG1]] : -// CHECK-SAME: outs(%[[STR_ARG2]] : +// CHECK-SAME: inits(%[[STR_ARG2]] : // CHECK: %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: %[[ST_RESULT:.+]] = linalg.generic // CHECK-SAME: ins(%[[LHS]], %[[RHS]] : -// CHECK-SAME: outs(%[[ST_ARG6]] : +// CHECK-SAME: inits(%[[ST_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]] // CHECK-SAME: into %[[ARG6]][%[[IV0]], %[[IV1]]] // CHECK: scf.yield %[[UPDATE]] @@ -354,13 +354,13 @@ %arg2: tensor, %arg3: tensor, %arg4: tensor, %arg5: tensor, %arg6: tensor) -> tensor { %0 = linalg.matmul ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor // [M, N0] * [N0, N1] + inits(%arg2 : tensor) -> tensor // [M, N0] * [N0, N1] %1 = linalg.matmul ins(%0, %arg3 : tensor, tensor) - outs(%arg4 : tensor) -> tensor // [M, N1] * [N1, N2] + inits(%arg4 : tensor) -> tensor // [M, N1] * [N1, N2] %2 = linalg.matmul {__internal_linalg_transform__ = "gemm_sequence_fusion"} ins(%1, %arg5 : tensor, tensor) - outs(%arg6 : tensor) -> tensor // [M, N2] * [N2, N3] + inits(%arg6 : tensor) -> tensor // [M, N2] * [N2, N3] return %2 : tensor } @@ -389,15 +389,15 @@ // CHECK-DAG: %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]] // CHECK-DAG: %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]] // CHECK-DAG: %[[TILE_GEMM1:.+]] = linalg.matmul ins(%[[SLICE_ARG0]], %[[SLICE_ARG1]] : -// CHECK-SAME: outs(%[[SLICE_ARG2]] : +// CHECK-SAME: inits(%[[SLICE_ARG2]] : // CHECK-DAG: %[[SLICE_ARG3:.+]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[N1]], %[[N2]]] // CHECK-DAG: %[[SLICE_ARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV]], 0] [%[[TILE_M]], %[[N2]]] // CHECK-DAG: %[[TILE_GEMM2:.+]] = linalg.matmul ins(%[[TILE_GEMM1]], %[[SLICE_ARG3]] : -// CHECK-SAME: outs(%[[SLICE_ARG4]] : +// CHECK-SAME: inits(%[[SLICE_ARG4]] : // CHECK-DAG: %[[SLICE_ARG5:.+]] = tensor.extract_slice %[[ARG5]][0, 0] [%[[N2]], %[[N3]]] // CHECK-DAG: %[[SLICE_ARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]] // CHECK-DAG: %[[TILE_GEMM3:.+]] = linalg.matmul // CHECK-SAME: ins(%[[TILE_GEMM2]], %[[SLICE_ARG5]] : -// CHECK-SAME: outs(%[[SLICE_ARG6]] : +// CHECK-SAME: inits(%[[SLICE_ARG6]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[TILE_GEMM3]] into %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]] // CHECK: scf.yield %[[UPDATE]] diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir --- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir @@ -4,7 +4,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -34,7 +34,7 @@ // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT1]] // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] // CHECK: scf.yield %[[UPDATE]] @@ -47,7 +47,7 @@ %arg2 : memref) { linalg.matmul {__internal_linalg_transform__ = "simple_gemm_memref"} ins(%arg0, %arg1 : memref, memref) - outs(%arg2 : memref) + inits(%arg2 : memref) return } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -79,7 +79,7 @@ // CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[OUT_TILE]] : +// CHECK-SAME: inits(%[[OUT_TILE]] : // ----- @@ -94,7 +94,7 @@ iterator_types = ["parallel", "parallel", "parallel"]} {__internal_linalg_transform__ = "parallel_generic_transpose"} ins(%arg0 : tensor<128x200x300xf32>) - outs(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) { + inits(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) { ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): linalg.yield %b0, %b0 : f32, f32 } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) @@ -123,7 +123,7 @@ // CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1] // CHECK: %[[RESULT_TILE:.+]]:2 = linalg.generic // CHECK-SAME: ins(%[[ARG_TILE]] : -// CHECK-SAME: outs(%[[INIT0_TILE]], %[[INIT1_TILE]] : +// CHECK-SAME: inits(%[[INIT0_TILE]], %[[INIT1_TILE]] : // CHECK: %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]] // CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1] // CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]] @@ -141,7 +141,7 @@ dilation = dense<[4, 5]> : tensor<2xi64>, __internal_linalg_transform__ = "simple_conv"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)> @@ -187,7 +187,7 @@ // CHECK: %[[CONV_TILE:.+]] = linalg.conv_2d_nhwc_hwcf // CHECK-SAME: dilation = dense<[4, 5]> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64> // CHECK-SAME: ins(%[[INPUT_TILE]], %[[FILTER_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: tensor.insert_slice %[[CONV_TILE]] into %[[INIT2]] // CHECK-SAME: [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]] @@ -207,7 +207,7 @@ iterator_types = ["parallel", "parallel"]} {__internal_linalg_transform__ = "indexed_semantics"} ins(%arg0: tensor) - outs(%arg1: tensor) { + inits(%arg1: tensor) { ^bb0(%arg2: f32, %arg3: f32): // CHECK: %[[INDEX0:.+]] = linalg.index 0 // CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]]) @@ -231,7 +231,7 @@ %arg2 : tensor) -> tensor { %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"} ins(%arg0, %arg1 : tensor, tensor) - outs(%arg2 : tensor) -> tensor + inits(%arg2 : tensor) -> tensor return %0 : tensor } // CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)> @@ -266,7 +266,7 @@ // CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: %[[GEMM_TILE:.+]] = linalg.matmul // CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : -// CHECK-SAME: outs(%[[INIT_TILE]] : +// CHECK-SAME: inits(%[[INIT_TILE]] : // CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT2]] // CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] // CHECK: scf.yield %[[UPDATE]] diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -2806,7 +2806,7 @@ let assemblyFormat = [{ attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)? - `outs` `(` $outputs `:` type($outputs) `)` + `inits` `(` $outputs `:` type($outputs) `)` $region (`->` type($results)^)? }]; @@ -2866,7 +2866,7 @@ let assemblyFormat = [{ attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)? - `outs` `(` $outputs `:` type($outputs) `)` + `inits` `(` $outputs `:` type($outputs) `)` $region (`->` type($results)^)? }]; diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir --- a/mlir/test/mlir-cpu-runner/async.mlir +++ b/mlir/test/mlir-cpu-runner/async.mlir @@ -22,7 +22,7 @@ %c4 = arith.constant 4.0 : f32 %A = memref.alloc() : memref<4xf32> - linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>) + linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>) // CHECK: [0, 0, 0, 0] %U = memref.cast %A : memref<4xf32> to memref<*xf32> diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir --- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir +++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir @@ -7,14 +7,14 @@ %cf1 = arith.constant 1.00000e+00 : f32 - linalg.fill ins(%cf1 : f32) outs(%A : memref<16x16xf32>) - linalg.fill ins(%cf1 : f32) outs(%B : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%A : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%B : memref<16x16xf32>) %reps = arith.constant 1 : index %t_start = call @rtclock() : () -> f64 affine.for %arg0 = 0 to 5 { - linalg.fill ins(%cf1 : f32) outs(%C : memref<16x16xf32>) + linalg.fill ins(%cf1 : f32) inits(%C : memref<16x16xf32>) func.call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> () } %t_end = call @rtclock() : () -> f64 diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir --- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir +++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir @@ -42,18 +42,18 @@ %f10 = arith.constant 10.00000e+00 : f32 %V = memref.cast %A : memref<10x3xf32, 0> to memref - linalg.fill ins(%f10 : f32) outs(%V : memref) + linalg.fill ins(%f10 : f32) inits(%V : memref) %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32> call @printMemrefF32(%U) : (memref<*xf32>) -> () %V2 = memref.cast %U : memref<*xf32> to memref - linalg.fill ins(%f5 : f32) outs(%V2 : memref) + linalg.fill ins(%f5 : f32) inits(%V2 : memref) %U2 = memref.cast %V2 : memref to memref<*xf32> call @printMemrefF32(%U2) : (memref<*xf32>) -> () %V3 = memref.cast %V2 : memref to memref<*xf32> %V4 = memref.cast %V3 : memref<*xf32> to memref - linalg.fill ins(%f2 : f32) outs(%V4 : memref) + linalg.fill ins(%f2 : f32) inits(%V4 : memref) %U3 = memref.cast %V2 : memref to memref<*xf32> call @printMemrefF32(%U3) : (memref<*xf32>) -> () @@ -79,7 +79,7 @@ func.func @return_two_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = arith.constant 1.0 : f32 - linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>) + linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>) %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>) call @printMemrefF32(%1#0) : (memref<*xf32>) -> () call @printMemrefF32(%1#1) : (memref<*xf32>) -> () @@ -94,7 +94,7 @@ func.func @return_var_memref_caller() { %0 = memref.alloca() : memref<4x3xf32> %c0f32 = arith.constant 1.0 : f32 - linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>) + linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>) %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32> call @printMemrefF32(%1) : (memref<*xf32>) -> () return diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir --- a/mlir/test/mlir-cpu-runner/utils.mlir +++ b/mlir/test/mlir-cpu-runner/utils.mlir @@ -19,7 +19,7 @@ %f = arith.constant 2.00000e+00 : f32 %A = memref.alloc() : memref<16xf32> %B = memref.cast %A: memref<16xf32> to memref - linalg.fill ins(%f : f32) outs(%B : memref) + linalg.fill ins(%f : f32) inits(%B : memref) %U = memref.cast %B : memref to memref<*xf32> call @printMemrefF32(%U): (memref<*xf32>) -> () memref.dealloc %A : memref<16xf32> @@ -33,7 +33,7 @@ %f4 = arith.constant 4.00000e+00 : f32 %A = memref.alloc() : memref<3x4x5xf32> %B = memref.cast %A: memref<3x4x5xf32> to memref - linalg.fill ins(%f : f32) outs(%B : memref) + linalg.fill ins(%f : f32) inits(%B : memref) %c2 = arith.constant 2 : index memref.store %f4, %B[%c2, %c2, %c2]: memref diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir --- a/mlir/test/mlir-opt/async.mlir +++ b/mlir/test/mlir-opt/async.mlir @@ -20,7 +20,7 @@ %c4 = arith.constant 4.0 : f32 %A = memref.alloc() : memref<4xf32> - linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>) + linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>) %U = memref.cast %A : memref<4xf32> to memref<*xf32> call @printMemrefF32(%U): (memref<*xf32>) -> () diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py --- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py +++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py @@ -56,7 +56,7 @@ # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]] # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"] # CHECK-SAME: ins(%[[A]], %[[B]] - # CHECK-SAME: outs(%[[INITC]] + # CHECK-SAME: inits(%[[INITC]] @func.FuncOp.from_py_func( RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32)) def test_matmul_mono(lhs, rhs): diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py --- a/mlir/test/python/dialects/linalg/ops.py +++ b/mlir/test/python/dialects/linalg/ops.py @@ -21,7 +21,7 @@ # CHECK-LABEL: func @fill_tensor # CHECK-SAME: %[[OUT:[0-9a-z]+]]: tensor<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32 - # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32> + # CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32> # CHECK-NEXT: return %[[RES]] : tensor<12x?xf32> @func.FuncOp.from_py_func( RankedTensorType.get((12, ShapedType.get_dynamic_size()), f32)) @@ -32,7 +32,7 @@ # CHECK-LABEL: func @fill_buffer # CHECK-SAME: %[[OUT:[0-9a-z]+]]: memref<12x?xf32> # CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32 - # CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>) + # CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : memref<12x?xf32>) # CHECK-NEXT: return @func.FuncOp.from_py_func( MemRefType.get((12, ShapedType.get_dynamic_size()), f32)) @@ -59,12 +59,12 @@ # CHECK: linalg.elemwise_unary # CHECK-SAME: cast = #linalg.type_fn # CHECK-SAME: fun = #linalg.unary_fn - # CHECK-SAME: ins(%{{.*}} : tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>) + # CHECK-SAME: ins(%{{.*}} : tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>) unary_result = linalg.elemwise_unary(lhs, outs=[init_result.result]) # CHECK: linalg.elemwise_binary # CHECK-SAME: cast = #linalg.type_fn # CHECK-SAME: fun = #linalg.binary_fn - # CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>) + # CHECK-SAME: ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>) # CHECK: return binary_result = linalg.elemwise_binary( lhs, @@ -144,7 +144,7 @@ init = tensor.EmptyOp([4, 8], f32) # CHECK: linalg.matmul # CHECK: ins(%[[LHS]], %[[RHS]] - # CHECK: outs(%[[INIT]] + # CHECK: inits(%[[INIT]] return linalg.matmul(lhs, rhs, outs=init) print(module) diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py --- a/mlir/test/python/integration/dialects/linalg/opsrun.py +++ b/mlir/test/python/integration/dialects/linalg/opsrun.py @@ -29,10 +29,10 @@ %rhs = memref.alloc() : memref<4x8xf32> %O0 = memref.alloc() : memref<4x8xf32> %O1 = memref.alloc() : memref<4x8xf32> - linalg.fill ins(%v1 : f32) outs(%lhs : memref) - linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>) + linalg.fill ins(%v1 : f32) inits(%lhs : memref) + linalg.fill ins(%v2 : f32) inits(%rhs : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%O0 : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%O1 : memref<4x8xf32>) call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) : (memref, memref<4x8xf32>, memref<4x8xf32>) -> () @@ -60,10 +60,10 @@ %B = memref.alloc() : memref<16x8xf32> %C0 = memref.alloc() : memref<4x8xf32> %C1 = memref.alloc() : memref<4x8xf32> - linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>) - linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>) - linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>) - linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>) + linalg.fill ins(%v1 : i8) inits(%A : memref<4x16xi8>) + linalg.fill ins(%v2 : f32) inits(%B : memref<16x8xf32>) + linalg.fill ins(%v0 : f32) inits(%C0 : memref<4x8xf32>) + linalg.fill ins(%v0 : f32) inits(%C1 : memref<4x8xf32>) call @matmul_signed_on_buffers(%A, %B, %C0) : (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> () @@ -137,9 +137,9 @@ %input = memref.alloc() : memref<1x4x16x1xf64> %filter = memref.alloc() : memref<2x2x1xf64> %output = memref.alloc() : memref<1x2x4x1xi32> - linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>) - linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>) - linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>) + linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>) + linalg.fill ins(%v2 : f64) inits(%filter : memref<2x2x1xf64>) + linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>) call @conv_on_buffers(%input, %filter, %output) : (memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> () @@ -163,9 +163,9 @@ %input = memref.alloc() : memref<1x4x16x1xf64> %shape = memref.alloc() : memref<2x2xf64> %output = memref.alloc() : memref<1x2x4x1xi32> - linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>) - linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>) - linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>) + linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>) + linalg.fill ins(%v1 : f64) inits(%shape : memref<2x2xf64>) + linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>) %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index