diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -71,7 +71,7 @@
       linalg.generic #trait_attribute
           ins(%A, %B : memref<?x?xf32, stride_specification>,
                        memref<?x?xf32, stride_specification>)
-          outs(%C : memref<?x?xf32, stride_specification>)
+          inits(%C : memref<?x?xf32, stride_specification>)
           attrs = {other-optional-attributes}
           {region}
       ```
@@ -112,7 +112,7 @@
       linalg.generic #matmul_trait
         ins(%A, %B : memref<?x?xf32, stride_specification>,
                      memref<?x?xf32, stride_specification>)
-        outs(%C : memref<?x?xf32, stride_specification>)
+        inits(%C : memref<?x?xf32, stride_specification>)
         {other-optional-attributes} {
         ^bb0(%a: f32, %b: f32, %c: f32) :
           %d = arith.mulf %a, %b: f32
@@ -153,7 +153,7 @@
     ```mlir
     %C = linalg.generic #trait_attribute
       ins(%A, %B : tensor<?x?xf32>, memref<?x?xf32, stride_specification>)
-      outs(%C : tensor<?x?xf32>)
+      inits(%C : tensor<?x?xf32>)
       {other-optional-attributes}
       {region}
       -> (tensor<?x?xf32>)
@@ -249,7 +249,7 @@
     ```
       %add = linalg.map
           ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-          outs(%init: tensor<64xf32>)
+          inits(%init: tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -326,7 +326,7 @@
     ```
       %reduce = linalg.reduce
           ins(%input:tensor<16x32x64xf32>)
-          outs(%init:tensor<16x64xf32>)
+          inits(%init:tensor<16x64xf32>)
           dimensions = [1]
           (%in: f32, %out: f32) {
             %0 = arith.addf %in, %out: f32
@@ -398,7 +398,7 @@
     ```
       %transpose = linalg.transpose
           ins(%input:tensor<16x64xf32>)
-          outs(%init:tensor<64x16xf32>)
+          inits(%init:tensor<64x16xf32>)
           permutation = [1, 0]
     ```
   }];
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -143,7 +143,7 @@
       return failure();
   }
 
-  if (succeeded(parser.parseOptionalKeyword("outs"))) {
+  if (succeeded(parser.parseOptionalKeyword("inits"))) {
     outputsOperandsLoc = parser.getCurrentLocation();
     if (parser.parseLParen() || parser.parseOperandList(outputsOperands) ||
         parser.parseColonTypeList(outputTypes) || parser.parseRParen())
@@ -170,7 +170,7 @@
   if (!inputs.empty())
     p << " ins(" << inputs << " : " << inputs.getTypes() << ")";
   if (!outputs.empty())
-    p << " outs(" << outputs << " : " << outputs.getTypes() << ")";
+    p << " inits(" << outputs << " : " << outputs.getTypes() << ")";
 }
 
 static void printCommonStructuredOpPartsWithNewLine(OpAsmPrinter &p,
@@ -182,7 +182,7 @@
   }
   if (!outputs.empty()) {
     p.printNewline();
-    p << "outs(" << outputs << " : " << outputs.getTypes() << ")";
+    p << "inits(" << outputs << " : " << outputs.getTypes() << ")";
   }
 }
 //===----------------------------------------------------------------------===//
@@ -685,7 +685,7 @@
   for (Value v : getRegionInputArgs())
     setNameFn(v, "in");
   for (Value v : getRegionOutputArgs())
-    setNameFn(v, "out");
+    setNameFn(v, "init");
 }
 
 void GenericOp::build(
@@ -2177,7 +2177,7 @@
     OpResult resultValue = castOp.getSource().cast<OpResult>();
     unsigned resultNumber = resultValue.getResultNumber();
     auto resultType = castOp->getResult(0).getType().cast<RankedTensorType>();
-    // Replace the `outs` for the result with a `tensor.cast`. This cast is now
+    // Replace the `inits` for the result with a `tensor.cast`. This cast is now
     // going from a more dynamic shape to a less dynamic shape. If the producer
     // for this cast, i.e. producer of the out operand, is also an operation
     // that folds with tensor.cast consumer (like this pattern), the cast will
diff --git a/mlir/test/Analysis/test-match-reduction.mlir b/mlir/test/Analysis/test-match-reduction.mlir
--- a/mlir/test/Analysis/test-match-reduction.mlir
+++ b/mlir/test/Analysis/test-match-reduction.mlir
@@ -12,7 +12,7 @@
                                           affine_map<(d0) -> (0)>],
                                           iterator_types = ["reduction"]}
    ins(%in0t : tensor<?xf32>)
-   outs(%out0t : tensor<1xf32>) {
+   inits(%out0t : tensor<1xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %add = arith.addf %in0, %out0 : f32
       linalg.yield %add : f32
@@ -49,7 +49,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
-   outs(%out0t : tensor<4xf32>) {
+   inits(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %cmp = arith.cmpf ogt, %in0, %out0 : f32
       %sel = arith.select %cmp, %in0, %out0 : f32
@@ -69,7 +69,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%in0t : tensor<4x4xf32>)
-   outs(%out0t : tensor<4xf32>) {
+   inits(%out0t : tensor<4xf32>) {
     ^bb0(%in0: f32, %out0: f32):
       %mul = arith.mulf %in0, %in0 : f32
       %sub = arith.subf %mul, %in0 : f32
diff --git a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir
--- a/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir
+++ b/mlir/test/Conversion/LinalgToSPIRV/linalg-to-spirv.mlir
@@ -49,7 +49,7 @@
 } {
   linalg.generic #single_workgroup_reduction_trait
       ins(%input : memref<16xi32, #spirv.storage_class<StorageBuffer>>)
-     outs(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
+     inits(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
     ^bb(%in: i32, %out: i32):
       %sum = arith.addi %in, %out : i32
       linalg.yield %sum : i32
@@ -78,7 +78,7 @@
   // expected-error @+1 {{failed to legalize operation 'linalg.generic'}}
   linalg.generic #single_workgroup_reduction_trait
       ins(%input : memref<16xi32, #spirv.storage_class<StorageBuffer>>)
-     outs(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
+     inits(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
     ^bb(%in: i32, %out: i32):
       %sum = arith.addi %in, %out : i32
       linalg.yield %sum : i32
@@ -109,7 +109,7 @@
   // expected-error @+1 {{failed to legalize operation 'linalg.generic'}}
   linalg.generic #single_workgroup_reduction_trait
       ins(%input : memref<16xi32, #spirv.storage_class<StorageBuffer>>)
-     outs(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
+     inits(%output : memref<1xi32, #spirv.storage_class<StorageBuffer>>) {
     ^bb(%in: i32, %out: i32):
       %sum = arith.addi %in, %out : i32
       linalg.yield %sum : i32
@@ -140,7 +140,7 @@
   // expected-error @+1 {{failed to legalize operation 'linalg.generic'}}
   linalg.generic #single_workgroup_reduction_trait
       ins(%input : memref<16x8xi32, #spirv.storage_class<StorageBuffer>>)
-     outs(%output : memref<16xi32, #spirv.storage_class<StorageBuffer>>) {
+     inits(%output : memref<16xi32, #spirv.storage_class<StorageBuffer>>) {
     ^bb(%in: i32, %out: i32):
       %sum = arith.addi %in, %out : i32
       linalg.yield %sum : i32
diff --git a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
--- a/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
+++ b/mlir/test/Conversion/TensorToLinalg/tensor-ops-to-linalg.mlir
@@ -7,7 +7,7 @@
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
 func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
@@ -32,7 +32,7 @@
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
 // CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1]], 2, %[[DIM3]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
 // CHECK:           return %[[PADDED]] : tensor<4x?x?x?xf32>
 // CHECK:         }
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -4,8 +4,8 @@
 func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) inits([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) inits([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> (tensor<1x5x6xf32>)
   return %0 : tensor<1x5x6xf32>
 }
@@ -17,10 +17,10 @@
 func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>) {
   // CHECK: [[C0:%.+]] = arith.constant 0
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) outs([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
+  // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : i32) inits([[INIT]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
-  // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
+  // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) inits([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
   %0 = "tosa.matmul"(%arg0, %arg1) {quantization_info = #tosa.matmul_quant<a_zp = 1, b_zp = 2>} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> (tensor<1x5x6xi32>)
   return %0 : tensor<1x5x6xi32>
 }
@@ -33,8 +33,8 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[C0_0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) inits(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) inits(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<?x5x3xf32>, tensor<?x3x6xf32>)  -> (tensor<?x5x6xf32>)
   return %0 : tensor<?x5x6xf32>
 }
@@ -47,8 +47,8 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg1, %[[C2]]
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) inits(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x3xf32>, tensor<1x3x?xf32>)  -> (tensor<1x5x?xf32>)
   return %0 : tensor<1x5x?xf32>
 }
@@ -59,8 +59,8 @@
 func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x?x6xf32>) -> (tensor<1x5x6xf32>) {
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
+  // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) inits(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x5x?xf32>, tensor<1x?x6xf32>)  -> (tensor<1x5x6xf32>)
   return %0 : tensor<1x5x6xf32>
 }
@@ -74,12 +74,12 @@
 func.func @fully_connected(%arg0: tensor<5x3xf32>, %arg1: tensor<6x3xf32>, %arg2: tensor<6xf32>) -> (tensor<5x6xf32>) {
   // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
   // CHECK: [[INITB:%.+]] = tensor.empty()
-  // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) outs([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
-  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) outs([[INITB]] : tensor<5x6xf32>) {
+  // CHECK: [[MATMUL:%.+]] = linalg.matmul ins(%arg0, [[TRANSPOSE]] : tensor<5x3xf32>, tensor<3x6xf32>) inits([[FILL]] : tensor<5x6xf32>) -> tensor<5x6xf32>
+  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xf32>, tensor<5x6xf32>) inits([[INITB]] : tensor<5x6xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -97,14 +97,14 @@
 func.func @quantized_fully_connected(%arg0: tensor<5x3xi8>, %arg1: tensor<6x3xi8>, %arg2: tensor<6xi32>) -> (tensor<5x6xi32>) {
   // CHECK: [[INITT:%.+]] = tensor.empty()
   // CHECK: [[ZERO:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}outs([[INITT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[ZERO]]{{.*}}inits([[INITT]]
   // CHECK: [[PERM:%.+]] = arith.constant dense<[1, 0]>
   // CHECK: [[TRANSPOSE:%.+]] = "tosa.transpose"(%arg1, [[PERM]])
   // CHECK: [[INITB:%.+]] = tensor.empty()
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
-  // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) outs([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
-  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) outs([[INITB]]
+  // CHECK: [[MATMUL:%.+]] = linalg.quantized_matmul ins(%arg0, [[TRANSPOSE]], [[ONE]], [[TWO]] : tensor<5x3xi8>, tensor<3x6xi8>, i32, i32) inits([[FILL]] : tensor<5x6xi32>) -> tensor<5x6xi32>
+  // CHECK: [[ADDED:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, [[MATMUL]] : tensor<6xi32>, tensor<5x6xi32>) inits([[INITB]]
   // CHECK: ^bb0([[IN1:%.+]]: i32, [[IN2:%.+]]: i32, [[UNUSED:%.+]]: i32):
   // CHECK:   [[ADD:%.+]] = arith.addi
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -123,12 +123,12 @@
   // CHECK: %[[DIM:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[INITT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[ZERO:.+]] = arith.constant 0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}outs(%[[INITT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[ZERO]]{{.*}}inits(%[[INITT]]
   // CHECK: %[[PERM:.+]] = arith.constant dense<[1, 0]>
   // CHECK: %[[TRANSPOSE:.+]] = "tosa.transpose"(%arg1, %[[PERM]])
   // CHECK: %[[INITB:.+]] = tensor.empty(%[[DIM]])
-  // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) outs(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
-  // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) outs(%[[INITB]] : tensor<?x6xf32>) {
+  // CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%arg0, %[[TRANSPOSE]] : tensor<?x3xf32>, tensor<3x6xf32>) inits(%[[FILL]] : tensor<?x6xf32>) -> tensor<?x6xf32>
+  // CHECK: %[[ADDED:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel"]} ins(%arg2, %[[MATMUL]] : tensor<6xf32>, tensor<?x6xf32>) inits(%[[INITB]] : tensor<?x6xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
@@ -143,9 +143,9 @@
 func.func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38
   // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
-  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[INIT]]
+  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[INIT]]
   // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x32x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, [[KERNEL]] : tensor<1x6x34x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x32x62xf32>)
   return
 }
@@ -157,9 +157,9 @@
   // CHECK-DAG:   tensor.yield [[CONST]]
   // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
   // CHECK-DAG: [[INIT:%.+]] = tensor.empty()
-  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}outs([[INIT]]
+  // CHECK-DAG: [[FILL:%.+]] = linalg.fill ins([[INITVAL]]{{.*}}inits([[INIT]]
   // CHECK-DAG: [[KERNEL:%.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) outs([[FILL]] : tensor<1x4x33x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x6x35x62xf32>, tensor<3x3xf32>) inits([[FILL]] : tensor<1x4x33x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 1], kernel = [3, 3], stride = [1, 1]} : (tensor<1x6x34x62xf32>)  -> (tensor<1x4x33x62xf32>)
   return
 }
@@ -170,9 +170,9 @@
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
   // CHECK: %[[CONST:.+]] = arith.constant -3.40282347E+38
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}outs(%[[INIT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CONST]]{{.*}}inits(%[[INIT]]
   // CHECK: %[[KERNEL:.+]] = tensor.empty()
-  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) outs(%[[FILL]] : tensor<?x4x32x62xf32>)
+  // CHECK: linalg.pooling_nhwc_max {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x6x34x62xf32>, tensor<3x3xf32>) inits(%[[FILL]] : tensor<?x4x32x62xf32>)
   %0 = "tosa.max_pool2d"(%arg0) {pad = [0, 0, 0, 0], kernel = [3, 3], stride = [1, 1]} : (tensor<?x6x34x62xf32>)  -> (tensor<?x4x32x62xf32>)
   return
 }
@@ -209,11 +209,11 @@
   // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
   // CHECK: [[CONST:%.+]] = arith.constant 0
   // CHECK: [[POOLINIT:%.+]] = tensor.empty()
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}outs([[POOLINIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CONST]]{{.*}}inits([[POOLINIT]]
   // CHECK: [[KERNEL:%.+]] = tensor.empty()
-  // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) outs([[FILL]] : tensor<1x5x33x62xf32>)
+  // CHECK: [[POOL:%.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins([[PAD]], [[KERNEL]] : tensor<1x8x36x62xf32>, tensor<4x4xf32>) inits([[FILL]] : tensor<1x5x33x62xf32>)
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) outs([[INIT]] : tensor<1x5x33x62xf32>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins([[POOL]] : tensor<1x5x33x62xf32>) inits([[INIT]] : tensor<1x5x33x62xf32>)
   // CHECK: ^bb0(%[[BBARG1:[a-zA-Z0-9_]+]]: f32,
   // CHECK:   [[ZERO:%.0]] = arith.constant 0
   // CHECK:   [[ONE:%.+]] = arith.constant 1
@@ -273,9 +273,9 @@
   // CHECK: %[[POOLINIT:.+]] = tensor.empty(%[[BATCH]])
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[KERNEL:.+]] = tensor.empty()
-  // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor<?x8x36x62xf32>, tensor<4x4xf32>) outs(%[[FILL]] : tensor<?x5x33x62xf32>)
+  // CHECK: %[[POOL:.+]] = linalg.pooling_nhwc_sum {dilations = dense<1> : vector<2xi64>, strides = dense<1> : vector<2xi64>} ins(%[[PAD]], %[[KERNEL]] : tensor<?x8x36x62xf32>, tensor<4x4xf32>) inits(%[[FILL]] : tensor<?x5x33x62xf32>)
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor<?x5x33x62xf32>) outs(%[[INIT]] : tensor<?x5x33x62xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[POOL]] : tensor<?x5x33x62xf32>) inits(%[[INIT]] : tensor<?x5x33x62xf32>)
   %0 = "tosa.avg_pool2d"(%arg0) {pad = [1, 1, 1, 1], kernel = [4, 4], stride = [1, 1]} : (tensor<?x6x34x62xf32>)  -> (tensor<?x5x33x62xf32>)
   return %0 : tensor<?x5x33x62xf32>
 }
@@ -353,8 +353,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty()
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x45x40x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) outs(%[[B_IN]] : tensor<1x45x40x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<1x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x45x40x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x45x40x28xf32>) inits(%[[B_IN]] : tensor<1x45x40x28xf32>)
   // CHECK:   arith.addf
   // CHECK:   linalg.yield
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
@@ -376,8 +376,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<?x45x40x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) outs(%[[B_IN]] : tensor<?x45x40x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[W]] : tensor<?x49x42x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<?x45x40x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<?x45x40x28xf32>) inits(%[[B_IN]] : tensor<?x45x40x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
   // CHECK:   linalg.yield %[[ADD]] : f32
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<?x49x42x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<?x45x40x28xf32>)
@@ -436,8 +436,8 @@
   // CHECK: %[[CST:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[B_IN:.+]] = tensor.empty(%[[H_OUT]], %[[W_OUT]])
-  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) outs(%[[FILL]] : tensor<1x?x?x28xf32>)
-  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) outs(%[[B_IN]] : tensor<1x?x?x28xf32>)
+  // CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %[[WEIGHT]] : tensor<1x?x?x27xf32>, tensor<3x3x27x28xf32>) inits(%[[FILL]] : tensor<1x?x?x28xf32>)
+  // CHECK: %[[B:.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[CONV]] : tensor<28xf32>, tensor<1x?x?x28xf32>) inits(%[[B_IN]] : tensor<1x?x?x28xf32>)
   // CHECK:   %[[ADD:.+]] = arith.addf
   // CHECK:   linalg.yield %[[ADD]] : f32
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [0, 0, 0, 0], stride = [1, 1], dilation = [2, 1]} : (tensor<1x?x?x27xf32>, tensor<28x3x3x27xf32>, tensor<28xf32>)  -> (tensor<1x?x?x28xf32>)
@@ -477,11 +477,11 @@
 func.func @depthwise_conv(%arg0 : tensor<1x7x5x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -503,9 +503,9 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[OUT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) outs(%[[FILL]] : tensor<?x5x5x3x11xf32>)
+  // CHECK: %[[DEPTH:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x7x5x3xf32>, tensor<3x1x3x11xf32>) inits(%[[FILL]] : tensor<?x5x5x3x11xf32>)
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) outs(%[[OUT]] : tensor<?x5x5x33xf32>) {
+  // CHECK: %[[BIAS:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, %[[COLLAPSED]] : tensor<33xf32>, tensor<?x5x5x33xf32>) inits(%[[OUT]] : tensor<?x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):
   // CHECK:   %[[ADD:.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield %[[ADD]] : f32
@@ -523,11 +523,11 @@
 func.func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x11xf32>, %arg2 : tensor<33xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) outs([[FILL]] : tensor<1x5x5x3x11xf32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x11x9x3xf32>, tensor<3x1x3x11xf32>) inits([[FILL]] : tensor<1x5x5x3x11xf32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) outs([[OUT]] : tensor<1x5x5x33xf32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xf32>, tensor<1x5x5x33xf32>) inits([[OUT]] : tensor<1x5x5x33xf32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: f32, %[[ARG4:[0-9a-zA-Z_]+]]: f32, %[[ARG5:[0-9a-zA-Z_]+]]: f32):  
   // CHECK:   [[ADD:%.+]] = arith.addf %[[ARG3]], %[[ARG4]] : f32
   // CHECK:   linalg.yield [[ADD]] : f32
@@ -549,13 +549,13 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x12x12x4x128xi32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins([[PAD]], %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x12x12x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) outs([[OUT]] : tensor<1x12x12x512xi32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x12x12x512xi32>) inits([[OUT]] : tensor<1x12x12x512xi32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
   // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -573,13 +573,13 @@
 func.func @depthwise_conv_quant_dilations(%arg0 : tensor<1x14x14x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
   // CHECK: [[OUT:%.+]] = tensor.empty()
   // CHECK: [[C128:%.+]] = arith.constant -128
   // CHECK: [[C42:%.+]] = arith.constant 42
-  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) outs([[FILL]] : tensor<1x10x10x4x128xi32>)
+  // CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm_q {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1, [[C128]], [[C42]] : tensor<1x14x14x4xi8>, tensor<3x3x4x128xi8>, i32, i32) inits([[FILL]] : tensor<1x10x10x4x128xi32>)
   // CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
-  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) outs([[OUT]] : tensor<1x10x10x512xi32>) {
+  // CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<512xi32>, tensor<1x10x10x512xi32>) inits([[OUT]] : tensor<1x10x10x512xi32>) {
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):  
   // CHECK:   [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
   // CHECK:   linalg.yield [[ADD]] : i32
@@ -598,7 +598,7 @@
   // CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index):
   // CHECK: tensor.yield %cst : f32
   // CHECK:  } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32>
-  // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
+  // CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) inits(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]]
   %0 = "tosa.depthwise_conv2d"(%arg0, %arg1, %arg2) {pad = [1, 2, 3, 4], dilation = [2, 1], stride = [1, 2]} : (tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>, tensor<15xf32>) -> tensor<2x?x?x15xf32>
   return
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-resize.mlir
@@ -11,7 +11,7 @@
   // CHECK-SAME: indexing_maps = [#map, #map1]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xf32>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xf32>)
   // CHECK-NEXT: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32):
   // CHECK:   linalg.yield %[[IN]]
   %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xf32>) -> tensor<3x15x13x7xf32>
@@ -33,7 +33,7 @@
   // CHECK-SAME: indexing_maps = [#map, #map1]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xf32>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xf32>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xf32>)
   // CHECK-NEXT: ^bb0(%[[IN:.+]]: f32, %[[OUT:.+]]: f32):
   // CHECK:   linalg.yield %[[IN]]
   %resize = "tosa.resize"(%arg0) {mode = "BILINEAR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xf32>) -> tensor<3x15x13x7xf32>
@@ -55,7 +55,7 @@
   // CHECK-SAME: indexing_maps = [#map, #map1]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi8>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi8>)
   // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i8):
   // CHECK:   linalg.yield %[[IN]]
   %resize = "tosa.resize"(%arg0) {mode = "NEAREST_NEIGHBOR", scale = [2, 2, 1, 1], offset = [0, 0], border = [0, 0]} : (tensor<3x1x1x7xi8>) -> tensor<3x15x13x7xi8>
@@ -77,7 +77,7 @@
   // CHECK-SAME: indexing_maps = [#map, #map1]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi32>)
   // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
   // CHECK:   %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 
   // CHECK:   linalg.yield %[[EXT]]
@@ -100,7 +100,7 @@
   // CHECK-SAME: indexing_maps = [#map, #map1]
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   // CHECK-SAME: ins(%[[COLLAPSE]] : tensor<3x7xi8>)
-  // CHECK-SAME: outs(%[[EMPTY]] : tensor<3x15x13x7xi32>)
+  // CHECK-SAME: inits(%[[EMPTY]] : tensor<3x15x13x7xi32>)
   // CHECK-NEXT: ^bb0(%[[IN:.+]]: i8, %[[OUT:.+]]: i32):
   // CHECK: %[[EXT:.+]] = arith.extsi %[[IN]] : i8 to i32 
   // CHECK-DAG: %[[C2:.+]] = arith.constant 2 : i32
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -6,7 +6,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<f32>) -> tensor<f32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<f32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor<f32>) outs([[INIT]] : tensor<f32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = []} ins(%[[ARG0]] : tensor<f32>) inits([[INIT]] : tensor<f32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -26,7 +26,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -45,7 +45,7 @@
 // CHECK-SAME: (%[[ARG0:[0-9a-zA-Z_]*]]
 func.func @test_abs(%arg0: tensor<2x3xf32>) -> tensor<2x3xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) outs([[INIT]] : tensor<2x3xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xf32>) inits([[INIT]] : tensor<2x3xf32>) {
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = math.absf %[[ARG1]]
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -97,7 +97,7 @@
 func.func @test_broadcast(%arg0: tensor<1xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG0]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor<f32>, tensor<2xf32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel"]} ins([[RESHAPE]], %[[ARG1]] : tensor<f32>, tensor<2xf32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -117,7 +117,7 @@
 func.func @test_broadcast_swapped_args(%arg0: tensor<2xf32>, %arg1: tensor<1xf32>) -> tensor<2xf32> {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2xf32>
   // CHECK: [[RESHAPE:%.+]] = tensor.collapse_shape %[[ARG1]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor<f32>) outs([[INIT]] : tensor<2xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[RESHAPE]] : tensor<2xf32>, tensor<f32>) inits([[INIT]] : tensor<2xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -139,7 +139,7 @@
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3xf32>
   // CHECK: [[RESHAPE1:%.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1]]
   // CHECK: [[RESHAPE2:%.+]] = tensor.collapse_shape %[[ARG1]] {{\[}}[0, 1]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) outs([[INIT]] : tensor<2x3xf32>) {
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP2]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins([[RESHAPE1]], [[RESHAPE2]] : tensor<3xf32>, tensor<2xf32>) inits([[INIT]] : tensor<2x3xf32>) {
   // CHECK: ^bb0(%[[ARG2:.*]]: f32, %[[ARG3:.*]]: f32, %[[ARG4:.*]]: f32):
   // CHECK:   [[ELEMENT:%.+]] = arith.addf %[[ARG2]], %[[ARG3]] : f32
   // CHECK:   linalg.yield [[ELEMENT]] : f32
@@ -654,7 +654,7 @@
 func.func @test_transpose(%arg0: tensor<1x2x3xi32>) -> () {
   %0 = arith.constant dense<[1, 2, 0]> : tensor<3xi32>
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<2x3x1xi32>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) outs([[OUT:%.+]] : tensor<2x3x1xi32>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel"]} ins([[ARG0]] : tensor<1x2x3xi32>) inits([[OUT:%.+]] : tensor<2x3x1xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -674,7 +674,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]]) : tensor<?x4x1x3xi32>
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) outs([[OUT:%.+]] : tensor<?x4x1x3xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x3x4xi32>) inits([[OUT:%.+]] : tensor<?x4x1x3xi32>)
   // CHECK: ^bb0([[ARG1:%.+]]: i32, [[ARG2:%.+]]: i32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -696,7 +696,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[DIM1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM0]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs([[OUT:%.+]] : tensor<?x?xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x?xf32>) inits([[OUT:%.+]] : tensor<?x?xf32>)
   // CHECK: ^bb0([[ARG1:%.+]]: f32, [[ARG2:%.+]]: f32)
   // CHECK:   linalg.yield [[ARG1]]
   // CHECK: }
@@ -715,8 +715,8 @@
 func.func @reduce_float(%arg0: tensor<5x4xf32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<4xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<4xf32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<4xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
@@ -725,8 +725,8 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5xf32>
   // CHECK: [[CST0:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) outs([[FILL]] : tensor<5xf32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xf32>) inits([[FILL]] : tensor<5xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   [[RES:%.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield [[RES]] : f32
@@ -765,8 +765,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x4xf32>
   // CHECK: %[[CST0:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor<?x5x4xf32>) outs(%[[FILL]] : tensor<?x4xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction", "parallel"]} ins(%[[ARG0]] : tensor<?x5x4xf32>) inits(%[[FILL]] : tensor<?x4xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -785,8 +785,8 @@
 func.func @reduce_float_dyn_rank_1(%arg0: tensor<?xf32>) -> () {
   // CHECK-DAG: %[[INIT:.+]] = tensor.empty() : tensor<f32>
   // CHECK-DAG: %[[CST0:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor<?xf32>) outs(%[[FILL]] : tensor<f32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST0]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction"]} ins(%[[ARG0]] : tensor<?xf32>) inits(%[[FILL]] : tensor<f32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.addf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -807,8 +807,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<5x?xf32>
   // CHECK: %[[CST1:.+]] = arith.constant 1.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) outs(%[[FILL]] : tensor<5x?xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST1]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[ARG0]] : tensor<5x?x4xf32>) inits(%[[FILL]] : tensor<5x?xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[RES:.+]] = arith.mulf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[RES]] : f32
@@ -829,8 +829,8 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[CMIN:.+]] = arith.constant -3.40282347E+38
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}outs(%[[INIT]]
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) outs(%[[FILL]] : tensor<?xf32>)
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CMIN]]{{.*}}inits(%[[INIT]]
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<?x?xf32>) inits(%[[FILL]] : tensor<?xf32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: f32, %[[ARG2:.*]]: f32)
   // CHECK:   %[[MAX:.+]] = arith.maxf %[[ARG1]], %[[ARG2]] : f32
   // CHECK:   linalg.yield %[[MAX]] : f32
@@ -850,8 +850,8 @@
 func.func @reduce_int(%arg0: tensor<5x4xi32>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<4xi32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<4xi32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
@@ -860,8 +860,8 @@
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant 0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) outs([[FILL]] : tensor<5xi32>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP2]]], iterator_types = ["parallel", "reduction"]} ins([[ARG0]] : tensor<5x4xi32>) inits([[FILL]] : tensor<5xi32>)
   // CHECK: ^bb0(%[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
   // CHECK:   [[RES:%.+]] = arith.addi %[[ARG1]], %[[ARG2]] : i32
   // CHECK:   linalg.yield [[RES]] : i32
@@ -900,8 +900,8 @@
 func.func @reduce_bool(%arg0: tensor<5x4xi1>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[CST0:%.+]] = arith.constant true
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) outs([[FILL]] : tensor<4xi1>)
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}inits([[INIT]]
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins([[ARG0]] : tensor<5x4xi1>) inits([[FILL]] : tensor<4xi1>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i1, %[[ARG2:[0-9a-zA-Z_]+]]: i1)
   // CHECK:   [[RES:%.+]] = arith.andi %[[ARG1]], %[[ARG2]] : i1
   // CHECK:   linalg.yield [[RES]] : i1
@@ -930,7 +930,7 @@
   // CHECK: [[IDX1:%.+]] = arith.constant 1 : index
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<11x1xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}inits([[INIT]]
   // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1]
   // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG1]] into [[INSERT0]][5, 0] [6, 1] [1, 1]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x1xf32>, tensor<6x1xf32>)  -> (tensor<11x1xf32>)
@@ -942,7 +942,7 @@
   // CHECK: [[IDX1:%.+]] = arith.constant 1 : index
   // CHECK: [[INIT:%.+]] = tensor.empty() : tensor<5x2xf32>
   // CHECK: [[CST:%.+]] = arith.constant 0.0
-  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}outs([[INIT]]
+  // CHECK: [[FILL:%.+]] = linalg.fill ins([[CST]]{{.*}}inits([[INIT]]
   // CHECK: [[INSERT0:%.+]] = tensor.insert_slice %[[ARG0]] into [[FILL]][0, 0] [5, 1] [1, 1]
   // CHECK: [[INSERT1:%.+]] = tensor.insert_slice %[[ARG0]] into [[INSERT0]][0, 1] [5, 1] [1, 1]
   %1 = "tosa.concat"(%arg0, %arg0) { axis = 1 : i64} : (tensor<5x1xf32>, tensor<5x1xf32>)  -> (tensor<5x2xf32>)
@@ -965,7 +965,7 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[IDX1_2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<11x?xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}inits(%[[INIT]]
   // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [5, %[[SIZE]]] [1, 1]
   // CHECK: %[[INSERT1:.+]] = tensor.insert_slice %[[ARG1]] into %[[INSERT0]][5, 0] [6, %[[SIZE]]] [1, 1]
   %0 = "tosa.concat"(%arg0, %arg1) { axis = 0 : i64} : (tensor<5x?xf32>, tensor<6x?xf32>)  -> (tensor<11x?xf32>)
@@ -988,7 +988,7 @@
   // CHECK: %[[IDX1:.+]] = arith.constant 1 : index
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]]) : tensor<?x3xf32>
   // CHECK: %[[CST:.+]] = arith.constant 0.0
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}outs(%[[INIT]]
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST]]{{.*}}inits(%[[INIT]]
   // CHECK: %[[DYN1:.+]] = tensor.dim %[[ARG0]], %[[AXIS]]
   // CHECK: %[[INSERT0:.+]] = tensor.insert_slice %[[ARG0]] into %[[FILL]][0, 0] [%[[DYN1]], 3] [1, 1]
   // CHECK: %[[SUM:.+]]  = arith.addi %[[OFFSET]], %[[DYN1]]
@@ -1007,7 +1007,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1028,7 +1028,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) outs([[INIT]] : tensor<2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xi8>) inits([[INIT]] : tensor<2xui8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[UNUSED:%.+]]: ui8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1061,13 +1061,13 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xi8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) inits(%[[INIT]] : tensor<?x2xi8>)
   %0 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xi8>)
 
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]]) : tensor<?x2xui8>
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) outs(%[[INIT]] : tensor<?x2xui8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x2xi8>) inits(%[[INIT]] : tensor<?x2xui8>)
   %1 = "tosa.rescale"(%arg0) {input_zp = 17 : i32, output_zp = 22 : i32, multiplier = [19689 : i32], shift = [15 : i32], scale32 = false, double_round = false, per_channel = false} : (tensor<?x2xi8>)  -> (tensor<?x2xui8>)
 
   return
@@ -1085,7 +1085,7 @@
   // CHECK: %[[C2:.+]] = arith.constant 2
   // CHECK: %[[DIM2:.+]] = tensor.dim %[[ARG0]], %[[C2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM1]], %[[DIM2]])
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) outs(%[[INIT]] : tensor<1x?x?x32xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<1x?x?x32xi32>) inits(%[[INIT]] : tensor<1x?x?x32xi8>)
   %0 = "tosa.rescale"(%arg0) {double_round = true, input_zp = 0 : i32, multiplier = [1376784203 : i32], output_zp = 0 : i32, per_channel = false, scale32 = true, shift = [38 : i32]} : (tensor<1x?x?x32xi32>) -> tensor<1x?x?x32xi8>
   return
 }
@@ -1100,7 +1100,7 @@
   // CHECK: [[C0:%.+]] = arith.constant 19689
   // CHECK: [[C1:%.+]] = arith.constant 15
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) outs([[INIT]] : tensor<2xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<2xui8>) inits([[INIT]] : tensor<2xi8>)
   // CHECK: ^bb0([[IN:%.+]]: ui8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C17:%.+]] = arith.constant 17
   // CHECK: [[C22:%.+]] = arith.constant 22
@@ -1132,7 +1132,7 @@
   // CHECK: [[MULTIPLIERS:%.+]] = arith.constant dense<[42, 43, 0]>
   // CHECK: [[SHIFTS:%.+]] = arith.constant dense<[14, 15, 0]>
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) outs([[INIT]] : tensor<3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP0]], #[[$MAP0]], #[[$MAP0]]], iterator_types = ["parallel"]} ins(%[[ARG0]], [[MULTIPLIERS]], [[SHIFTS]] : tensor<3xi8>, tensor<3xi32>, tensor<3xi8>) inits([[INIT]] : tensor<3xi8>)
   // CHECK: ^bb0([[IN:%.+]]: i8, [[MULTIPLIER:%.+]]: i32, [[SHIFT:%.+]]: i8, [[UNUSED:%.+]]: i8):
   // CHECK: [[C243:%.+]] = arith.constant 243
   // CHECK: [[C252:%.+]] = arith.constant 252
@@ -1185,7 +1185,7 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
@@ -1198,7 +1198,7 @@
   // CHECK: %[[C1:.+]] = arith.constant 1
   // CHECK: %[[RDIM:.+]] = tensor.dim %[[ARG0]], %[[C1]]
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} outs(%[[INIT]] : tensor<5x4xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel", "parallel"]} inits(%[[INIT]] : tensor<5x4xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[I1:.+]] = linalg.index 1
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
@@ -1222,7 +1222,7 @@
   // CHECK: %[[C0_2:.+]] = arith.constant 0
   // CHECK: %[[D0_2:.+]] = tensor.dim %[[ARG0]], %[[C0_2]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[D0_1]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} outs(%[[INIT]] : tensor<?xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]]], iterator_types = ["parallel"]} inits(%[[INIT]] : tensor<?xi32>)
   // CHECK-DAG:   %[[I0:.+]] = linalg.index 0
   // CHECK-DAG:   %[[SUB1:.+]] = arith.constant 1
   // CHECK-DAG:   %[[RDIM_MINUS_C1:.+]] = arith.subi %[[D0_2]], %[[SUB1]]
@@ -1242,21 +1242,21 @@
 // CHECK-SAME: %[[ARG0:.+]]: tensor<2x3xi8>
 func.func @tile(%arg0 : tensor<2x3xi8>) -> () {
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<2x2x1x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<2x2x1x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1, 2], [3]]
   %0 = "tosa.tile"(%arg0) {multiples = [2, 1]} : (tensor<2x3xi8>)  -> (tensor<4x3xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<1x2x2x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<1x2x2x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
   %1 = "tosa.tile"(%arg0) {multiples = [1, 2]} : (tensor<2x3xi8>)  -> (tensor<2x6xi8>)
 
   // CHECK: [[INIT:%.+]] = tensor.empty()
-  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs([[INIT]] : tensor<5x2x7x3xi8>)
+  // CHECK: [[GENERIC:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits([[INIT]] : tensor<5x2x7x3xi8>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i8
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: tensor.collapse_shape [[GENERIC]] {{\[}}[0, 1], [2, 3]]
@@ -1276,7 +1276,7 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]] : tensor<?x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) outs(%[[INIT]] : tensor<2x?x1x3xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<?x3xi8>) inits(%[[INIT]] : tensor<2x?x1x3xi8>)
   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
@@ -1297,7 +1297,7 @@
   // CHECK: %[[CST1:.+]] = arith.constant 1
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]] : tensor<2x3xi8>
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) outs(%[[INIT]] : tensor<2x2x?x3xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%[[ARG0]] : tensor<2x3xi8>) inits(%[[INIT]] : tensor<2x2x?x3xi8>)
   // CHECK: ^bb0(%[[ARG1:.+]]: i8,
   // CHECK:   linalg.yield %[[ARG1]] : i8
   // CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[GENERIC]] {{\[}}[0, 1, 2, 3]]
@@ -1405,11 +1405,11 @@
 func.func @argmax(%arg0 : tensor<3x2xi32>, %arg1 : tensor<6xf32>) -> () {
   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
-  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
+  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]]
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
-  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
+  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<2xi32>, tensor<2xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 0
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
@@ -1421,11 +1421,11 @@
 
   // CHECK: [[IDX_INIT:%.+]] = tensor.empty()
   // CHECK: [[IDX_MIN:%.+]] = arith.constant 0 : i32
-  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}outs([[IDX_INIT]]
+  // CHECK: [[IDX_FILL:%.+]] = linalg.fill ins([[IDX_MIN]]{{.*}}inits([[IDX_INIT]]
   // CHECK: [[VAL_INIT:%.+]] = tensor.empty()
   // CHECK: [[VAL_MIN:%.+]] = arith.constant -2147483648
-  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}outs([[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) outs([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: [[VAL_FILL:%.+]] = linalg.fill ins([[VAL_MIN]]{{.*}}inits([[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#map, #map2, #map2], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x2xi32>) inits([[IDX_FILL]], [[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   [[IDX:%.+]] = linalg.index 1
   // CHECK:   [[CAST:%.+]] = arith.index_cast [[IDX]]
@@ -1457,11 +1457,11 @@
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST1]]
   // CHECK: %[[IDX_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
-  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
+  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]]
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty(%[[DYN]])
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
-  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
+  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["reduction", "parallel"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<?xi32>, tensor<?xi32>)
   // CHECK: ^bb0(%[[ARG1:[0-9a-zA-Z_]+]]: i32, %[[ARG2:[0-9a-zA-Z_]+]]: i32, %[[ARG3:[0-9a-zA-Z_]+]]: i32
   // CHECK:   %[[IDX:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
@@ -1481,11 +1481,11 @@
 func.func @argmax_dyn_axis(%arg0 : tensor<3x?xi32>) -> () {
   // CHECK: %[[IDX_INIT:.+]] = tensor.empty()
   // CHECK: %[[IDX_MIN:.+]] = arith.constant 0 : i32
-  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}outs(%[[IDX_INIT]]
+  // CHECK: %[[IDX_FILL:.+]] = linalg.fill ins(%[[IDX_MIN]]{{.*}}inits(%[[IDX_INIT]]
   // CHECK: %[[VAL_INIT:.+]] = tensor.empty()
   // CHECK: %[[VAL_MIN:.+]] = arith.constant -2147483648
-  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}outs(%[[VAL_INIT]]
-  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) outs(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
+  // CHECK: %[[VAL_FILL:.+]] = linalg.fill ins(%[[VAL_MIN]]{{.*}}inits(%[[VAL_INIT]]
+  // CHECK: linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[ARG0]] : tensor<3x?xi32>) inits(%[[IDX_FILL]], %[[VAL_FILL]] : tensor<3xi32>, tensor<3xi32>)
   // CHECK:   %[[IDX:.+]] = linalg.index 1
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[IDX]]
   // CHECK:   %[[CMP:.+]] = arith.cmpi sgt, %[[ARG1]], %[[ARG3]]
@@ -1503,7 +1503,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_float(%arg0: tensor<2x3x2xf32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xf32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1523,7 +1523,7 @@
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %[[ARG0]], %[[C0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[BATCH]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) outs(%[[INIT]] : tensor<?x3x2xf32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<?x3xi32>) inits(%[[INIT]] : tensor<?x3x2xf32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: f32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1541,7 +1541,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]
 func.func @gather_int(%arg0: tensor<2x3x2xi32>, %arg1: tensor<2x3xi32>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) outs(%[[INIT]] : tensor<2x3x2xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]} ins(%[[ARG1]] : tensor<2x3xi32>) inits(%[[INIT]] : tensor<2x3x2xi32>)
   // CHECK: ^bb0(%[[BBARG0:.+]]: i32, %[[BBARG1:.+]]: i32)
   // CHECK:   %[[IDX0:.+]] = linalg.index 0
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[BBARG0]]
@@ -1559,7 +1559,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8(%arg0: tensor<6xi8>, %arg1: tensor<512xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
@@ -1577,7 +1577,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table16(%arg0: tensor<6xi16>, %arg1: tensor<513xi16>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) outs(%[[INIT]] : tensor<6xi32>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi16>) inits(%[[INIT]] : tensor<6xi32>)
   // CHECK: ^bb0(%[[ARG2:.*]]: i16, %[[ARG3:.*]]: i32)
   // CHECK: %[[EXT_IN:.+]] = arith.extsi %[[ARG2]]
   // CHECK: %[[C32768:.+]] = arith.constant 32768
@@ -1612,7 +1612,7 @@
   // CHECK: %[[CST0:.+]] = arith.constant 0
   // CHECK: %[[DYN:.+]] = tensor.dim %[[ARG0]], %[[CST0]]
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DYN]])
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) outs(%[[INIT]] : tensor<?xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<?xi8>) inits(%[[INIT]] : tensor<?xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
@@ -1630,7 +1630,7 @@
 // CHECK-SAME:  %[[ARG1:[0-9a-zA-Z_]*]]:
 func.func @table8_dyn_table(%arg0: tensor<6xi8>, %arg1: tensor<?xi8>) -> () {
   // CHECK: %[[INIT:.+]] = tensor.empty()
-  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) outs(%[[INIT]] : tensor<6xi8>)
+  // CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]} ins(%[[ARG0]] : tensor<6xi8>) inits(%[[INIT]] : tensor<6xi8>)
   // CHECK: ^bb0(%[[ARG_IN:.+]]: i8, %[[ARG_INIT:.+]]: i8)
   // CHECK:   %[[CAST:.+]] = arith.index_cast %[[ARG_IN]]
   // CHECK:   %[[OFFSET:.+]] = arith.constant 128
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-alloc-tensor-elimination.mlir
@@ -11,7 +11,7 @@
   %f0 = arith.constant 0.0: f32
 
   //     CHECK: %[[EXTRACT_SLICE_ALLOC:.*]] = memref.alloc(%[[sz]])
-  //     CHECK: linalg.fill ins({{.*}} : f32) outs(%[[EXTRACT_SLICE_ALLOC]] : memref<?xf32>)
+  //     CHECK: linalg.fill ins({{.*}} : f32) inits(%[[EXTRACT_SLICE_ALLOC]] : memref<?xf32>)
   // Alloc is needed for the **first** insert_slice (due to backward traversal during analysis).
   //     CHECK: %[[DIM:.*]] = memref.dim %[[FUNC_ARG]]
   // This allocs the whole dim to allow for a full clone of t.
@@ -20,7 +20,7 @@
   // insert_slice. AllocTensorOp replaces the alloc_tensor with an out-of-place
   // extract_slice.
   %a = bufferization.alloc_tensor(%sz) : tensor<?xf32>
-  %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+  %f = linalg.fill ins(%f0 : f32) inits(%a : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: memref.copy %[[FUNC_ARG]], %[[ALLOC]] : memref<?xf32> to memref<?xf32>
   //     CHECK: %[[SV0_ALLOC:.*]] = memref.subview %[[ALLOC]][0] [%[[sz]]] [1] : memref<?xf32> to memref<?xf32, strided<[1]>>
@@ -52,8 +52,8 @@
   // CHECK: %[[T_SUBVIEW:.*]] =  memref.subview %[[FUNC_ARG]][42] [%[[sz]]] [1]
   %a = bufferization.alloc_tensor(%sz) : tensor<?xf32>
 
-  // CHECK: linalg.fill ins({{.*}} : f32) outs(%[[T_SUBVIEW]] : memref<?xf32
-  %f = linalg.fill ins(%f0 : f32) outs(%a : tensor<?xf32>) -> tensor<?xf32>
+  // CHECK: linalg.fill ins({{.*}} : f32) inits(%[[T_SUBVIEW]] : memref<?xf32
+  %f = linalg.fill ins(%f0 : f32) inits(%a : tensor<?xf32>) -> tensor<?xf32>
 
   // Self-copy canonicalizes away later.
   %r1 = tensor.insert_slice %f into %t[42][%sz][1]: tensor<?xf32> into tensor<?xf32>
@@ -79,8 +79,8 @@
     %iv_i32 = arith.index_cast %iv : index to i32
     %f = arith.sitofp %iv_i32 : i32 to f32
 
-    // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]]
-    %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32>
+    // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]]
+    %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32>
 
     // CHECK-NOT: memref.copy
     %inserted = tensor.insert_slice %filled into %bb[%iv][5][1] : tensor<5xf32> into tensor<?xf32>
@@ -109,8 +109,8 @@
     %iv_i32 = arith.index_cast %iv : index to i32
     %f = arith.sitofp %iv_i32 : i32 to f32
 
-    // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[subview]]
-    %filled = linalg.fill ins(%f : f32) outs(%blank : tensor<5xf32>) -> tensor<5xf32>
+    // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[subview]]
+    %filled = linalg.fill ins(%f : f32) inits(%blank : tensor<5xf32>) -> tensor<5xf32>
 
     // CHECK-NOT: memref.copy
     %inserted = tensor.insert_slice %filled into %bb[%idx][5][1] : tensor<5xf32> into tensor<?xf32>
@@ -130,7 +130,7 @@
 func.func @shape_mismatch(%t: tensor<5x6x128xf32>) -> tensor<5x6x128xf32> {
   %cst = arith.constant 8.0 : f32
   %0 = bufferization.alloc_tensor() : tensor<128xf32>
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<128xf32>) -> tensor<128xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<128xf32>) -> tensor<128xf32>
   %2 = tensor.expand_shape %1 [[0, 1, 2]]
       : tensor<128xf32> into tensor<1x1x128xf32>
   %3 = tensor.insert_slice %2 into %t[2, 3, 0][1, 1, 128][1, 1, 1]
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize-partial.mlir
@@ -137,10 +137,10 @@
   // One alloc for the alloc_tensor, another one because the transfer_write
   // bufferizes out-of-place.
   // CHECK: %[[m1:.*]] = memref.alloc() {{.*}} : memref<10xf32>
-  // CHECK: linalg.fill ins(%{{.*}}{{.*}}outs(%[[m1]]
+  // CHECK: linalg.fill ins(%{{.*}}{{.*}}inits(%[[m1]]
   // CHECK: %[[filled_tensor:.*]] = bufferization.to_tensor %[[m1]]
   %t1 = bufferization.alloc_tensor() : tensor<10xf32>
-  %filled = linalg.fill ins(%cst : f32) outs(%t1 : tensor<10xf32>) -> tensor<10xf32>
+  %filled = linalg.fill ins(%cst : f32) inits(%t1 : tensor<10xf32>) -> tensor<10xf32>
 
   // The transfer_write is out-of-place because "dummy_op" may read.
   // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<10xf32>
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-allow-return-allocs.mlir
@@ -51,13 +51,13 @@
 //       CHECK:   %[[call:.*]] = call @return_slice(%[[t]]
 //       CHECK:   %[[alloc:.*]] = memref.alloc
 //       CHECK:   memref.copy %[[call]], %[[alloc]]
-//       CHECK:   linalg.fill ins({{.*}}) outs(%[[t]]
+//       CHECK:   linalg.fill ins({{.*}}) inits(%[[t]]
 //       CHECK:   memref.load %[[alloc]]
 //       CHECK:   memref.load %[[t]]
 func.func @main(%t: tensor<?xf32>, %sz: index, %idx: index) -> (f32, f32) {
   %cst = arith.constant 1.0 : f32
   %0 = call @return_slice(%t, %sz) : (tensor<?xf32>, index) -> (tensor<?xf32>)
-  %filled = linalg.fill ins(%cst : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
+  %filled = linalg.fill ins(%cst : f32) inits(%t : tensor<?xf32>) -> tensor<?xf32>
   %r1 = tensor.extract %0[%idx] : tensor<?xf32>
   %r2 = tensor.extract %filled[%idx] : tensor<?xf32>
   return %r1, %r2 : f32, f32
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-analysis.mlir
@@ -75,21 +75,21 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %C = linalg.matmul  ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand interferes with input operand.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand does not interferes with input operand.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
@@ -260,7 +260,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
@@ -292,7 +292,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
@@ -304,7 +304,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32>
+  %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<?xf32>) -> tensor<?xf32>
 
   %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32>
 
@@ -336,14 +336,14 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%B: tensor<4x4xf32>)
+                     inits(%B: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // matmul output operand is inplaceable at the function boundary.
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%C: tensor<4x4xf32>)
+                     inits(%C: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   //      CHECK: return
@@ -370,7 +370,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%sB: tensor<4x4xf32>)
+                     inits(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // Step 2. %sC forward propagates to an inplace write in %E.
@@ -385,7 +385,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%sC: tensor<4x4xf32>)
+                     inits(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
@@ -410,7 +410,7 @@
 
   //      CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) inits(%15 : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]}
@@ -450,7 +450,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %D = linalg.matmul  ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%sB: tensor<4x4xf32>)
+                     inits(%sB: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   // Step 2. %sC forward propagates to an inplace write in %E.
@@ -465,7 +465,7 @@
   //     CHECK: linalg.matmul
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]}
   %E = linalg.matmul  ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>)
-                     outs(%sC: tensor<4x4xf32>)
+                     inits(%sC: tensor<4x4xf32>)
     -> tensor<4x4xf32>
 
   return %D, %E: tensor<4x4xf32>, tensor<4x4xf32>
@@ -504,7 +504,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]}
   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
-  %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
   %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -527,7 +527,7 @@
   %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
   %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32>
-  %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FB = linalg.fill ins(%f0 : f32) inits(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32>
   %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32>
   %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
@@ -550,7 +550,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]}
   %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
-  %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
+  %FC = linalg.fill ins(%f0 : f32) inits(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32>
   %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32>
   %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -577,12 +577,12 @@
   // cannot bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   call @foo(%A) : (tensor<64xf32>) -> ()
   call @foo(%B) : (tensor<64xf32>) -> ()
@@ -613,12 +613,12 @@
   // bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %A = linalg.fill ins(%f1 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 4. Bufferizes inplace: no alias to %A is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32>
+  %B = linalg.fill ins(%f2 : f32) inits(%I : tensor<64xf32>) -> tensor<64xf32>
 
   // 3. Does not read or write, bufferizes inplace.
   //      CHECK: scf.for
@@ -638,12 +638,12 @@
   // cannot bufferize inplace.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
-  %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
+  %A2 = linalg.fill ins(%f1 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32>
 
   // 1. Bufferizes inplace: no alias to %A2 is yet possible.
   //     CHECK: fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32>
+  %B2 = linalg.fill ins(%f2 : f32) inits(%I2 : tensor<64xf32>) -> tensor<64xf32>
 
   call @bar(%A2) : (tensor<64xf32>) -> ()
   call @bar(%B2) : (tensor<64xf32>) -> ()
@@ -688,8 +688,8 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
-  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
-  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true"]}
@@ -701,7 +701,7 @@
   %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
-        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+        inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
   // CHECK-SAME: __equivalent_func_args__ = [2]
@@ -726,7 +726,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]}
   //      CHECK: vector.transfer_write
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
-  %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %8 = linalg.fill ins(%cst_0 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
   %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
@@ -734,7 +734,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]}
   //      CHECK: vector.transfer_write
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"]
-  %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %11 = linalg.fill ins(%cst_1 : f32) inits(%7 : tensor<256x256xf32>) -> tensor<256x256xf32>
   %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32>
   %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32>
 
@@ -748,7 +748,7 @@
   %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32>
   %r = linalg.matmul
          ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>)
-        outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+        inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
 
   //      CHECK: return
   // CHECK-SAME: __equivalent_func_args__ = [2]
@@ -779,7 +779,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
+  %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32>
 
   //      CHECK: tensor.extract_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true"]
@@ -857,7 +857,7 @@
   //      CHECK: linalg.generic
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]
   %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>)
-                               outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
+                               inits (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) {
       ^bb(%0: f32, %1: f32, %2 : f32) :
         linalg.yield %0, %0 : f32, f32
     } -> (tensor<?xf32>, tensor<?xf32>)
@@ -892,7 +892,7 @@
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"]
   %o:3 = linalg.generic #trait
           ins(%t1 : tensor<?xf32>)
-          outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
+          inits (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) {
       ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) :
         linalg.yield %0, %0, %0 : f32, f32, f32
     } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize-invalid.mlir
@@ -222,7 +222,7 @@
 func.func @mini_test_case1() -> tensor<10x20xf32> {
   %f0 = arith.constant 0.0 : f32
   %t = bufferization.alloc_tensor() : tensor<10x20xf32>
-  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
+  %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<10x20xf32>) -> tensor<10x20xf32>
   // expected-error @+1 {{operand #0 of ReturnLike op does not satisfy destination passing style}}
   return %r : tensor<10x20xf32>
 }
diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-module-bufferize.mlir
@@ -244,7 +244,7 @@
 //   CHECK-NOT:   copy
 func.func @does_not_read(%t: tensor<?xf32>) -> tensor<?xf32> {
   %f0 = arith.constant 0.0 : f32
-  %r = linalg.fill ins(%f0 : f32) outs(%t : tensor<?xf32>) -> tensor<?xf32>
+  %r = linalg.fill ins(%f0 : f32) inits(%t : tensor<?xf32>) -> tensor<?xf32>
   return %r : tensor<?xf32>
 }
 
@@ -416,12 +416,12 @@
   // CHECK-NEXT:   %[[C0:.*]] = arith.constant 0{{.*}} : f32
   %v0 = arith.constant 0.0 : f32
 
-  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
-  %d = linalg.fill ins(%v0 : f32) outs(%c : tensor<f32>) -> tensor<f32>
+  // CHECK-NEXT:   linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref<f32, strided<[], offset: ?>>)
+  %d = linalg.fill ins(%v0 : f32) inits(%c : tensor<f32>) -> tensor<f32>
 
-  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) outs(%[[C]] : memref<f32, strided<[], offset: ?>>)
+  // CHECK-NEXT:   linalg.dot ins(%[[A]], %[[B]] : memref<64xf32, strided<[?], offset: ?>>, memref<64xf32, strided<[?], offset: ?>>) inits(%[[C]] : memref<f32, strided<[], offset: ?>>)
   %e = linalg.dot ins(%a, %b : tensor<64xf32>,tensor<64xf32>)
-    outs(%d: tensor<f32>) -> tensor<f32>
+    inits(%d: tensor<f32>) -> tensor<f32>
 
   // CHECK-NEXT:   return
   return %e : tensor<f32>
@@ -446,12 +446,12 @@
   %B = bufferization.alloc_tensor() : tensor<64xf32>
   %C = bufferization.alloc_tensor() : tensor<f32>
 
-  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) outs(%[[A]] : memref<64xf32>)
-  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) outs(%[[B]] : memref<64xf32>)
-  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) outs(%[[C]] : memref<f32>)
-  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
-  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
-  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
+  //  CHECK-DAG:   linalg.fill ins(%[[C1]] : f32) inits(%[[A]] : memref<64xf32>)
+  //  CHECK-DAG:   linalg.fill ins(%[[C2]] : f32) inits(%[[B]] : memref<64xf32>)
+  //  CHECK-DAG:   linalg.fill ins(%[[C0]] : f32) inits(%[[C]] : memref<f32>)
+  %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32>
+  %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32>
+  %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor<f32>) -> tensor<f32>
 
   // CHECK-NEXT:   call @init_and_dot(%[[cA]], %[[cB]], %[[cC]])
   %res = call @init_and_dot(%AA, %BB, %CC) :
diff --git a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
--- a/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/tensor-copy-insertion.mlir
@@ -56,11 +56,11 @@
   -> (tensor<5xf32>, tensor<5xf32>)
 {
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<5xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<5xf32>)
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<5xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    outs(%t : tensor<5xf32>) {
+    inits(%t : tensor<5xf32>) {
       ^bb0(%arg0 : f32) :
         linalg.yield %f : f32
     } -> tensor<5xf32>
@@ -75,11 +75,11 @@
 {
   %0 = tensor.extract_slice %t[0][3][1] : tensor<5xf32> to tensor<3xf32>
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() {bufferization.escape = [false], memory_space = 0 : ui64} : tensor<3xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]] : tensor<3xf32>)
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]] : tensor<3xf32>)
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    outs(%0 : tensor<3xf32>) {
+    inits(%0 : tensor<3xf32>) {
       ^bb0(%arg0 : f32) :
         linalg.yield %f : f32
     } -> tensor<3xf32>
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
--- a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
+++ b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -151,7 +151,7 @@
   %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
             threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
   {
-    %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) outs(%z : tensor<32x32xf32>) -> tensor<32x32xf32>
+    %t = linalg.matmul ins(%x, %y: tensor<32x32xf32>, tensor<32x32xf32>) inits(%z : tensor<32x32xf32>) -> tensor<32x32xf32>
     gpu.terminator
   }  
   return 
diff --git a/mlir/test/Dialect/Linalg/affine.mlir b/mlir/test/Dialect/Linalg/affine.mlir
--- a/mlir/test/Dialect/Linalg/affine.mlir
+++ b/mlir/test/Dialect/Linalg/affine.mlir
@@ -10,7 +10,7 @@
   %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
   %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%C: memref<?x?xf32>)
+               inits(%C: memref<?x?xf32>)
   return
 }
 
@@ -19,7 +19,7 @@
 //----------------------------------------------------------------------------//
 func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
   linalg.batch_matmul ins(%A, %B: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%C : memref<?x?x?xf32>)
+                     inits(%C : memref<?x?x?xf32>)
   return
 }
 // CHECK-LABEL: @named_batch_matmul
diff --git a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
--- a/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
+++ b/mlir/test/Dialect/Linalg/bubble-up-extract-slice-op.mlir
@@ -7,7 +7,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -22,7 +22,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg3] [%arg5] [1] : tensor<?xf32> to tensor<?xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[%arg2, %arg3] [%arg4, %arg5] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x?xf32>, tensor<?xf32>) outs(%[[SLICE2]] : tensor<?x?xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x?xf32>, tensor<?xf32>) inits(%[[SLICE2]] : tensor<?x?xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<?x?xf32>
 
 //-----
@@ -34,7 +34,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<16x8xf32>, tensor<8xf32>)
-    outs(%arg0 : tensor<16x8xf32>) {
+    inits(%arg0 : tensor<16x8xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -49,7 +49,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<8xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<16x8xf32> to tensor<4x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<4x2xf32>
 
 //-----
@@ -61,7 +61,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x8xf32>, tensor<8xf32>)
-    outs(%arg0 : tensor<?x8xf32>) {
+    inits(%arg0 : tensor<?x8xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -76,7 +76,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[%arg2] [2] [1] : tensor<8xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, %arg2] [%arg3, 2] [1, 1] : tensor<?x8xf32> to tensor<?x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<?x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<?x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<?x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<?x2xf32>
 
 //-----
@@ -88,7 +88,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32
@@ -103,7 +103,7 @@
 //      CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[4] [2] [1] : tensor<?xf32> to tensor<2xf32>
 //      CHECK: %[[SLICE2:.+]] = tensor.extract_slice %arg0[8, 4] [4, 2] [1, 1] : tensor<?x?xf32> to tensor<4x2xf32>
 //      CHECK: %[[GENERIC:.+]] = linalg.generic {indexing_maps = [#map, #map1, #map], iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) outs(%[[SLICE2]] : tensor<4x2xf32>)
+// CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]] : tensor<4x2xf32>, tensor<2xf32>) inits(%[[SLICE2]] : tensor<4x2xf32>)
 //      CHECK: return %[[GENERIC]] : tensor<4x2xf32>
 
 //-----
@@ -112,7 +112,7 @@
     %lhs = arith.constant dense<1.0> : tensor<4x4xf32>
     %rhs = arith.constant dense<1.0> : tensor<4x4xf32>
     %dst = arith.constant dense<[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0], [12.0, 13.0, 14.0, 15.0]]> : tensor<4x4xf32>
-    %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) outs(%dst : tensor<4x4xf32>) -> tensor<4x4xf32>
+    %0 = linalg.matmul ins(%lhs, %rhs : tensor<4x4xf32>, tensor<4x4xf32>) inits(%dst : tensor<4x4xf32>) -> tensor<4x4xf32>
     %1 = tensor.extract_slice %0[1,1][2,2][1,1] : tensor<4x4xf32> to tensor<2x2xf32>
     return %1 : tensor<2x2xf32>
 }
@@ -121,7 +121,7 @@
 // CHECK: %[[SLICE0:.+]] = arith.constant dense<1.000000e+00> : tensor<2x4xf32>
 // CHECK: %[[SLICE1:.+]] = arith.constant dense<1.000000e+00> : tensor<4x2xf32>
 // CHECK: %[[SLICE3:.+]] = tensor.extract_slice %[[CST:.+]][1, 1] [2, 2] [1, 1] : tensor<4x4xf32> to tensor<2x2xf32>
-// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) outs(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32>
+// CHECK: %[[MATMUL:.+]] = linalg.matmul ins(%[[SLICE0]], %[[SLICE1]] : tensor<2x4xf32>, tensor<4x2xf32>) inits(%[[SLICE3]] : tensor<2x2xf32>) -> tensor<2x2xf32>
 // CHECK: return %[[MATMUL]] : tensor<2x2xf32>
 
 //-----
@@ -136,12 +136,12 @@
   %cst = arith.constant 0.0 : f32
 
   %init = tensor.empty() : tensor<1x112x112x32xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>)
-    outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+    inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %slice = tensor.extract_slice %conv [0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
 
@@ -153,8 +153,8 @@
 // CHECK: %[[SLICE0:.+]] = tensor.extract_slice %arg0[0, 128, 128, 0] [1, 65, 65, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> to tensor<1x65x65x3xf32>
 // CHECK: %[[SLICE1:.+]] = tensor.extract_slice %arg1[0, 0, 0, 16] [3, 3, 3, 16] [1, 1, 1, 1] : tensor<3x3x3x32xf32> to tensor<3x3x3x16xf32>
 // CHECK: %[[SLICE2:.+]] = tensor.extract_slice %[[INIT]][0, 64, 64, 16] [1, 32, 32, 16] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x32x32x16xf32>
-// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) outs(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
-// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) outs(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
+// CHECK: %[[FILL:.+]] = linalg.fill ins(%[[CST:.+]] : f32) inits(%[[SLICE2]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
+// CHECK: %[[CONV:.+]] = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%[[SLICE0]], %[[SLICE1]] : tensor<1x65x65x3xf32>, tensor<3x3x3x16xf32>) inits(%[[FILL]] : tensor<1x32x32x16xf32>) -> tensor<1x32x32x16xf32>
 // CHECK: return %[[CONV]] : tensor<1x32x32x16xf32>
 
 //-----
@@ -163,7 +163,7 @@
 func.func @rank_reducing_slice(%width : index) -> tensor<1x1x1x?xf32> {
   %cst = arith.constant 1.000000e+00 : f32
   %init = tensor.empty(%width) : tensor<1x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x?xf32>) -> tensor<1x?xf32>
   %slice = tensor.extract_slice %fill[0, 0] [1, %width] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
   %expand = tensor.expand_shape %slice [[0, 1, 2, 3]] : tensor<?xf32> into tensor<1x1x1x?xf32>
   return %expand : tensor<1x1x1x?xf32>
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -16,7 +16,7 @@
 // CHECK-DAG:       %[[RESULT_MEMREF:.*]] = memref.alloc() {{.*}} : memref<4xf32>
 // CHECK:           linalg.generic {indexing_maps = [#map, #map], iterator_types = ["parallel"]}
 // CHECK-SAME:      ins(%[[MEMREF]] : memref<4xf32>)
-// CHECK-SAME:      outs(%[[RESULT_MEMREF]] : memref<4xf32>) {
+// CHECK-SAME:      inits(%[[RESULT_MEMREF]] : memref<4xf32>) {
 // CHECK:           ^bb0(%[[RESULT1:.*]]: f32, %[[UNUSED:.*]]: f32):
 // CHECK:             %[[DIM1:.*]] = math.exp %[[RESULT1]] : f32
 // CHECK:             linalg.yield %[[DIM1]] : f32
@@ -28,7 +28,7 @@
       indexing_maps = [#map0, #map0],
       iterator_types = ["parallel"]
     } ins(%arg0 : tensor<4xf32>)
-      outs(%arg0 : tensor<4xf32>) {
+      inits(%arg0 : tensor<4xf32>) {
       ^bb0(%gen_arg1: f32, %out: f32):
         %tmp1 = math.exp %gen_arg1 : f32
         linalg.yield %tmp1 : f32
@@ -50,14 +50,14 @@
 // CHECK-DAG:     %[[OUT_BUF:.*]] = memref.alloc(%[[SIZE]]) {{.*}} : memref<?xf32>
 // CHECK:         linalg.generic
 // CHECK-SAME:    ins(%[[MEMREF]] : memref<?xf32>)
-// CHECK-SAME:    outs(%[[OUT_BUF]] : memref<?xf32>) {
+// CHECK-SAME:    inits(%[[OUT_BUF]] : memref<?xf32>) {
 func.func @empty_tensor(%in : tensor<?xf32>, %size: index) -> tensor<?xf32> {
   %init = tensor.empty(%size) : tensor<?xf32>
   %0 = linalg.generic {
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel"]
   } ins(%in : tensor<?xf32>)
-    outs(%init : tensor<?xf32>) {
+    inits(%init : tensor<?xf32>) {
     ^bb0(%gen_arg1: f32, %out: f32):
       %tmp1 = math.exp %gen_arg1 : f32
       linalg.yield %tmp1 : f32
@@ -75,14 +75,14 @@
 // CHECK:           %[[RESULT1:.*]] = memref.alloc() {{.*}} : memref<4xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%{{.*}} : memref<4xf32>)
-// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
+// CHECK-SAME:      inits(%[[RESULT0]], %[[RESULT1]] : memref<4xf32>, memref<4xf32>)
 // CHECK-NEXT: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: f32):
 func.func @multiple_results(%arg0: tensor<4xf32>) -> (tensor<4xf32>, tensor<4xf32>) {
     %0, %1 = linalg.generic {
       indexing_maps = [#map0, #map0, #map0],
       iterator_types = ["parallel"]
     } ins(%arg0 : tensor<4xf32>)
-      outs (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) {
+      inits (%arg0, %arg0 : tensor<4xf32>, tensor<4xf32>) {
       ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
         %tmp1 = math.exp %gen_arg1 : f32
         linalg.yield %tmp1, %tmp1 : f32, f32
@@ -108,14 +108,14 @@
 // CHECK-DAG:       %[[MEMREF_ARG:.*]] = bufferization.to_memref %[[ARG]] : memref<?x?xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[MEMREF_ARG]] : memref<?x?xf32>)
-// CHECK-SAME:      outs(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
+// CHECK-SAME:      inits(%[[RESULT0]], %[[RESULT1]] : memref<?x?xf32>, memref<?x?xf32>)
 func.func @dynamic_results(%arg0: tensor<?x?xf32>)
          -> (tensor<?x?xf32>, tensor<?x?xf32>) {
     %0, %1 = linalg.generic {
       indexing_maps = [#map_2d, #map_2d, #map_2d],
       iterator_types = ["parallel", "parallel"]
     } ins(%arg0 : tensor<?x?xf32>)
-      outs (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
+      inits (%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
       ^bb0(%gen_arg1: f32, %out1: f32, %out2: f32):
         %tmp1 = math.exp %gen_arg1 : f32
         linalg.yield %tmp1, %tmp1 : f32, f32
@@ -146,13 +146,13 @@
 // CHECK:           memref.copy %[[ARG1_MEMREF]], %[[INIT_BUFFER]] : memref<3x2xf32> to memref<3x2xf32>
 // CHECK:           linalg.generic
 // CHECK-SAME:      ins(%[[ARG0_MEMREF]] : memref<2x3x4xvector<3x4xi4>>)
-// CHECK-SAME:      outs(%[[INIT_BUFFER]] : memref<3x2xf32>) {
+// CHECK-SAME:      inits(%[[INIT_BUFFER]] : memref<3x2xf32>) {
 func.func @generic_with_init_tensor(%arg0: tensor<2x3x4xvector<3x4xi4>>,
   %arg1: tensor<3x2xf32>) -> (tensor<3x2xf32>) {
 
   %0 = linalg.generic #trait
     ins(%arg0 : tensor<2x3x4xvector<3x4xi4>>)
-   outs(%arg1 : tensor<3x2xf32>) {
+   inits(%arg1 : tensor<3x2xf32>) {
     ^bb(%v0: vector<3x4xi4>, %v1: f32) :
       linalg.yield %v1 : f32
   } -> tensor<3x2xf32>
@@ -167,10 +167,10 @@
 func.func @bufferize_fill(%arg0: tensor<?xf32>) -> tensor<?xf32> {
   %c0 = arith.constant 0.0 : f32
   // CHECK: %[[ALLOC:.*]] = memref.alloc
-  // CHECK: linalg.fill ins(%cst : f32) outs(%[[ALLOC]] : memref<?xf32>)
+  // CHECK: linalg.fill ins(%cst : f32) inits(%[[ALLOC]] : memref<?xf32>)
   // CHECK: %[[TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<?xf32>
   // CHECK: return %[[TENSOR]]
-  %0 = linalg.fill ins(%c0 : f32) outs(%arg0 : tensor<?xf32>) -> tensor<?xf32>
+  %0 = linalg.fill ins(%c0 : f32) inits(%arg0 : tensor<?xf32>) -> tensor<?xf32>
   return %0 : tensor<?xf32>
 }
 
@@ -179,13 +179,13 @@
 // CHECK-LABEL:   func @bufferize_dot
 func.func @bufferize_dot(%in: tensor<4xf32>, %out: tensor<f32>) -> tensor<f32> {
   %dot = linalg.dot ins(%in, %in : tensor<4xf32>, tensor<4xf32>)
-                    outs(%out : tensor<f32>) -> tensor<f32>
+                    inits(%out : tensor<f32>) -> tensor<f32>
   return %dot : tensor<f32>
   // CHECK: %[[ALLOC:.*]] = memref.alloc
   // TODO: The copy is not necessary.
   // CHECK: memref.copy {{.*}}, %[[ALLOC]]
   // CHECK: linalg.dot ins(%{{.*}}, %{{.*}} : memref<4xf32>, memref<4xf32>)
-  // CHECK-SAME:       outs(%[[ALLOC:.*]] : memref<f32>)
+  // CHECK-SAME:       inits(%[[ALLOC:.*]] : memref<f32>)
   // CHECK: %[[OUT_TENSOR:.*]] = bufferization.to_tensor %[[ALLOC]] : memref<f32>
   // CHECK: return %[[OUT_TENSOR]]
 }
@@ -202,14 +202,14 @@
 // CHECK:   %[[collapse:.*]] = tensor.collapse_shape %[[arg0]]
 // CHECK:   %[[collapse_m:.*]] = bufferization.to_memref %[[collapse]]
 // CHECK:   %[[alloc:.*]] = memref.alloc()
-// CHECK:   linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) outs(%[[alloc]] : memref<6xi64>)
+// CHECK:   linalg.generic {{.*}} ins(%[[collapse_m]] : memref<6xi1>) inits(%[[alloc]] : memref<6xi64>)
 // CHECK:   %[[generic_t:.*]] = bufferization.to_tensor %[[alloc]]
 // CHECK:   %[[call:.*]] = call @csum(%[[generic_t]])
 // CHECK:   return %[[call]]
 func.func public @main(%arg0: tensor<2x3xi1>) -> tensor<6xi64> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] : tensor<2x3xi1> into tensor<6xi1>
   %1 = tensor.empty() : tensor<6xi64>
-  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) outs(%1 : tensor<6xi64>) {
+  %2 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>], iterator_types = ["parallel"]} ins(%0 : tensor<6xi1>) inits(%1 : tensor<6xi64>) {
   ^bb0(%arg1: i1, %arg2: i64):
     %4 = arith.extui %arg1 : i1 to i64
     linalg.yield %4 : i64
diff --git a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize-duplicate-inputs.mlir
@@ -13,7 +13,7 @@
   // CHECK:     arith.addf %[[BBARG]], %[[BBARG]]
   %0 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel"]}
      ins(%arg0, %arg0 : tensor<?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?xf32>) attrs = {someattr} {
+    inits(%arg0 : tensor<?xf32>) attrs = {someattr} {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %1 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1 : f32
@@ -36,7 +36,7 @@
   // CHECK: linalg.generic{{.*}}[#[[$MAP0]], #[[$MAP1]], #[[$MAP0]]]
   %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %1 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1 : f32
@@ -61,7 +61,7 @@
   // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]])
   %0 = linalg.generic {indexing_maps = [#map0, #map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
      ins(%arg0, %arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg0 : tensor<?x?xf32>) {
+    inits(%arg0 : tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
     %1 = "test.elementwise_mappable"(%arg1, %arg2, %arg3) : (f32, f32, f32) -> f32
     linalg.yield %1 : f32
@@ -83,7 +83,7 @@
   // CHECK:     "test.elementwise_mappable"(%[[BBARG0]], %[[BBARG1]], %[[BBARG0]], %[[BBARG1]])
   %0 = linalg.generic {indexing_maps = [#map, #map, #map, #map, #map], iterator_types = ["parallel"]}
      ins(%arg0, %arg1, %arg0, %arg1 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%arg0 : tensor<?xf32>) {
+    inits(%arg0 : tensor<?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32, %arg4: f32, %arg5: f32, %arg6: f32):
     %1 = "test.elementwise_mappable"(%arg2, %arg3, %arg4, %arg5) : (f32, f32, f32, f32) -> f32
     linalg.yield %1 : f32
@@ -105,7 +105,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %arg0, %arg0
+      inits(%arg0, %arg0, %arg0, %arg0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %1 = arith.addf %b0, %b0: f32
@@ -120,7 +120,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -137,7 +137,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["reduction"]}
     ins(%arg0 : tensor<?xf32>)
-    outs(%init0, %init1 : tensor<f32>, tensor<i32>) {
+    inits(%init0, %init1 : tensor<f32>, tensor<i32>) {
   ^bb0(%b0: f32, %b1: f32, %b2: i32):
     %8 = linalg.index 0 : index
     %9 = arith.index_cast %8 : index to i32
@@ -156,7 +156,7 @@
 //  CHECK-DAG:   %[[INIT0:.+]] = tensor.empty() : tensor<f32>
 //  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<i32>
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 //      CHECK:   return %[[GENERIC]]#1
 
 // -----
@@ -165,11 +165,11 @@
 func.func @loop_dim_operand(%arg0 : tensor<?xf32>) -> tensor<i32> {
   %cst = arith.constant 0 : i32
   %init = tensor.empty() : tensor<i32>
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<i32>) -> tensor<i32>
   %0 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
       iterator_types = ["reduction"]}
-      ins(%arg0 : tensor<?xf32>) outs(%fill : tensor<i32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%fill : tensor<i32>) {
     ^bb0(%b0: f32, %b1: i32):
       %1 = linalg.index 0 : index
       %2 = arith.index_cast %1 : index to i32
@@ -190,11 +190,11 @@
   %cst = arith.constant 0 : i32
   %init1 = tensor.empty(%arg0) : tensor<?xi32>
   %init = tensor.empty() : tensor<i32>
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<i32>) -> tensor<i32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<i32>) -> tensor<i32>
   %0:2 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>],
       iterator_types = ["parallel"]}
-      outs(%init1, %fill : tensor<?xi32>, tensor<i32>) {
+      inits(%init1, %fill : tensor<?xi32>, tensor<i32>) {
     ^bb0(%b0: i32, %b1: i32):
       %1 = linalg.index 0 : index
       %2 = arith.index_cast %1 : index to i32
@@ -207,7 +207,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: index
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ARG0]])
 //      CHECK:   linalg.generic
-// CHECK-SAME:       outs(%[[INIT]]
+// CHECK-SAME:       inits(%[[INIT]]
 
 // -----
 
@@ -222,7 +222,7 @@
       iterator_types = ["parallel", "reduction"]}
       ins(%arg4, %arg0, %arg0, %arg1, %arg3, %arg3
           : tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>, tensor<?xi32>, tensor<?x?xi32>, tensor<?x?xi32>)
-      outs(%arg2 : tensor<?xi32>) {
+      inits(%arg2 : tensor<?xi32>) {
     ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32, %b4 : i32, %b5 : i32, %b6 : i32):
       %1 = arith.addi %b0, %b1 : i32
       %2 = arith.addi %1, %b2 : i32
@@ -248,7 +248,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:       ins(%[[ARG4]], %[[ARG0]], %[[ARG1]], %[[ARG3]] :
-// CHECK-SAME:       outs(%[[ARG2]] :
+// CHECK-SAME:       inits(%[[ARG2]] :
 //      CHECK:   ^{{.+}}(%[[B0:[a-zA-Z0-9]+]]: i32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: i32
 // CHECK-SAME:       %[[B2:[a-zA-Z0-9_]+]]: i32
@@ -274,7 +274,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0 : tensor<?x?xf32>)
-    outs(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%arg0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %1 = arith.addf %b0, %b0 : f32
       linalg.yield %1, %1 : f32, f32
@@ -284,7 +284,7 @@
 //      CHECK: func @drop_redundant_results
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?xf32>
 //      CHECK:   %[[GENERIC:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]
 
 // -----
@@ -308,7 +308,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %init0, %init0
+      inits(%arg0, %arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       linalg.yield %b0, %b0, %b3, %b4 : f32, f32, f32, f32
@@ -323,7 +323,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -347,7 +347,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3, #map4],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %arg0, %init0, %init0
+      inits(%arg0, %arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %1 = arith.addf %b0, %b0: f32
@@ -365,7 +365,7 @@
 // CHECK-SAME:     %[[ARG0:.+]]: tensor<?x?x?xf32>)
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[ARG0]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[ARG0]] :
 //      CHECK:   return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -388,7 +388,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %init0, %init0
+      inits(%arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
       linalg.yield %b2, %b1, %b3 : f32, f32, f32
@@ -403,7 +403,7 @@
 //      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[INIT]] :
 //      CHECK:   return %[[GENERIC]]#0
 
 // -----
@@ -426,7 +426,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%arg0 : tensor<?x?x?xf32>)
-      outs(%arg0, %init0, %init0
+      inits(%arg0, %init0, %init0
           : tensor<?x?x?xf32>, tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
       %1 = arith.addf %b1, %b2: f32
@@ -443,5 +443,5 @@
 //      CHECK:   %[[INIT:.+]] = tensor.empty
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP2]]]
-// CHECK-SAME:       outs(%[[ARG0]], %[[INIT]] :
+// CHECK-SAME:       inits(%[[ARG0]], %[[INIT]] :
 //      CHECK:   return %[[GENERIC]]#0
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -10,9 +10,9 @@
   %2 = memref.view %1[%c0][] : memref<?xi8> to memref<16x16xf32>
   %3 = memref.cast %2 : memref<16x16xf32> to memref<?x?xf32>
 
-  // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) outs({{.*}}memref<16x16xf32>)
+  // CHECK:  linalg.matmul ins({{.*}}memref<16x16xf32>, memref<16x16xf32>) inits({{.*}}memref<16x16xf32>)
   linalg.matmul ins(%3, %3: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%3: memref<?x?xf32>)
+               inits(%3: memref<?x?xf32>)
   return %3: memref<?x?xf32>
 }
 
@@ -32,7 +32,7 @@
   memref.copy %arg0, %arg0 : memref<0xf32> to memref<0xf32>
 
   // tensor<0xf32> cannot be dce'ed
-  %1 = linalg.generic #trait outs(%arg1 : tensor<0xf32>) {
+  %1 = linalg.generic #trait inits(%arg1 : tensor<0xf32>) {
   ^bb(%0: f32) :
     linalg.yield %0 : f32
   } -> tensor<0xf32>
@@ -57,9 +57,9 @@
   %tc = tensor.cast %c : tensor<3x?xf32> to tensor<?x?xf32>
 
   //      CHECK:  linalg.matmul ins({{.*}}tensor<3x4xf32>, tensor<4x?xf32>)
-  // CHECK-SAME:    outs({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
+  // CHECK-SAME:    inits({{.*}}tensor<3x?xf32>) -> tensor<3x?xf32>
   %0 = linalg.matmul ins(%ta, %tb: tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
+                    inits(%tc: tensor<?x?xf32>) -> tensor<?x?xf32>
 
   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<3x?xf32>
 
@@ -75,11 +75,11 @@
 func.func @linalg_effects(%a : tensor<?x?xf32>, %b : memref<?x?xf32>, %c : tensor<?x?xf32>) {
   // CHECK-NOT:   %{{.*}} = linalg.matmul
   %t = linalg.matmul ins(%a, %b : tensor<?x?xf32>, memref<?x?xf32>)
-                    outs(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
+                    inits(%c : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   // CHECK:   linalg.matmul
   linalg.matmul ins(%a, %c : tensor<?x?xf32>, tensor<?x?xf32>)
-               outs(%b : memref<?x?xf32>)
+               inits(%b : memref<?x?xf32>)
   return
 }
 
@@ -99,7 +99,7 @@
     indexing_maps = [#map, #map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+    inits(%3, %3 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32, %arg5 : f32):
     linalg.yield %arg3, %arg2 : f32, f32
   } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -120,7 +120,7 @@
     indexing_maps = [#map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0 : tensor<?x?x?xf32>)
-    outs(%out : tensor<1x2x3xf32>) {
+    inits(%out : tensor<1x2x3xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   } -> (tensor<1x2x3xf32>)
@@ -140,7 +140,7 @@
     indexing_maps = [#map, #map],
     iterator_types = []
   } ins(%arg0 : f32)
-    outs(%out : tensor<f32>) {
+    inits(%out : tensor<f32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   } -> (tensor<f32>)
@@ -164,7 +164,7 @@
 ^bb1(%arg1 : f32):
   %3 = linalg.generic
     {indexing_maps = [#map, #map], iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?x?xf32>) outs(%2 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?x?xf32>) inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32):
       linalg.yield %arg1 : f32
     } -> tensor<?x?xf32>
@@ -192,7 +192,7 @@
     {indexing_maps = [#map, #map, #map, #map],
      iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4 : f32, %arg5 : f32, %arg6 : f32):
       linalg.yield %arg2, %arg4 : f32, f32
     } -> (tensor<?x?xf32>, tensor<?x?xf32>)
@@ -224,10 +224,10 @@
   %c0_i32 = arith.constant 0 : i32
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%c0_i32 : i32) outs(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
+  %0 = linalg.fill ins(%c0_i32 : i32) inits(%arg0 : tensor<7x7xi32>) -> tensor<7x7xi32>
   %1 = linalg.matmul ins(%arg1, %arg1: tensor<7x7xf32>, tensor<7x7xf32>)
-                     outs(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
-  %2 = linalg.generic #trait outs(%arg0 : tensor<7x7xi32>) {
+                     inits(%arg1: tensor<7x7xf32>) -> tensor<7x7xf32>
+  %2 = linalg.generic #trait inits(%arg0 : tensor<7x7xi32>) {
   ^bb(%3: i32) :
     linalg.yield %3 : i32
   } -> tensor<7x7xi32>
@@ -247,7 +247,7 @@
   %c21 = arith.constant 21 : index
   %c42 = arith.constant 42 : index
   %0 = tensor.empty(%c21, %c42) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%arg1 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%arg1 : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %3 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %4 = tensor.insert_slice %arg0 into %1[%arg2, %arg3] [%2, %3] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
@@ -255,7 +255,7 @@
 }
 // CHECK-LABEL: func @propogate_casts
 //       CHECK:   %[[INIT:.+]] = tensor.empty
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //       CHECK:   %[[INSERTED:.+]] = tensor.insert_slice %{{.+}} into %[[FILL]]
 //       CHECK:   %[[RESULT:.+]] = tensor.cast %[[INSERTED]]
 //       CHECK:   return %[[RESULT]]
@@ -278,7 +278,7 @@
 //  CHECK-SAME: (%[[ARG0:.*]]: tensor<?xf32>) -> tensor<?xf32> {
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
-//  CHECK-SAME: outs({{.*}} : tensor<?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?xf32>) {
 #map0 = affine_map<(d0) -> (d0)>
 func.func @remove_deadargs_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
   %c0 = arith.constant 0 : index
@@ -286,7 +286,7 @@
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
   %1 = tensor.empty(%0) : tensor<?xf32>
   %2 = tensor.empty(%0) : tensor<?xf32>
-  %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) outs (%2:tensor<?xf32>) {
+  %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %1 : tensor<?xf32>, tensor<?xf32>) inits (%2:tensor<?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %4 = arith.addf  %arg1, %cst : f32
 	linalg.yield %4 : f32
@@ -299,7 +299,7 @@
 // CHECK-LABEL: func @remove_deadargs_generic_mixedaccess
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //   CHECK-NOT: ins
-//  CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?x?xf32>) {
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
 #map1 = affine_map<(d0, d1) -> (d1, d0)>
 func.func @remove_deadargs_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
@@ -312,7 +312,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = tensor.empty(%1, %0) : tensor<?x?xf32>
   %4 = tensor.empty(%0, %1) : tensor<?x?xf32>
-  %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%4:tensor<?x?xf32>) {
+  %5 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%2, %3 : tensor<?x?xf32>, tensor<?x?xf32>) inits (%4:tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %6 = arith.divf  %cst1, %cst2 : f32
 	linalg.yield %6 : f32
@@ -326,8 +326,8 @@
   %zero = arith.constant 0.0 : f32
   // CHECK: %[[INIT:.+]] = tensor.empty() : tensor<6x4xf32>
   %empty = tensor.empty() : tensor<1x2x3x4xf32>
-  // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
+  // CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<6x4xf32>) -> tensor<6x4xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%empty : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
   %reshape = tensor.collapse_shape %fill [[0, 1, 2], [3]]
       : tensor<1x2x3x4xf32> into tensor<6x4xf32>
   // CHECK: return %[[FILL]] : tensor<6x4xf32>
@@ -341,8 +341,8 @@
 func.func @fold_fill_reshape_dynamic(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?xf32> {
   %zero = arith.constant 0.0 : f32
   // CHECK: %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]]
-  %0 = linalg.fill ins(%zero : f32) outs(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
-  // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[RESHAPE]]
+  %0 = linalg.fill ins(%zero : f32) inits(%arg0 : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK: %[[RESULT:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[RESHAPE]]
   %1 = tensor.collapse_shape %0 [[0, 1, 2], [3, 4]]
       : tensor<?x?x?x?x?xf32> into tensor<?x?xf32>
   // CHECK: return %[[RESULT]]
@@ -358,7 +358,7 @@
                                    affine_map<(d0, d1) -> (d0, d1)>],
                   iterator_types = ["parallel", "parallel"]}
     ins(%0 : memref<4x16xf32>)
-    outs(%0 : memref<4x16xf32>) {
+    inits(%0 : memref<4x16xf32>) {
       ^bb0(%arg4: f32, %arg5: f32):
         linalg.yield %arg4 : f32
     }
@@ -370,12 +370,12 @@
 // CHECK-LABEL: func @fold_static_pad_fill
 //       CHECK:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<412x276xf32>
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //       CHECK:   return %[[FILL]]
 func.func @fold_static_pad_fill() -> tensor<412x276xf32> {
   %f0 = arith.constant 0.0 : f32
   %empty = tensor.empty() : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
     tensor.yield %f0 : f32
@@ -395,18 +395,18 @@
 
 //  CHECK-DAG:   %[[I1:.+]] = arith.constant 1 : index
 //  CHECK-DAG:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
-//      CHECK:   %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[SRC]] : tensor<8x?x16x32xf32>)
+//      CHECK:   %[[OF:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[SRC]] : tensor<8x?x16x32xf32>)
 //      CHECK:   %[[S0:.+]] = affine.apply #[[MAP0]]()[%[[LOW0]]]
 //      CHECK:   %[[DIM1:.+]] = tensor.dim %[[OF]], %[[I1]] : tensor<8x?x16x32xf32>
 //      CHECK:   %[[S1:.+]] = affine.apply #[[MAP1]]()[%[[DIM1]]]
 //      CHECK:   %[[S2:.+]] = affine.apply #[[MAP2]]()[%[[HIGH2]]]
 //      CHECK:   %[[S3:.+]] = affine.apply #[[MAP3]]()[%[[LOW3]], %[[HIGH3]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[S0]], %[[S1]], %[[S2]], %[[S3]]) : tensor<?x?x?x?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //      CHECK:   return %[[FILL]]
 func.func @fold_dynamic_pad_fill(%empty: tensor<8x?x16x32xf32>, %low0: index, %low3: index, %high2: index, %high3: index) -> tensor<?x?x?x?xf32> {
   %f0 = arith.constant 0.0 : f32
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x?x16x32xf32>) -> tensor<8x?x16x32xf32>
   %pad = tensor.pad %fill low[%low0, 8, 7, %low3] high[1, 2, %high2, %high3] {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
     tensor.yield %f0 : f32
@@ -421,7 +421,7 @@
   %f0 = arith.constant 0.0 : f32
   %f1 = arith.constant 1.0 : f32
   %empty = tensor.empty() : tensor<400x273xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<400x273xf32>) -> tensor<400x273xf32>
   // CHECK: tensor.pad
   %pad = tensor.pad %fill low[4, 1] high[8, 2] {
   ^bb0(%arg1: index, %arg2: index):
@@ -451,7 +451,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<2x3x4xf32>, tensor<?x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -461,7 +461,7 @@
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -482,7 +482,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %4 : tensor<2x3x4xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -492,7 +492,7 @@
     //  CHECK:      %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -514,7 +514,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %5 : tensor<?x?x?xf32>, tensor<2x?x?xf32>)
-    outs(%4 : tensor<2x3x4xf32>) {
+    inits(%4 : tensor<2x3x4xf32>) {
   ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
     %9 = arith.addf %arg3, %arg4 : f32
     linalg.yield %9 : f32
@@ -524,7 +524,7 @@
     //  CHECK-NEXT: %[[CAST_ARG1:.*]] = tensor.cast %[[ARG1]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
     //  CHECK-NEXT: %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[CAST_ARG0]], %[[CAST_ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -548,7 +548,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%4, %5 : tensor<2x?x?xf32>, tensor<2x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
     %9 = arith.addf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -557,7 +557,7 @@
   return %7: tensor<2x3x4xf32>
     //  CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
     //  CHECK-SAME: ins(%[[ARG0]], %[[ARG1]] : tensor<2x3x4xf32>, tensor<2x3x4xf32>)
-    //  CHECK-SAME: outs({{.*}} : tensor<2x3x4xf32>)
+    //  CHECK-SAME: inits({{.*}} : tensor<2x3x4xf32>)
 }
 
 // -----
@@ -572,7 +572,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel", "parallel", "parallel"]
   } ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<1x?x?xf32>)
-    outs(%0 : tensor<?x?x?xf32>) {
+    inits(%0 : tensor<?x?x?xf32>) {
   ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
     %3 = arith.subf %arg5, %arg6 : f32
     linalg.yield %3 : f32
@@ -580,7 +580,7 @@
   return %2 : tensor<?x?x?xf32>
 // CHECK:      %[[GENERIC_OP:.*]] = linalg.generic
 // CHECK-SAME: ins(%{{.*}}, %[[ARG1]] : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
-// CHECK-SAME: outs(%{{.*}} : tensor<1x?x?xf32>)
+// CHECK-SAME: inits(%{{.*}} : tensor<1x?x?xf32>)
 // CHECK: tensor.cast %[[GENERIC_OP]] : tensor<1x?x?xf32> to tensor<?x?x?xf32>
 }
 
@@ -594,7 +594,7 @@
 //   CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index
 //   CHECK-DAG: %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK: %[[INIT:.+]] = tensor.empty()
-//       CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}outs(%[[INIT]]
+//       CHECK: %[[FILL:.+]] = linalg.fill ins(%[[F0]]{{.*}}inits(%[[INIT]]
 //       CHECK: %[[OFFSET1:.+]] = affine.apply #[[$MAP]]()[%[[LOW1]]]
 //       CHECK: %[[D0:.+]] = tensor.dim %[[INPUT]], %[[C0]] : tensor<?x?x?xf32>
 //       CHECK: %[[D1:.+]] = tensor.dim %[[INPUT]], %[[C1]] : tensor<?x?x?xf32>
@@ -608,7 +608,7 @@
     tensor.yield %f0 : f32
   } : tensor<?x?x?xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %pad into %fill[0, 1, 2] [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   return %0: tensor<8x384x384xf32>
 }
@@ -629,7 +629,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -648,7 +648,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 0, 129]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %1 at dim#3
@@ -668,7 +668,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, 255]    [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   // Range overlap with %0 at dim#3
@@ -688,7 +688,7 @@
     tensor.yield %f0 : f32
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
-  %fill = linalg.fill ins(%f0 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f0 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   // Overlap btween %0 and %1 is fine but not with %2 is fine.
   // CHECK-COUNT-3: tensor.insert_slice
   %0 = tensor.insert_slice %a   into %fill[0, 0, %offset]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -711,7 +711,7 @@
   } : tensor<7x123x124xf32> to tensor<8x128x128xf32>
   %empty = tensor.empty() : tensor<8x384x384xf32>
   // Different filling value than padding value.
-  %fill = linalg.fill ins(%f1 : f32) outs(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
+  %fill = linalg.fill ins(%f1 : f32) inits(%empty : tensor<8x384x384xf32>) -> tensor<8x384x384xf32>
   %0 = tensor.insert_slice %a   into %fill[%offset, 0, 0]  [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %1 = tensor.insert_slice %a   into %0   [0, 128, %offset][8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
   %2 = tensor.insert_slice %pad into %1   [0, 0, 256]      [8, 128, 128] [1, 1, 1] : tensor<8x128x128xf32> into tensor<8x384x384xf32>
@@ -723,7 +723,7 @@
 func.func @fold_linalgop_with_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : tensor<?x?xf32>) -> (tensor<4x8xf32>, tensor<?x?xf32>) {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
   return %1, %0 : tensor<4x8xf32>, tensor<?x?xf32>
 }
@@ -736,7 +736,7 @@
 //   CHECK-DAG:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?xf32> to tensor<4x8xf32>
 //       CHECK:  %[[MATMUL:.+]] = linalg.matmul
 //  CHECK-SAME:      ins(%[[LHS_CAST]], %[[RHS_CAST]] :
-//  CHECK-SAME:      outs(%[[OUT_CAST]] :
+//  CHECK-SAME:      inits(%[[OUT_CAST]] :
 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[MATMUL]]
 //       CHECK:  return %[[MATMUL]], %[[RESULT_CAST]]
 
@@ -747,7 +747,7 @@
 func.func @linalgop_with_cond_cast_consumer(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>,
     %arg2 : tensor<?x?xf32>, %arg3 : i1) -> tensor<?x?xf32> {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   scf.if %arg3 {
     %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x8xf32>
     func.call @some_use(%1) : (tensor<4x8xf32>) -> ()
@@ -759,7 +759,7 @@
 // CHECK-LABEL: func @linalgop_with_cond_cast_consumer
 //  CHECK-SAME:     (%[[ARG0:.*]]: tensor<?x?xf32>, %[[ARG1:.*]]: tensor<?x?xf32>, %[[ARG2:.*]]: tensor<?x?xf32>, %[[ARG3:.*]]: i1)
 //       CHECK: %[[RES:.*]] = linalg.matmul ins(%[[ARG0]], %[[ARG1]] : tensor<?x?xf32>, tensor<?x?xf32>)
-//  CHECK-SAME:      outs(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+//  CHECK-SAME:      inits(%[[ARG2]] : tensor<?x?xf32>) -> tensor<?x?xf32>
 //       CHECK: scf.if %[[ARG3]] {
 //       CHECK:   %[[CAST:.*]] = tensor.cast %[[RES]] : tensor<?x?xf32> to tensor<4x8xf32>
 //       CHECK:   func.call @some_use(%[[CAST]]) : (tensor<4x8xf32>) -> ()
@@ -773,7 +773,7 @@
     %arg1 : tensor<?x?x?x?xf32>,  %arg2 : tensor<?x?x?x?xf32>) ->
     (tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>) {
   %0 = linalg.conv_2d_nchw_fchw ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-      outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+      inits(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %1 = tensor.cast %0 : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
   return %1, %0 : tensor<4x8x12x16xf32>, tensor<?x?x?x?xf32>
 }
@@ -784,7 +784,7 @@
 //       CHECK:  %[[OUT_CAST:.+]] = tensor.cast %[[ARG2]] : tensor<?x?x?x?xf32> to tensor<4x8x12x16xf32>
 //       CHECK:  %[[CONV:.+]] = linalg.conv_2d_nchw_fchw
 //  CHECK-SAME:      ins(%[[ARG0]], %[[ARG1]] :
-//  CHECK-SAME:      outs(%[[OUT_CAST]] :
+//  CHECK-SAME:      inits(%[[OUT_CAST]] :
 //       CHECK:  %[[RESULT_CAST:.+]] = tensor.cast %[[CONV]]
 //       CHECK:  return %[[CONV]], %[[RESULT_CAST]]
 
@@ -804,7 +804,7 @@
       indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                        affine_map<(d0, d1, d2) -> (d1, d2, d0)>,
                        affine_map<(d0, d1, d2) -> (d2, d1, d0)>]}
-      ins(%arg0 : tensor<?x?x?xf32>) outs(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+      ins(%arg0 : tensor<?x?x?xf32>) inits(%empty1, %empty2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32) :
       linalg.yield %b0, %b0 : f32, f32
     } -> (tensor<?x?x?xf32>, tensor<?x?x?xf32>)
@@ -818,7 +818,7 @@
 //   CHECK-DAG:   %[[INIT2:.+]] = tensor.empty() : tensor<3x2x4xf32>
 //       CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 //  CHECK-SAME:       ins(%[[CAST]] :
-//  CHECK-SAME:       outs(%[[INIT2]], %[[INIT1]] :
+//  CHECK-SAME:       inits(%[[INIT2]], %[[INIT1]] :
 //       CHECK:   %[[RETURN_CAST:.+]] = tensor.cast %[[GENERIC]]#0 : tensor<3x2x4xf32> to tensor<?x?x?xf32>
 //       CHECK:   return %[[RETURN_CAST]], %[[GENERIC]]#1
 
@@ -830,7 +830,7 @@
     indexing_maps = [#map, #map],
     iterator_types = ["parallel"]
   } ins(%arg0 : tensor<?xf32>)
-    outs(%arg1 : memref<?xf32>) {
+    inits(%arg1 : memref<?xf32>) {
   ^bb0(%arg2 : f32, %arg3 : f32):
     linalg.yield %arg2 : f32
   }
@@ -845,7 +845,7 @@
 //  CHECK-SAME:    indexing_maps = [#map, #map],
 //  CHECK-SAME:    iterator_types = ["parallel"]
 //  CHECK-SAME:  } ins(%[[ARG1]] : tensor<?xf32>)
-//  CHECK-SAME:    outs(%[[ARG2]] : memref<?xf32>) {
+//  CHECK-SAME:    inits(%[[ARG2]] : memref<?xf32>) {
 
 // -----
 
@@ -858,13 +858,13 @@
                       affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
     ins(%1, %1 : memref<4xf32>, memref<4xf32>)
-    outs(%0 : tensor<4xf32>) {
+    inits(%0 : tensor<4xf32>) {
   ^bb0(%in: f32, %in_24: f32, %out: f32):
     linalg.yield %in : f32
   } -> tensor<4xf32>
   %53 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
                         iterator_types = ["parallel"]}
-                        outs(%36 : tensor<4xf32>) {
+                        inits(%36 : tensor<4xf32>) {
   ^bb0(%out: f32):
     linalg.yield %out : f32
   } -> tensor<4xf32>
diff --git a/mlir/test/Dialect/Linalg/collapse-dim.mlir b/mlir/test/Dialect/Linalg/collapse-dim.mlir
--- a/mlir/test/Dialect/Linalg/collapse-dim.mlir
+++ b/mlir/test/Dialect/Linalg/collapse-dim.mlir
@@ -7,7 +7,7 @@
         affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
         affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
   iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
-  ins(%arg0 : tensor<2x32x10x4096xf32>) outs(%arg1 : tensor<2x32xf32>) {
+  ins(%arg0 : tensor<2x32x10x4096xf32>) inits(%arg1 : tensor<2x32xf32>) {
   ^bb0(%arg3: f32, %arg4: f32):
     %1 = arith.addf %arg3, %arg4 : f32
     linalg.yield %1 : f32
@@ -22,7 +22,7 @@
 //       CHECK:   %[[T:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
 //       CHECK:   linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], 
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "reduction"]} 
-//  CHECK-SAME:     ins(%[[T]] : tensor<2x32x40960xf32>) outs(%{{.*}} : tensor<2x32xf32>) {
+//  CHECK-SAME:     ins(%[[T]] : tensor<2x32x40960xf32>) inits(%{{.*}} : tensor<2x32xf32>) {
 //       CHECK:   } -> tensor<2x32xf32>
 
 // -----
@@ -34,7 +34,7 @@
         affine_map<(d0, d1, d2, d3) -> (d1, d0, d2, d3)>,
         affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
   iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  ins(%arg0 : tensor<32x2x10x4096xf32>) outs(%arg1 : tensor<2x32x10x4096xf32>) {
+  ins(%arg0 : tensor<32x2x10x4096xf32>) inits(%arg1 : tensor<2x32x10x4096xf32>) {
   ^bb0(%arg3: f32, %arg4: f32):
     %1 = arith.addf %arg3, %arg4 : f32
     linalg.yield %1 : f32
@@ -50,6 +50,6 @@
 //   CHECK-DAG:  %[[D:.*]] = tensor.collapse_shape %{{.*}} {{\[}}[0], [1], [2, 3]] : tensor<2x32x10x4096xf32> into tensor<2x32x40960xf32>
 //       CHECK:  %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]]], 
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"]} 
-//  CHECK-SAME:     ins(%[[S]] : tensor<32x2x40960xf32>) outs(%[[D]] : tensor<2x32x40960xf32>) {
+//  CHECK-SAME:     ins(%[[S]] : tensor<32x2x40960xf32>) inits(%[[D]] : tensor<2x32x40960xf32>) {
 //       CHECK:   } -> tensor<2x32x40960xf32>
 //       CHECK:  tensor.expand_shape %[[R]] {{\[}}[0], [1], [2, 3]] : tensor<2x32x40960xf32> into tensor<2x32x10x4096xf32>
diff --git a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
--- a/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/conv-interface-invalid.mlir
@@ -18,7 +18,7 @@
   %0 = test.linalg_conv_op {
       indexing_maps = [#map, #map],
       iterator_types = ["parallel"]}
-      ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
          linalg.yield  %arg3 : f32
       } -> tensor<?xf32>
@@ -36,7 +36,7 @@
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -54,7 +54,7 @@
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -72,7 +72,7 @@
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -90,7 +90,7 @@
                        affine_map<(d0, d1) -> (d0 + d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -110,7 +110,7 @@
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) {
+      inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?x?xf32>
@@ -129,7 +129,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d2)>],
       iterator_types = ["parallel", "reduction", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) {
+      inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?x?xf32>
@@ -148,7 +148,7 @@
                        affine_map<(d0, d1, d2) -> (d0)>],
       iterator_types = ["parallel", "reduction", "reduction"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -167,7 +167,7 @@
                        affine_map<(d0, d1, d2) -> (d0)>],
       iterator_types = ["parallel", "reduction", "reduction"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
@@ -186,7 +186,7 @@
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2 : tensor<?xf32>) {
+      inits(%arg2 : tensor<?xf32>) {
       ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
          linalg.yield %arg5 : f32
       } -> tensor<?xf32>
diff --git a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
--- a/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
+++ b/mlir/test/Dialect/Linalg/convert-elementwise-to-linalg.mlir
@@ -10,7 +10,7 @@
   // CHECK-SAME: indexing_maps = [#[[$MAP]], #[[$MAP]], #[[$MAP]]]
   // CHECK-SAME: iterator_types = []
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   //      CHECK: ^bb0(%[[LHS:.*]]: f32, %[[RHS:.*]]: f32, %{{.*}}: f32):
   //      CHECK:   %[[YIELD:.*]] = arith.addf %[[LHS]], %[[RHS]] : f32
   //      CHECK:   linalg.yield %[[YIELD]] : f32
@@ -29,7 +29,7 @@
   // CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel"]
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   %0 = arith.addf %arg0, %arg1 : tensor<?xf32>
   return %0 : tensor<?xf32>
 }
@@ -42,7 +42,7 @@
 func.func @exp(%arg0: tensor<f32>) -> tensor<f32> {
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]]
-  // CHECK-SAME: outs(%[[ARG0]]
+  // CHECK-SAME: inits(%[[ARG0]]
   // CHECK: ^bb0(%[[SCALAR:.*]]: f32, %{{.*}}: f32):
   // CHECK:   %[[YIELD:.*]] = math.exp %[[SCALAR]] : f32
   // CHECK:   linalg.yield %[[YIELD]] : f32
@@ -60,7 +60,7 @@
 func.func @select(%arg0: tensor<i1>, %arg1: tensor<i32>, %arg2: tensor<i32>) -> tensor<i32> {
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]], %[[ARG2]]
-  // CHECK-SAME: outs(%[[ARG1]]
+  // CHECK-SAME: inits(%[[ARG1]]
   // CHECK: ^bb0(%[[PRED:.*]]: i1, %[[TRUE_VAL:.*]]: i32, %[[FALSE_VAL:.*]]: i32, %{{.*}}: i32):
   // CHECK:   arith.select %[[PRED]], %[[TRUE_VAL]], %[[FALSE_VAL]] : i32
   %0 = arith.select %arg0, %arg1, %arg2 : tensor<i1>, tensor<i32>
@@ -78,7 +78,7 @@
   // CHECK: %[[INIT:.*]] = tensor.empty() : tensor<i1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[INIT]]
+  // CHECK-SAME: inits(%[[INIT]]
   // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
   // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32
   %0 = arith.cmpf olt, %arg0, %arg1 : tensor<f32>
@@ -101,7 +101,7 @@
   // CHECK: %[[INIT:.*]] = tensor.empty(%[[D1]], %[[D2]], %[[D5]]) : tensor<4x?x?x8x2x?xi1>
   // CHECK: linalg.generic
   // CHECK-SAME:  ins(%[[ARG0]], %[[ARG1]]
-  // CHECK-SAME: outs(%[[INIT]]
+  // CHECK-SAME: inits(%[[INIT]]
   // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32, %{{.*}}: i1):
   // CHECK: arith.cmpf olt, %{{.*}}, %{{.*}} : f32
   %0 = arith.cmpf olt, %arg0, %arg1 : tensor<4x?x?x8x2x?xf32>
diff --git a/mlir/test/Dialect/Linalg/decompose-ops.mlir b/mlir/test/Dialect/Linalg/decompose-ops.mlir
--- a/mlir/test/Dialect/Linalg/decompose-ops.mlir
+++ b/mlir/test/Dialect/Linalg/decompose-ops.mlir
@@ -15,7 +15,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%init1, %init2 : tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%init1, %init2 : tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32) :
       %0 = arith.addf %b0, %b1 : f32
       %1 = arith.mulf %0, %b2 : f32
@@ -41,7 +41,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -55,7 +55,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#2 :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B6:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
@@ -87,7 +87,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -98,7 +98,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B3:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
@@ -124,7 +124,7 @@
                      affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?xf32>, tensor<?xf32>)
-    outs(%init1, %init2, %init2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
+    inits(%init1, %init2, %init2 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32, %b4 : f32, %b5 : f32) :
       %0 = arith.addf %b0, %b1 : f32
       %1 = arith.mulf %0, %b2 : f32
@@ -150,7 +150,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP0]], #[[MAP0]], #[[MAP3]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]] :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT2]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -165,7 +165,7 @@
 // CHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP3]], #[[MAP0]], #[[MAP0]]]
 // CHECK-SAME:       ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[ARG2]], %[[GENERIC1]]#3 :
-// CHECK-SAME:       outs(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
+// CHECK-SAME:       inits(%[[INIT1]], %[[INIT2]], %[[INIT2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B7:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[B8:[a-zA-Z0-9_]+]]: f32
@@ -195,7 +195,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]], %[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]], %[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B0:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B1:[a-zA-Z0-9_]+]]: f32
@@ -206,7 +206,7 @@
 // CANONICALIZECHECK-SAME:       [#[[MAP3]], #[[MAP2]], #[[MAP0]]]
 // CANONICALIZECHECK-SAME:       ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG2]], %[[GENERIC1]]#0 :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[B4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[B5:[a-zA-Z0-9_]+]]: f32
@@ -226,7 +226,7 @@
     indexing_maps = [#map0, #map1, #map2],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<10x20xf32>, tensor<10xi32>)
-    outs(%init : tensor<20x10xf64>) {
+    inits(%init : tensor<20x10xf64>) {
     ^bb0(%b0 : f32, %b1 : i32, %b2 : f64):
       %1 = arith.sitofp %b1 : i32 to f64
       %2 = arith.extf %b0 : f32 to f64
@@ -248,7 +248,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B0:.+]]: f32
 // CHECK-SAME:         %[[B1:.+]]: i32
@@ -260,7 +260,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1 :
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B4:.+]]: f32
 // CHECK-SAME:         %[[B5:.+]]: i32
@@ -273,7 +273,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP0]], #[[MAP0]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC0]]#1, %[[GENERIC1]]#1 :
-// CHECK-SAME:       outs(%[[INIT0]] :
+// CHECK-SAME:       inits(%[[INIT0]] :
 // CHECK-NEXT:     ^bb0(
 // CHECK-SAME:         %[[B9:.+]]: f32
 // CHECK-SAME:         %[[B10:.+]]: i32
@@ -296,7 +296,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B0:.+]]: i32
 // CANONICALIZECHECK-SAME:         %[[B1:.+]]: f64
@@ -306,7 +306,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT1]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT1]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B2:.+]]: f32
 // CANONICALIZECHECK-SAME:         %[[B3:.+]]: f64
@@ -316,7 +316,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP1]], #[[MAP1]], #[[MAP2]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[GENERIC0]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[INIT0]] :
+// CANONICALIZECHECK-SAME:       inits(%[[INIT0]] :
 // CANONICALIZECHECK-NEXT:     ^bb0(
 // CANONICALIZECHECK-SAME:         %[[B4:[a-zA-Z0-9_]+]]: f64
 // CANONICALIZECHECK-SAME:         %[[B5:[a-zA-Z0-9_]+]]: f64
@@ -339,7 +339,7 @@
       indexing_maps = [#map0, #map1, #map2, #map3],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>) {
+      inits(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>) {
       ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32) :
         %1 = arith.addf %b0, %b2 : f32
         %2 = arith.mulf %b1, %b3 : f32
@@ -360,7 +360,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[ARG3]], %[[ARG2]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
@@ -373,7 +373,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]], %[[GENERIC1]]#2 :
-// CHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[ARG3]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[ARG9:[a-zA-Z0-9_]+]]: f32
 // CHECK-SAME:       %[[ARG10:[a-zA-Z0-9_]+]]: f32
@@ -397,7 +397,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG0]] :
-// CANONICALIZECHECK-SAME:       outs(%[[ARG2]] :
+// CANONICALIZECHECK-SAME:       inits(%[[ARG2]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
@@ -407,7 +407,7 @@
 // CANONICALIZECHECK-SAME:       indexing_maps = [#[[MAP2]], #[[MAP1]], #[[MAP1]], #[[MAP3]]]
 // CANONICALIZECHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CANONICALIZECHECK-SAME:       ins(%[[ARG1]], %[[GENERIC1]] :
-// CANONICALIZECHECK-SAME:       outs(%[[ARG2]], %[[ARG3]] :
+// CANONICALIZECHECK-SAME:       inits(%[[ARG2]], %[[ARG3]] :
 // CANONICALIZECHECK-NEXT:   ^bb0(
 // CANONICALIZECHECK-SAME:       %[[ARG4:[a-zA-Z0-9_]+]]: f32
 // CANONICALIZECHECK-SAME:       %[[ARG5:[a-zA-Z0-9_]+]]: f32
diff --git a/mlir/test/Dialect/Linalg/detensorize_0d.mlir b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_0d.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_0d.mlir
@@ -6,7 +6,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -25,7 +25,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -34,7 +34,7 @@
   %3 = tensor.empty() : tensor<f32>
   %4 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %1 : tensor<f32>, tensor<f32>)
-    outs(%3 : tensor<f32>) {
+    inits(%3 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %5 = arith.mulf %arg3, %arg4 : f32
     linalg.yield %5 : f32
@@ -43,7 +43,7 @@
   %6 = tensor.empty() : tensor<f32>
   %7 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%1, %4 : tensor<f32>, tensor<f32>)
-    outs(%6 : tensor<f32>) {
+    inits(%6 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %5 = arith.divf %arg3, %arg4 : f32
     linalg.yield %5 : f32
@@ -65,7 +65,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %2 = arith.addf %arg3, %arg4 : f32
     %3 = arith.mulf %2, %arg4 : f32
@@ -86,7 +86,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = []}
     ins(%arg1, %arg2 : tensor<f32>, tensor<f32>)
-    outs(%0 : tensor<f32>) {
+    inits(%0 : tensor<f32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %2 = "foreign.do_something"(%arg3, %arg4) {} : (f32, f32) -> f32
     linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_br_operands.mlir
@@ -10,7 +10,7 @@
   %3 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg0_t : tensor<i1>)
-    outs(%2 : tensor<i8>) {
+    inits(%2 : tensor<i8>) {
   ^bb0(%arg2: i1, %arg3: i8):  
     %10 = arith.extui %arg2 : i1 to i8
     linalg.yield %10 : i8
@@ -23,7 +23,7 @@
   %7 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>, affine_map<() -> ()>], iterator_types = []}
     ins(%arg1_t, %cst : tensor<i32>, tensor<i32>)
-    outs(%6 : tensor<i32>) {
+    inits(%6 : tensor<i32>) {
   ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):  
     %10 = arith.addi %arg2, %arg3 : i32
     linalg.yield %10 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_if.mlir b/mlir/test/Dialect/Linalg/detensorize_if.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_if.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_if.mlir
@@ -18,7 +18,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -30,7 +30,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
@@ -79,7 +79,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -91,7 +91,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
@@ -142,7 +142,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -159,7 +159,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %12 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_trivial.mlir
@@ -15,7 +15,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%farg0, %1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
diff --git a/mlir/test/Dialect/Linalg/detensorize_while.mlir b/mlir/test/Dialect/Linalg/detensorize_while.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while.mlir
@@ -15,7 +15,7 @@
   %1 = tensor.empty() : tensor<i1>
   %2 = linalg.generic #attrs
     ins(%0, %farg1 : tensor<i32>, tensor<i32>)
-    outs(%1 : tensor<i1>) {
+    inits(%1 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -27,7 +27,7 @@
   %5 = tensor.empty() : tensor<i32>
   %6 = linalg.generic #attrs
     ins(%4, %4 : tensor<i32>, tensor<i32>)
-    outs(%5 : tensor<i32>) {
+    inits(%5 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
       %8 = arith.addi %arg0, %arg1 : i32
       linalg.yield %8 : i32
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_impure_cf.mlir
@@ -28,7 +28,7 @@
   %1 = tensor.empty() : tensor<i32>
   %2 = linalg.generic #sum_reduction_attrs
     ins(%0: tensor<10xi32>)
-    outs(%1: tensor<i32>) {
+    inits(%1: tensor<i32>) {
       ^bb(%a: i32, %x: i32):
         %b = arith.addi %x, %a : i32
         linalg.yield %b : i32
@@ -37,7 +37,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %farg1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -49,7 +49,7 @@
   %7 = tensor.empty() : tensor<10xi32>
   %9 = linalg.generic #broadcast_attrs
        ins(%6: tensor<i32>)
-      outs(%7: tensor<10xi32>) {
+      inits(%7: tensor<10xi32>) {
     ^bb(%a: i32, %b: i32) :
       linalg.yield %a : i32
   } -> tensor<10xi32>
@@ -67,7 +67,7 @@
 // DET-ALL:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-ALL:       ^[[bb1]](%{{.*}}: tensor<10xi32>)
 // DET-ALL:         tensor.empty() : tensor<i32>
-// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
+// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor<i32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):  
 // DET-ALL:           %{{.*}} = arith.addi %{{.*}}, %{{.*}}
 // DET-ALL:           linalg.yield %{{.*}} : i32
@@ -78,7 +78,7 @@
 // DET-ALL:       ^[[bb2]](%{{.*}}: i32)
 // DET-ALL:         tensor.from_elements %{{.*}} : tensor<i32>
 // DET-ALL:         tensor.empty() : tensor<10xi32>
-// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
+// DET-ALL:         linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) inits(%{{.*}} : tensor<10xi32>) {
 // DET-ALL:         ^bb0(%{{.*}}: i32, %{{.*}}: i32):
 // DET-ALL:           linalg.yield %{{.*}} : i32
 // DET-ALL:         } -> tensor<10xi32>
@@ -92,12 +92,12 @@
 // DET-CF-SAME:    (%{{.*}}: tensor<10xi32>, %{{.*}}: tensor<i32>)
 // DET-CF:         cf.br ^[[bb1:.*]](%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb1(%{{.*}}: tensor<10xi32>)
-// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) outs(%{{.*}} : tensor<i32>) {
+// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<10xi32>) inits(%{{.*}} : tensor<i32>) {
 // DET-CF:         tensor.extract %{{.*}}[] : tensor<i32>
 // DET-CF:         cmpi slt, %{{.*}}, %{{.*}} : i32
 // DET-CF:         cf.cond_br %{{.*}}, ^bb2(%{{.*}} : tensor<i32>), ^bb3(%{{.*}} : tensor<i32>)
 // DET-CF:       ^bb2(%{{.*}}: tensor<i32>)
-// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) outs(%{{.*}} : tensor<10xi32>) {
+// DET-CF:         %{{.*}} = linalg.generic {{{.*}}} ins(%{{.*}} : tensor<i32>) inits(%{{.*}} : tensor<10xi32>) {
 // DET-CF:         cf.br ^bb1(%{{.*}} : tensor<10xi32>)
 // DET-CF:       ^bb3(%{{.*}}: tensor<i32>)
 // DET-CF:         return %{{.*}} : tensor<i32>
diff --git a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
--- a/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
+++ b/mlir/test/Dialect/Linalg/detensorize_while_pure_cf.mlir
@@ -20,7 +20,7 @@
   %3 = tensor.empty() : tensor<i1>
   %4 = linalg.generic #attrs
     ins(%2, %reshaped1 : tensor<i32>, tensor<i32>)
-    outs(%3 : tensor<i1>) {
+    inits(%3 : tensor<i1>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i1):  
       %8 = arith.cmpi slt, %arg0, %arg1 : i32
       linalg.yield %8 : i1
@@ -32,7 +32,7 @@
   %7 = tensor.empty() : tensor<i32>
   %8 = linalg.generic #attrs
     ins(%6, %6 : tensor<i32>, tensor<i32>)
-    outs(%7 : tensor<i32>) {
+    inits(%7 : tensor<i32>) {
     ^bb0(%arg0: i32, %arg1: i32, %arg2: i32):  
       %9 = arith.addi %arg0, %arg1 : i32
       linalg.yield %9 : i32
diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
--- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
+++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
@@ -15,7 +15,7 @@
 func.func @drop_one_trip_loops(%arg0 : tensor<?x1x?xf32>, %arg1 : f32, %shape: tensor<?x1x?x1x?xf32>) -> tensor<?x1x?x1x?xf32> {
   %0 = linalg.generic #trait
      ins(%arg0, %arg1 : tensor<?x1x?xf32>, f32)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
+    inits(%shape : tensor<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        } -> tensor<?x1x?x1x?xf32>
@@ -49,7 +49,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<?x1x?xi32>)
-    outs(%shape: tensor<?x1x?x1x?xi32>) {
+    inits(%shape: tensor<?x1x?x1x?xi32>) {
        ^bb0(%arg6 : i32, %arg7 : i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -95,7 +95,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%arg0 : tensor<1x1xf32>) {
+    inits(%arg0 : tensor<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        } -> tensor<1x1xf32>
@@ -122,7 +122,7 @@
   (%arg0 : tensor<1x1xi32>) -> tensor<1x1xi32>{
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xi32>)
-    outs(%arg0 : tensor<1x1xi32>) {
+    inits(%arg0 : tensor<1x1xi32>) {
        ^bb0(%arg3: i32, %arg4: i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -155,7 +155,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x5xf32>)
-    outs(%shape : tensor<5xf32>) {
+    inits(%shape : tensor<5xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     linalg.yield %arg2 : f32
   } -> tensor<5xf32>
@@ -189,7 +189,7 @@
   %1 = tensor.expand_shape %arg1 [[0, 1]] : tensor<5xf32> into tensor<5x1xf32>
   %2 = linalg.generic #trait
      ins(%0, %1 : tensor<1x5xf32>, tensor<5x1xf32>)
-    outs(%shape : tensor<5x5xf32>) {
+    inits(%shape : tensor<5x5xf32>) {
        ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
          %3 = arith.addf %arg3, %arg4 : f32
          linalg.yield %3 : f32
@@ -223,7 +223,7 @@
 {
    %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%shape : tensor<?x?xf32>) {
+    inits(%shape : tensor<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
         linalg.yield %arg2 : f32
    } -> tensor<?x?xf32>
@@ -249,7 +249,7 @@
   %1 = tensor.empty() : tensor<1x2x5xf32>
   %2 = linalg.generic {i64, indexing_maps = [#map1, #map0],
     iterator_types = ["parallel", "parallel", "parallel"]}
-    ins(%arg0 : tensor<5xf32>) outs(%1 : tensor<1x2x5xf32>) {
+    ins(%arg0 : tensor<5xf32>) inits(%1 : tensor<1x2x5xf32>) {
     ^bb0(%arg1: f32, %arg2: f32):
       linalg.yield %arg1 : f32
     } -> tensor<1x2x5xf32>
@@ -266,11 +266,11 @@
 func.func @fold_unit_dim_for_empty_tensor(%input: tensor<1x1000xf32>) -> tensor<1xf32> {
   %cst = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<1xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1xf32>) -> tensor<1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1xf32>) -> tensor<1xf32>
   %add = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-    ins(%input : tensor<1x1000xf32>)outs(%fill : tensor<1xf32>) {
+    ins(%input : tensor<1x1000xf32>)inits(%fill : tensor<1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %1823 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1823 : f32
@@ -287,12 +287,12 @@
 
 //       CHECK: %[[INPUT_RESHAPE:.+]] = tensor.collapse_shape %{{.+}} {{\[}}[0, 1]] : tensor<1x1000xf32> into tensor<1000xf32>
 //       CHECK: %[[INIT:.+]] = tensor.empty() : tensor<f32>
-//       CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<f32>) -> tensor<f32>
+//       CHECK: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<f32>) -> tensor<f32>
 //       CHECK: %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     iterator_types = ["reduction"]
 //  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : tensor<1000xf32>)
-//  CHECK-SAME:   outs(%[[FILL]] : tensor<f32>)
+//  CHECK-SAME:   inits(%[[FILL]] : tensor<f32>)
 //       CHECK: %[[GENERIC_RESHAPE:.+]] = tensor.expand_shape %[[GENERIC]] [] : tensor<f32> into tensor<1xf32>
 //       CHECK: return %[[GENERIC_RESHAPE:.+]] : tensor<1xf32>
 
@@ -331,13 +331,13 @@
   %c3 = arith.constant 3 : index
   %0 = tensor.dim %arg0, %c3 : tensor<1x?x1x?xf32>
   %1 = tensor.empty(%0) : tensor<1x?xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x?xf32>) -> tensor<1x?xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<1x?x1x?xf32>)
-    outs(%2 : tensor<1x?xf32>) {
+    inits(%2 : tensor<1x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -350,12 +350,12 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x?xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2], [3]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:     iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
-// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
+// CHECK-SAME:     inits(%[[FILL]] : tensor<?xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -365,13 +365,13 @@
   %cst = arith.constant 1.000000e+00 : f32
   %c3 = arith.constant 3 : index
   %1 = tensor.empty() : tensor<1x1xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<1x?x1x1xf32>)
-    outs(%2 : tensor<1x1xf32>) {
+    inits(%2 : tensor<1x1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -383,12 +383,12 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<1x?x1x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1, 2, 3]
 //      CHECK:   %[[INIT:.+]] = tensor.empty() : tensor<1xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]]]
 // CHECK-SAME:     iterator_types = ["parallel"]
 // CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?xf32>)
-// CHECK-SAME:     outs(%[[FILL]] : tensor<1xf32>)
+// CHECK-SAME:     inits(%[[FILL]] : tensor<1xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -399,13 +399,13 @@
   %c2 = arith.constant 2 : index
   %0 = tensor.dim %arg0, %c2 : tensor<?x1x?x1xf32>
   %1 = tensor.empty(%0) : tensor<?x1xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<?x1xf32>) -> tensor<?x1xf32>
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>,
                      affine_map<(d0, d1, d2, d3) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel", "reduction", "reduction"]}
     ins(%arg0 : tensor<?x1x?x1xf32>)
-    outs(%2 : tensor<?x1xf32>) {
+    inits(%2 : tensor<?x1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %4 = arith.addf %arg1, %arg2 : f32
     linalg.yield %4 : f32
@@ -418,12 +418,12 @@
 // CHECK-SAME:   %[[ARG0:.+]]: tensor<?x1x?x1xf32>
 //  CHECK-DAG:   %[[RESHAPE:.+]] = tensor.collapse_shape %[[ARG0]] {{\[}}[0, 1], [2, 3]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%{{.+}}) : tensor<?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%{{.+}}{{.*}}inits(%[[INIT]]
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:     iterator_types = ["parallel", "reduction"]
 // CHECK-SAME:     ins(%[[RESHAPE]] : tensor<?x?xf32>)
-// CHECK-SAME:     outs(%[[FILL]] : tensor<?xf32>)
+// CHECK-SAME:     inits(%[[FILL]] : tensor<?xf32>)
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[RESULT]] {{\[}}[0, 1]]
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -468,7 +468,7 @@
 func.func @drop_one_trip_loops(%arg0 : memref<?x1x?xf32>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> {
   linalg.generic #trait
      ins(%arg0, %arg1 : memref<?x1x?xf32>, f32)
-    outs(%shape : memref<?x1x?x1x?xf32>) {
+    inits(%shape : memref<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        }
@@ -501,7 +501,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<?x1x?xi32>)
-    outs(%shape: memref<?x1x?x1x?xi32>) {
+    inits(%shape: memref<?x1x?x1x?xi32>) {
        ^bb0(%arg6 : i32, %arg7 : i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -547,7 +547,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%arg0 : memref<1x1xf32>) {
+    inits(%arg0 : memref<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        }
@@ -574,7 +574,7 @@
   (%arg0 : memref<1x1xi32>) -> memref<1x1xi32>{
   linalg.generic #trait
      ins(%arg0 : memref<1x1xi32>)
-    outs(%arg0 : memref<1x1xi32>) {
+    inits(%arg0 : memref<1x1xi32>) {
        ^bb0(%arg3: i32, %arg4: i32) :
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -607,7 +607,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: memref<1x5xf32>, %shape: memref<5xf32>) -> memref<5xf32> {
   linalg.generic #trait
      ins(%arg0 : memref<1x5xf32>)
-    outs(%shape : memref<5xf32>) {
+    inits(%shape : memref<5xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     linalg.yield %arg2 : f32
   }
@@ -641,7 +641,7 @@
   %1 = memref.expand_shape %arg1 [[0, 1]] : memref<5xf32> into memref<5x1xf32>
   linalg.generic #trait
      ins(%0, %1 : memref<1x5xf32>, memref<5x1xf32>)
-    outs(%shape : memref<5x5xf32>) {
+    inits(%shape : memref<5x5xf32>) {
        ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
          %3 = arith.addf %arg3, %arg4 : f32
          linalg.yield %3 : f32
@@ -675,7 +675,7 @@
 {
    linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%shape : memref<?x?xf32>) {
+    inits(%shape : memref<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
         linalg.yield %arg2 : f32
    }
@@ -701,7 +701,7 @@
   %1 = memref.alloc() : memref<1x2x5xf32>
   linalg.generic {i64, indexing_maps = [#map1, #map0],
     iterator_types = ["parallel", "parallel", "parallel"]}
-    ins(%arg0 : memref<5xf32>) outs(%1 : memref<1x2x5xf32>) {
+    ins(%arg0 : memref<5xf32>) inits(%1 : memref<1x2x5xf32>) {
     ^bb0(%arg1: f32, %arg2: f32):
       linalg.yield %arg1 : f32
     }
@@ -713,7 +713,7 @@
 //       CHECK:   %[[ALLOC:.*]] = memref.alloc() : memref<1x2x5xf32>
 //       CHECK:   %[[OUT:.*]] = memref.collapse_shape %[[ALLOC]]
 //       CHECK:   linalg.generic
-//       CHECK-SAME:   outs(%[[OUT:.*]] :
+//       CHECK-SAME:   inits(%[[OUT:.*]] :
 //       CHECK:   %[[RESULT:.*]] = memref.collapse_shape %[[ALLOC]]
 //       CHECK:   return %[[RESULT]]
 
@@ -725,7 +725,7 @@
   linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-    ins(%input : memref<1x1000xf32>)outs(%init : memref<1xf32>) {
+    ins(%input : memref<1x1000xf32>)inits(%init : memref<1xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %1823 = arith.addf %arg1, %arg2 : f32
     linalg.yield %1823 : f32
@@ -745,7 +745,7 @@
 //  CHECK-SAME:     indexing_maps = [#[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     iterator_types = ["reduction"]
 //  CHECK-SAME:   ins(%[[INPUT_RESHAPE]] : memref<1000xf32>)
-//  CHECK-SAME:   outs(%[[INIT_RESHAPE]] : memref<f32>)
+//  CHECK-SAME:   inits(%[[INIT_RESHAPE]] : memref<f32>)
 //       CHECK: return %[[INIT:.+]] : memref<1xf32>
 
 
@@ -768,7 +768,7 @@
 func.func @input_stays_same(%arg0 : memref<?x1x?xf32, strided<[?, 1, 1]>>, %arg1 : f32, %shape: memref<?x1x?x1x?xf32>) -> memref<?x1x?x1x?xf32> {
   linalg.generic #trait
      ins(%arg0, %arg1 : memref<?x1x?xf32, strided<[?, 1, 1]>>, f32)
-    outs(%shape : memref<?x1x?x1x?xf32>) {
+    inits(%shape : memref<?x1x?x1x?xf32>) {
        ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32) :
          linalg.yield %arg3 : f32
        }
@@ -788,7 +788,7 @@
 // CHECK-SAME:   {indexing_maps = [#[[MAP1]], #[[MAP2]], #[[MAP3]]],
 // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel"]}
 // CHECK-SAME:   ins(%[[ARG0]], %[[ARG1]] : memref<?x1x?xf32, strided<[?, 1, 1]>>, f32)
-// CHECK-SAME:   outs(%[[OUT]] : memref<?x?x?xf32>) {
+// CHECK-SAME:   inits(%[[OUT]] : memref<?x?x?xf32>) {
 // CHECK:      ^bb0(%{{.*}}: f32, %[[ARG:.*]]: f32, %{{.*}}: f32):
 // CHECK:       linalg.yield %[[ARG]] : f32
 // CHECK:      }
@@ -812,7 +812,7 @@
     %0 = tensor.empty() : tensor<8xf32>
     %1 = linalg.generic #matvec
       ins(%arg0, %arg1: tensor<8x8xf32, #CSR>, tensor<8xf32>)
-      outs(%0: tensor<8xf32>) {
+      inits(%0: tensor<8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %m = arith.mulf %a, %b : f32
         %add = arith.addf %x, %m : f32
@@ -834,7 +834,7 @@
   %0 = tensor.empty() : tensor<4x2xf32>
   %res = scf.foreach_thread (%arg0, %arg1) in (%c4, %c2) shared_outs(%o = %0) -> (tensor<4x2xf32>) {
     %1 = tensor.empty() : tensor<1x1xf32>
-    %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
+    %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<1x1xf32>) -> tensor<1x1xf32>
     scf.foreach_thread.perform_concurrently {
       //      CHECK: tensor.parallel_insert_slice %{{[0-9a-z]*}} into %{{[0-9a-z]*}}
       // CHECK-SAME: [%{{.*}}, %{{.*}}] [1, 1] [1, 1] : tensor<f32> into tensor<4x2xf32>
@@ -859,7 +859,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32, 3>)
-    outs(%arg0 : memref<1x1xf32, 3>) {
+    inits(%arg0 : memref<1x1xf32, 3>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        }
@@ -867,6 +867,6 @@
 }
 
 // CHECK-LABEL: func @drop_all_loops
-//       CHECK:   memref.collapse_shape 
+//       CHECK:   memref.collapse_shape
 //  CHECK-SAME:     [] : memref<1x1xf32, 3> into memref<f32, 3>
 //       CHECK:   linalg.generic{{.*}}memref<f32, 3>
diff --git a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
--- a/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
+++ b/mlir/test/Dialect/Linalg/fill-interface-invalid.mlir
@@ -18,7 +18,7 @@
   %0 = test.linalg_fill_op {
       indexing_maps = [#map0, #map0, #map1],
       iterator_types = ["parallel"]}
-      ins(%arg0, %arg0 : f32, f32) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0, %arg0 : f32, f32) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32, %arg4 : f32):
          linalg.yield  %arg2 : f32
       } -> tensor<?xf32>
@@ -34,7 +34,7 @@
   %0 = test.linalg_fill_op {
       indexing_maps = [#map1, #map1],
       iterator_types = ["parallel"]}
-      ins(%arg0 : tensor<?xf32>) outs(%arg1 : tensor<?xf32>) {
+      ins(%arg0 : tensor<?xf32>) inits(%arg1 : tensor<?xf32>) {
       ^bb0(%arg2 : f32, %arg3 : f32):
          linalg.yield  %arg2 : f32
       } -> tensor<?xf32>
diff --git a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
--- a/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
+++ b/mlir/test/Dialect/Linalg/fold-unit-trip-loops.mlir
@@ -15,7 +15,7 @@
 {
   %0 = linalg.generic #trait
     ins(%arg0 : tensor<?x1x?xf32>)
-    outs(%shape : tensor<?x1x?x1x?xf32>) {
+    inits(%shape : tensor<?x1x?x1x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          linalg.yield %arg1 : f32
        } -> tensor<?x1x?x1x?xf32>
@@ -42,7 +42,7 @@
 {
   %0 = linalg.generic #trait
      ins(%arg0 : tensor<1x1xf32>)
-    outs(%arg0 : tensor<1x1xf32>) {
+    inits(%arg0 : tensor<1x1xf32>) {
        ^bb0(%arg1: f32, %arg2: f32) :
          linalg.yield %arg1 : f32
        } -> tensor<1x1xf32>
@@ -68,7 +68,7 @@
 {
   linalg.generic #trait
      ins(%arg0 : memref<1x1xf32>)
-    outs(%arg1 : memref<1x1xf32>) {
+    inits(%arg1 : memref<1x1xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32) :
       linalg.yield %arg2 : f32
     }
@@ -96,7 +96,7 @@
 func.func @leading_dim_1_canonicalization(%arg0: tensor<1x5xf32>, %shape: tensor<5xf32>) -> tensor<5xf32> {
   %0 = linalg.generic #trait
        ins(%arg0 : tensor<1x5xf32>)
-      outs(%shape : tensor<5xf32>) {
+      inits(%shape : tensor<5xf32>) {
     ^bb0(%arg2: f32, %arg3: f32):     
       linalg.yield %arg2 : f32
   } -> tensor<5xf32>
diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
--- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
+++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
@@ -29,7 +29,7 @@
   %c0 = arith.constant 0: index
   %f0 = arith.constant 0.0: f32
   %alloc = memref.alloc() : memref<32 x f32>
-  linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>)
   %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   %0 = vector.transfer_read %alloc[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32>
@@ -69,7 +69,7 @@
   %alloc = memref.alloc() : memref<128 x i8>
   %view = memref.view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
   %subview = memref.subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
-  linalg.fill ins(%f0 : f32) outs(%view : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%view : memref<32 x f32>)
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   %0 = vector.transfer_read %view[%c0], %f0 {in_bounds = [true]} : memref<32 x f32>, vector<32 x f32>
   memref.dealloc %alloc : memref<128 x i8>
@@ -129,7 +129,7 @@
   %f0 = arith.constant 0.0: f32
   %f1 = arith.constant 1.0: f32
   %alloc = memref.alloc() : memref<32 x f32>
-  linalg.fill ins(%f0 : f32) outs(%alloc : memref<32 x f32>)
+  linalg.fill ins(%f0 : f32) inits(%alloc : memref<32 x f32>)
   %subview = memref.subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   memref.copy %in, %subview : memref<? x f32> to memref<16 x f32>
   "some_interleaved_use"(%subview) : (memref<16 x f32>) -> ()
diff --git a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
--- a/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
+++ b/mlir/test/Dialect/Linalg/fuse-with-reshape-by-collapsing.mlir
@@ -16,7 +16,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %t0 = arith.addi %b0, %b1 : i32
         %t1 = arith.addi %t0, %b2 : i32
@@ -39,7 +39,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP0]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] :
-// CHECK-SAME:       outs(%[[INIT_RESHAPE]] :
+// CHECK-SAME:       inits(%[[INIT_RESHAPE]] :
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2], [3], [4, 5, 6], [7]{{\]}}
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -67,7 +67,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<2x3x4x5x6x7x8x9xi32>, tensor<2x3x4xi32>, tensor<5x6x7x8xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %iv0 = linalg.index 0: index
         %iv1 = linalg.index 1: index
@@ -129,7 +129,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<9x7x8x2x3x4x5x6xi32>, tensor<7x8x2xi32>, tensor<6x3x4x5xi32>)
-    outs(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
+    inits(%init : tensor<2x3x4x5x6x7x8x9xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %t0 = arith.addi %b0, %b1 : i32
         %t1 = arith.addi %t0, %b2 : i32
@@ -153,7 +153,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1_RESHAPE]], %[[ARG2_RESHAPE]] :
-// CHECK-SAME:       outs(%[[INIT_RESHAPE]] :
+// CHECK-SAME:       inits(%[[INIT_RESHAPE]] :
 //      CHECK:   %[[RESULT_RESHAPE:.+]] = tensor.expand_shape %[[COLLAPSED_OP]] {{\[}}[0], [1, 2, 3], [4], [5, 6], [7]{{\]}}
 //      CHECK:   return %[[RESULT_RESHAPE]]
 
@@ -181,7 +181,7 @@
     indexing_maps = [#map0, #map1, #map2, #map3],
     iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
     ins(%expand, %arg1, %arg2 : tensor<?x7x?x?x3x?x5x?xi32>, tensor<?x?x?xi32>, tensor<?x?x?x?xi32>)
-    outs(%init : tensor<?x3x?x5x?x7x?x?xi32>) {
+    inits(%init : tensor<?x3x?x5x?x7x?x?xi32>) {
       ^bb0(%b0 : i32, %b1 : i32, %b2 : i32, %b3 : i32):
         %iv0 = linalg.index 0: index
         %iv1 = linalg.index 1: index
@@ -229,7 +229,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["parallel", "reduction", "reduction", "parallel"]}
-      ins(%0 : tensor<2x6x?x5xf32>) outs(%arg1 : tensor<2x5xf32>) {
+      ins(%0 : tensor<2x6x?x5xf32>) inits(%arg1 : tensor<2x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -245,7 +245,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "reduction", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]] : tensor<2x?x5xf32>)
-// CHECK-SAME:       outs(%[[ARG1]] : tensor<2x5xf32>)
+// CHECK-SAME:       inits(%[[ARG1]] : tensor<2x5xf32>)
 
 // -----
 
@@ -258,7 +258,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map0],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x3x4x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x3x4x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -285,7 +285,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) outs(%init : tensor<2x4x3x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2xf32>) inits(%init : tensor<2x4x3x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -312,7 +312,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
-      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) outs(%init : tensor<2x5xf32>) {
+      ins(%0, %arg1 : tensor<2x3x4x5xf32>, tensor<2x3xf32>) inits(%init : tensor<2x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %2 = arith.addf %b0, %b1 : f32
           linalg.yield %2 : f32
@@ -341,7 +341,7 @@
   %2 = linalg.generic {
       indexing_maps = [#map0, #map1, #map2],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) outs(%init : tensor<2x3x4x5xf32>) {
+      ins(%0, %1 : tensor<2x3xf32>, tensor<4x5xf32>) inits(%init : tensor<2x3x4x5xf32>) {
         ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
           %3 = arith.addf %b0, %b1 : f32
           linalg.yield %3 : f32
@@ -358,7 +358,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%{{.+}}: tensor<6x20xf32>)
+// CHECK-SAME:       inits(%{{.+}}: tensor<6x20xf32>)
 //      CHECK:   %[[RESHAPE1:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1, 2]{{\]}}
 //      CHECK:   %[[RESHAPE2:.+]] = tensor.expand_shape %[[RESHAPE1]] {{\[}}[0, 1], [2], [3]{{\]}}
 //      CHECK:   return %[[RESHAPE2]]
@@ -374,7 +374,7 @@
 //      CONTROL:     %[[INIT_RESHAPE:.+]] = tensor.collapse_shape %[[INIT]] {{\[}}[0], [1], [2, 3]{{\]}}
 //      CONTROL:     %[[GENERIC:.+]] = linalg.generic
 // CONTROL-SAME:         ins(%[[EXPAND]], %[[ARG1]] :
-// CONTROL-SAME:         outs(%[[INIT_RESHAPE]] :
+// CONTROL-SAME:         inits(%[[INIT_RESHAPE]] :
 //      CONTROL:     %[[RESULT:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}}
 
 // -----
@@ -387,7 +387,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map, #map],
       iterator_types = ["parallel"]}
-      ins(%0: tensor<1xf32>) outs(%init : tensor<1xf32>) {
+      ins(%0: tensor<1xf32>) inits(%init : tensor<1xf32>) {
         ^bb0(%b0 : f32, %b1 : f32):
           linalg.yield %b0: f32
       } -> tensor<1xf32>
@@ -410,7 +410,7 @@
       indexing_maps = [#map0, #map1, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
       ins(%0, %arg1 : tensor<?x4x?x8xf32>, tensor<4x?x?x8xf32>)
-      outs(%arg1 : tensor<4x?x?x8xf32>) {
+      inits(%arg1 : tensor<4x?x?x8xf32>) {
     ^bb0(%b0: f32, %b1 : f32, %b2 : f32):
       %2 = arith.addf %b0, %b1 : f32
       linalg.yield %2 : f32
@@ -430,7 +430,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel", "parallel"]
 // CHECK-SAME:       ins(%[[COLLAPSE_ARG0]], %[[COLLAPSE_ARG1_0]] :
-// CHECK-SAME:       outs(%[[COLLAPSE_ARG1_1]] :
+// CHECK-SAME:       inits(%[[COLLAPSE_ARG1_1]] :
 //      CHECK:   %[[EXPAND_GENERIC:.+]] = tensor.expand_shape %[[GENERIC]] {{\[}}[0], [1], [2, 3]{{\]}}
 //      CHECK:   return %[[EXPAND_GENERIC]]
 
@@ -448,7 +448,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-      ins(%0 : tensor<?x4x?x8xi32>) outs(%init : tensor<?x8x?x4xi32>) {
+      ins(%0 : tensor<?x4x?x8xi32>) inits(%init : tensor<?x8x?x4xi32>) {
     ^bb0(%b0 : i32, %b1 : i32):
       %2 = linalg.index 0 : index
       %3 = linalg.index 1 : index
@@ -474,7 +474,7 @@
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       iterator_types = ["parallel", "parallel"]
 // CHECK-SAME:       ins(%[[ARG0]] :
-// CHECK-SAME:       outs(%[[COLLAPSE_INIT]] :
+// CHECK-SAME:       inits(%[[COLLAPSE_INIT]] :
 // CHECK-NEXT:   ^bb{{[0-9]}}
 //      CHECK:       %[[ID0:.+]] = linalg.index 0
 //  CHECK-DAG:       %[[T0:.+]] = arith.remui %[[ID0]], %[[C4]]
@@ -504,7 +504,7 @@
   %1 = linalg.generic {
       indexing_maps = [#map0, #map1],
       iterator_types = ["reduction", "reduction", "reduction", "reduction"]}
-      ins(%0 : tensor<?x4x?x8xi32>) outs(%init : tensor<i32>) {
+      ins(%0 : tensor<?x4x?x8xi32>) inits(%init : tensor<i32>) {
     ^bb0(%b0 : i32, %b1 : i32):
       %2 = linalg.index 0 : index
       %3 = linalg.index 1 : index
diff --git a/mlir/test/Dialect/Linalg/fusion-2-level.mlir b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
--- a/mlir/test/Dialect/Linalg/fusion-2-level.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-2-level.mlir
@@ -13,7 +13,7 @@
   %1 = memref.dim %C, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, 1], offset: ?>>
   linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
   scf.for %arg5 = %c0 to %0 step %c20 {
     scf.for %arg6 = %c0 to %2 step %c30 {
       scf.for %arg7 = %c0 to %1 step %c40 {
@@ -30,7 +30,7 @@
               %16 = memref.subview %7[%arg10, %arg9][%c4, %c3][%c1, %c1]: memref<?x?xf32, strided<[?, ?], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
               %17 = memref.subview %8[%arg8, %arg9][%c2, %c3][%c1, %c1] : memref<?x?xf32, strided<[?, ?], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
               linalg.matmul ins(%14, %16: memref<?x?xf32, strided<[?, ?], offset: ?>>, memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                           outs(%17: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                           inits(%17: memref<?x?xf32, strided<[?, ?], offset: ?>>)
             }
           }
         }
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -13,7 +13,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -22,7 +22,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     // CHECK: ^{{[a-zA-Z0-9_]*}}
     // CHECK-SAME: [[ARG0:%[a-zA-Z0-9_]*]]
     // CHECK-SAME: [[ARG1:%[a-zA-Z0-9_]*]]
@@ -55,7 +55,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, f32)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -64,7 +64,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP1]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, f32)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     // CHECK: ^{{[a-zA-Z0-9_]*}}
     // CHECK-SAME: [[ARG3:%[a-zA-Z0-9_]*]]
     // CHECK-SAME: [[ARG4:%[a-zA-Z0-9_]*]]
@@ -97,7 +97,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -106,7 +106,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP0]], [[$MAP1]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
       %5 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %5 : f32
@@ -131,7 +131,7 @@
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
   %3 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>) {
+      inits(%2 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %4 = arith.addf %arg3, %arg4 : f32
       linalg.yield %4 : f32
@@ -140,7 +140,7 @@
   // CHECK-SAME: indexing_maps = {{\[}}[[$MAP1]], [[$MAP0]], [[$MAP0]], [[$MAP0]]{{\]}}
   %4 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%3, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%2 : tensor<?x?xf32>){
+      inits(%2 : tensor<?x?xf32>){
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
       %5 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %5 : f32
@@ -165,7 +165,7 @@
   %1 = tensor.empty(%0) : tensor<?xf32>
   %2 = linalg.generic {indexing_maps = [#map2, #map2, #map2], iterator_types = ["parallel"]}
       ins(%arg0, %arg1 : tensor<?xf32>, tensor<?xf32>)
-      outs(%1 : tensor<?xf32>) {
+      inits(%1 : tensor<?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %3 = arith.addf %arg3, %arg4 : f32
       linalg.yield %3 : f32
@@ -176,7 +176,7 @@
   %4 = tensor.empty(%0, %3) : tensor<?x?xf32>
   %5 = linalg.generic {indexing_maps = [#map1, #map0, #map0], iterator_types = ["parallel", "parallel"]}
       ins(%2, %arg2 : tensor<?xf32>, tensor<?x?xf32>)
-      outs(%4 : tensor<?x?xf32>){
+      inits(%4 : tensor<?x?xf32>){
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):       
       %6 = arith.mulf %arg5, %arg6 : f32
       linalg.yield %6 : f32
@@ -195,7 +195,7 @@
   %0 = tensor.empty() : tensor<f32>
   %1 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
       ins(%arg0, %arg1 : tensor<f32>, tensor<f32>)
-      outs(%0 : tensor<f32>) {
+      inits(%0 : tensor<f32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %2 = arith.addf %arg3, %arg4 : f32
       linalg.yield %2 : f32
@@ -205,7 +205,7 @@
   // CHECK: arith.mulf
   %2 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types = []}
       ins(%1, %arg2 : tensor<f32>, tensor<f32>)
-      outs(%0 : tensor<f32>) {
+      inits(%0 : tensor<f32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):       
       %3 = arith.mulf %arg3, %arg4 : f32
       linalg.yield %3 : f32
@@ -231,7 +231,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%cst, %arg0 : tensor<5xf32>, tensor<5x?x?xf32>)
-    outs(%2 : tensor<5x?x?xf32>) {
+    inits(%2 : tensor<5x?x?xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %4 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %4 : f32
@@ -263,7 +263,7 @@
     indexing_maps = [#map0, #map1, #map1],
     iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%cst, %arg0 : tensor<f32>, tensor<5x?x?xf32>)
-    outs(%2 : tensor<5x?x?xf32>) {
+    inits(%2 : tensor<5x?x?xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %4 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %4 : f32
@@ -291,7 +291,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0, %arg1  : tensor<?x?xi32>, tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):       
       %10 = arith.addi %arg2, %arg3 : i32
       linalg.yield %10 : i32
@@ -300,7 +300,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%3 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):       
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -342,7 +342,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg4: i32, %arg5: i32):       
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -356,7 +356,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%3, %arg0 : tensor<?x?xi32>, tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32, %arg4: i32):       
       %10 = arith.addi %arg2, %arg3 : i32
       linalg.yield %10 : i32
@@ -396,7 +396,7 @@
     indexing_maps = [#map0, #map0],
     iterator_types = ["parallel", "parallel"] }
     ins(%arg0 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):       
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -410,7 +410,7 @@
     indexing_maps = [#map1, #map1],
     iterator_types = ["parallel", "parallel"] }
     ins(%3 : tensor<?x?xi32>)
-    outs(%2 : tensor<?x?xi32>) {
+    inits(%2 : tensor<?x?xi32>) {
     ^bb0(%arg2: i32, %arg3: i32):       
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -457,7 +457,7 @@
   %1 = linalg.generic
       {indexing_maps = [#map1, #map1],
        iterator_types = ["parallel"]}
-      ins(%arg0 : tensor<?xi32>) outs(%0 : tensor<?xi32>) {
+      ins(%arg0 : tensor<?xi32>) inits(%0 : tensor<?xi32>) {
       ^bb0(%arg2 : i32, %arg3 : i32):
         %2 = linalg.index 0 : index
         %3 = arith.index_cast %2 : index to i32
@@ -471,7 +471,7 @@
       {indexing_maps = [#map2, #map3, #map2],
        iterator_types = ["parallel", "parallel"]}
       ins(%arg1, %1 : tensor<?x?xi32>, tensor<?xi32>)
-      outs(%4 : tensor<?x?xi32>) {
+      inits(%4 : tensor<?x?xi32>) {
       ^bb0(%arg2 : i32, %arg3 : i32, %arg4: i32):
         %6 = arith.addi %arg2, %arg3 : i32
         linalg.yield %6 : i32
@@ -503,7 +503,7 @@
   %1 = linalg.generic
     {indexing_maps = [affine_map<() -> ()>, affine_map<() -> ()>],
      iterator_types = []}
-    ins(%arg1 : tensor<i32>) outs(%0 : tensor<f32>) {
+    ins(%arg1 : tensor<i32>) inits(%0 : tensor<f32>) {
     ^bb0(%arg2: i32, %arg3: f32):  
       %3 = arith.index_cast %arg2 : i32 to index
       %4 = tensor.extract %arg0[%3, %c0, %c0] : tensor<5x1x1xf32>
@@ -514,7 +514,7 @@
    {indexing_maps = [affine_map<(d0) -> ()>, affine_map<(d0) -> (d0)>,
                      affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    ins(%1, %cst : tensor<f32>, tensor<10xf32>) outs(%2 : tensor<10xf32>) {
+    ins(%1, %cst : tensor<f32>, tensor<10xf32>) inits(%2 : tensor<10xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):  
       %4 = arith.mulf %arg2, %arg3 : f32
       linalg.yield %4 : f32
@@ -544,7 +544,7 @@
                       affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
     ins (%arg0, %cst : tensor<4xf32>, tensor<4xf32>)
-    outs (%1 : tensor<4xf32>) {
+    inits (%1 : tensor<4xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
       %3 = arith.addf %arg1, %arg2 : f32
       linalg.yield %3 : f32
@@ -559,7 +559,7 @@
 //      CHECK:   %[[T1:.+]] = linalg.generic
 // CHECK-SAME:     indexing_maps = [#[[MAP]], #[[MAP]]]
 // CHECK-SAME:     ins(%[[ARG0]] : tensor<4xf32>)
-// CHECK-SAME:     outs(%[[T0]] : tensor<4xf32>)
+// CHECK-SAME:     inits(%[[T0]] : tensor<4xf32>)
 //      CHECK:   ^{{.+}}(
 // CHECK-SAME:     %[[ARG1:[a-zA-Z0-9_]+]]: f32, %[[ARG2:[a-zA-Z0-9_]+]]: f32)
 //      CHECK:     %[[T2:.+]] = arith.addf %[[ARG1]], %[[CST]]
@@ -579,7 +579,7 @@
     {indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<1x10xf32>, tensor<1x10xf32>)
-    outs(%init : tensor<1x10xf32>) {
+    inits(%init : tensor<1x10xf32>) {
   ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):  
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -588,7 +588,7 @@
     {indexing_maps = [#map1, #map2],
      iterator_types = ["reduction"]}
     ins(%0 : tensor<1x10xf32>)
-    outs(%arg2 : tensor<1xf32>)  {
+    inits(%arg2 : tensor<1xf32>)  {
   ^bb0(%arg3: f32, %arg4: f32):  
     %2 = arith.addf %arg3, %arg4 : f32
     linalg.yield %2 : f32
@@ -625,7 +625,7 @@
     affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
   }
-     outs(%init0 : tensor<?x1xf32>) {
+     inits(%init0 : tensor<?x1xf32>) {
     ^bb0(%a: f32):  
       linalg.yield %cp5 : f32
   } -> tensor<?x1xf32>
@@ -638,7 +638,7 @@
     iterator_types = ["parallel", "parallel"]
   }
       ins(%0, %1 : tensor<?x1xf32>, tensor<?x1xf32>)
-     outs(%init1 : tensor<?x1xf32>) {
+     inits(%init1 : tensor<?x1xf32>) {
   ^bb0(%a: f32, %b: f32, %c: f32):  
       %m = arith.mulf %a, %b : f32
       linalg.yield %m : f32
@@ -656,7 +656,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(i, j) -> (i, j)>],
     iterator_types = ["parallel", "parallel"]}
-  outs(%arg0 : tensor<1x8xf64>) {
+  inits(%arg0 : tensor<1x8xf64>) {
   ^bb0(%a: f64):
     %r = func.call @compute1(%a) : (f64) -> f64
     linalg.yield %r : f64
@@ -672,7 +672,7 @@
     indexing_maps = [affine_map<(i, j) -> (i, j)>, affine_map<(i, j) -> (i, j)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%0 : tensor<1x8xf64>)
-  outs(%arg1 : tensor<1x8xi32>) {
+  inits(%arg1 : tensor<1x8xi32>) {
   ^bb0(%a: f64, %b: i32):
     %r = func.call @compute2(%a, %b) : (f64, i32) -> i32
     linalg.yield %r : i32
@@ -697,7 +697,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                        affine_map<(d0, d1) -> (d0)>],
       iterator_types = ["parallel", "reduction"]}
-     ins(%three : tensor<3x2xf32>) outs(%init : tensor<3xf32>) {
+     ins(%three : tensor<3x2xf32>) inits(%init : tensor<3xf32>) {
      ^bb0(%arg0 : f32, %arg1 : f32):
         %0 = arith.addf %arg0, %arg1 : f32
         linalg.yield %0 : f32
@@ -714,12 +714,12 @@
 }
 func.func @break_outs_dependency(%arg0 : tensor<?x?xf32>) -> tensor<?x?xf32>
 {
-  %0 = linalg.generic #trait ins(%arg0 : tensor<?x?xf32>) outs(%arg0 : tensor<?x?xf32>) {
+  %0 = linalg.generic #trait ins(%arg0 : tensor<?x?xf32>) inits(%arg0 : tensor<?x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          %1 = arith.addf %arg1, %arg1 : f32
          linalg.yield %1 : f32
        } -> tensor<?x?xf32>
-  %2 = linalg.generic #trait ins(%0 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) {
+  %2 = linalg.generic #trait ins(%0 : tensor<?x?xf32>) inits(%0 : tensor<?x?xf32>) {
        ^bb0(%arg1 : f32, %arg2 : f32) :
          %3 = arith.mulf %arg1, %arg1 : f32
          linalg.yield %3 : f32
@@ -734,12 +734,12 @@
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[ARG0]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[GENERIC1:.+]] = linalg.generic
-// CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
+// CHECK-SAME:     inits(%[[INIT]] : tensor<?x?xf32>)
 //  CHECK-DAG:   %[[D0:.+]] = tensor.dim %[[GENERIC1]], %[[C0]]
 //  CHECK-DAG:   %[[D1:.+]] = tensor.dim %[[GENERIC1]], %[[C1]]
 //  CHECK-DAG:   %[[INIT:.+]] = tensor.empty(%[[D0]], %[[D1]])
 //      CHECK:   %[[RESULT:.+]] = linalg.generic
-// CHECK-SAME:     outs(%[[INIT]] : tensor<?x?xf32>)
+// CHECK-SAME:     inits(%[[INIT]] : tensor<?x?xf32>)
 
 // -----
 
@@ -760,7 +760,7 @@
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %cst, %c42 : tensor<?x?xf32>, f32, i32)
-      outs(%0, %1 : tensor<?x?xf32>, tensor<?x?xi32>) {
+      inits(%0, %1 : tensor<?x?xf32>, tensor<?x?xi32>) {
       ^bb0(%arg1 : f32, %arg2 : f32, %arg3 : i32, %arg4 : f32, %arg5 : i32) :
         %3 = arith.addf %arg1, %arg2 : f32
         linalg.yield %3, %arg3 : f32, i32
@@ -785,7 +785,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<3x2xf32>
@@ -803,7 +803,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf64>) outs(%init : tensor<3x2xf64>) {
+  } ins(%input : tensor<2x3xf64>) inits(%init : tensor<3x2xf64>) {
   ^bb0(%arg1: f64, %arg2: f64):
     linalg.yield %arg1 : f64
   } -> tensor<3x2xf64>
@@ -827,7 +827,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%input : tensor<1x2x3x4xi32>) outs(%init : tensor<3x1x4x2xi32>) {
+  } ins(%input : tensor<1x2x3x4xi32>) inits(%init : tensor<3x1x4x2xi32>) {
   ^bb0(%arg1: i32, %arg2: i32):
     linalg.yield %arg1 : i32
   } -> tensor<3x1x4x2xi32>
@@ -851,7 +851,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d2, d0, d3, d1)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%input : tensor<1x2x3x4xi16>) outs(%init : tensor<3x1x4x2xi16>) {
+  } ins(%input : tensor<1x2x3x4xi16>) inits(%init : tensor<3x1x4x2xi16>) {
   ^bb0(%arg1: i16, %arg2: i16):
     linalg.yield %arg1 : i16
   } -> tensor<3x1x4x2xi16>
@@ -867,7 +867,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<3x2xf32>
@@ -884,7 +884,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %cst : f32
   } -> tensor<3x2xf32>
@@ -900,7 +900,7 @@
   %1 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d1, d0)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%input : tensor<2x3xf32>) outs(%init : tensor<3x2xf32>) {
+  } ins(%input : tensor<2x3xf32>) inits(%init : tensor<3x2xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     %add = arith.addf %arg1, %arg1 : f32
     linalg.yield %add : f32
@@ -929,16 +929,16 @@
   %5 = linalg.generic {
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel"]
-  } ins(%arg0 : tensor<f32>) outs(%4 : tensor<?x?xf32>) {
+  } ins(%arg0 : tensor<f32>) inits(%4 : tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):  
     linalg.yield %arg2 : f32
   } -> tensor<?x?xf32>
   %6 = tensor.empty(%arg1) : tensor<?xf32>
-  %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+  %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
   %8 = linalg.generic {
     indexing_maps = [#map2, #map3],
     iterator_types = ["parallel", "reduction"]
-  } ins(%5 : tensor<?x?xf32>) outs(%7 : tensor<?xf32>) {
+  } ins(%5 : tensor<?x?xf32>) inits(%7 : tensor<?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):  
     %9 = arith.maxf %arg2, %arg3 : f32
     linalg.yield %9 : f32
@@ -953,7 +953,7 @@
   %0 = linalg.generic {
         indexing_maps = [affine_map<(d0) -> (d0)>],
         iterator_types = ["parallel"]}
-        outs(%arg0 : tensor<5000xi64>) {
+        inits(%arg0 : tensor<5000xi64>) {
         ^bb0(%arg3: i64):  // no predecessors
           %22 = linalg.index 0 : index
           %23 = arith.index_cast %22 : index to i64
@@ -963,7 +963,7 @@
   %2 = linalg.generic {
         indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>],
         iterator_types = ["parallel", "parallel"]}
-        ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) {
+        ins(%0 : tensor<5000xi64>) inits(%1 : tensor<5000xi32>) {
         ^bb0(%arg3: i64, %arg5: i32):  // no predecessors
           %22 = arith.index_cast %arg3 : i64 to index
           %23 = tensor.extract %arg1[%22] : tensor<5000xi32>
@@ -980,7 +980,7 @@
 //  CHECK-DAG:   %[[INIT1:.+]] = tensor.empty() : tensor<5000xi32>
 //      CHECK:   %[[RESULT:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
-// CHECK-SAME:       outs(%[[INIT0]], %[[INIT1]] :
+// CHECK-SAME:       inits(%[[INIT0]], %[[INIT1]] :
 // CHECK-NEXT:   ^bb0(
 // CHECK-SAME:       %[[B0:.+]]: i64
 // CHECK-SAME:       %[[B1:.+]]: i32
@@ -998,16 +998,16 @@
 //   CHECK-NOT: linalg.fill
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //  CHECK-SAME: ins(%[[ARG0]] : tensor<?xf32>)
-//  CHECK-SAME: outs({{.*}} : tensor<?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?xf32>) {
 #map0 = affine_map<(d0) -> (d0)>
 func.func @fold_fill_generic_basic(%arg0: tensor<?xf32>) -> (tensor<?xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 7.0 : f32
   %0 = tensor.dim %arg0, %c0 : tensor<?xf32>
   %1 = tensor.empty(%0) : tensor<?xf32>
-  %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
+  %2 = linalg.fill ins(%cst : f32) inits(%1 : tensor<?xf32>) -> tensor<?xf32>
   %3 = tensor.empty(%0) : tensor<?xf32>
-  %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf32>, tensor<?xf32>) outs (%3:tensor<?xf32>) {
+  %4 = linalg.generic {indexing_maps = [#map0, #map0, #map0], iterator_types=["parallel"]} ins(%arg0, %2 : tensor<?xf32>, tensor<?xf32>) inits (%3:tensor<?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %5 = arith.addf  %arg1, %arg2 : f32
         linalg.yield %5 : f32
@@ -1021,7 +1021,7 @@
 //   CHECK-NOT: linalg.fill
 //       CHECK: %[[GENERIC_OP:.*]] = linalg.generic
 //   CHECK-NOT: ins
-//  CHECK-SAME: outs({{.*}} : tensor<?x?xf32>) {
+//  CHECK-SAME: inits({{.*}} : tensor<?x?xf32>) {
 #map0 = affine_map<(d0, d1) -> (d0, d1)>
 #map1 = affine_map<(d0, d1) -> (d1, d0)>
 func.func @fold_fill_generic_mixedaccess(%arg0: tensor<?x?xf32>) -> (tensor<?x?xf32>) {
@@ -1032,11 +1032,11 @@
   %0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg0, %c1 : tensor<?x?xf32>
   %2 = tensor.empty(%0, %1) : tensor<?x?xf32>
-  %3 = linalg.fill ins(%cst1 : f32) outs(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %3 = linalg.fill ins(%cst1 : f32) inits(%2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %4 = tensor.empty(%1, %0) : tensor<?x?xf32>
-  %5 = linalg.fill ins(%cst2 : f32) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %5 = linalg.fill ins(%cst2 : f32) inits(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %6 = tensor.empty(%0, %1) : tensor<?x?xf32>
-  %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor<?x?xf32>, tensor<?x?xf32>) outs (%6:tensor<?x?xf32>) {
+  %7 = linalg.generic {indexing_maps = [#map0, #map1, #map0], iterator_types=["parallel","parallel"]} ins(%3, %5 : tensor<?x?xf32>, tensor<?x?xf32>) inits (%6:tensor<?x?xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
     %8 = arith.divf  %arg1, %arg2 : f32
         linalg.yield %8 : f32
@@ -1053,7 +1053,7 @@
     %1 = tensor.empty() : tensor<f32>
     %2:2 = linalg.generic {
       indexing_maps = [#map, #map, #map, #map, #map], iterator_types = []}
-      ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) outs(%0, %1 : tensor<f32>, tensor<f32>) {
+      ins(%arg0, %arg1, %arg1 : tensor<f32>, tensor<f32>, tensor<f32>) inits(%0, %1 : tensor<f32>, tensor<f32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32, %arg8: f32, %arg9: f32):
       %4 = arith.addf %arg5, %arg6 : f32
       %5 = arith.addf %4, %arg7 : f32
@@ -1061,7 +1061,7 @@
     } -> (tensor<f32>, tensor<f32>)
     %3 = linalg.generic {
       indexing_maps = [#map, #map, #map], iterator_types = []}
-      ins(%2#1, %arg1 : tensor<f32>, tensor<f32>) outs(%arg4 : tensor<f32>) {
+      ins(%2#1, %arg1 : tensor<f32>, tensor<f32>) inits(%arg4 : tensor<f32>) {
     ^bb0(%arg5: f32, %arg6: f32, %arg7: f32):
       %4 = arith.addf %arg5, %arg6 : f32
       %5 = arith.addf %4, %arg6 : f32
@@ -1076,7 +1076,7 @@
 //       CHECK:   %[[INIT:.+]] = tensor.empty
 //       CHECK:   %[[GENERIC:.+]] = linalg.generic
 //  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-//  CHECK-SAME:       outs(%[[INIT]] :
+//  CHECK-SAME:       inits(%[[INIT]] :
 //  CHECK-NEXT:     ^bb0
 //  CHECK-SAME:         %[[B0:[a-zA-Z0-9_]+]]: f32
 //  CHECK-SAME:         %[[B1:[a-zA-Z0-9_]+]]: f32
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-options.mlir
@@ -21,28 +21,28 @@
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
   %0 = linalg.generic #binary2Dpointwise
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %1 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %1 : f32
     } -> tensor<?x?xf32>
   %2 = linalg.generic #binary2Dpointwise
       ins(%arg2, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %3 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %3 : f32
     } -> tensor<?x?xf32>
   %4 = linalg.generic #binary2Dpointwise
       ins(%arg4, %arg5 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32):
        %5 = arith.mulf %arg6, %arg7 : f32
        linalg.yield %5 : f32
     } -> tensor<?x?xf32>
   %6 = linalg.generic #ternary2Dpointwise
       ins(%0, %2, %4 : tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32, %arg8 : f32, %arg9 : f32):
        %7 = arith.addf %arg6, %arg7 : f32
        %8 = arith.addf %7, %arg8 : f32
diff --git a/mlir/test/Dialect/Linalg/fusion-indexed.mlir b/mlir/test/Dialect/Linalg/fusion-indexed.mlir
--- a/mlir/test/Dialect/Linalg/fusion-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-indexed.mlir
@@ -11,7 +11,7 @@
                                     %D: memref<?x?xf32>) {
   linalg.generic #pointwise_2d_trait
     ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-   outs(%C : memref<?x?xf32>) {
+   inits(%C : memref<?x?xf32>) {
   ^bb0(%e: f32, %arg5: f32, %arg6: f32):   
     %2 = arith.addf %e, %arg5 : f32
     linalg.yield %2 : f32
@@ -34,7 +34,7 @@
         indexing_maps = [#id_2d, #id_2d],
         iterator_types = ["parallel", "parallel"]}
         ins(%4 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
-       outs(%5 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
+       inits(%5 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
       ^bb0(%arg4: f32, %arg5: f32):
         %idx0 = linalg.index 0 : index
         %idx1 = linalg.index 1 : index
@@ -73,7 +73,7 @@
   linalg.generic {
     indexing_maps = [affine_map<(i, j) -> (j, i)>],
     iterator_types = ["parallel", "parallel"]}
-    outs(%A : memref<?x?xindex>) {
+    inits(%A : memref<?x?xindex>) {
   ^bb0(%a: index):   
     %idx0 = linalg.index 0 : index
     %idx1 = linalg.index 1 : index
@@ -92,7 +92,7 @@
                        affine_map<(i, j) -> (i, j)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%A_view : memref<?x?xindex, strided<[?, ?], offset: ?>>)
-      outs(%B_view : memref<?x?xindex, strided<[?, ?], offset: ?>>) {
+      inits(%B_view : memref<?x?xindex, strided<[?, ?], offset: ?>>) {
     ^bb0(%a: index, %b: index):
       linalg.yield %a : index
     }
@@ -121,7 +121,7 @@
   linalg.generic {
     indexing_maps = [affine_map<(i, j) -> (i, j)>],
     iterator_types = ["parallel", "parallel"]}
-    outs(%A : memref<?x?xindex>) {
+    inits(%A : memref<?x?xindex>) {
   ^bb0(%a: index):   
     %idx0 = linalg.index 0 : index
     %idx1 = linalg.index 1 : index
@@ -140,7 +140,7 @@
                        affine_map<(i, j) -> (i, j)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%A_view : memref<?x?xindex, strided<[?, ?], offset: ?>>)
-      outs(%B_view : memref<?x?xindex, strided<[?, ?], offset: ?>>) {
+      inits(%B_view : memref<?x?xindex, strided<[?, ?], offset: ?>>) {
     ^bb0(%a: index, %b: index):
       linalg.yield %a : index
     }
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -8,7 +8,7 @@
 //      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor<?x112x16xf32> into tensor<?x16xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<?x16xf32>)
+// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<?x16xf32>)
 //      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<?x16xf32> into tensor<?x112x16xf32>
 //      CHECK: return %[[RR]] : tensor<?x112x16xf32>
 func.func @reshape(%A: tensor<?x16xf32>, %B: tensor<16xf32>, %init: tensor<?x112x16xf32>) -> tensor<?x112x16xf32> {
@@ -19,7 +19,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%0, %B : tensor<?x112x16xf32>, tensor<16xf32>)
-  outs(%init : tensor<?x112x16xf32>) {
+  inits(%init : tensor<?x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
     %s = arith.subf %arg1, %arg2 : f32
     linalg.yield %s : f32
@@ -38,7 +38,7 @@
 //      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
-// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<12544x16xf32>)
+// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) inits(%[[RI]] : tensor<12544x16xf32>)
 //      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<12544x16xf32> into tensor<112x112x16xf32>
 //      CHECK: return %[[RR]] : tensor<112x112x16xf32>
 func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>,
@@ -55,7 +55,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>)
-  outs(%2 : tensor<112x112x16xf32>) {
+  inits(%2 : tensor<112x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):  
     %s = arith.subf %arg1, %arg2 : f32
     %m = arith.mulf %s, %arg3 : f32
@@ -81,7 +81,7 @@
     affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
     iterator_types = ["parallel", "parallel", "parallel"]}
   ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>)
-  outs(%21 : tensor<112x112x16xf32>) {
+  inits(%21 : tensor<112x112x16xf32>) {
   ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
     %s = arith.subf %arg1, %arg2 : f32
     linalg.yield %s : f32
@@ -106,7 +106,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
       iterator_types = ["parallel", "parallel", "parallel"]}
       ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>)
-      outs(%26 : tensor<2x3x5xf32>) {
+      inits(%26 : tensor<2x3x5xf32>) {
       ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):  
         %29 = arith.sitofp %arg6 : i32 to f32
         %30 = arith.addf %arg7, %cst_8 : f32
@@ -121,6 +121,6 @@
 // CHECK-LABEL: func @type_correctness
 //       CHECK:   %[[OP:.+]] = linalg.generic
 //  CHECK-SAME:   ins(%{{.+}}, %{{.+}}, %{{.+}} : tensor<6x5xi32>, tensor<5xf32>, tensor<5xf32>)
-//  CHECK-SAME:   outs(%{{.+}} : tensor<6x5xf32>)
+//  CHECK-SAME:   inits(%{{.+}} : tensor<6x5xf32>)
 //       CHECK:   tensor.expand_shape %[[OP]]
 //  CHECK-SAME:   tensor<6x5xf32> into tensor<2x3x5xf32>
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -16,7 +16,7 @@
   %2 = memref.dim %B, %c1 : memref<?x?xf32, strided<[?, 1], offset: 0>>
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, 1], offset: 0>>,
                              memref<?x?xf32, strided<[?, 1], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, 1], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, 1], offset: 0>>)
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
@@ -31,7 +31,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8: memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -60,7 +60,7 @@
   %c2 = arith.constant 2 : index
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C: memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C: memref<?x?xf32, strided<[?, ?], offset: 0>>)
   %0 = memref.dim %C, %c0 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %1 = memref.dim %C, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
@@ -78,7 +78,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -110,7 +110,7 @@
   %c2 = arith.constant 2 : index
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   %0 = memref.dim %D, %c0 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %1 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %2 = memref.dim %C, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
@@ -128,7 +128,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -162,10 +162,10 @@
   %c2 = arith.constant 2 : index
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   %0 = memref.dim %C, %c0 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %1 = memref.dim %C, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
@@ -183,7 +183,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -222,10 +222,10 @@
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   linalg.matmul ins(%C, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   scf.for %arg5 = %c0 to %1 step %c2 {
     scf.for %arg6 = %c0 to %0 step %c3 {
       scf.for %arg7 = %c0 to %2 step %c4 {
@@ -240,7 +240,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -274,9 +274,9 @@
 //     CHECK:        %[[BOUND_4_B1:.*]] = affine.min #[[BOUND_4_MAP]](%[[K]])[%[[B_1]]]
 //     CHECK:        %[[B_0K:.*]] = memref.subview %[[B]][0, %[[K]]]
 //     CHECK:        %[[D_IK_OUT:.+]] = memref.subview %[[D]][%[[I]], %[[K]]] [%[[BOUND_2_C0]], %[[BOUND_4_B1]]]
-//     CHECK:        linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} outs(%[[C_I0_OUT]]
-//     CHECK:        linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} outs(%[[D_IK_OUT]]
-//     CHECK:        linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} outs(%[[E_IJ]]
+//     CHECK:        linalg.matmul ins(%[[A_I0]], %[[B_00]]{{.*}} inits(%[[C_I0_OUT]]
+//     CHECK:        linalg.matmul ins(%[[C_I0]], %[[B_0K]]{{.*}} inits(%[[D_IK_OUT]]
+//     CHECK:        linalg.matmul ins(%[[D_IK]], %[[B_KJ]]{{.*}} inits(%[[E_IJ]]
 
 // -----
 
@@ -298,10 +298,10 @@
   %0 = memref.dim %C, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   linalg.matmul ins(%A, %C : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%E : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%E : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   %1 = memref.dim %C, %c0 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   scf.for %arg5 = %c0 to %1 step %c2 {
@@ -321,7 +321,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -357,10 +357,10 @@
   %4 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   linalg.matmul ins(%A, %C : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%E : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%E : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
       scf.for %arg7 = %c0 to %1 step %c4 {
@@ -375,7 +375,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%7, %9 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%10 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%10 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -393,7 +393,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%7, %9 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%10 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%10 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -408,7 +408,7 @@
 // CHECK:  %[[C_1:.*]] = memref.dim %[[C]], %[[C1:.*]] : memref<?x?xf32, strided<[?, ?]>>
 // CHECK:  %[[C_0:.*]] = memref.dim %[[C]], %[[C0:.*]] : memref<?x?xf32, strided<[?, ?]>>
 // CHECK:  %[[D_1:.*]] = memref.dim %[[D]], %[[C1:.*]] : memref<?x?xf32, strided<[?, ?]>>
-// CHECK:  linalg.matmul ins(%[[A]], %[[C]]{{.*}} outs(%[[E]]
+// CHECK:  linalg.matmul ins(%[[A]], %[[C]]{{.*}} inits(%[[E]]
 // CHECK:  scf.for %{{.*}} = %{{.*}} to %[[A_0]] step %{{.*}} {
 // CHECK:    scf.for %{{.*}} = %{{.*}} to %[[C_1]] step %{{.*}} {
 // CHECK:      scf.for %{{.*}} = %{{.*}} to %[[A_1]] step %{{.*}} {
@@ -441,10 +441,10 @@
   %1 = memref.dim %A, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   linalg.matmul ins(%A, %C : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%D : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
   %2 = memref.dim %D, %c1 : memref<?x?xf32, strided<[?, ?], offset: 0>>
   scf.for %arg5 = %c0 to %0 step %c2 {
     scf.for %arg6 = %c0 to %2 step %c3 {
@@ -463,7 +463,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%5, %7 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%8 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -497,7 +497,7 @@
   linalg.generic #pointwise_2d_trait
       ins(%A, %A: memref<?x?xf32, strided<[?, ?], offset: 0>>,
                   memref<?x?xf32, strided<[?, ?], offset: 0>>)
-     outs(%B : memref<?x?xf32, strided<[?, ?], offset: 0>>) {
+     inits(%B : memref<?x?xf32, strided<[?, ?], offset: 0>>) {
   ^bb0(%E: f32, %arg5: f32, %arg6: f32):
     %2 = arith.addf %E, %arg5 : f32
     linalg.yield %2 : f32
@@ -518,7 +518,7 @@
       linalg.generic #pointwise_2d_trait
         ins(%4, %5: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                     memref<?x?xf32, strided<[?, ?], offset: ?>>)
-       outs(%6 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
+       inits(%6 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
       ^bb0(%arg6: f32, %arg7: f32, %arg8: f32):
         %7 = arith.mulf %arg6, %arg7 : f32
         linalg.yield %7 : f32
@@ -555,7 +555,7 @@
   %E = memref.alloc (%M, %N): memref<?x?xf32>
   linalg.generic #pointwise_2d_trait
     ins(%A, %A : memref<?x?xf32>, memref<?x?xf32>)
-   outs(%B : memref<?x?xf32>) {
+   inits(%B : memref<?x?xf32>) {
   ^bb0(%e: f32, %arg5: f32, %arg6: f32):
     %2 = arith.addf %e, %arg5 : f32
     linalg.yield %2 : f32
@@ -576,7 +576,7 @@
       linalg.generic #pointwise_2d_trait
         ins(%4, %5: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                     memref<?x?xf32, strided<[?, ?], offset: ?>>)
-       outs(%6 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
+       inits(%6 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
       ^bb0(%arg6: f32, %arg7: f32, %arg8: f32):
         %7 = arith.mulf %arg6, %arg7 : f32
         linalg.yield %7 : f32
@@ -610,7 +610,7 @@
     indexing_maps = [#map0, #map1],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg1 : memref<100xf32>)
-   outs(%0 : memref<100x10xf32>) {
+   inits(%0 : memref<100x10xf32>) {
       ^bb0(%arg3: f32, %arg4: f32):
         linalg.yield %arg3 : f32
       }
@@ -619,7 +619,7 @@
     indexing_maps = [#map1, #map1, #map1],
     iterator_types = ["parallel", "parallel"]}
     ins(%arg0, %0: memref<100x10xf32>, memref<100x10xf32>)
-   outs(%1 : memref<100x10xf32>) {
+   inits(%1 : memref<100x10xf32>) {
       ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
         %2 = arith.subf %arg3, %arg4 : f32
         linalg.yield %2 : f32
@@ -639,7 +639,7 @@
         indexing_maps = [#map1, #map1],
         iterator_types = ["parallel", "parallel"]}
         ins(%6 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
-       outs(%7 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
+       inits(%7 : memref<?x?xf32, strided<[?, ?], offset: ?>>) {
           ^bb0(%arg3: f32, %arg4: f32):
             %8 = math.exp %arg3 : f32
             linalg.yield %8 : f32
@@ -677,7 +677,7 @@
   %c3 = arith.constant 3 : index
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
-  linalg.fill ins(%cst : f32) outs(%arg0 : memref<?x?xf32>)
+  linalg.fill ins(%cst : f32) inits(%arg0 : memref<?x?xf32>)
   %2 = memref.dim %arg1, %c0 : memref<?x?xf32>
   %3 = memref.dim %arg1, %c1 : memref<?x?xf32>
   %4 = memref.dim %arg2, %c0 : memref<?x?xf32>
@@ -690,7 +690,7 @@
       %9 = affine.min #map0(%arg3)[%4]
       %10 = affine.min #map1(%arg4)[%5]
       %11 = memref.subview %arg2[%arg3, %arg4] [%9, %10] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-      linalg.conv_2d ins(%8, %arg1 : memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32>) outs(%11 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+      linalg.conv_2d ins(%8, %arg1 : memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32>) inits(%11 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
     }
   }
   return
@@ -717,7 +717,7 @@
 
   linalg.matmul ins(%A, %B : memref<?x?xf32, strided<[?, ?], offset: 0>>,
                              memref<?x?xf32, strided<[?, ?], offset: 0>>)
-               outs(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
+               inits(%C : memref<?x?xf32, strided<[?, ?], offset: 0>>)
 
   scf.for %i = %c0 to %dim step %c2 {
     scf.for %j = %c0 to %dim step %c3 {
@@ -733,7 +733,7 @@
           memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%0, %1 : memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                    memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%2 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%2 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir
@@ -2,7 +2,7 @@
 
 func.func @generalize_matmul_buffer(%A : memref<16x8xf32>, %B: memref<8x32xf32>, %C: memref<16x32xf32>) {
   linalg.matmul ins(%A, %B: memref<16x8xf32>, memref<8x32xf32>)
-               outs(%C: memref<16x32xf32>)
+               inits(%C: memref<16x32xf32>)
   return
 }
 
@@ -20,7 +20,7 @@
 // CHECK-SAME: indexing_maps = [#[[A_MAP]], #[[B_MAP]], #[[C_MAP]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
 // CHECK-SAME:  ins(%[[A]], %[[B]]
-// CHECK-SAME: outs(%[[C]]
+// CHECK-SAME: inits(%[[C]]
 
 // CHECK: ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
 // CHECK:   %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32
@@ -31,7 +31,7 @@
 
 func.func @generalize_matmul_tensor(%A : tensor<16x8xf32>, %B: tensor<8x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf32>, tensor<8x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -39,7 +39,7 @@
 
 // CHECK: linalg.generic
 // CHECK-SAME:  ins(%{{.+}}, %{{.+}} : tensor<16x8xf32>, tensor<8x32xf32>)
-// CHECK-SAME: outs(%{{.+}} : tensor<16x32xf32>)
+// CHECK-SAME: inits(%{{.+}} : tensor<16x32xf32>)
 
 // CHECK:      ^{{.*}}(%[[A_ARG:.+]]: f32, %[[B_ARG:.+]]: f32, %[[C_ARG:.+]]: f32)
 // CHECK-NEXT:   %[[MUL:.+]] = arith.mulf %[[A_ARG]], %[[B_ARG]] : f32
@@ -54,7 +54,7 @@
                                             %C: tensor<16x32xcomplex<f32>>)
           -> tensor<16x32xcomplex<f32>> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xcomplex<f32>>, tensor<8x32xcomplex<f32>>)
-                    outs(%C: tensor<16x32xcomplex<f32>>) -> tensor<16x32xcomplex<f32>>
+                    inits(%C: tensor<16x32xcomplex<f32>>) -> tensor<16x32xcomplex<f32>>
   return %0: tensor<16x32xcomplex<f32>>
 }
 
@@ -62,7 +62,7 @@
 
 // CHECK: linalg.generic
 // CHECK-SAME:  ins(%{{.+}}, %{{.+}} : tensor<16x8xcomplex<f32>>, tensor<8x32xcomplex<f32>>)
-// CHECK-SAME: outs(%{{.+}} : tensor<16x32xcomplex<f32>>)
+// CHECK-SAME: inits(%{{.+}} : tensor<16x32xcomplex<f32>>)
 
 // CHECK:      ^{{.*}}(%[[A_ARG:.+]]: complex<f32>, %[[B_ARG:.+]]: complex<f32>, %[[C_ARG:.+]]: complex<f32>)
 // CHECK-NEXT:   %[[MUL:.+]] = complex.mul %[[A_ARG]], %[[B_ARG]] : complex<f32>
@@ -76,7 +76,7 @@
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x3x4x2x3xf32>)
+    inits(%output : memref<2x3x4x2x3xf32>)
   return
 }
 
@@ -90,7 +90,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<2x3x4x2x3xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -103,7 +103,7 @@
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x2x3x2x3xf32>)
+    inits(%output : memref<2x2x3x2x3xf32>)
   return
 }
 
@@ -117,7 +117,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<2x2x3x2x3xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<2x2x3x2x3xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -129,7 +129,7 @@
 func.func @depthwise_conv_2d_nhwc_hwc(%input: memref<1x113x113x96xf32>, %filter: memref<3x3x96xf32>, %output: memref<1x56x56x96xf32>) {
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -143,7 +143,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<1x56x56x96xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<1x56x56x96xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -156,7 +156,7 @@
   linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
+    inits (%output: memref<?x?x?xf32>)
   return
 }
 // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1 + d3, d4)>
@@ -169,7 +169,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -182,7 +182,7 @@
   linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
+    inits (%output: memref<?x?x?xf32>)
   return
 }
 // CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d3, d2 + d4)>
@@ -195,7 +195,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK-NEXT:      %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
@@ -205,7 +205,7 @@
 // -----
 
 func.func @generalize_fill(%output: memref<?x?xf32>, %value : f32) {
-  linalg.fill ins(%value : f32) outs(%output : memref<?x?xf32>)
+  linalg.fill ins(%value : f32) inits(%output : memref<?x?xf32>)
   return
 }
 
@@ -219,7 +219,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel"]}
 // CHECK-SAME: ins(%[[VAL]] : f32)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
 
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32)
 // CHECK-NEXT:      linalg.yield %[[BBARG0]] : f32
@@ -228,7 +228,7 @@
 
 func.func @generalize_batch_matm_vec(%lhs : memref<?x?x?xi8>, %rhs: memref<?x?xi8>,  %out: memref<?x?xf32>) {
   linalg.batch_matvec ins(%lhs, %rhs: memref<?x?x?xi8>, memref<?x?xi8>)
-                     outs(%out: memref<?x?xf32>)
+                     inits(%out: memref<?x?xf32>)
   return
 }
 // CHECK: #[[MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
@@ -241,7 +241,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xi8>, memref<?x?xi8>)
-// CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
+// CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: i8, %[[BBARG1:.+]]: i8, %[[BBARG2:.+]]: f32)
 // CHECK:            %[[BBARG0_F32:.+]] = arith.sitofp %[[BBARG0]] : i8 to f32
 // CHECK:            %[[BBARG1_F32:.+]] = arith.sitofp %[[BBARG1]] : i8 to f32
@@ -253,7 +253,7 @@
 
 func.func @batch_reduce_gemm(%lhs: memref<7x8x9xf32>, %rhs: memref<7x9x8xf32>, %out: memref<8x8xf32>) {
   linalg.batch_reduce_matmul ins(%lhs, %rhs: memref<7x8x9xf32>, memref<7x9x8xf32>)  
-                             outs(%out: memref<8x8xf32>)
+                             inits(%out: memref<8x8xf32>)
   return
 }
 
@@ -267,7 +267,7 @@
 // CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 // CHECK-SAME: iterator_types = ["reduction", "parallel", "parallel", "reduction"]}
 // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<7x8x9xf32>, memref<7x9x8xf32>)
-// CHECK-SAME: outs(%{{.+}} : memref<8x8xf32>
+// CHECK-SAME: inits(%{{.+}} : memref<8x8xf32>
 // CHECK:         ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32)
 // CHECK:         %[[MUL:.+]] = arith.mulf %[[BBARG0]], %[[BBARG1]] : f32
 // CHECK:         %[[ADD:.+]] = arith.addf %[[BBARG2]], %[[MUL]] : f32
diff --git a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
--- a/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-named-polymorphic-ops.mlir
@@ -3,7 +3,7 @@
 // Verifies that different argument types is legal.
 func.func @generalize_matmul_tensor_f16f64f32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                          outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                          inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -22,7 +22,7 @@
 // Verifies that different argument types is legal.
 func.func @generalize_matmul_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                          outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                          inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -43,7 +43,7 @@
 func.func @generalize_matmul_tensor_i16i64i32_unsigned(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul {cast = #linalg.type_fn<cast_unsigned>}
                      ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                          outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                          inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -54,7 +54,7 @@
 
 func.func @generalize_matmul_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                     outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                     inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -67,7 +67,7 @@
 
 func.func @generalize_matmul_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -80,7 +80,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_i16i64i32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -93,7 +93,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_i16i64f32(%A : tensor<16x8xi16>, %B: tensor<8x32xi64>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xi16>, tensor<8x32xi64>)
-                              outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                              inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -106,7 +106,7 @@
 
 func.func @generalize_matmul_unsigned_tensor_f16f64i32(%A : tensor<16x8xf16>, %B: tensor<8x32xf64>, %C: tensor<16x32xi32>) -> tensor<16x32xi32> {
   %0 = linalg.matmul_unsigned ins(%A, %B: tensor<16x8xf16>, tensor<8x32xf64>)
-                              outs(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
+                              inits(%C: tensor<16x32xi32>) -> tensor<16x32xi32>
   return %0: tensor<16x32xi32>
 }
 
@@ -119,7 +119,7 @@
 
 func.func @generalize_pooling_nhwc_max_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -133,7 +133,7 @@
 
 func.func @generalize_pooling_nhwc_max_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_max {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -145,7 +145,7 @@
 
 func.func @generalize_pooling_nhwc_max_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_max_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -157,7 +157,7 @@
 
 func.func @generalize_pooling_nhwc_min_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -171,7 +171,7 @@
 
 func.func @generalize_pooling_nhwc_min_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_min {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -183,7 +183,7 @@
 
 func.func @generalize_pooling_nhwc_min_unsigned_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_min_unsigned {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -195,7 +195,7 @@
 
 func.func @generalize_pooling_nhwc_sum_f32(%input : tensor<1x4x16x1xf32>, %shape: tensor<2x2xf32>, %output: tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32> {
   %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) outs(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
+    ins(%input, %shape : tensor<1x4x16x1xf32>, tensor<2x2xf32>) inits(%output : tensor<1x2x4x1xf32>) -> tensor<1x2x4x1xf32>
   return %0: tensor<1x2x4x1xf32>
 }
 
@@ -209,7 +209,7 @@
 
 func.func @generalize_pooling_nhwc_sum_i32(%input : tensor<1x4x16x1xi32>, %shape: tensor<2x2xi32>, %output: tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32> {
   %0 = linalg.pooling_nhwc_sum {dilations = dense<[1, 2]> : tensor<2xi64>, strides = dense<[2, 4]> : tensor<2xi64>}
-    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) outs(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
+    ins(%input, %shape : tensor<1x4x16x1xi32>, tensor<2x2xi32>) inits(%output : tensor<1x2x4x1xi32>) -> tensor<1x2x4x1xi32>
   return %0: tensor<1x2x4x1xi32>
 }
 
@@ -222,7 +222,7 @@
 // -----
 
 func.func @generalize_fill_0d(%value: f64, %O: tensor<f32>) -> tensor<f32> {
-  %0 = linalg.fill ins(%value: f64) outs(%O : tensor<f32>) -> tensor<f32>
+  %0 = linalg.fill ins(%value: f64) inits(%O : tensor<f32>) -> tensor<f32>
   return %0: tensor<f32>
 }
 
@@ -236,7 +236,7 @@
 // -----
 
 func.func @generalize_fill_2d(%value: f64, %O: memref<16x32xf32>) {
-  linalg.fill ins(%value: f64) outs(%O : memref<16x32xf32>)
+  linalg.fill ins(%value: f64) inits(%O : memref<16x32xf32>)
   return
 }
 
@@ -251,7 +251,7 @@
 // -----
 
 func.func @generalize_index(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> {
-  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
+  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -264,7 +264,7 @@
 // -----
 
 func.func @generalize_const(%min: f64, %max: f64, %seed: i32, %O: tensor<16x32xf32>) -> tensor<16x32xf32> {
-  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) outs(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
+  %0 = linalg.fill_rng_2d ins(%min, %max, %seed: f64, f64, i32) inits(%O : tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -277,7 +277,7 @@
 
 // Verifies the default value of the fun attribute is an exp op.
 func.func @generalize_elemwise_exp(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+  %0 = linalg.elemwise_unary ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -289,7 +289,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_log(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<log>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -301,7 +301,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_abs(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<abs>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -313,7 +313,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_ceil(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<ceil>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -325,7 +325,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_floor(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<floor>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -337,7 +337,7 @@
 // Verifies the fun attribute controls the unary function used.
 func.func @generalize_elemwise_negf(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_unary {fun = #linalg.unary_fn<negf>}
-                              ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -349,7 +349,7 @@
 // Verifies the default value of the fun attribute is an add op.
 func.func @generalize_elemwise_add(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -362,7 +362,7 @@
 func.func @generalize_elemwise_mul(%lhs : tensor<4x8xf32>, %rhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<mul>}
                               ins(%lhs, %rhs: tensor<4x8xf32>, tensor<4x8xf32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -375,7 +375,7 @@
 func.func @generalize_elemwise_rank_zero(%lhs : tensor<f32>, %rhs : tensor<f32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
   %0 = linalg.elemwise_binary {fun = #linalg.binary_fn<sub>}
                               ins(%lhs, %rhs: tensor<f32>, tensor<f32>)
-                              outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+                              inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
@@ -388,7 +388,7 @@
 
 // Verifies the fun attribute controls the binary function used.
 func.func @generalize_copy(%lhs : tensor<4x8xf32>, %output : tensor<4x8xf32>) -> tensor<4x8xf32> {
-  %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) outs(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
+  %0 = linalg.copy ins(%lhs: tensor<4x8xf32>) inits(%output: tensor<4x8xf32>) -> tensor<4x8xf32>
   return %0: tensor<4x8xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -4,7 +4,7 @@
 // CHECK-SAME:                                             %[[IN:.*]]: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
 // CHECK:           %[[C0:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[INIT:.*]] = tensor.empty() : tensor<1x32x32x1xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[C0]] : f32) inits(%[[INIT]] : tensor<1x32x32x1xf32>) -> tensor<1x32x32x1xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]][0, 2, 2, 0] [1, 28, 28, 1] [1, 1, 1, 1] : tensor<1x28x28x1xf32> into tensor<1x32x32x1xf32>
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
 func.func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
@@ -29,7 +29,7 @@
 // CHECK:           %[[DIM3:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[OUT_DIM3:.*]] = arith.addi %[[DIM3]], %[[OFFSET]] : index
 // CHECK:           %[[INIT:.*]] = tensor.empty(%[[DIM1]], %[[OUT_DIM2]], %[[OUT_DIM3]]) : tensor<4x?x?x?xf32>
-// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
+// CHECK:           %[[FILL:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[INIT]] : tensor<4x?x?x?xf32>) -> tensor<4x?x?x?xf32>
 // CHECK:           %[[DIM1_1:.*]] = tensor.dim %[[IN]], %[[C1]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[DIM3_1:.*]] = tensor.dim %[[IN]], %[[C3]] : tensor<4x?x2x?xf32>
 // CHECK:           %[[PADDED:.*]] = tensor.insert_slice %[[IN]] into %[[FILL]]{{\[}}%[[C0]], %[[C0]], %[[OFFSET]], %[[C0]]] [4, %[[DIM1_1]], 2, %[[DIM3_1]]] [1, 1, 1, 1] : tensor<4x?x2x?xf32> into tensor<4x?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
--- a/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
+++ b/mlir/test/Dialect/Linalg/inline-scalar-operands.mlir
@@ -12,7 +12,7 @@
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
                          iterator_types = ["parallel"]}
                          ins(%arg0, %scalar : tensor<4xf32>, tensor<f32>)
-                         outs(%0 : tensor<4xf32>) {
+                         inits(%0 : tensor<4xf32>) {
     // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
       // CHECK: tensor.extract %[[SCALAR]][]
@@ -37,7 +37,7 @@
     %1 = linalg.generic {indexing_maps = [#map2, #map3, #map2],
                          iterator_types = ["parallel"]}
                          ins(%arg0, %scalar : tensor<4xf32>, tensor<1xf32>)
-                         outs(%0 : tensor<4xf32>) {
+                         inits(%0 : tensor<4xf32>) {
     // CHECK: ^bb0(%{{.*}}: f32, %{{.*}}: f32)
     ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):  
       // CHECK: tensor.extract %[[SCALAR]][%[[ZERO]]]
diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir
--- a/mlir/test/Dialect/Linalg/inlining.mlir
+++ b/mlir/test/Dialect/Linalg/inlining.mlir
@@ -23,7 +23,7 @@
   // CHECK: linalg.generic
   linalg.generic #trait
      ins(%arg0 : memref<?xf32>)
-    outs(%arg0 : memref<?xf32>) {
+    inits(%arg0 : memref<?xf32>) {
     ^bb(%0 : f32, %1 : f32) :
       %2 = arith.addf %0, %0: f32
       linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -36,7 +36,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.index 2 : index
       linalg.yield %0 : f32
@@ -50,7 +50,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.index -1 : index
       linalg.yield %0 : f32
@@ -74,7 +74,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%0: f32):
       linalg.yield
   }
@@ -87,7 +87,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<1xi32>) {
+      inits(%arg0 : memref<1xi32>) {
     ^bb(%i : i32):
     linalg.yield %i : i32
   }
@@ -100,7 +100,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<(i) -> (i)> ],
     iterator_types = ["random"]}
-      outs(%arg0 : memref<1xi32>) {
+      inits(%arg0 : memref<1xi32>) {
     ^bb(%i : i32):
     linalg.yield %i : i32
   }
@@ -113,7 +113,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> (0, 0)> ],
     iterator_types = []}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%f : f32):
       linalg.yield %f: f32
   }
@@ -128,7 +128,7 @@
     indexing_maps =  [ affine_map<() -> (0)>, affine_map<() -> (0, 0)> ],
     iterator_types = []}
       ins(%cst : f32)
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%0 : f32, %1 : f32):
       linalg.yield %0: f32
   }
@@ -141,7 +141,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%0: f32):
       %1 = arith.constant 1: i4
       linalg.yield %1: i4
@@ -159,7 +159,7 @@
     ],
     iterator_types = ["parallel","parallel"]}
     ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
-   outs(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+   inits(%arg1 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
   ^bb(%0: f32, %1: f32):
       linalg.yield %1: f32
   }
@@ -178,7 +178,7 @@
     indexing_maps =  [ affine_map<() -> ()>, affine_map<() -> ()> ],
     iterator_types = []}
       ins(%arg0 : memref<f32>)
-     outs(%arg0 : memref<f32>) {
+     inits(%arg0 : memref<f32>) {
     ^bb1:
       linalg.yield %f0: f32
     ^bb2:
@@ -195,7 +195,7 @@
     indexing_maps =  [ affine_map<() -> ()> , affine_map<() -> ()> ],
     iterator_types = []}
     ins(%arg0 : memref<f32>)
-   outs(%arg0 : memref<f32>) {
+   inits(%arg0 : memref<f32>) {
   }
 }
 
@@ -206,7 +206,7 @@
   linalg.generic {
       indexing_maps =  [ affine_map<() -> ()>, affine_map<() -> ()> ],
       iterator_types = []}
-      outs(%arg0, %arg0 : memref<f32>, memref<f32>) {
+      inits(%arg0, %arg0 : memref<f32>, memref<f32>) {
     ^bb(%f: f32):
       linalg.yield %f: f32
   }
@@ -219,7 +219,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> ()> ],
     iterator_types = []}
-      outs(%arg0 : memref<f32>) {
+      inits(%arg0 : memref<f32>) {
     ^bb(%i: i1):
     linalg.yield %i : i1
   }
@@ -232,7 +232,7 @@
   linalg.generic {
     indexing_maps =  [ affine_map<() -> ()> ],
     iterator_types = []}
-      outs(%arg0 : tensor<f32>) {
+      inits(%arg0 : tensor<f32>) {
     ^bb(%i: i1):
     linalg.yield %i : i1
   } -> tensor<f32>
@@ -245,7 +245,7 @@
   linalg.generic {
     indexing_maps = [ affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
-      outs(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+      inits(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
     ^bb(%i: f32):
       %0 = arith.constant 0: i1
       linalg.yield %0: i1
@@ -261,7 +261,7 @@
     indexing_maps = [ affine_map<(i) -> (i)> , affine_map<(i) -> (i)> ],
     iterator_types = ["parallel"]}
        ins(%arg0 : memref<?xf32, affine_map<(i)[off]->(off + i)>>)
-      outs(%arg1 : tensor<?xf32>) {
+      inits(%arg1 : tensor<?xf32>) {
     ^bb(%i: f32, %j: f32):
       linalg.yield %i: f32
   } -> tensor<f32>
@@ -274,7 +274,7 @@
   linalg.generic  {
     indexing_maps = [ affine_map<(i, j) -> (i, j)> ],
     iterator_types = ["parallel", "parallel"]}
-      outs(%arg0 : memref<?x?xf32>) {
+      inits(%arg0 : memref<?x?xf32>) {
     ^bb(%0: f32) :
       %1 = arith.addf %0, %0: f32
   }
@@ -298,7 +298,7 @@
 func.func @named_ops(%a3: memref<?x?x?xf32>, %b3: memref<?x?xf32>, %c3: memref<?x?x?xf32>) {
   // expected-error @+1 {{expected operand rank (2) to match the result rank of indexing_map #1 (3)}}
   linalg.batch_matmul ins(%a3, %b3: memref<?x?x?xf32>, memref<?x?xf32>)
-                     outs(%c3 : memref<?x?x?xf32>)
+                     inits(%c3 : memref<?x?x?xf32>)
   return
 }
 
@@ -316,7 +316,7 @@
 func.func @matching_inits(%m: memref<?x?xf32>, %t: tensor<?x?xf32>) {
   // expected-error @+1 {{expected type of operand #2 ('tensor<?x?xf32>') to match type of corresponding result ('tensor<?xf32>')}}
   %res = linalg.matmul ins(%m, %m : memref<?x?xf32>, memref<?x?xf32>)
-                      outs(%t : tensor<?x?xf32>)
+                      inits(%t : tensor<?x?xf32>)
                         -> tensor<?xf32>
   return
 }
@@ -327,7 +327,7 @@
 {
   %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
   // expected-error @+1 {{expected the number of results (0) to be equal to the number of output tensors (1)}}
-  linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>)
+  linalg.fill ins(%arg2 : f32) inits(%0 : tensor<?x?xf32>)
 }
 
 // -----
@@ -336,7 +336,7 @@
   (%arg0 : memref<?x?xf32>, %arg1 : f32) -> tensor<?x?xf32>
 {
   // expected-error @+1 {{expected the number of results (1) to be equal to the number of output tensors (0)}}
-  %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?xf32>) -> tensor<?x?xf32>
+  %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
@@ -346,7 +346,7 @@
   (%arg0 : tensor<?x?xf32>, %arg1 : f32) -> memref<?x?xf32>
 {
   // expected-error @+1 {{result #0 must be ranked tensor of any type values, but got 'memref<?x?xf32>'}}
-  %0 = linalg.fill ins(%arg1 : f32) outs(%arg0 : tensor<?x?xf32>) -> memref<?x?xf32>
+  %0 = linalg.fill ins(%arg1 : f32) inits(%arg0 : tensor<?x?xf32>) -> memref<?x?xf32>
   return %0 : memref<?x?xf32>
 }
 
@@ -355,7 +355,7 @@
 func.func @invalid_static_matmul(%arg0: memref<2x4xf32>, %arg1: memref<3x4xf32>, %arg2: memref<2x4xf32>) {
   // expected-error @+1 {{inferred input/output operand #1 has shape's dimension #0 to be 4, but found 3}}
   linalg.matmul ins(%arg0, %arg1 : memref<2x4xf32>, memref<3x4xf32>)
-                      outs(%arg2 :memref<2x4xf32>)
+                      inits(%arg2 :memref<2x4xf32>)
   return
 }
 
@@ -366,7 +366,7 @@
   linalg.conv_2d_nhwc_hwcf
     { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %filter : memref<1x3x4x2xf32>, memref<3x2x2x1xf32>)
-    outs(%output : memref<1x2x3x1xf32>)
+    inits(%output : memref<1x2x3x1xf32>)
   return
 }
 
@@ -382,7 +382,7 @@
 
 func.func @invalid_reverse(%A: memref<5xf32>, %B: memref<5xf32>) {
   // expected-error @+1 {{unexpected result less than 0 at expression #0 in}}
-  linalg.generic #attrs ins(%A: memref<5xf32>) outs(%B: memref<5xf32>) {
+  linalg.generic #attrs ins(%A: memref<5xf32>) inits(%B: memref<5xf32>) {
                 ^bb0(%a: f32, %b: f32):
                 linalg.yield %a : f32
         }
@@ -396,7 +396,7 @@
     -> tensor<64xf32> {
    %add = linalg.map
           ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             // expected-error @+1{{'linalg.yield' op expected number of yield values (1) to match the number of operands of the enclosing LinalgOp (2)}}
@@ -413,7 +413,7 @@
   // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -429,7 +429,7 @@
     // expected-error@+1{{'linalg.map' op expected element type of input 'f32' to match bbArg type 'f64'}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>)
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       (%lhs_elem: f64, %rhs_elem: f64) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f64
         linalg.yield %0: f64
@@ -445,7 +445,7 @@
     // expected-error@+1{{'linalg.map' op expected shape of input (64, 64) to match shape of output (32)}}
   %add = linalg.map
       ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>)
-      outs(%init:tensor<32xf32>)
+      inits(%init:tensor<32xf32>)
       (%lhs_elem: f32, %rhs_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -461,7 +461,7 @@
   // expected-error @+1 {{'linalg.reduce' op init dimensions [16, 64] doesn't match input dimensions after reduction [16, 32]}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [2]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -477,7 +477,7 @@
   // expected-error @+1 {{'linalg.reduce' op dimensions for reduction should be in the range [0, 2].}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [3]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -493,7 +493,7 @@
   // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16xf32>)
+      inits(%init:tensor<16xf32>)
       dimensions = [1, 1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -509,7 +509,7 @@
   // expected-error @+1 {{'linalg.reduce' op attribute 'dimensions' failed to satisfy constraint: i64 dense array attribute should be in increasing order}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16xf32>)
+      inits(%init:tensor<16xf32>)
       dimensions = [2, 1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -525,7 +525,7 @@
   // expected-error @+1 {{'linalg.reduce' op number of dimensions after reduction 1 doesn't match the init rank 2}}
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1, 2]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -543,7 +543,7 @@
   // expected-error @+1{{'linalg.reduce' op mismatching number of operands and block arguments}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -561,7 +561,7 @@
   // expected-error @+1{{'linalg.reduce' op input element type 'f32' does not match corresponding block argument type 'f64'}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f64, %out1: f32, %out2: f64) {
         %0 = arith.addf %in1, %out1: f32
@@ -580,7 +580,7 @@
   // expected-error @+1{{'linalg.reduce' op output element type 'f64' does not match corresponding block argument type 'f32'}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xf64>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -597,7 +597,7 @@
   // expected-error @+1{{'linalg.reduce' op expects all inputs to have the same shapes. Shape at input-index 1 is not equal to the shape at input-index 0.}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<17x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -615,7 +615,7 @@
   // expected-error @+1{{'linalg.reduce' op expects all outputs to have the same shapes. Shape at output-index 1 is not equal to the shape at output-index 0.}}
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xf32>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<17x64xf32>)
       dimensions = [1]
       (%in1: f32, %in2: f32, %out1: f32, %out2: f32) {
         %0 = arith.addf %in1, %out1: f32
@@ -632,7 +632,7 @@
   // expected-error @+1 {{'linalg.transpose' op permutation is not valid}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 1, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -644,7 +644,7 @@
   // expected-error @+1 {{'linalg.transpose' op dim(result, 0) = 32 doesn't match dim(input, permutation[0]) = 16}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [0, 1, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -657,7 +657,7 @@
   // expected-error @+1 {{'linalg.transpose' op size of permutation 2 does not match the argument rank 3}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -669,7 +669,7 @@
   // expected-error @+1 {{'linalg.transpose' op input rank 2 does not match init rank 3}}
   %transpose = linalg.transpose
       ins(%input:tensor<16x32xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 0, 2]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -682,7 +682,7 @@
   // expected-error @+1 {{'linalg.broadcast' op dimensions should be in sorted order}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x8x16xf32>)
+      inits(%init:tensor<4x8x16xf32>)
       dimensions = [1, 0]
   func.return %bcast : tensor<4x8x16xf32>
 }
@@ -695,7 +695,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input rank does match the number of dimensions. expected: 2, got: 1}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x8x16xf32>)
+      inits(%init:tensor<4x8x16xf32>)
       dimensions = [0]
   func.return %bcast : tensor<4x8x16xf32>
 }
@@ -708,7 +708,7 @@
   // expected-error @+1 {{'linalg.broadcast' op dimension 1 is out of range. expected range: [0, 2], got: 5}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x8x16xf32>)
+      inits(%init:tensor<4x8x16xf32>)
       dimensions = [0, 5]
   func.return %bcast : tensor<4x8x16xf32>
 }
@@ -721,7 +721,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 4, init: 5}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<5x8x16xf32>)
+      inits(%init:tensor<5x8x16xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<5x8x16xf32>
 }
@@ -734,7 +734,7 @@
   // expected-error @+1 {{'linalg.broadcast' op init dim 1 can't be dynamic, because it's not matched to input}}
   %bcast = linalg.broadcast
       ins(%input:tensor<4x16xf32>)
-      outs(%init:tensor<4x?x16xf32>)
+      inits(%init:tensor<4x?x16xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<4x?x16xf32>
 }
@@ -747,7 +747,7 @@
   // expected-error @+1 {{'linalg.broadcast' op input dim 0 should match init dim 0. input: 1, init: 4}}
   %bcast = linalg.broadcast
       ins(%input:tensor<1x16xf32>)
-      outs(%init:tensor<4x?x16xf32>)
+      inits(%init:tensor<4x?x16xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<4x?x16xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/library-calls.mlir b/mlir/test/Dialect/Linalg/library-calls.mlir
--- a/mlir/test/Dialect/Linalg/library-calls.mlir
+++ b/mlir/test/Dialect/Linalg/library-calls.mlir
@@ -14,11 +14,11 @@
   %C = memref.alloc(%x, %y) : memref<?x?xf32>
 
   // CHECK: call @linalg_fill_f32_viewsxsxf32({{.*}}) : (f32, memref<?x?xf32, {{.*}}>)
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
 
   // CHECK:  call @linalg_matmul_viewsxsxf32_viewsxsxf32_viewsxsxf32({{.*}}) : (memref<?x?xf32, {{.*}}>, memref<?x?xf32, {{.*}}>, memref<?x?xf32, {{.*}}>) -> ()
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-                outs(%C: memref<?x?xf32>)
+                inits(%C: memref<?x?xf32>)
   return %C : memref<?x?xf32>
 }
   
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -15,7 +15,7 @@
   %B = memref.view %arg0[%c0][%K, %N] : memref<?xi8> to memref<?x?xf32>
   %C = memref.view %arg0[%c0][%M, %N] : memref<?xi8> to memref<?x?xf32>
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%C: memref<?x?xf32>)
+               inits(%C: memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: func @matmul(%{{.*}}: memref<?xi8>,
@@ -60,7 +60,7 @@
   %3 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
   %4 = memref.view %arg0[%c0][%N] : memref<?xi8> to memref<?xf32>
   linalg.matvec ins(%2, %3: memref<?x?xf32>, memref<?xf32>)
-               outs(%4 : memref<?xf32>)
+               inits(%4 : memref<?xf32>)
   return
 }
 // CHECK-LABEL: func @matvec(%{{.*}}: memref<?xi8>,
@@ -101,7 +101,7 @@
   %2 = memref.view %arg0[%c0][%M] : memref<?xi8> to memref<?xf32>
   %3 = memref.view %arg0[%c0][] : memref<?xi8> to memref<f32>
   linalg.dot ins(%1, %2 : memref<?xf32>, memref<?xf32>)
-            outs(%3 : memref<f32>)
+            inits(%3 : memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot(%{{.*}}: memref<?xi8>,
@@ -135,7 +135,7 @@
                    %arg3: memref<i32>) {
   // Verifies that we use the correct arith operations for integers.
   linalg.dot ins(%arg0, %arg1 : memref<?xi32>, memref<?xi32>)
-             outs(%arg3 : memref<i32>)
+             inits(%arg3 : memref<i32>)
   return
 }
 // CHECK-LABEL: func @dot_int(
@@ -148,7 +148,7 @@
                     %arg3: memref<i1>) {
   // Verifies that we use the correct (saturating) arith operations for booleans.
   linalg.dot ins(%arg0, %arg1 : memref<?xi1>, memref<?xi1>)
-             outs(%arg3 : memref<i1>)
+             inits(%arg3 : memref<i1>)
   return
 }
 // CHECK-LABEL: func @dot_bool(
@@ -160,7 +160,7 @@
 func.func @dot_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: memref<?xf32, strided<[1], offset: ?>>, %arg2: memref<f32>) {
   linalg.dot ins(%arg0, %arg1 : memref<?xf32, strided<[1], offset: ?>>,
                                 memref<?xf32, strided<[1], offset: ?>>)
-            outs(%arg2:  memref<f32>)
+            inits(%arg2:  memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot_view(
@@ -186,7 +186,7 @@
 //       CHECKPARALLEL:   store %[[res]], %{{.*}}[] : memref<f32>
 
 func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view(
@@ -200,7 +200,7 @@
 //       CHECKPARALLEL:     store %{{.*}}, %{{.*}}[%{{.*}}] : memref<?xf32, strided<[1], offset: ?>>
 
 func.func @fill_view0(%arg0: memref<f32>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<f32>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<f32>)
   return
 }
 // CHECK-LABEL: func @fill_view0(%{{.*}}: memref<f32>, %{{.*}}: f32) {
@@ -210,7 +210,7 @@
 //       CHECKPARALLEL:   store %{{.*}}, %{{.*}}[] : memref<f32>
 
 func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view3(
@@ -230,7 +230,7 @@
     iterator_types = ["parallel"],
     indexing_maps = [ affine_map<(i) -> (i)>, affine_map<(i) -> (i)>] }
     ins(%arg0: memref<?xf32, strided<[1], offset: ?>>)
-   outs(%arg1: memref<?xf32, strided<[1], offset: ?>>) {
+   inits(%arg1: memref<?xf32, strided<[1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a : f32
   }
@@ -264,7 +264,7 @@
 func.func @generic_region(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>, %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait2
     ins(%arg0: memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
+   inits(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
                        memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b : f32
@@ -309,7 +309,7 @@
         %arg2: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait4
       ins(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
-     outs(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
+     inits(%arg1, %arg2 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>,
                          memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
     ^bb0(%a: f32, %b: f32, %c: f32):
       %i = linalg.index 0 : index
@@ -377,7 +377,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : memref<f32>)
-     outs(%arg1 : memref<3x4xf32>) {
+     inits(%arg1 : memref<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   }
@@ -403,7 +403,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : f32)
-     outs(%arg1 : memref<3x4xf32>) {
+     inits(%arg1 : memref<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   }
@@ -427,7 +427,7 @@
 {
   linalg.generic #trait_broadcast
       ins(%arg0 : memref<i32>)
-     outs(%arg1 : memref<3x4xi32>) {
+     inits(%arg1 : memref<3x4xi32>) {
     ^bb(%a: i32, %b: i32) :
       %i = linalg.index 0 : index
       %j = linalg.index 1 : index
@@ -477,7 +477,7 @@
 {
   linalg.generic #trait_reduce_1D
       ins(%arg0 : memref<?xf32>)
-     outs(%arg1 : memref<f32>) {
+     inits(%arg1 : memref<f32>) {
     ^bb(%a: f32, %b: f32) :
       %0 = arith.addf %a, %b : f32
       linalg.yield %0 : f32
@@ -523,7 +523,7 @@
 {
   linalg.generic #trait_reduce_init_1D
       ins(%arg0, %arg1 : memref<?xf32>, memref<f32>)
-     outs(%arg2 : memref<f32>) {
+     inits(%arg2 : memref<f32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %i = linalg.index 0 : index
       %0 = arith.constant 0 : index
@@ -567,7 +567,7 @@
 }
 func.func @generic_const_init(%arg0: memref<?xf32>) {
         %cst = arith.constant 1.0 : f32
-  linalg.generic #trait_const_fill outs(%arg0 : memref<?xf32>) {
+  linalg.generic #trait_const_fill inits(%arg0 : memref<?xf32>) {
     ^bb0(%arg1: f32):   
       linalg.yield %cst : f32
     }
@@ -601,7 +601,7 @@
 {
   linalg.generic #scalar_trait
     ins(%arg0, %arg1 : memref<f32>, memref<f32>)
-   outs(%arg2 : memref<f32>) {
+   inits(%arg2 : memref<f32>) {
   ^bb(%a : f32, %b : f32, %c : f32) :
     %result = scf.if %arg3 -> (f32) {
       scf.yield %a : f32
@@ -643,7 +643,7 @@
 //----------------------------------------------------------------------------//
 func.func @named_batch_matmul(%A: memref<?x?x?xf32>, %B: memref<?x?x?xf32>, %C: memref<?x?x?xf32>) {
   linalg.batch_matmul ins(%A, %B : memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%C : memref<?x?x?xf32>)
+                     inits(%C : memref<?x?x?xf32>)
   return
 }
 // CHECK-LABEL: @named_batch_matmul
@@ -685,7 +685,7 @@
 
 func.func @conv1d_no_symbols(%in : memref<?xf32>, %filter : memref<?xf32>, %out : memref<?xf32>) -> () {
   linalg.conv_1d ins(%in, %filter : memref<?xf32>, memref<?xf32>)
-                outs(%out : memref<?xf32>)
+                inits(%out : memref<?xf32>)
   return
 }
 
@@ -728,7 +728,7 @@
 
 func.func @conv2d_no_symbols(%in : memref<?x?xf32>, %filter : memref<?x?xf32>, %out : memref<?x?xf32>) -> () {
   linalg.conv_2d ins(%in, %filter : memref<?x?xf32>, memref<?x?xf32>)
-                outs(%out: memref<?x?xf32>)
+                inits(%out: memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: @conv2d_no_symbols
@@ -781,7 +781,7 @@
 
 func.func @conv3d_no_symbols(%in : memref<?x?x?xf32>, %filter : memref<?x?x?xf32>, %out : memref<?x?x?xf32>) -> () {
   linalg.conv_3d ins(%in, %filter : memref<?x?x?xf32>, memref<?x?x?xf32>)
-                outs(%out : memref<?x?x?xf32>)
+                inits(%out : memref<?x?x?xf32>)
   return
 }
 
@@ -856,7 +856,7 @@
     iterator_types = ["parallel"],
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>]}
     ins(%0: memref<?xi32, strided<[1], offset: ?>>)
-   outs(%1: memref<?xi32, strided<[1], offset: ?>>) {
+   inits(%1: memref<?xi32, strided<[1], offset: ?>>) {
     ^bb0(%a: i32, %b: i32):
       linalg.yield %a : i32
   }
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -57,7 +57,7 @@
 // CHECK:      %[[R2c:.+]] = linalg.generic
 // CHECK-SAME:   indexing_maps = [#[[$MAP4]], #[[$MAP5]]]
 // CHECK-SAME:   iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-// CHECK:        ins(%{{.*}} : tensor<1x28x28x1xf32>) outs(%{{.*}} : tensor<1x32x32x1xf32>)
+// CHECK:        ins(%{{.*}} : tensor<1x28x28x1xf32>) inits(%{{.*}} : tensor<1x32x32x1xf32>)
 // CHECK:      ^bb0(%[[VAL:.+]]: f32, %{{.*}}: f32)
 // CHECK:        linalg.yield %[[VAL]] : f32
 // CHECK:      return %[[R2c:.+]]
diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
--- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
+++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir
@@ -28,7 +28,7 @@
     iterator_types = ["parallel", "parallel"]
   }
   ins(%arg0: tensor<10x34xf32>)
-  outs(%arg1: tensor<10x34xf32>) {
+  inits(%arg1: tensor<10x34xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %j = linalg.index 1 : index
@@ -55,7 +55,7 @@
   // CHECK:        %[[LOOPRES:.+]] = scf.for %[[I2:.+]] = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%[[ITERARG_2:.+]] = %[[SLICE_2]])
   // CHECK:          %[[INSLICE_2:.+]] = tensor.extract_slice %[[SLICE_2_IN]][0, %[[I2]]] [2, 8] [1, 1]
   // CHECK:          %[[OUTSLICE_2:.+]] = tensor.extract_slice %[[ITERARG_2]][0, %[[I2]]] [2, 8] [1, 1]
-  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) outs(%[[OUTSLICE_2]] : tensor<2x8xf32>)
+  // CHECK:          %[[RESSLICE_1:.+]] = linalg.generic {{.*}} ins(%[[INSLICE_2]] : tensor<2x8xf32>) inits(%[[OUTSLICE_2]] : tensor<2x8xf32>)
   // CHECK:          %[[RESPARTIAL:.+]] = tensor.insert_slice %[[RESSLICE_1]] into %[[ITERARG_2]]
   // CHECK:          scf.yield %[[RESPARTIAL]]
 
diff --git a/mlir/test/Dialect/Linalg/named-ops.mlir b/mlir/test/Dialect/Linalg/named-ops.mlir
--- a/mlir/test/Dialect/Linalg/named-ops.mlir
+++ b/mlir/test/Dialect/Linalg/named-ops.mlir
@@ -4,11 +4,11 @@
 func.func @depthwise_conv_1d_nwc_wcm(%input: tensor<1x12x8xf32>, %filter: tensor<3x8x8xf32>) -> tensor<1x10x8x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<1x10x8x8xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wcm
   %0 = linalg.depthwise_conv_1d_nwc_wcm {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8x8xf32>)
-    outs(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
+    inits(%fill : tensor<1x10x8x8xf32>) -> tensor<1x10x8x8xf32>
   return %0 : tensor<1x10x8x8xf32>
 }
 
@@ -18,11 +18,11 @@
 func.func @depthwise_conv_1d_nwc_wc(%input: tensor<1x12x8xf32>, %filter: tensor<3x8xf32>) -> tensor<1x10x8xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<1x10x8xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
   // CHECK: depthwise_conv_1d_nwc_wc
   %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : tensor<1x12x8xf32>, tensor<3x8xf32>)
-    outs(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
+    inits(%fill : tensor<1x10x8xf32>) -> tensor<1x10x8xf32>
   return %0 : tensor<1x10x8xf32>
 }
 
@@ -32,15 +32,15 @@
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor(%input: tensor<2x4x5x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x3x4x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x4x2x3xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<2x3x4x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : tensor<2x3x4x2x3xf32>)
   %0 = linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : tensor<2x4x5x2xf32>, tensor<2x2x2x3xf32>)
-    outs(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
+    inits(%fill : tensor<2x3x4x2x3xf32>) -> tensor<2x3x4x2x3xf32>
   return %0 : tensor<2x3x4x2x3xf32>
 }
 
@@ -49,11 +49,11 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<2x3x4x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<2x3x4x2x3xf32>)
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x4x5x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x3x4x2x3xf32>)
+    inits(%output : memref<2x3x4x2x3xf32>)
   return
 }
 
@@ -63,10 +63,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_1d_nw
   // CHECK-SAME:   {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x96xf32>, tensor<3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
   %0 = linalg.depthwise_conv_1d_nwc_wc {dilations = dense<1> : vector<1xi64>, strides = dense<2> : vector<1xi64>}
          ins(%input, %filter: tensor<1x113x96xf32>, tensor<3x96xf32>)
-         outs(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
+         inits(%init: tensor<1x56x96xf32>) -> tensor<1x56x96xf32>
   return %0: tensor<1x56x96xf32>
 }
 
@@ -76,10 +76,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwc
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
   %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x113x113x96xf32>, tensor<3x3x96xf32>)
-         outs(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
+         inits(%init: tensor<1x56x56x96xf32>) -> tensor<1x56x56x96xf32>
   return %0: tensor<1x56x56x96xf32>
 }
 
@@ -88,10 +88,10 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwc
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<1x56x56x96xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<1x56x56x96xf32>)
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -101,10 +101,10 @@
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nchw_chw
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
   %0 = linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x96x113x113xf32>, tensor<96x3x3xf32>)
-         outs(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
+         inits(%init: tensor<1x96x56x56xf32>) -> tensor<1x96x56x56xf32>
   return %0: tensor<1x96x56x56xf32>
 }
 
@@ -113,25 +113,25 @@
   // CHECK:      linalg.depthwise_conv_2d_nchw_chw
   // CHECK-SAME:   {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<1x96x113x113xf32>, memref<96x3x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<1x96x56x56xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<1x96x56x56xf32>)
   linalg.depthwise_conv_2d_nchw_chw {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
     ins(%input, %filter: memref<1x96x113x113xf32>, memref<96x3x3xf32>)
-    outs(%output: memref<1x96x56x56xf32>)
+    inits(%output: memref<1x96x56x56xf32>)
   return
 }
 
 func.func @depthwise_conv_2d_nhwc_hwcm_tensor_dilated(%input: tensor<2x8x9x2xf32>, %filter: tensor<2x2x2x3xf32>) -> tensor<2x6x7x2x3xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x6x7x2x3xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
   // CHECK:      %{{.+}} = linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<2x6x7x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : tensor<2x6x7x2x3xf32>)
   %0 = linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : tensor<2x8x9x2xf32>, tensor<2x2x2x3xf32>)
-    outs(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
+    inits(%fill : tensor<2x6x7x2x3xf32>) -> tensor<2x6x7x2x3xf32>
   return %0 : tensor<2x6x7x2x3xf32>
 }
 
@@ -140,11 +140,11 @@
   // CHECK:      linalg.depthwise_conv_2d_nhwc_hwcm
   // CHECK-SAME:   {dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<2x6x7x2x3xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<2x6x7x2x3xf32>)
   linalg.depthwise_conv_2d_nhwc_hwcm
      { dilations = dense<2> : tensor<2xi64>, strides = dense<1> : tensor<2xi64> }
      ins(%input, %filter : memref<2x8x9x2xf32>, memref<2x2x2x3xf32>)
-    outs(%output : memref<2x6x7x2x3xf32>)
+    inits(%output : memref<2x6x7x2x3xf32>)
   return
 }
 
@@ -157,7 +157,7 @@
   // CHECK-NOT:  dilations =
   linalg.depthwise_conv_2d_nhwc_hwc
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -167,7 +167,7 @@
   // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}}
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2.0> : vector<2xf32>}
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -177,7 +177,7 @@
   // expected-error @+1 {{op attribute 'strides' failed to satisfy constraint: 64-bit signless int elements attribute of shape [2]}}
   linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<3xi64> }
     ins(%input, %filter: memref<1x113x113x96xf32>, memref<3x3x96xf32>)
-    outs(%output: memref<1x56x56x96xf32>)
+    inits(%output: memref<1x56x56x96xf32>)
   return
 }
 
@@ -187,11 +187,11 @@
 func.func @depthwise_conv_3d_ndhwc_dhwcm(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6x6xf32>) -> tensor<2x3x13x4x6x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x13x4x6x6xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwcm
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwcm {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
     ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6x6xf32>)
-    outs(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
+    inits(%fill : tensor<2x3x13x4x6x6xf32>) -> tensor<2x3x13x4x6x6xf32>
   return %0 : tensor<2x3x13x4x6x6xf32>
 }
 
@@ -201,11 +201,11 @@
 func.func @depthwise_conv_3d_ndhwc_dhwc(%input: tensor<2x6x13x12x6xf32>, %filter: tensor<2x1x3x6xf32>) -> tensor<2x3x13x4x6xf32> {
   %zero = arith.constant 0.000000e+00 : f32
   %init = tensor.empty() : tensor<2x3x13x4x6xf32>
-  %fill = linalg.fill ins(%zero : f32) outs(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%init : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
   // CHECK: depthwise_conv_3d_ndhwc_dhwc
   %0 = linalg.depthwise_conv_3d_ndhwc_dhwc {dilations = dense<1> : tensor<3xi64>, strides = dense<[2, 1, 3]> : tensor<3xi64>}
     ins(%input, %filter : tensor<2x6x13x12x6xf32>, tensor<2x1x3x6xf32>)
-    outs(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
+    inits(%fill : tensor<2x3x13x4x6xf32>) -> tensor<2x3x13x4x6xf32>
   return %0 : tensor<2x3x13x4x6xf32>
 }
 
@@ -217,11 +217,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %0 = linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                                             strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs (%init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+    inits (%init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   return %0 : tensor<?x?x?xf32>
 }
 
@@ -233,11 +233,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?xf32>)
   linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
+    inits (%output: memref<?x?x?xf32>)
   return
 }
 
@@ -249,11 +249,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %0 = linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>,
                                             strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs (%init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+    inits (%init: tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   return %0 : tensor<?x?x?xf32>
 }
 
@@ -265,11 +265,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<1xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<1xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?xf32>)
   linalg.conv_1d_ncw_fcw {dilations = dense<1> : tensor<1xi64>,
                                        strides = dense<1> : tensor<1xi64>}
      ins (%input, %filter: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%output: memref<?x?x?xf32>)
+    inits (%output: memref<?x?x?xf32>)
   return
 }
 
@@ -281,11 +281,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                                               strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+    inits (%init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   return %0 : tensor<?x?x?x?xf32>
 }
 
@@ -297,11 +297,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   %0 = linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>,
                                               strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+    inits (%init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   return %0 : tensor<?x?x?x?x?xf32>
 }
 
@@ -313,11 +313,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+    inits (%init: tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   return %0 : tensor<?x?x?x?xf32>
 }
 
@@ -329,11 +329,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x128x128x32xf32>, tensor<64x3x3x32xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
   %0 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x128x128x32xf32>, tensor<64x3x3x32xf32>)
-    outs (%init: tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
+    inits (%init: tensor<?x126x126x64xf32>) -> tensor<?x126x126x64xf32>
   return %0 : tensor<?x126x126x64xf32>
 }
 
@@ -345,11 +345,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?xf32>)
   linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                                          strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?xf32>)
+    inits (%output: memref<?x?x?x?xf32>)
   return
 }
 
@@ -361,11 +361,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<2xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<2xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?x?xf32>)
   linalg.conv_2d_ngchw_fgchw {dilations = dense<1> : tensor<2xi64>,
                                          strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?x?xf32>)
+    inits (%output: memref<?x?x?x?x?xf32>)
   return
 }
 
@@ -377,11 +377,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+  // CHECK-SAME:   inits(%{{.+}} : tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   %0 = linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
                                                 strides = dense<1> : tensor<3xi64>}
      ins (%input, %filter: tensor<?x?x?x?x?xf32>, tensor<?x?x?x?x?xf32>)
-    outs (%init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
+    inits (%init: tensor<?x?x?x?x?xf32>) -> tensor<?x?x?x?x?xf32>
   return %0 : tensor<?x?x?x?x?xf32>
 }
 
@@ -393,11 +393,11 @@
   // CHECK-SAME:   dilations = dense<1> : tensor<3xi64>
   // CHECK-SAME:   strides = dense<1> : tensor<3xi64>
   // CHECK-SAME:   ins(%{{.+}}, %{{.+}} : memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-  // CHECK-SAME:   outs(%{{.+}} : memref<?x?x?x?x?xf32>)
+  // CHECK-SAME:   inits(%{{.+}} : memref<?x?x?x?x?xf32>)
   linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
                                            strides = dense<1> : tensor<3xi64>}
      ins (%input, %filter: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%output: memref<?x?x?x?x?xf32>)
+    inits (%output: memref<?x?x?x?x?xf32>)
   return
 }
 
@@ -408,15 +408,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_sum_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -427,11 +427,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_sum(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -442,15 +442,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 func.func @pooling_nchw_sum_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+    inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   return %res : tensor<1x1x2x2xf32>
 }
 
@@ -461,11 +461,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x1x4x4xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x1x2x2xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x1x2x2xf32>)
 func.func @pooling_nchw_sum(%input: memref<1x1x4x4xf32>, %fake: memref<3x3xf32>, %output: memref<1x1x2x2xf32>) {
   linalg.pooling_nchw_sum {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x1x4x4xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x1x2x2xf32>)
+    inits(%output: memref<1x1x2x2xf32>)
   return
 }
 
@@ -476,15 +476,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_max_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -494,16 +494,16 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
 
 func.func @pooling_nchw_max_tensor(%input: tensor<1x1x4x4xf32>) -> tensor<1x1x2x2xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x1x2x2xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   %res = linalg.pooling_nchw_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x1x4x4xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
+    inits(%fill: tensor<1x1x2x2xf32>) -> tensor<1x1x2x2xf32>
   return %res : tensor<1x1x2x2xf32>
 }
 
@@ -514,11 +514,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_max(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -529,15 +529,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi8>, tensor<3x3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
 func.func @pooling_nhwc_i8_max_tensor(%input: tensor<1x4x4x1xi8>) -> tensor<1x2x2x1xi8> {
   %fake = tensor.empty() : tensor<3x3xi8>
   %init = tensor.empty() : tensor<1x2x2x1xi8>
   %cst = arith.constant 0 : i8
-  %fill = linalg.fill ins(%cst : i8) outs(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+  %fill = linalg.fill ins(%cst : i8) inits(%init : tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi8>, tensor<3x3xi8>)
-    outs(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
+    inits(%fill: tensor<1x2x2x1xi8>) -> tensor<1x2x2x1xi8>
   return %res : tensor<1x2x2x1xi8>
 }
 
@@ -548,11 +548,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi8>, memref<3x3xi8>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi8>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi8>)
 func.func @pooling_nhwc_i8_max(%input: memref<1x4x4x1xi8>, %fake: memref<3x3xi8>, %output: memref<1x2x2x1xi8>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi8>, memref<3x3xi8>)
-    outs(%output: memref<1x2x2x1xi8>)
+    inits(%output: memref<1x2x2x1xi8>)
   return
 }
 
@@ -563,15 +563,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi16>, tensor<3x3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
 func.func @pooling_nhwc_i16_max_tensor(%input: tensor<1x4x4x1xi16>) -> tensor<1x2x2x1xi16> {
   %fake = tensor.empty() : tensor<3x3xi16>
   %init = tensor.empty() : tensor<1x2x2x1xi16>
   %cst = arith.constant 0 : i16
-  %fill = linalg.fill ins(%cst : i16) outs(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+  %fill = linalg.fill ins(%cst : i16) inits(%init : tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi16>, tensor<3x3xi16>)
-    outs(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
+    inits(%fill: tensor<1x2x2x1xi16>) -> tensor<1x2x2x1xi16>
   return %res : tensor<1x2x2x1xi16>
 }
 
@@ -582,11 +582,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi16>, memref<3x3xi16>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi16>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi16>)
 func.func @pooling_nhwc_i16_max(%input: memref<1x4x4x1xi16>, %fake: memref<3x3xi16>, %output: memref<1x2x2x1xi16>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi16>, memref<3x3xi16>)
-    outs(%output: memref<1x2x2x1xi16>)
+    inits(%output: memref<1x2x2x1xi16>)
   return
 }
 
@@ -597,15 +597,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xi32>, tensor<3x3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
 func.func @pooling_nhwc_i32_max_tensor(%input: tensor<1x4x4x1xi32>) -> tensor<1x2x2x1xi32> {
   %fake = tensor.empty() : tensor<3x3xi32>
   %init = tensor.empty() : tensor<1x2x2x1xi32>
   %cst = arith.constant 0 : i32
-  %fill = linalg.fill ins(%cst : i32) outs(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+  %fill = linalg.fill ins(%cst : i32) inits(%init : tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
   %res = linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xi32>, tensor<3x3xi32>)
-    outs(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
+    inits(%fill: tensor<1x2x2x1xi32>) -> tensor<1x2x2x1xi32>
   return %res : tensor<1x2x2x1xi32>
 }
 
@@ -616,11 +616,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xi32>, memref<3x3xi32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xi32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xi32>)
 func.func @pooling_nhwc_i32_max(%input: memref<1x4x4x1xi32>, %fake: memref<3x3xi32>, %output: memref<1x2x2x1xi32>) {
   linalg.pooling_nhwc_max {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xi32>, memref<3x3xi32>)
-    outs(%output: memref<1x2x2x1xi32>)
+    inits(%output: memref<1x2x2x1xi32>)
   return
 }
 
@@ -632,15 +632,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
 func.func @pooling_nhwc_min_tensor(%input: tensor<1x4x4x1xf32>) -> tensor<1x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   %res = linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: tensor<1x4x4x1xf32>, tensor<3x3xf32>)
-    outs(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x1xf32>) -> tensor<1x2x2x1xf32>
   return %res : tensor<1x2x2x1xf32>
 }
 
@@ -651,11 +651,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<2xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<2xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x1xf32>, memref<3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x1xf32>)
 func.func @pooling_nhwc_min(%input: memref<1x4x4x1xf32>, %fake: memref<3x3xf32>, %output: memref<1x2x2x1xf32>) {
   linalg.pooling_nhwc_min {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>}
     ins(%input, %fake: memref<1x4x4x1xf32>, memref<3x3xf32>)
-    outs(%output: memref<1x2x2x1xf32>)
+    inits(%output: memref<1x2x2x1xf32>)
   return
 }
 
@@ -666,15 +666,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_sum_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -685,11 +685,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_sum(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_sum {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -700,15 +700,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_max_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -719,11 +719,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_max(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_max {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -734,15 +734,15 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+// CHECK-SAME:      inits(%{{.+}} : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
 func.func @pooling_ndhwc_min_tensor(%input: tensor<1x4x4x4x1xf32>) -> tensor<1x2x2x2x1xf32> {
   %fake = tensor.empty() : tensor<3x3x3xf32>
   %init = tensor.empty() : tensor<1x2x2x2x1xf32>
   %cst = arith.constant 0.000000e+00 : f32
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   %res = linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: tensor<1x4x4x4x1xf32>, tensor<3x3x3xf32>)
-    outs(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
+    inits(%fill: tensor<1x2x2x2x1xf32>) -> tensor<1x2x2x2x1xf32>
   return %res : tensor<1x2x2x2x1xf32>
 }
 
@@ -753,11 +753,11 @@
 // CHECK-SAME:      dilations = dense<1> : tensor<3xi64>
 // CHECK-SAME:      strides = dense<1> : tensor<3xi64>
 // CHECK-SAME:      ins(%{{.+}}, %{{.+}} : memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-// CHECK-SAME:      outs(%{{.+}} : memref<1x2x2x2x1xf32>)
+// CHECK-SAME:      inits(%{{.+}} : memref<1x2x2x2x1xf32>)
 func.func @pooling_ndhwc_min(%input: memref<1x4x4x4x1xf32>, %fake: memref<3x3x3xf32>, %output: memref<1x2x2x2x1xf32>) {
   linalg.pooling_ndhwc_min {dilations = dense<1> : tensor<3xi64>, strides = dense<1> : tensor<3xi64>}
     ins(%input, %fake: memref<1x4x4x4x1xf32>, memref<3x3x3xf32>)
-    outs(%output: memref<1x2x2x2x1xf32>)
+    inits(%output: memref<1x2x2x2x1xf32>)
   return
 }
 
@@ -800,8 +800,8 @@
 func.func @batch_reduce_matmul(%arg0: tensor<8x128x256xf32>, %arg1: tensor<8x256x512xf32>, %arg2: tensor<128x512xf32>) -> tensor<128x512xf32> {
   // CHECK: %{{.+}} = linalg.batch_reduce_matmul
   // CHECK-SAME: ins(%{{.+}}, %{{.+}} : tensor<8x128x256xf32>, tensor<8x256x512xf32>)
-  // CHECK-SAME: outs(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32>
-  %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) outs(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32>
+  // CHECK-SAME: inits(%{{.+}} : tensor<128x512xf32>) -> tensor<128x512xf32>
+  %0 = linalg.batch_reduce_matmul ins(%arg0, %arg1 : tensor<8x128x256xf32>, tensor<8x256x512xf32>) inits(%arg2: tensor<128x512xf32>) -> tensor<128x512xf32>
   return %0: tensor<128x512xf32>
 }
 
@@ -810,7 +810,7 @@
 func.func @batch_reduce_matmul(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?xf32>) {
   // CHECK: linalg.batch_reduce_matmul
   // CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<?x?x?xf32>, memref<?x?x?xf32>)
-  // CHECK-SAME: outs(%{{.+}} : memref<?x?xf32>)
-  linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) outs(%arg2: memref<?x?xf32>)
+  // CHECK-SAME: inits(%{{.+}} : memref<?x?xf32>)
+  linalg.batch_reduce_matmul ins(%arg0, %arg1 : memref<?x?x?xf32>, memref<?x?x?xf32>) inits(%arg2: memref<?x?xf32>)
   return
 }
diff --git a/mlir/test/Dialect/Linalg/namedop_conversion.mlir b/mlir/test/Dialect/Linalg/namedop_conversion.mlir
--- a/mlir/test/Dialect/Linalg/namedop_conversion.mlir
+++ b/mlir/test/Dialect/Linalg/namedop_conversion.mlir
@@ -4,9 +4,9 @@
 func.func @depthwise_conv(%arg0: tensor<?x?x?x?xf32>, %arg1: tensor<?x?x?x1xf32>, %arg2: tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32> {
   // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]]
   // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]]
-  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x?x?x?xf32>, tensor<?x?x?xf32>) outs(%[[INIT]] : tensor<?x?x?x?xf32>)
+  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]] : tensor<?x?x?x?xf32>, tensor<?x?x?xf32>) inits(%[[INIT]] : tensor<?x?x?x?xf32>)
   // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]]
-  %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x1xf32>) outs(%arg2 : tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32>
+  %0 = linalg.depthwise_conv_2d_nhwc_hwcm {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x1xf32>) inits(%arg2 : tensor<?x?x?x?x1xf32>) -> tensor<?x?x?x?x1xf32>
   return %0 : tensor<?x?x?x?x1xf32>
 }
 
@@ -17,8 +17,8 @@
 func.func @depthwise_conv_q(%arg0: tensor<?x?x?x?xi8>, %arg1: tensor<?x?x?x1xi8>, %arg2: tensor<?x?x?x?x1xi32>, %arg3 : i32, %arg4 : i32) -> tensor<?x?x?x?x1xi32> {
   // CHECK-DAG: %[[KERNEL:.+]] = tensor.collapse_shape %arg1 {{\[\[}}0], [1], [2, 3]]
   // CHECK-DAG: %[[INIT:.+]] = tensor.collapse_shape %arg2 {{\[\[}}0], [1], [2], [3, 4]]
-  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?xi8>, i32, i32) outs(%[[INIT]] : tensor<?x?x?x?xi32>)
+  // CHECK-DAG: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwc_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %[[KERNEL]], %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?xi8>, i32, i32) inits(%[[INIT]] : tensor<?x?x?x?xi32>)
   // CHECK: %[[OUT:.+]] = tensor.expand_shape %[[CONV]] {{\[\[}}0], [1], [2], [3, 4]]
-  %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?x1xi8>, i32, i32) outs(%arg2 : tensor<?x?x?x?x1xi32>) -> tensor<?x?x?x?x1xi32>
+  %0 = linalg.depthwise_conv_2d_nhwc_hwcm_q {_someattr, dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>} ins(%arg0, %arg1, %arg3, %arg4 : tensor<?x?x?x?xi8>, tensor<?x?x?x1xi8>, i32, i32) inits(%arg2 : tensor<?x?x?x?x1xi32>) -> tensor<?x?x?x?x1xi32>
   return %0 : tensor<?x?x?x?x1xi32>
 }
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-2fill-extract-matmul-all-perms.mlir
@@ -18,15 +18,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -45,15 +45,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -72,15 +72,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -99,15 +99,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -126,15 +126,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -153,15 +153,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -180,15 +180,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -207,15 +207,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -234,15 +234,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -261,15 +261,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -288,15 +288,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -315,15 +315,15 @@
   %0 = bufferization.alloc_tensor() : tensor<256x256xf32>
 
   // CHECK: {__inplace_operands_attr__ = ["none", "false"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -344,13 +344,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -371,13 +371,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -397,13 +397,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -424,13 +424,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %2[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %4 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -453,11 +453,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -480,11 +480,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -505,13 +505,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -532,13 +532,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -559,13 +559,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<256x256xf32>) -> tensor<256x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %1[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%3, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -586,13 +586,13 @@
   // CHECK: {__inplace_operands_attr__ = ["false"]}
   %4 = tensor.extract_slice %0[0, 0] [16, 256] [1, 1] : tensor<256x256xf32> to tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -615,11 +615,11 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
 
@@ -642,10 +642,10 @@
   // CHECK: {__inplace_operands_attr__ = ["true"]}
   %3 = tensor.extract_slice %0[0, 0] [256, 16] [1, 1] : tensor<256x256xf32> to tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %2 = linalg.fill ins(%cst_0 : f32) outs(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
+  %2 = linalg.fill ins(%cst_0 : f32) inits(%4 : tensor<16x256xf32>) -> tensor<16x256xf32>
   // CHECK: {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%3 : tensor<256x16xf32>) -> tensor<256x16xf32>
   // CHECK: {__inplace_operands_attr__ = ["true", "true", "true"]}
-  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
+  %5 = linalg.matmul ins(%1, %2 : tensor<256x16xf32>, tensor<16x256xf32>) inits(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32>
   return %5 : tensor<256x256xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize-analysis-init-tensor-elimination.mlir
@@ -14,7 +14,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]
@@ -41,7 +41,7 @@
 
   //      CHECK: linalg.fill
   // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?xf32>) -> tensor<?xf32>
 
   //      CHECK: tensor.insert_slice
   // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]
diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir
@@ -22,8 +22,8 @@
 
   /// Inplaceable, no alloc
   // CHECK-NOT: alloc
-  //     CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[A]] : memref<?xf32, strided<[?], offset: ?>>)
-  %r = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //     CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[A]] : memref<?xf32, strided<[?], offset: ?>>)
+  %r = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: return
   // CHECK-NOT: tensor
@@ -45,8 +45,8 @@
 
   //     CHECK: %[[D0:.*]] = memref.dim %[[A]], {{.*}} : memref<?xf32, strided<[?], offset: ?>>
   //     CHECK: %[[ALLOC:.*]] = memref.alloc(%[[D0]]) {alignment = 128 : i64} : memref<?xf32>
-  //     CHECK: linalg.fill ins(%[[F0]] : f32) outs(%[[ALLOC]] : memref<?xf32>)
-  %r = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //     CHECK: linalg.fill ins(%[[F0]] : f32) inits(%[[ALLOC]] : memref<?xf32>)
+  %r = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   // CHECK-NOT: dealloc
   //     CHECK: return %[[ALLOC]] : memref<?xf32>
@@ -67,14 +67,14 @@
 
   /// Cross-op multiple uses of %A, the first op which has interfering reads must alloc.
   //       CHECK: %[[ALLOC:.*]] = memref.alloc
-  //       CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[ALLOC]]
-  %f = linalg.fill ins(%f0 : f32) outs(%A : tensor<?x?xf32>) -> tensor<?x?xf32>
+  //       CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[ALLOC]]
+  %f = linalg.fill ins(%f0 : f32) inits(%A : tensor<?x?xf32>) -> tensor<?x?xf32>
 
   /// The second op has no interfering reads and can reuse.
   //   CHECK-NOT: alloc
-  //       CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) outs(%[[A]]
+  //       CHECK: linalg.matmul ins(%[[ALLOC]], %[[ALLOC]]{{.*}}) inits(%[[A]]
   %r = linalg.matmul  ins(%f, %f: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%A: tensor<?x?xf32>)
+                     inits(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   //     CHECK: memref.dealloc %[[ALLOC]]
@@ -91,7 +91,7 @@
   /// Within op multiple uses of %A, must alloc.
   // CHECK: alloc
   %r = linalg.matmul  ins(%A, %A: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%A: tensor<?x?xf32>)
+                     inits(%A: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   // CHECK-NOT: dealloc
   return %r: tensor<?x?xf32>
@@ -181,8 +181,8 @@
         tensor<128x192xf32> to tensor<8x16xf32>
 
       // linalg.fill is inplace.
-      // CHECK: linalg.fill ins(%{{.*}} : f32) outs(%[[C_SLICE]]
-      %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<8x16xf32>) -> tensor<8x16xf32>
+      // CHECK: linalg.fill ins(%{{.*}} : f32) inits(%[[C_SLICE]]
+      %5 = linalg.fill ins(%cst : f32) inits(%4 : tensor<8x16xf32>) -> tensor<8x16xf32>
 
       // CHECK: scf.for %[[K:.*]] =
       %6 = scf.for %arg7 = %c0 to %c256 step %c32 iter_args(%arg8 = %5) -> (tensor<8x16xf32>) {
@@ -192,9 +192,9 @@
           tensor<256x16xf32> to tensor<32x16xf32>
 
         // linalg.matmul is inplace as well as the enclosing scf.for.
-        // CHECK: linalg.matmul ins({{.*}} outs(%[[C_SLICE]]
+        // CHECK: linalg.matmul ins({{.*}} inits(%[[C_SLICE]]
         %10 = linalg.matmul ins(%8, %9 : tensor<8x32xf32>, tensor<32x16xf32>)
-                           outs(%arg8 : tensor<8x16xf32>)
+                           inits(%arg8 : tensor<8x16xf32>)
           -> tensor<8x16xf32>
         scf.yield %10 : tensor<8x16xf32>
       }
@@ -231,7 +231,7 @@
 
   %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32>
-  %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %FA = linalg.fill ins(%f0 : f32) inits(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32>
   %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32>
   %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
 
@@ -250,7 +250,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                        affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%arg1 : tensor<?xi32>) outs(%arg2 : tensor<?x?xf32>) {
+      ins(%arg1 : tensor<?xi32>) inits(%arg2 : tensor<?x?xf32>) {
       ^bb0(%arg3: i32, %arg4 : f32):
         %iv1 = linalg.index 1 : index
         %1 = arith.index_cast %arg3: i32 to index
@@ -266,7 +266,7 @@
 //  CHECK-SAME:   ) {
 //       CHECK:   linalg.generic
 //  CHECK-SAME:       ins(%[[ARG1]] :
-//  CHECK-SAME:       outs(%[[ARG2]] :
+//  CHECK-SAME:       inits(%[[ARG2]] :
 //       CHECK:     %[[YIELD:.+]] = memref.load %[[ARG0]]
 //       CHECK:     linalg.yield %[[YIELD]]
 
@@ -281,14 +281,14 @@
     %s1: index, %s2: index, %cst: f32)
   -> tensor<?x?xf32>
 {
-  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) outs(%[[t3]] : {{.*}})
+  // CHECK: linalg.generic {{.*}} ins(%[[t1]], %[[t2]] : {{.*}}) inits(%[[t3]] : {{.*}})
   %r = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                      affine_map<(d0, d1) -> (d1)>,
                      affine_map<(d0, d1)-> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
     ins(%t1, %t2 : tensor<?x?xf32>, tensor<?xf32>)
-    outs(%t3 : tensor<?x?xf32>) {
+    inits(%t3 : tensor<?x?xf32>) {
       ^bb0(%arg0 : f32, %arg1 : f32, %arg2 : f32) :
         %add = arith.addf %arg0, %arg1 : f32
         linalg.yield %add : f32
@@ -316,15 +316,15 @@
   // Make sure that a copy is inserted here.
   // CHECK: %[[ALLOC:.*]] = memref.alloc
   // CHECK: memref.copy %[[t0]], %[[ALLOC]]
-  // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
-  %r0 =linalg.generic #trait outs (%t0 : tensor<?xf32>) {
+  // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref
+  %r0 =linalg.generic #trait inits (%t0 : tensor<?xf32>) {
       ^bb(%0: f32) :
         %a = arith.addf %cst, %0 : f32
         linalg.yield %a : f32
     } -> (tensor<?xf32>)
 
-  // CHECK: linalg.generic {{.*}} outs(%[[ALLOC]] : memref
-  %r1 = linalg.generic #trait outs (%r0 : tensor<?xf32>) {
+  // CHECK: linalg.generic {{.*}} inits(%[[ALLOC]] : memref
+  %r1 = linalg.generic #trait inits (%r0 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
     } -> (tensor<?xf32>)
@@ -344,7 +344,7 @@
    // CHECK-NEXT: ins(%[[LHS]], %[[RHS]] : memref<64xf32
    %add = linalg.map
           ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -362,7 +362,7 @@
   // CHECK-NEXT: ins(%[[INPUT]] : memref<16x32x64xf32
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -381,7 +381,7 @@
   // CHECK-NEXT: ins(%[[ARG0]] : memref<16x32x64xf32
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
@@ -394,7 +394,7 @@
                      %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x32xf32>)
-      outs(%init:tensor<8x16x32xf32>)
+      inits(%init:tensor<8x16x32xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<8x16x32xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -10,7 +10,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]} 
-    ins(%arg0 : tensor<?x?xf32>) outs(%init : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?x?xf32>) inits(%init : tensor<?x?xf32>) {
     ^bb0(%arg6 : f32, %arg7 : f32):
       %1 = arith.mulf %arg6, %arg6 : f32
       linalg.yield %1 : f32
@@ -38,13 +38,13 @@
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG2]], %[[ARG4]], %[[SOURCE_D1]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D0]], %[[TARGET_D1]]) 
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG5]]{{.*}}inits(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D0:.+]] = tensor.dim %[[SOURCE]], %[[C0]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //      CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
 // CHECK-SAME:       [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1]
 //      CHECK:   %[[SOURCE:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[SLICE]] : tensor<?x?xf32>)
+// CHECK-SAME:       inits(%[[SLICE]] : tensor<?x?xf32>)
 //      CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]]
 // CHECK-SAME:       [%[[ARG1]], %[[ARG2]]] [%[[SIZE_D0]], %[[SIZE_D1]]] [1, 1]
 //      CHECK:   return %[[RESULT]]
@@ -59,7 +59,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
     iterator_types = ["parallel", "parallel"]} 
-    ins(%arg0 : tensor<?x42xf32>) outs(%init : tensor<42x?xf32>) {
+    ins(%arg0 : tensor<?x42xf32>) inits(%init : tensor<42x?xf32>) {
     ^bb0(%arg4 : f32, %arg5 : f32):
       %1 = arith.mulf %arg4, %arg4 : f32
       linalg.yield %1 : f32
@@ -82,12 +82,12 @@
 //  CHECK-DAG:   %[[SOURCE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //  CHECK-DAG:   %[[TARGET_D1:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]], %[[SOURCE_D1]]]
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[TARGET_D1]])
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}outs(%[[INIT]]
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[ARG3]]{{.*}}inits(%[[INIT]]
 //  CHECK-DAG:   %[[SIZE_D1:.+]] = tensor.dim %[[SOURCE]], %[[C1]]
 //      CHECK:   %[[SLICE:.+]] = tensor.extract_slice %[[FILL]]
 // CHECK-SAME:       [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1]
 //      CHECK:   %[[SOURCE:.+]] = linalg.generic
-// CHECK-SAME:       outs(%[[SLICE]] : tensor<42x?xf32>)
+// CHECK-SAME:       inits(%[[SLICE]] : tensor<42x?xf32>)
 //      CHECK:   %[[RESULT:.+]] = tensor.insert_slice %[[SOURCE]] into %[[FILL]]
 // CHECK-SAME:       [3, %[[ARG1]]] [42, %[[SIZE_D1]]] [1, 1]
 //      CHECK:   return %[[RESULT]]
diff --git a/mlir/test/Dialect/Linalg/parallel-loops.mlir b/mlir/test/Dialect/Linalg/parallel-loops.mlir
--- a/mlir/test/Dialect/Linalg/parallel-loops.mlir
+++ b/mlir/test/Dialect/Linalg/parallel-loops.mlir
@@ -8,7 +8,7 @@
     indexing_maps = [#map0, #map0, #map0],
     iterator_types = ["parallel", "parallel"]}
       ins(%lhs, %rhs : memref<2x2xf32>, memref<2x2xf32>)
-     outs(%sum : memref<2x2xf32>) {
+     inits(%sum : memref<2x2xf32>) {
     ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):   
       %0 = arith.addf %lhs_in, %rhs_in : f32
       linalg.yield %0 : f32
@@ -41,7 +41,7 @@
 func.func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
   linalg.generic #trait
       ins(%A : memref<?x?x?x?xf32>)
-     outs(%B : memref<?x?x?xf32>) {
+     inits(%B : memref<?x?x?xf32>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a: f32
   }
@@ -74,7 +74,7 @@
 func.func @lower_mixed_parallel(%A: memref<?x?x?x?x?x?xf32>, %B: memref<?x?x?x?xf32>) {
   linalg.generic #trait
       ins(%A : memref<?x?x?x?x?x?xf32>)
-     outs(%B : memref<?x?x?x?xf32>) {
+     inits(%B : memref<?x?x?x?xf32>) {
     ^bb0(%a: f32, %b: f32):
       linalg.yield %a: f32
   }
diff --git a/mlir/test/Dialect/Linalg/promote.mlir b/mlir/test/Dialect/Linalg/promote.mlir
--- a/mlir/test/Dialect/Linalg/promote.mlir
+++ b/mlir/test/Dialect/Linalg/promote.mlir
@@ -25,7 +25,7 @@
         linalg.matmul
           ins(%11, %14: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                         memref<?x?xf32, strided<[?, 1], offset: ?>>)
-         outs(%17: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+         inits(%17: memref<?x?xf32, strided<[?, 1], offset: ?>>)
       }
     }
   }
@@ -56,7 +56,7 @@
 //       CHECK:         memref.copy %[[vB]], %[[partialB]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
 //       CHECK:         memref.copy %[[vC]], %[[partialC]] : memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, 1], offset: ?>>
 //
-//       CHECK:         linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} outs(%[[partialC]]
+//       CHECK:         linalg.matmul ins(%[[partialA]], %[[partialB]]{{.*}} inits(%[[partialC]]
 //
 //       CHECK:         memref.copy %[[partialC]], %[[vC]] :
 //       CHECK:           memref<?x?xf32, strided<[?, 1], offset: ?>> to
@@ -95,7 +95,7 @@
         linalg.matmul
           ins(%11, %14: memref<?x?xf64, strided<[?, 1], offset: ?>>,
                         memref<?x?xf64, strided<[?, 1], offset: ?>>)
-         outs(%17: memref<?x?xf64, strided<[?, 1], offset: ?>>)
+         inits(%17: memref<?x?xf64, strided<[?, 1], offset: ?>>)
       }
     }
   }
@@ -126,7 +126,7 @@
 //       CHECK:         memref.copy %[[vB_f64]], %[[partialB_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
 //       CHECK:         memref.copy %[[vC_f64]], %[[partialC_f64]] : memref<?x?xf64, strided<[?, 1], offset: ?>> to memref<?x?xf64, strided<[?, 1], offset: ?>>
 //
-//       CHECK:         linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} outs(%[[partialC_f64]]
+//       CHECK:         linalg.matmul ins(%[[partialA_f64]], %[[partialB_f64]]{{.*}} inits(%[[partialC_f64]]
 //
 //       CHECK:         memref.copy %[[partialC_f64]], %[[vC_f64]] :
 //       CHECK:           memref<?x?xf64, strided<[?, 1], offset: ?>> to
@@ -170,9 +170,9 @@
   // CHECK-COUNT-3: memref.copy
   // CHECK: linalg.generic
   // CHECK-SAME: ins(%[[a_pro_subview]], %[[b_pro_subview]]
-  // CHECK-SAME: outs(%[[c_pro_subview]]
+  // CHECK-SAME: inits(%[[c_pro_subview]]
 
-  linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref<?x32xf32, strided<[?, ?], offset: ?>>, memref<128x32xf32, strided<[?, ?], offset: ?>>) outs(%9 : memref<?x128xf32, strided<[128, 1], offset: ?>>) {
+  linalg.generic {indexing_maps = [#map6, #map7, #map8], iterator_types = ["parallel", "parallel", "reduction"]} ins(%13, %14 : memref<?x32xf32, strided<[?, ?], offset: ?>>, memref<128x32xf32, strided<[?, ?], offset: ?>>) inits(%9 : memref<?x128xf32, strided<[128, 1], offset: ?>>) {
   ^bb0(%arg9: f32, %arg10: f32, %arg11: f32):
     %15 = arith.mulf %arg9, %arg10 : f32
     %16 = arith.addf %arg11, %15 : f32
diff --git a/mlir/test/Dialect/Linalg/promotion_options.mlir b/mlir/test/Dialect/Linalg/promotion_options.mlir
--- a/mlir/test/Dialect/Linalg/promotion_options.mlir
+++ b/mlir/test/Dialect/Linalg/promotion_options.mlir
@@ -3,7 +3,7 @@
 func.func @gemm(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32>)
 {
    linalg.matmul ins(%a, %b: memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c: memref<?x?xf32>)
+               inits(%c: memref<?x?xf32>)
    return
 }
 
@@ -29,7 +29,7 @@
 
 //      CHECK:       memref.copy %[[svA]], %[[svAA]]
 //      CHECK:       memref.copy %[[svC]], %[[svCC]]
-//      CHECK:       linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} outs(%[[VC]]
+//      CHECK:       linalg.matmul ins(%[[VA]], %[[svB]]{{.*}} inits(%[[VC]]
 //      CHECK:       memref.copy %[[svCC]], %[[svC]]
 //      CHECK:       memref.dealloc %[[tmpA]]
 //      CHECK:       memref.dealloc %[[tmpC]]
diff --git a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_control_fusion.mlir
@@ -11,7 +11,7 @@
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%0, %arg1 : tensor<?x?xf32>, tensor<?xf32>)
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
       ^bb0(%arg2 : f32, %arg3:f32, %arg4 : f32):
         %2 = arith.addf %arg2, %arg3 : f32
         linalg.yield %2 : f32
@@ -44,19 +44,19 @@
   %fill = linalg.generic {
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      outs(%init : tensor<?x?xf32>) {
+      inits(%init : tensor<?x?xf32>) {
       ^bb0(%arg2: f32):
         linalg.yield %cst : f32
       } -> tensor<?x?xf32>
   %0 = tensor.expand_shape %fill [[0, 1], [2]] : tensor<?x?xf32> into tensor<1x?x?xf32>
   %1 = linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x?x?xf32>, tensor<1x?x?xf32>)
-      outs(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
+      inits(%0 : tensor<1x?x?xf32>) -> tensor<1x?x?xf32>
   return %1 : tensor<1x?x?xf32>
 }
 //  CHECK-DAG: #[[MAP:.+]] = affine_map<(d0, d1, d2) -> (d0, d1, d2)
 //      CHECK: func @control_consumer_reshape_fusion
 //      CHECK:   %[[FILL:.+]] = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP]]]
-// CHECK-SAME:       outs(%{{.+}} : tensor<1x?x?xf32>)
+// CHECK-SAME:       inits(%{{.+}} : tensor<1x?x?xf32>)
 //      CHECK:   linalg.batch_matmul
-// CHECK-SAME:       outs(%[[FILL]] : tensor<1x?x?xf32>)
+// CHECK-SAME:       inits(%[[FILL]] : tensor<1x?x?xf32>)
diff --git a/mlir/test/Dialect/Linalg/reshape_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
--- a/mlir/test/Dialect/Linalg/reshape_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_fusion.mlir
@@ -14,7 +14,7 @@
      indexing_maps = [#map0, #map1, #map2, #map1],
      iterator_types = ["parallel", "parallel", "parallel"]}
        ins(%0, %arg1, %arg2 : tensor<?x?x?xf32>, tensor<?x?x?xf32>, f32)
-       outs(%arg1 : tensor<?x?x?xf32>) {
+       inits(%arg1 : tensor<?x?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):       
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -38,7 +38,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP5]], #[[MAP6]], #[[MAP7]], #[[MAP6]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[ARG0]], %[[T1]], %[[ARG2]] : tensor<?x?x4x?xf32>, tensor<?x?x?x4xf32>, f32)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x?x?x4xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x?x?x4xf32>)
 //      CHECK:   %[[T4:.+]] = tensor.collapse_shape %[[T3]]
 // CHECK-SAME:     [0], [1], [2, 3]
 // CHECK-SAME:     tensor<?x?x?x4xf32> into tensor<?x?x?xf32>
@@ -57,7 +57,7 @@
      indexing_maps = [#map0, #map0, #map1, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1, %arg2 : tensor<?x?xf32>, tensor<?x?xf32>, f32)
-       outs(%arg0 : tensor<?x?xf32>) {
+       inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %arg5: f32, %s: f32):       
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -87,7 +87,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP3]], #[[MAP2]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]], %[[ARG2]] : tensor<?x4x?x5xf32>, tensor<?x4x?x5xf32>, f32)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x4x?x5xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x4x?x5xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x4x?x5xf32>
 
 
@@ -102,7 +102,7 @@
                           affine_map<(d0, d1, d2) -> (d0, d2, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<?x?x?xf32>, tensor<?x?xf32>)
-         outs(%a : tensor<?x?x?xf32>) {
+         inits(%a : tensor<?x?x?xf32>) {
        ^bb0(%arg0 : f32, %arg1: f32, %s: f32):
          %1 = arith.addf %arg0, %arg1 : f32
          linalg.yield %1 : f32
@@ -130,7 +130,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP8]], #[[MAP9]], #[[MAP10]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]] : tensor<3x4x?x?x2x?xf32>, tensor<3x4x?x?xf32>)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x2x?x3x4x?xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x2x?x3x4x?xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x2x?x3x4x?xf32>
 
 // -----
@@ -147,7 +147,7 @@
      indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %cst : tensor<264x4xf32>, tensor<264x4xf32>)
-       outs(%0 : tensor<264x4xf32>) {
+       inits(%0 : tensor<264x4xf32>) {
     ^bb0(%arg1: f32, %arg2: f32, %s: f32):  
       %2 = arith.mulf %arg1, %arg2 : f32
       linalg.yield %2 : f32
@@ -173,7 +173,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP2]], #[[MAP2]], #[[MAP2]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[CST]] :
-// CHECK-SAME:     outs(%[[T1]] : tensor<8x33x4xf32>)
+// CHECK-SAME:     inits(%[[T1]] : tensor<8x33x4xf32>)
 //      CHECK:   return %[[T2]] : tensor<8x33x4xf32>
 
 // -----
@@ -190,7 +190,7 @@
      indexing_maps = [#map0, #map1, #map1],
      iterator_types = ["parallel", "parallel", "parallel"]}
        ins(%0, %arg1 : tensor<?x?x?xi32>, tensor<?x?x?xi32>)
-      outs(%0 : tensor<?x?x?xi32>) {
+      inits(%0 : tensor<?x?x?xi32>) {
     ^bb0(%arg3: i32, %arg4: i32, %s: i32):
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -239,7 +239,7 @@
      indexing_maps = [#map0, #map0, #map0],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xi32>, tensor<?x?xi32>)
-      outs(%arg0 : tensor<?x?xi32>) {
+      inits(%arg0 : tensor<?x?xi32>) {
     ^bb0(%arg3: i32, %arg4: i32, %s: i32):       
       %idx0 = linalg.index 0 : index
       %idx1 = linalg.index 1 : index
@@ -288,7 +288,7 @@
                           affine_map<(d0, d1, d2) -> (d0, d2, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<210x6x4xi32>, tensor<210x4xi32>)
-          outs(%shape : tensor<6x4x210xi32>) {
+          inits(%shape : tensor<6x4x210xi32>) {
        ^bb0(%arg3 : i32, %arg4: i32, %s: i32):
          %idx0 = linalg.index 0 : index
          %idx1 = linalg.index 1 : index
@@ -329,7 +329,7 @@
 //       CHECK:   %[[T4:.+]] = linalg.generic
 //  CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]]]
 //  CHECK-SAME:     ins(%[[T1]], %[[T2]] : tensor<5x6x7x2x3x4xi32>, tensor<5x6x7x4xi32>)
-//  CHECK-SAME:     outs(%[[T3]] : tensor<2x3x4x5x6x7xi32>)
+//  CHECK-SAME:     inits(%[[T3]] : tensor<2x3x4x5x6x7xi32>)
 //       CHECK:   ^{{.+}}(
 //  CHECK-SAME:     %[[ARG8:[a-zA-Z0-9_]+]]: i32, %[[ARG9:[a-zA-Z0-9_]+]]: i32,
 //  CHECK-SAME:     %[[ARG10:[a-zA-Z0-9_]+]]: i32)
@@ -362,7 +362,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
      ins(%0 : tensor<264x?xi32>)
-    outs(%shape : tensor<264x?x4xi32>) {
+    inits(%shape : tensor<264x?x4xi32>) {
   ^bb0(%arg1: i32, %s: i32):  
     %idx0 = linalg.index 0 : index
     %idx1 = linalg.index 1 : index
@@ -418,7 +418,7 @@
      indexing_maps = [#map0, #map0, #map1],
      iterator_types = ["parallel", "parallel"]}
        ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-       outs(%arg0 : tensor<?x?xf32>) {
+       inits(%arg0 : tensor<?x?xf32>) {
     ^bb0(%arg3: f32, %arg4: f32, %s: f32):       
       %1 = arith.mulf %arg3, %arg4 : f32
       linalg.yield %1 : f32
@@ -446,7 +446,7 @@
 // CHECK-SAME:     indexing_maps = [#[[MAP4]], #[[MAP4]], #[[MAP5]]]
 // CHECK-SAME:     ["parallel", "parallel", "parallel", "parallel"]
 // CHECK-SAME:     ins(%[[T0]], %[[T1]] : tensor<?x4x5x?xf32>, tensor<?x4x5x?xf32>)
-// CHECK-SAME:     outs(%[[T2]] : tensor<?x?x4x5xf32>)
+// CHECK-SAME:     inits(%[[T2]] : tensor<?x?x4x5xf32>)
 //      CHECK:   return %[[T3]] : tensor<?x?x4x5xf32>
 
 // -----
@@ -459,7 +459,7 @@
   %3 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
     iterator_types = ["parallel"]}
-    ins(%0 : tensor<?xf32>) outs(%2 : tensor<?xf32>) {
+    ins(%0 : tensor<?xf32>) inits(%2 : tensor<?xf32>) {
       ^bb0(%arg1 : f32, %arg2: f32):
         %4 = arith.addf %arg1, %arg1 : f32
         linalg.yield %4 : f32
@@ -484,7 +484,7 @@
                       affine_map<(d0) -> (d0)>],
      iterator_types = ["parallel"]}
     ins(%0, %arg1 : tensor<2xi64>, tensor<?xi64>)
-    outs(%1 : tensor<2xi64>) {
+    inits(%1 : tensor<2xi64>) {
   ^bb0(%arg4: i64, %arg5: i64, %arg6: i64):  
     %3 = arith.addi %arg4, %arg5 : i64
     linalg.yield %3 : i64
@@ -512,7 +512,7 @@
                           affine_map<(d0, d1, d2) -> (d2, d0, d1)>],
          iterator_types = ["parallel", "parallel", "parallel"]}
           ins(%a, %b : tensor<?x?x?xf32>, tensor<?x?xf32>)
-         outs(%a, %a : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
+         inits(%a, %a : tensor<?x?x?xf32>, tensor<?x?x?xf32>) {
        ^bb0(%arg0 : f32, %arg1: f32, %s: f32, %t : f32):
          %1 = arith.addf %arg0, %arg1 : f32
          linalg.yield %1, %1 : f32, f32
@@ -537,7 +537,7 @@
 //      CHECK:  %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:      indexing_maps = [#[[MAP0]], #[[MAP1]], #[[MAP2]], #[[MAP3]]]
 // CHECK-SAME:      ins(%[[RESHAPE0]], %[[RESHAPE1]] :
-// CHECK-SAME:      outs(%[[RESHAPE2]], %[[RESHAPE3]] :
+// CHECK-SAME:      inits(%[[RESHAPE2]], %[[RESHAPE3]] :
 //      CHECK:  return %[[GENERIC]]#0, %[[GENERIC]]#1
 
 // -----
@@ -551,7 +551,7 @@
         indexing_maps = [#map0, #map0, #map0, #map1],
         iterator_types = ["parallel", "parallel"]}
         ins(%arg0, %arg1 : tensor<512xf32>, tensor<512xf32>)
-        outs(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) {
+        inits(%arg2, %arg3 : tensor<512xf32>, tensor<200x512xf32>) {
       ^bb0(%arg4: f32, %arg5: f32, %arg6: f32, %arg7: f32):
         %2 = arith.addf %arg4, %arg5 : f32
         linalg.yield %2, %2 : f32, f32
@@ -571,5 +571,5 @@
 //      CHECK:   %[[GENERIC:.+]]:2 = linalg.generic
 // CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]], #[[MAP1]]]
 // CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] :
-// CHECK-SAME:       outs(%[[ARG2]], %[[OUTS]] :
+// CHECK-SAME:       inits(%[[ARG2]], %[[OUTS]] :
 //      CHECK:   return %[[GENERIC]]#1
diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -54,7 +54,7 @@
                       affine_map<(d0, d1, d2) -> (d0 + d1, d1 - d0)>],
      iterator_types = ["parallel", "parallel", "reduction"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) {
+    inits(%arg2 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %1 = arith.mulf %arg3, %arg4 : f32
       %2 = arith.addf %1, %arg5 : f32
@@ -92,7 +92,7 @@
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?xf32>) outs(%0 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?xf32>) inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32) :
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
@@ -111,7 +111,7 @@
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = tensor.dim %0, %c0 : tensor<?x?xf32>
   %2 = tensor.dim %0, %c1 : tensor<?x?xf32>
   %3 = linalg.generic
@@ -120,7 +120,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d2)>],
      iterator_types = ["parallel", "reduction", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%0 : tensor<?x?xf32>) {
+    inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32):
       %4 = arith.mulf %arg3, %arg4 : f32
       %5 = arith.addf %4, %arg5 : f32
@@ -154,7 +154,7 @@
     {indexing_maps = [affine_map<(d0, d1) -> (d0)>,
                       affine_map<(d0, d1) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel"]}
-    ins(%arg0 : tensor<?xf32>) outs(%0 : tensor<?x?xf32>) {
+    ins(%arg0 : tensor<?xf32>) inits(%0 : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3 : f32):
       linalg.yield %arg2 : f32
     } -> tensor<?x?xf32>
@@ -179,7 +179,7 @@
     indexing_maps = [#map, #map, #map],
     iterator_types = ["parallel"]
   } ins(%arg_0 : tensor<?xf32>)
-    outs(%arg_0, %arg_1 : tensor<?xf32>, tensor<?xf32>) {
+    inits(%arg_0, %arg_1 : tensor<?xf32>, tensor<?xf32>) {
   ^bb0(%in: f32, %out_0: f32, %out_1: f32):
     linalg.yield %in, %in : f32, f32
   } -> (tensor<?xf32>, tensor<?xf32>)
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -32,38 +32,38 @@
           %arg3: memref<f32>) {
   linalg.matmul ins(%arg0, %arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>,
                                    memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%arg0 : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   linalg.matvec ins(%arg0, %arg1: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                                   memref<?xf32, strided<[1], offset: ?>>)
-               outs(%arg2: memref<?xf32, strided<[1], offset: ?>>)
+               inits(%arg2: memref<?xf32, strided<[1], offset: ?>>)
   linalg.dot ins(%arg1, %arg2: memref<?xf32, strided<[1], offset: ?>>,
                                memref<?xf32, strided<[1], offset: ?>>)
-            outs(%arg3: memref<f32>)
+            inits(%arg3: memref<f32>)
   return
 }
 // CHECK-LABEL: func @ops(%
 // CHECK: linalg.matmul
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK-SAME:                          memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK-SAME:  inits(%{{.*}} : memref<?x?xf32, strided<[?, 1], offset: ?>>)
 // CHECK: linalg.matvec
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>,
 // CHECK-SAME:                         memref<?xf32, strided<[1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK-SAME:  inits(%{{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 // CHECK: linalg.dot
 // CHECK-SAME:   ins(%{{.*}}, %{{.*}}: memref<?xf32, strided<[1], offset: ?>>,
 // CHECK-SAME:                         memref<?xf32, strided<[1], offset: ?>>)
-// CHECK-SAME:  outs(%{{.*}}: memref<f32>)
+// CHECK-SAME:  inits(%{{.*}}: memref<f32>)
 
 // -----
 
 func.func @fill_view(%arg0: memref<?xf32, strided<[1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view(
 //       CHECK:  %{{.*}}: memref<?xf32, strided<[1], offset: ?>>, %{{.*}}: f32) {
-//       CHECK:   linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?xf32, strided<[1], offset: ?>>)
+//       CHECK:   linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -79,12 +79,12 @@
 
 
 func.func @fill_view3(%arg0: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %arg1: f32) {
-  linalg.fill ins(%arg1 : f32) outs(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+  linalg.fill ins(%arg1 : f32) inits(%arg0 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @fill_view3(
 //       CHECK:  %{{.*}}: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>, %{{.*}}: f32) {
-//       CHECK:   linalg.fill ins(%{{.*}} : f32) outs(%{{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//       CHECK:   linalg.fill ins(%{{.*}} : f32) inits(%{{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 
 // -----
 
@@ -105,7 +105,7 @@
   %cst = arith.constant 0.0 : f32
   linalg.generic #trait_0
        ins(%arg0, %cst : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>, f32)
-      outs(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+      inits(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
       attrs = {foo = 1} {
     ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) :
       linalg.yield %1 : f32
@@ -118,7 +118,7 @@
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"}
 //  CHECK-SAME:      ins({{.*}}, {{.*}} : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>, f32)
-//  CHECK-SAME:     outs({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//  CHECK-SAME:     inits({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 //  CHECK-SAME:     {foo = 1 : i64}
 
 // -----
@@ -127,7 +127,7 @@
 func.func @generic_without_inputs(%arg0 : memref<?x?x?xf32>) {
   linalg.generic  {indexing_maps = [#map0],
                    iterator_types = ["parallel", "parallel", "parallel"]}
-                  outs(%arg0 : memref<?x?x?xf32>) {
+                  inits(%arg0 : memref<?x?x?xf32>) {
    ^bb0(%arg3: f32):
       %cst = arith.constant 0.000000e+00 : f32
       linalg.yield %cst : f32
@@ -158,7 +158,7 @@
     -> (tensor<?x?x?xf32>) {
   %0 = linalg.generic #trait_1
        ins(%arg0, %arg1 : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
-      outs(%arg1 : tensor<?x?x?xf32>)
+      inits(%arg1 : tensor<?x?x?xf32>)
       attrs = {foo = 1} {
     ^bb(%0: vector<3x4xi4>, %1: f32, %2: f32) :
       %f0 = arith.constant 0.0 : f32
@@ -171,7 +171,7 @@
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"}
 //  CHECK-SAME:      ins({{.*}} : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32>)
-//  CHECK-SAME:     outs({{.*}} : tensor<?x?x?xf32>)
+//  CHECK-SAME:     inits({{.*}} : tensor<?x?x?xf32>)
 //  CHECK-SAME:     {foo = 1 : i64}
 //       CHECK:     -> tensor<?x?x?xf32>
 //       CHECK:   return {{.*}} : tensor<?x?x?xf32>
@@ -183,14 +183,14 @@
     -> (tensor<i32>, tensor<i32>) {
   %c0 = arith.constant 0 : index
   %0 = tensor.empty() : tensor<i32>
-  %1 = linalg.fill ins(%arg2 : i32) outs(%0 : tensor<i32>) -> tensor<i32>
+  %1 = linalg.fill ins(%arg2 : i32) inits(%0 : tensor<i32>) -> tensor<i32>
   %2 = tensor.empty() : tensor<i32>
-  %3 = linalg.fill ins(%arg2 : i32) outs(%2 : tensor<i32>) -> tensor<i32>
+  %3 = linalg.fill ins(%arg2 : i32) inits(%2 : tensor<i32>) -> tensor<i32>
   %4:2 = linalg.generic {
     indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> ()>, affine_map<(d0) -> ()>],
     iterator_types = ["reduction"]}
     ins(%arg0, %arg1 : tensor<?xi32>, tensor<?xi32>)
-    outs(%1, %3 : tensor<i32>, tensor<i32>) {
+    inits(%1, %3 : tensor<i32>, tensor<i32>) {
   ^bb0(%arg3: i32, %arg4: i32, %arg5: i32, %arg6: i32):
     %5 = arith.cmpi sge, %arg3, %arg5 : i32
     %6 = arith.select %5, %arg3, %arg5 : i32
@@ -206,7 +206,7 @@
 // CHECK-LABEL: func @generic_with_multiple_tensor_outputs
 //       CHECK:   %{{.*}} = linalg.generic {
 //  CHECK-SAME:      ins({{.*}} : tensor<?xi32>, tensor<?xi32>)
-//  CHECK-SAME:     outs({{.*}} : tensor<i32>, tensor<i32>)
+//  CHECK-SAME:     inits({{.*}} : tensor<i32>, tensor<i32>)
 //       CHECK:   } -> (tensor<i32>, tensor<i32>)
 
 // -----
@@ -226,7 +226,7 @@
 {
   %0 = linalg.generic #trait_broadcast
        ins(%arg0 : tensor<f32>)
-      outs(%arg1 : tensor<3x4xf32>) {
+      inits(%arg1 : tensor<3x4xf32>) {
     ^bb(%a: f32, %b: f32) :
       linalg.yield %a : f32
   } -> tensor<3x4xf32>
@@ -251,7 +251,7 @@
                      %arg1: memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>) {
   linalg.generic #trait_3
        ins(%arg0 : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>)
-      outs(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+      inits(%arg1 : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
       attrs = {foo = 1} {
     ^bb(%a: vector<3x4xi4>, %b: f32) :
       %0 = linalg.index 0 : index
@@ -267,7 +267,7 @@
 //  CHECK-SAME:     iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_2"
 //  CHECK-SAME:      ins({{.*}} : memref<?x?xvector<3x4xi4>, strided<[?, 1], offset: ?>>)
-//  CHECK-SAME:     outs({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
+//  CHECK-SAME:     inits({{.*}} : memref<?x?x?xf32, strided<[?, ?, 1], offset: ?>>)
 //  CHECK-SAME:     attrs = {foo = 1 : i64} {
 //       CHECK:  ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32):
 //       CHECK:    %{{.*}} = linalg.index 0 : index
@@ -283,10 +283,10 @@
   -> (tensor<?x?x?xf32>)
 {
   linalg.batch_matmul ins(%a3, %b3: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                     outs(%c3: memref<?x?x?xf32>)
+                     inits(%c3: memref<?x?x?xf32>)
   %res1 = linalg.batch_matmul
                       ins(%ta3, %tb3: tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-                     outs(%tc3: tensor<?x?x?xf32>)
+                     inits(%tc3: tensor<?x?x?xf32>)
                   -> tensor<?x?x?xf32>
   return %res1 : tensor<?x?x?xf32>
 }
@@ -298,10 +298,10 @@
 
 func.func @fill_tensor(%arg0 : index, %arg1 : index, %arg2 : f32) -> tensor<?x?xf32> {
   %0 = tensor.empty(%arg0, %arg1) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%arg2 : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%arg2 : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
-// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) outs(%{{.+}} : tensor<?x?xf32>) -> tensor<?x?xf32>
+// CHECK: %{{.+}} = linalg.fill ins(%{{.+}} : f32) inits(%{{.+}} : tensor<?x?xf32>) -> tensor<?x?xf32>
 
 // -----
 
@@ -313,7 +313,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2, %arg3 : tensor<?x?x?xf32>, tensor<?x?xf32>) {
+      inits(%arg2, %arg3 : tensor<?x?x?xf32>, tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32):
       %1 = arith.mulf %b0, %b1 : f32
       %2 = arith.addf %1, %b3 : f32
@@ -328,7 +328,7 @@
 
 func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> {
    %add = linalg.map
-      outs(%init:tensor<64xf32>)
+      inits(%init:tensor<64xf32>)
       () {
         %0 = arith.constant 0.0: f32
         linalg.yield %0: f32
@@ -337,7 +337,7 @@
 }
 // CHECK-LABEL: func @map_no_inputs
 //       CHECK:   linalg.map
-//  CHECK-NEXT:   outs
+//  CHECK-NEXT:   inits
 //  CHECK-NEXT:   () {
 //  CHECK-NEXT:     arith.constant
 //  CHECK-NEXT:     linalg.yield
@@ -349,7 +349,7 @@
                       %init: tensor<64xf32>) -> tensor<64xf32> {
    %add = linalg.map
           ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%lhs_elem: f32, %rhs_elem: f32) {
             %0 = arith.addf %lhs_elem, %rhs_elem: f32
             linalg.yield %0: f32
@@ -359,7 +359,7 @@
 // CHECK-LABEL: func @map_binary
 //       CHECK:   linalg.map
 //  CHECK-NEXT:   ins
-//  CHECK-NEXT:   outs
+//  CHECK-NEXT:   inits
 //  CHECK-NEXT:   (%{{.*}}: f32, %{{.*}}: f32) {
 //  CHECK-NEXT:     arith.addf
 //  CHECK-NEXT:     linalg.yield
@@ -371,7 +371,7 @@
                       %init: memref<64xf32>) {
    linalg.map
       ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>)
-      outs(%init:memref<64xf32>)
+      inits(%init:memref<64xf32>)
       (%lhs_elem: f32, %rhs_elem: f32) {
         %0 = arith.addf %lhs_elem, %rhs_elem: f32
         linalg.yield %0: f32
@@ -386,7 +386,7 @@
 func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> {
    %abs = linalg.map
           ins(%input:tensor<64xf32>)
-          outs(%init:tensor<64xf32>)
+          inits(%init:tensor<64xf32>)
           (%input_elem: f32) {
             %0 = math.absf %input_elem: f32
             linalg.yield %0: f32
@@ -401,7 +401,7 @@
 func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) {
    linalg.map
       ins(%input:memref<64xf32>)
-      outs(%init:memref<64xf32>)
+      inits(%init:memref<64xf32>)
       (%input_elem: f32) {
         %0 = math.absf %input_elem: f32
         linalg.yield %0: f32
@@ -417,7 +417,7 @@
                   %init: tensor<16x64xf32>) -> tensor<16x64xf32> {
   %reduce = linalg.reduce
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<16x64xf32>)
+      inits(%init:tensor<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -428,7 +428,7 @@
 // CHECK-LABEL: func @reduce
 //       CHECK:   linalg.reduce
 //  CHECK-NEXT:   ins
-//  CHECK-NEXT:   outs
+//  CHECK-NEXT:   inits
 //  CHECK-NEXT:   dimensions = [1]
 //  CHECK-NEXT:   (%{{.*}}: f32, %{{.*}}: f32) {
 //  CHECK-NEXT:     arith.addf
@@ -441,7 +441,7 @@
                          %init: memref<16x64xf32>) {
   linalg.reduce
       ins(%input:memref<16x32x64xf32>)
-      outs(%init:memref<16x64xf32>)
+      inits(%init:memref<16x64xf32>)
       dimensions = [1]
       (%in: f32, %out: f32) {
         %0 = arith.addf %in, %out: f32
@@ -459,7 +459,7 @@
     %init2: tensor<16x64xi64>)  -> (tensor<16x64xf32>, tensor<16x64xi64>) {
   %reduce, %reduce2 = linalg.reduce
       ins(%input1, %input2 : tensor<16x32x64xf32>, tensor<16x32x64xi64>)
-      outs(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>)
+      inits(%init1, %init2 : tensor<16x64xf32>, tensor<16x64xi64>)
       dimensions = [1]
       (%in1: f32, %in2: i64, %out1: f32, %out2: i64) {
         %0 = arith.addf %in1, %out1: f32
@@ -478,7 +478,7 @@
     %init2: memref<16x64xi64>) {
   linalg.reduce
       ins(%input1, %input2 : memref<16x32x64xf32>, memref<16x32x64xi64>)
-      outs(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>)
+      inits(%init1, %init2 : memref<16x64xf32>, memref<16x64xi64>)
       dimensions = [1]
       (%in1: f32, %in2: i64, %out1: f32, %out2: i64) {
         %0 = arith.addf %in1, %out1: f32
@@ -496,14 +496,14 @@
                      %init: tensor<32x64x16xf32>) -> tensor<32x64x16xf32> {
   %transpose = linalg.transpose
       ins(%input:tensor<16x32x64xf32>)
-      outs(%init:tensor<32x64x16xf32>)
+      inits(%init:tensor<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return %transpose : tensor<32x64x16xf32>
 }
 // CHECK-LABEL: func @transpose
 //      CHECK:    linalg.transpose
 // CHECK-NEXT:    ins
-// CHECK-NEXT:    outs
+// CHECK-NEXT:    inits
 // CHECK-NEXT:    permutation
 
 // -----
@@ -512,7 +512,7 @@
                             %init: memref<32x64x16xf32>) {
   linalg.transpose
       ins(%input:memref<16x32x64xf32>)
-      outs(%init:memref<32x64x16xf32>)
+      inits(%init:memref<32x64x16xf32>)
       permutation = [1, 2, 0]
   func.return
 }
@@ -524,14 +524,14 @@
                             %init: tensor<8x16x32xf32>) -> tensor<8x16x32xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x32xf32>)
-      outs(%init:tensor<8x16x32xf32>)
+      inits(%init:tensor<8x16x32xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<8x16x32xf32>
 }
 // CHECK-LABEL: func @broadcast_static_sizes
 //      CHECK:    linalg.broadcast
 // CHECK-NEXT:    ins
-// CHECK-NEXT:    outs
+// CHECK-NEXT:    inits
 // CHECK-NEXT:    dimensions
 
 // -----
@@ -541,14 +541,14 @@
               -> tensor<8x16x?xf32> {
   %bcast = linalg.broadcast
       ins(%input:tensor<8x?xf32>)
-      outs(%init:tensor<8x16x?xf32>)
+      inits(%init:tensor<8x16x?xf32>)
       dimensions = [0, 2]
   func.return %bcast : tensor<8x16x?xf32>
 }
 // CHECK-LABEL: func @broadcast_with_dynamic_sizes
 //      CHECK:    linalg.broadcast
 // CHECK-NEXT:    ins
-// CHECK-NEXT:    outs
+// CHECK-NEXT:    inits
 // CHECK-NEXT:    dimensions
 
 // -----
@@ -557,7 +557,7 @@
                             %init: memref<8x16x32xf32>) {
   linalg.broadcast
       ins(%input:memref<8x32xf32>)
-      outs(%init:memref<8x16x32xf32>)
+      inits(%init:memref<8x16x32xf32>)
       dimensions = [0, 2]
   func.return
 }
@@ -565,5 +565,5 @@
 // CHECK-LABEL: func @broadcast_memref
 //      CHECK:    linalg.broadcast
 // CHECK-NEXT:    ins
-// CHECK-NEXT:    outs
+// CHECK-NEXT:    inits
 // CHECK-NEXT:    dimensions
diff --git a/mlir/test/Dialect/Linalg/standard.mlir b/mlir/test/Dialect/Linalg/standard.mlir
--- a/mlir/test/Dialect/Linalg/standard.mlir
+++ b/mlir/test/Dialect/Linalg/standard.mlir
@@ -5,7 +5,7 @@
           %arg2: memref<f32>) {
   linalg.dot ins(%arg0, %arg1: memref<?xf32, strided<[1], offset: ?>>,
                                memref<?xf32, strided<[1], offset: ?>>)
-             outs(%arg2: memref<f32>)
+             inits(%arg2: memref<f32>)
   return
 }
 // CHECK-LABEL: func @dot(
@@ -44,7 +44,7 @@
 func.func @matmul_vec_impl(%A: !matrix_type_A, %B: !matrix_type_B, %C: !matrix_type_C) {
   linalg.generic #matmul_trait
       ins(%A, %B : !matrix_type_A, !matrix_type_B)
-     outs(%C : !matrix_type_C) {
+     inits(%C : !matrix_type_C) {
     ^bb0(%a: !vector_type_A, %b: !vector_type_B, %c: !vector_type_C):
       %d = vector.outerproduct %a, %b, %c: !vector_type_A, !vector_type_B
       linalg.yield %d: !vector_type_C
diff --git a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
--- a/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
+++ b/mlir/test/Dialect/Linalg/swap-extract-slice-with-fill.mlir
@@ -4,11 +4,11 @@
 //  CHECK-SAME: (%[[INIT:.+]]: tensor<?x?x?xf32>, %[[OFFSET0:.+]]: index, %[[SIZE1:.+]]: index)
 //       CHECK:   %[[F0:.+]] = arith.constant 0.000000e+00 : f32
 //       CHECK:   %[[EXT:.+]] = tensor.extract_slice %[[INIT]][%[[OFFSET0]], 8, 4] [1, %[[SIZE1]], 6] [1, 3, 1]
-//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) outs(%[[EXT]] : tensor<?x6xf32>) -> tensor<?x6xf32>
+//       CHECK:   %[[FILL:.+]] = linalg.fill ins(%[[F0]] : f32) inits(%[[EXT]] : tensor<?x6xf32>) -> tensor<?x6xf32>
 //       CHECK:   return %[[FILL]]
 func.func @swap_fill_insert_slice(%init : tensor<?x?x?xf32>, %offset0: index, %size1: index) -> tensor<?x6xf32> {
   %f0 = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %1 = tensor.extract_slice %0[%offset0, 8, 4] [1, %size1, 6] [1, 3, 1]
     : tensor<?x?x?xf32> to tensor<?x6xf32>
   return %1: tensor<?x6xf32>
@@ -21,7 +21,7 @@
 //       CHECK:   tensor.extract_slice
 func.func @dont_swap_fill_insert_slice_multi_user(%init : tensor<?x?x?xf32>, %offset0: index, %size1: index) -> (tensor<?x?x?xf32>, tensor<2x?x6xf32>) {
   %f0 = arith.constant 0.000000e+00 : f32
-  %0 = linalg.fill ins(%f0 : f32) outs(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
+  %0 = linalg.fill ins(%f0 : f32) inits(%init : tensor<?x?x?xf32>) -> tensor<?x?x?xf32>
   %1 = tensor.extract_slice %0[%offset0, 8, 4] [2, %size1, 6] [1, 3, 1]
     : tensor<?x?x?xf32> to tensor<2x?x6xf32>
   return %0, %1: tensor<?x?x?xf32>, tensor<2x?x6xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -2,7 +2,7 @@
 
 func.func @matmul_tensors(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %t0 = linalg.matmul ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
   %c4 = arith.constant 4 : index
@@ -19,7 +19,7 @@
         %6 = tensor.extract_slice %t0[%arg3, %arg7][%c2, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32>
         %7 = tensor.extract_slice %arg1[%arg7, %arg5][4, %c3][1, 1] : tensor<?x?xf32> to tensor<4x?xf32>
         %8 = tensor.extract_slice %arg8[%arg3, %arg5][%c2, %c3][1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
-        %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) outs(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        %9 = linalg.matmul ins(%6, %7 : tensor<?x4xf32>, tensor<4x?xf32>) inits(%8 : tensor<?x?xf32>) -> tensor<?x?xf32>
         %10 = tensor.insert_slice %9 into %arg8[%arg3, %arg5] [%c2, %c3] [1, 1]  : tensor<?x?xf32> into tensor<?x?xf32>
         scf.yield %10 : tensor<?x?xf32>
       }
@@ -50,8 +50,8 @@
 // slices of the producing matmul.
 //   CHECK-DAG:       %[[stB2:.*]] = tensor.extract_slice %[[B]][0, %[[K]]] [%[[dB0]], 4] [1, 1]  : tensor<?x?xf32> to tensor<?x4xf32>
 //   CHECK-DAG:       %[[stC:.*]] = tensor.extract_slice %[[C]][%[[I]], %[[K]]] [2, 4] [1, 1]  : tensor<?x?xf32> to tensor<2x4xf32>
-//       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) outs(%[[stC]] : tensor<2x4xf32>)  -> tensor<2x4xf32>
-//  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) outs(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
+//       CHECK:       %[[stD:.*]] = linalg.matmul ins(%[[stA]], %[[stB2]] : tensor<2x?xf32>, tensor<?x4xf32>) inits(%[[stC]] : tensor<2x4xf32>)  -> tensor<2x4xf32>
+//  CHECK-NEXT:       %[[stG:.*]] = linalg.matmul ins(%[[stD]], %[[stB1]] : tensor<2x4xf32>, tensor<4x3xf32>) inits(%[[stF]] : tensor<2x3xf32>)  -> tensor<2x3xf32>
 //  CHECK-NEXT:       tensor.insert_slice %[[stG]] into %[[RES]][%[[I]], %[[J]]]
 
 // -----
@@ -66,12 +66,12 @@
   %cst = arith.constant 0.0 : f32
 
   %init = tensor.empty() : tensor<1x112x112x32xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<1x225x225x3xf32>, tensor<3x3x3x32xf32>)
-    outs(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+    inits(%fill : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
   %for0 = scf.for %iv0 = %c0 to %c112 step %c8 iter_args(%arg0 = %fill) -> tensor<1x112x112x32xf32> {
     %for1 = scf.for %iv1 = %c0 to %c112 step %c16 iter_args(%arg1 = %arg0) -> tensor<1x112x112x32xf32> {
@@ -87,7 +87,7 @@
               affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
             iterator_types = ["parallel", "parallel", "parallel", "parallel"]
           }
-          ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) outs(%2 : tensor<1x8x16x4xf32>) {
+          ins(%0, %1 : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>) inits(%2 : tensor<1x8x16x4xf32>) {
         ^bb0(%arg3: f32, %arg4: f32, %arg5: f32):
           %result = arith.addf %arg3, %arg4 : f32
           linalg.yield %result : f32
@@ -110,7 +110,7 @@
 // CHECK-SAME: (%[[INPUT:.+]]: tensor<1x225x225x3xf32>, %[[FILTER:.+]]: tensor<3x3x3x32xf32>, %[[ELEM:.+]]: tensor<1x112x112x32xf32>)
 
 //      CHECK: %[[INIT:.+]] = tensor.empty() : tensor<1x112x112x32xf32>
-// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
+// CHECK-NEXT: %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<1x112x112x32xf32>) -> tensor<1x112x112x32xf32>
 
 // CHECK-NEXT: scf.for %[[IV0:.+]] = %{{.+}} to %{{.+}} step %{{.+}} iter_args(%[[ARG0:.+]] = %[[FILL]])
 // CHECK-NEXT:   %[[OFFSET_H:.+]] = affine.apply #[[MAP0]](%[[IV0]])
@@ -124,10 +124,10 @@
 // CHECK-NEXT:       %[[ST_FILL:.+]] = tensor.extract_slice %[[FILL]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4] [1, 1, 1, 1] : tensor<1x112x112x32xf32> to tensor<1x8x16x4xf32>
 // CHECK-NEXT:       %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:         ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<1x17x33x3xf32>, tensor<3x3x3x4xf32>)
-// CHECK-SAME:         outs(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
+// CHECK-SAME:         inits(%[[ST_FILL]] : tensor<1x8x16x4xf32>)
 // CHECK-NEXT:       %[[ADD:.+]] = linalg.generic
 // CHECK-SAME:         ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<1x8x16x4xf32>, tensor<1x8x16x4xf32>)
-// CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
+// CHECK-SAME:         inits(%[[ST_ARG2]] : tensor<1x8x16x4xf32>)
 //      CHECK:       tensor.insert_slice %[[ADD]] into %[[ARG2]][0, %[[IV0]], %[[IV1]], %[[IV2]]] [1, 8, 16, 4]
 
 // -----
@@ -148,12 +148,12 @@
   %oc = tensor.dim %elementwise, %c3 : tensor<?x?x?x?xf32>
 
   %init = tensor.empty(%n, %oh, %ow, %oc) : tensor<?x?x?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
   %conv = linalg.conv_2d_nhwc_hwcf
     {dilations = dense<1> : tensor<2xi64>, strides = dense<2> : tensor<2xi64>}
     ins(%input, %filter : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-    outs(%fill : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+    inits(%fill : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
   %for0 = scf.for %iv0 = %c0 to %n step %c8 iter_args(%arg0 = %fill) -> tensor<?x?x?x?xf32> {
     %for1 = scf.for %iv1 = %c0 to %oh step %c16 iter_args(%arg1 = %arg0) -> tensor<?x?x?x?xf32> {
@@ -174,7 +174,7 @@
                 affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
               iterator_types = ["parallel", "parallel", "parallel", "parallel"]
             }
-            ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) outs(%2 : tensor<?x?x?x?xf32>) {
+            ins(%0, %1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) inits(%2 : tensor<?x?x?x?xf32>) {
           ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
             %result = arith.addf %arg4, %arg5 : f32
             linalg.yield %result : f32
@@ -217,7 +217,7 @@
 //  CHECK-DAG:   %[[ELEM_OC:.+]] = tensor.dim %[[ELEM]], %[[C3]] : tensor<?x?x?x?xf32>
 
 //      CHECK:   %[[INIT:.+]] = tensor.empty(%[[ELEM_N]], %[[ELEM_OH]], %[[ELEM_OW]], %[[ELEM_OC]]) : tensor<?x?x?x?xf32>
-//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%cst : f32) outs(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+//      CHECK:   %[[FILL:.+]] = linalg.fill ins(%cst : f32) inits(%[[INIT]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 
 //  CHECK-DAG:   %[[FILTER_H:.+]] = tensor.dim %[[FILTER]], %[[C0]] : tensor<?x?x?x?xf32>
 //  CHECK-DAG:   %[[FILTER_W:.+]] = tensor.dim %[[FILTER]], %[[C1]] : tensor<?x?x?x?xf32>
@@ -256,10 +256,10 @@
 // CHECK-SAME:                 [%[[SIZE_INPUT_N]], %[[SIZE_ELEM_OH_2]], %[[SIZE_ELEM_OW_2]], %[[SIZE_ELEM_OC_2]]]
 // CHECK-NEXT:           %[[ST_CONV:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:                 ins(%[[ST_INPUT]], %[[ST_FILTER]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-// CHECK-SAME:                 outs(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+// CHECK-SAME:                 inits(%[[ST_FILL]] : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
 // CHECK-NEXT:           %[[ST_ADD:.+]] = linalg.generic
 // CHECK-SAME:                 ins(%[[ST_CONV]], %[[ST_ELEM]] : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-// CHECK-SAME:                 outs(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
+// CHECK-SAME:                 inits(%[[ST_ARG]] : tensor<?x?x?x?xf32>)
 //      CHECK:           tensor.insert_slice %[[ST_ADD]] into %[[ARG]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
 // CHECK-SAME:                 [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]]
 
@@ -301,7 +301,7 @@
     tensor.yield %zero : f32
   } : tensor<58x1xf32> to tensor<64x128xf32>
 
-  %fill = linalg.fill ins(%zero : f32) outs(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32>
+  %fill = linalg.fill ins(%zero : f32) inits(%large_input : tensor<64x128xf32>) -> tensor<64x128xf32>
 
   %for0 = scf.for %iv0 = %c0 to %d0 step %c16 iter_args(%arg0 = %fill) -> tensor<64x128xf32> {
     %for1 = scf.for %iv1 = %c0 to %d1 step %c32 iter_args(%arg1 = %arg0) -> tensor<64x128xf32> {
@@ -311,7 +311,7 @@
 
       %add = linalg.generic
         {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]}
-        ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) outs(%2 : tensor<16x32xf32>) {
+        ins(%0, %1 : tensor<16x32xf32>, tensor<16x32xf32>) inits(%2 : tensor<16x32xf32>) {
       ^bb0(%arg4: f32, %arg5: f32, %arg6: f32):
         %result = arith.addf %arg4, %arg5 : f32
         linalg.yield %result : f32
diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir
--- a/mlir/test/Dialect/Linalg/tile-conv.mlir
+++ b/mlir/test/Dialect/Linalg/tile-conv.mlir
@@ -5,7 +5,7 @@
 //  CHECK-DAG: #[[MAP2:.*]] = affine_map<(d0)[s0] -> (d0 + s0 - 1)>
 
 func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref<?x?xf32>) {
-  linalg.conv_2d ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>) outs(%arg2 : memref<?x?xf32>)
+  linalg.conv_2d ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>) inits(%arg2 : memref<?x?xf32>)
   return
 }
 
@@ -38,4 +38,4 @@
 //   CHECK-DAG:       %[[SVOUT:.*]] = memref.subview %[[ARG2]][%[[I]], %[[J]]] [%[[T4]], %[[T5]]]
 //       CHECK:       linalg.conv_2d
 //  CHECK-SAME:         ins(%[[SVIN]], %[[SVKER]]
-//  CHECK-SAME:         outs(%[[SVOUT]]
+//  CHECK-SAME:         inits(%[[SVOUT]]
diff --git a/mlir/test/Dialect/Linalg/tile-indexed.mlir b/mlir/test/Dialect/Linalg/tile-indexed.mlir
--- a/mlir/test/Dialect/Linalg/tile-indexed.mlir
+++ b/mlir/test/Dialect/Linalg/tile-indexed.mlir
@@ -3,7 +3,7 @@
 func.func @indexed_vector(%arg0: memref<50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i) -> (i)>],
                   iterator_types = ["parallel"]}
-     outs(%arg0 : memref<50xindex>) {
+     inits(%arg0 : memref<50xindex>) {
     ^bb0(%a: index):
       %i = linalg.index 0 : index
       linalg.yield %i : index
@@ -31,7 +31,7 @@
 func.func @indexed_matrix(%arg0: memref<50x50xindex>) {
   linalg.generic {indexing_maps = [affine_map<(i, j) -> (i, j)>],
                   iterator_types = ["parallel", "parallel"]}
-    outs(%arg0 : memref<50x50xindex>) {
+    inits(%arg0 : memref<50x50xindex>) {
     ^bb0(%a: index):
       %i = linalg.index 0 : index
       %j = linalg.index 1 : index
diff --git a/mlir/test/Dialect/Linalg/tile-tensors.mlir b/mlir/test/Dialect/Linalg/tile-tensors.mlir
--- a/mlir/test/Dialect/Linalg/tile-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-tensors.mlir
@@ -14,13 +14,13 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<?x?xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-// CHECK-SAME:                                  outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
+// CHECK-SAME:                                  inits(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<?x?xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<?x?xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<?x?xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<?x?xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
 
 //      CHECK: return %[[TD0]] : tensor<?x?xf32>
@@ -50,7 +50,7 @@
                       affine_map<(d0, d1, d2) -> (d2, d1, d0)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0, %arg1 : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-    outs(%3 : tensor<?x?x?xf32>) {
+    inits(%3 : tensor<?x?x?xf32>) {
     ^bb0(%arg2 : f32, %arg3: f32, %arg4: f32):
       %5 = arith.addf %arg2, %arg3 : f32
       linalg.yield %5 : f32
@@ -76,7 +76,7 @@
 //       CHECK:       %[[STARG2:.+]] = tensor.extract_slice %[[TC2]][{{.+}}] : tensor<?x?x?xf32> to tensor<?x?x?xf32>
 //       CHECK:       %[[STRETURN:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[STARG0]], %[[STARG1]] : tensor<?x?x?xf32>, tensor<?x?x?xf32>)
-//  CHECK-SAME:         outs(%[[STARG2]] : tensor<?x?x?xf32>)
+//  CHECK-SAME:         inits(%[[STARG2]] : tensor<?x?x?xf32>)
 //       CHECK:       %[[TD:.+]] = tensor.insert_slice %[[STRETURN]] into %[[TC2]]
 //       CHECK:       scf.yield %[[TD]]
 //       CHECK:     }
@@ -121,7 +121,7 @@
                       affine_map<(d0, d1, d2) -> (d0, d1)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%1, %arg2 : tensor<?x42xf32>, tensor<?x42x?xf32>)
-    outs(%arg1 : tensor<?x42xf32>) {
+    inits(%arg1 : tensor<?x42xf32>) {
     ^bb0(%arg3 : f32, %arg4: f32, %arg5: f32):
       %5 = arith.addf %arg3, %arg5 : f32
       linalg.yield %5 : f32
diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
--- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
+++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir
@@ -21,14 +21,14 @@
   //      CHECK:   %[[tC:.*]] = tensor.extract_slice %[[C_BLK]]{{.*}} : tensor<?x?xf32> to tensor<?x?xf32>
   //      CHECK:   %[[RES:.*]] = linalg.matmul
   // CHECK-SAME:      ins(%[[tA]], %[[tB]] : tensor<?x?xf32>, tensor<?x?xf32>)
-  // CHECK-SAME:     outs(%[[tC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
+  // CHECK-SAME:     inits(%[[tC]] : tensor<?x?xf32>) -> tensor<?x?xf32>
   //      CHECK:   scf.foreach_thread.perform_concurrently {
   // CHECK-NEXT:     tensor.parallel_insert_slice %[[RES]] into %[[C_BLK]]{{.*}} :
   // CHECK-SAME:       tensor<?x?xf32> into tensor<?x?xf32>
   // CHECK-NEXT:   }
   // CHECK-NEXT: } {thread_dim_mapping = [1, 0]}
     %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                      outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                      inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
     return %0 : tensor<?x?xf32>
   }
 
@@ -69,7 +69,7 @@
   //      CHECK:   scf.foreach_thread.perform_concurrently
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>)
-                    outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
+                    inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
   return %0 : tensor<100x300xf32>
 }
 
@@ -110,7 +110,7 @@
   //      CHECK:   scf.foreach_thread.perform_concurrently
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                    inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
   return %0 : tensor<?x?xf32>
 }
 
@@ -148,7 +148,7 @@
   //      CHECK:   scf.foreach_thread.perform_concurrently
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %0 = linalg.matmul ins(%A, %B : tensor<100x200xf32>, tensor<200x300xf32>)
-                    outs(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
+                    inits(%C : tensor<100x300xf32>) -> (tensor<100x300xf32>)
   return %0 : tensor<100x300xf32>
 }
 
@@ -166,7 +166,7 @@
     %result = linalg.generic {indexing_maps = [
       affine_map<(d0) -> (d0)>,affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]}
-      ins(%A : tensor<4xf32>) outs(%B1 : tensor<4xf32>) {
+      ins(%A : tensor<4xf32>) inits(%B1 : tensor<4xf32>) {
       ^bb0(%arg3: f32, %arg4: f32):  // no predecessors
         %2 = arith.addf %arg3, %arg3 : f32
         linalg.yield %2 : f32
@@ -221,7 +221,7 @@
   // CHECK-NEXT:    tensor.parallel_insert_slice
   %tile_size = "test.dummy"() : () -> (index)
   %0 = linalg.matmul ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-                    outs(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
+                    inits(%C : tensor<?x?xf32>) -> (tensor<?x?xf32>)
   return %0 : tensor<?x?xf32>
 }
 
@@ -270,7 +270,7 @@
                        affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]
     } ins(%IN1, %IN2 : tensor<100xf32>, tensor<100xf32>)
-      outs(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>)
+      inits(%OUT1, %OUT2 : tensor<100xf32>, tensor<100xf32>)
     {
       ^bb0(%a1: f32, %a2: f32, %a3: f32, %a4: f32):
         %1 = arith.addf %a1, %a3 : f32
@@ -323,7 +323,7 @@
                        ],
       iterator_types = ["parallel", "parallel"]
     } ins(%IN1, %IN2, %IN3 : tensor<100xf32>, tensor<100x300xf32>, tensor<300xf32>)
-      outs(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>)  {
+      inits(%OUT1, %OUT2: tensor<300x100xf32>, tensor<300xf32>)  {
       ^bb0(%i1: f32, %i2: f32, %i3: f32, %o1: f32, %o2: f32):
         %1 = arith.addf %i1, %o1 : f32
         %2 = arith.addf %i2, %1 : f32
diff --git a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-decompose.mlir
@@ -13,7 +13,7 @@
   %0 = linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x1x?x?xf32>, tensor<1x?x?x?xf32>)
-    outs (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
+    inits (%init: tensor<?x1x?x?xf32>) -> tensor<?x1x?x?xf32>
   // CHECK: return %[[RES]]
   return %0 : tensor<?x1x?x?xf32>
 }
@@ -31,7 +31,7 @@
   %0 = linalg.conv_2d_nchw_fchw {dilations = dense<1> : tensor<2xi64>,
                                  strides = dense<1> : tensor<2xi64>}
      ins (%input, %filter: tensor<?x?x1x?xf32>, tensor<?x?x1x?xf32>)
-    outs (%init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32>
+    inits (%init: tensor<?x?x1x?xf32>) -> tensor<?x?x1x?xf32>
   // CHECK: return %[[RES]]
   return %0 : tensor<?x?x1x?xf32>
 }
@@ -47,11 +47,11 @@
   // CHECK: %[[SLICERES:.+]] = tensor.extract_slice %[[RES]]
   // CHECK: %[[OPRES:.+]] = linalg.depthwise_conv_1d_nwc_wc
   // CHECK-SAME: ins(%[[SLICE0]], %[[SLICE1]]
-  // CHECK-SAME: outs(%[[SLICERES]]
+  // CHECK-SAME: inits(%[[SLICERES]]
   // CHECK: %[[INSERTED:.+]] = tensor.insert_slice %[[OPRES]] into %[[RES]]
   %0 = linalg.depthwise_conv_2d_nhwc_hwc {dilations = dense<1> : vector<2xi64>, strides = dense<2> : vector<2xi64>}
          ins(%input, %filter: tensor<1x1x113x96xf32>, tensor<1x3x96xf32>)
-         outs(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32>
+         inits(%init: tensor<1x1x56x96xf32>) -> tensor<1x1x56x96xf32>
   // CHECK: %[[INSERTED]]
   return %0: tensor<1x1x56x96xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse-into-containing.mlir
@@ -12,7 +12,7 @@
   func.func @fuse_tileable_op(%arg0: index, %arg1: tensor<?xf32>, %arg2: tensor<?xf32>) -> tensor<?xf32> {
     %cst = arith.constant 4.200000e+01 : f32
     %c0 = arith.constant 0 : index
-    %0 = linalg.fill ins(%cst : f32) outs(%arg1 : tensor<?xf32>) -> tensor<?xf32>
+    %0 = linalg.fill ins(%cst : f32) inits(%arg1 : tensor<?xf32>) -> tensor<?xf32>
     %d0 = tensor.dim %arg1, %c0 : tensor<?xf32>
     %1 = affine.apply #map0()[%d0, %arg0]
 
@@ -23,11 +23,11 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T0:.*]] = tensor.extract_slice %[[IN]][%{{.*}}] [%{{.*}}] [{{.*}}]
-      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
+      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]]
       %6 = tensor.extract_slice %0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.foreach_thread.perform_concurrently {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -74,7 +74,7 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<64xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[INIT_TENSOR]]
-      %7 = linalg.elemwise_unary ins(%0 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%0 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.foreach_thread.perform_concurrently {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<64xf32>
       }
@@ -108,7 +108,7 @@
   func.func @fuse_tileable_op_through_bbarg(%arg0: index, %arg1: tensor<?xf32>, %arg2: tensor<?xf32>) -> tensor<?xf32> {
     %cst = arith.constant 4.200000e+01 : f32
     %c0 = arith.constant 0 : index
-    %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<?xf32>) -> tensor<?xf32>
+    %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<?xf32>) -> tensor<?xf32>
     %d0 = tensor.dim %arg1, %c0 : tensor<?xf32>
     %1 = affine.apply #map0()[%d0, %arg0]
 
@@ -119,11 +119,11 @@
       %5 = tensor.extract_slice %o[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T0:.*]] = tensor.extract_slice %[[BBARGOUT]][%{{.*}}] [%{{.*}}] [{{.*}}]
-      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} outs(%[[T0]]
+      // CHECK: %[[T1:.*]] = linalg.fill {{.*}} inits(%[[T0]]
       %6 = tensor.extract_slice %arg1[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} outs(%[[T1]]
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: %[[T2:.*]] = linalg.elemwise_unary {{.*}} inits(%[[T1]]
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.foreach_thread.perform_concurrently {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
@@ -162,7 +162,7 @@
     %0:2 = linalg.generic {
       indexing_maps = [affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>, affine_map<(d0) -> (d0)>],
       iterator_types = ["parallel"]
-    } ins(%in : tensor<?xf32>) outs(%out_1, %out_3 : tensor<?xf32>, tensor<?xf32>) {
+    } ins(%in : tensor<?xf32>) inits(%out_1, %out_3 : tensor<?xf32>, tensor<?xf32>) {
       ^bb0(%a: f32, %b: f32, %c: f32):
         %d = arith.addf %a, %b : f32
         %e = arith.addf %d, %c : f32
@@ -183,7 +183,7 @@
       %6 = tensor.extract_slice %0#0[%3] [%4] [1] : tensor<?xf32> to tensor<?xf32>
 
       // CHECK: %[[T2:.*]] = linalg.elemwise_unary ins(%[[T1]]#0
-      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) outs(%5 : tensor<?xf32>) -> tensor<?xf32>
+      %7 = linalg.elemwise_unary ins(%6 : tensor<?xf32>) inits(%5 : tensor<?xf32>) -> tensor<?xf32>
       scf.foreach_thread.perform_concurrently {
         tensor.parallel_insert_slice %7 into %o[%3] [%4] [1] : tensor<?xf32> into tensor<?xf32>
       }
diff --git a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-fuse.mlir
@@ -9,9 +9,9 @@
   //     CHECK:       linalg.elemwise_binary
   //     CHECK: return %[[RES]]
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
@@ -36,9 +36,9 @@
   //     CHECK:       linalg.elemwise_binary
   //     CHECK: return %[[RES]]
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   %1 = linalg.elemwise_binary ins(%0, %arg0 : tensor<?x?xf32>, tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
 
@@ -66,18 +66,18 @@
 //       CHECK:   scf.for %[[IV1:.+]] = %{{.+}} to %{{.+}} step %[[C7]] iter_args(%[[FOR_ARG1:.+]] = %[[FOR_ARG0]])
 //       CHECK:     %[[OUT_SLICE0:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], 0, %[[IV1]]]
 //       CHECK:     %[[OUT_SLICE1:.+]] = tensor.extract_slice %[[FOR_ARG1]][%[[IV0]], %[[IV1]]]
-//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} outs(%[[OUT_SLICE1]] : tensor<?x?xf32>)
+//       CHECK:     %[[FILL:.+]] = linalg.fill {{.+}} inits(%[[OUT_SLICE1]] : tensor<?x?xf32>)
 //       CHECK:     scf.for %[[IV2:.+]] = %{{.+}} to %{{.+}} step %[[C4]] iter_args(%[[FOR_ARG2:.+]] = %[[FILL]])
 //       CHECK:       %[[IN_SLICE:.+]] = tensor.extract_slice %[[OUT_SLICE0]]
 //       CHECK:       %[[OUT_SLICE2:.+]] = tensor.extract_slice %[[FOR_ARG2]][0, 0]
-//       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) outs(%[[OUT_SLICE2]] : tensor<?x?xf32>)
+//       CHECK:       linalg.generic {{.+}} ins(%[[IN_SLICE]] : tensor<?x?x?xf32>) inits(%[[OUT_SLICE2]] : tensor<?x?xf32>)
 //       CHECK: return %[[RES]]
 
-  %fill = linalg.fill ins(%five : f32) outs(%init : tensor<12x25xf32>) -> tensor<12x25xf32>
+  %fill = linalg.fill ins(%five : f32) inits(%init : tensor<12x25xf32>) -> tensor<12x25xf32>
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d2)>],
     iterator_types = ["parallel", "reduction", "parallel"]
-  } ins(%input : tensor<12x7x25xf32>) outs(%fill : tensor<12x25xf32>) {
+  } ins(%input : tensor<12x7x25xf32>) inits(%fill : tensor<12x25xf32>) {
   ^bb0(%arg0: f32, %arg1: f32):
     %2 = arith.addf %arg0, %arg1 : f32
     linalg.yield %2 : f32
diff --git a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-generalize.mlir
@@ -6,7 +6,7 @@
   // CHECK-NOT:   linalg.elemwise_unary
   //     CHECK:   linalg.generic
   %0 = linalg.elemwise_unary ins(%arg0 : tensor<?x?xf32>)
-                             outs(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
+                             inits(%arg1: tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-interchange.mlir
@@ -10,7 +10,7 @@
   %0 = linalg.generic {
     indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]
-  } ins(%arg0 : tensor<?x?xf32>) outs(%arg1 : tensor<?x?xf32>) {
+  } ins(%arg0 : tensor<?x?xf32>) inits(%arg1 : tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     %1 = math.exp %arg2 : f32
     linalg.yield %1 : f32
@@ -28,7 +28,7 @@
 
 func.func @interchange_matmul(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>) -> tensor<?x?xf32> {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-match.mlir b/mlir/test/Dialect/Linalg/transform-op-match.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-match.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-match.mlir
@@ -48,7 +48,7 @@
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<12x128x32xf32>)
-    outs(%0 : tensor<128x12x32xf32>) {
+    inits(%0 : tensor<128x12x32xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<128x12x32xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-multitile-sizes.mlir
@@ -13,7 +13,7 @@
   %arg0: tensor<13x34xf32>, %arg1: tensor<34x42xf32>, %arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32> {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<13x34xf32>, tensor<34x42xf32>)
-                     outs(%arg2: tensor<13x42xf32>)
+                     inits(%arg2: tensor<13x42xf32>)
     -> tensor<13x42xf32>
   // The first application computes the total size.
   // CHECK: %{{.*}} = affine.apply #[[$MAP13]]()
@@ -59,7 +59,7 @@
   %arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>, %arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32> {
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<?x?xf32>, tensor<?x?xf32>)
-                     outs(%arg2: tensor<?x?xf32>)
+                     inits(%arg2: tensor<?x?xf32>)
     -> tensor<?x?xf32>
   
   return %0 : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-pad.mlir b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-pad.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-pad.mlir
@@ -25,8 +25,8 @@
 
   //      CHECK: %[[T5:.*]] = linalg.matmul
   // CHECK-SAME:              ins(%[[T3]], %[[T4]] : tensor<4x7xf32>, tensor<7x5xf32>)
-  // CHECK-SAME:              outs(%[[T2]] : tensor<4x5xf32>)
-  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  // CHECK-SAME:              inits(%[[T2]] : tensor<4x5xf32>)
+  %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %5 = tensor.insert_slice %4 into %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   func.return %5 : tensor<24x25xf32>
 }
@@ -43,7 +43,7 @@
                %arg1: tensor<12x25xf32>,
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{when applied to this op}} 
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -60,7 +60,7 @@
                %arg1: tensor<12x25xf32>,
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -78,7 +78,7 @@
                %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // This is attached to an error that is silenceable and is not reported by this transform
   //   {{when applied to this op}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-scalarize.mlir
@@ -6,7 +6,7 @@
   // The op is first tiled by 10 in the first dimension, which creates a
   // dynamic size, and then scalarized, which brings the dimension to static 1.
   // CHECK: linalg.matmul ins(%{{.*}}, %{{.*}} : tensor<1x12
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction-by-scaling.mlir
@@ -7,14 +7,14 @@
   //      CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "reduction"]
   // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}}, %{{[a-zA-Z0-9]*}} : tensor<?x256xf32>, tensor<256x32xf32>, tensor<64x4xi1>)
-  // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>) {
+  // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>) {
 
   //      CHECK: linalg.generic
   // CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
   // CHECK-SAME: ins(%{{[a-zA-Z0-9]*}} : tensor<?x32x64xf32>)
-  // CHECK-SAME: outs(%{{[a-zA-Z0-9]*}} : tensor<?x32xf32>) {
+  // CHECK-SAME: inits(%{{[a-zA-Z0-9]*}} : tensor<?x32xf32>) {
   %0 = linalg.matmul ins(%A, %B: tensor<?x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<?x32xf32>) -> tensor<?x32xf32>
+                    inits(%C: tensor<?x32xf32>) -> tensor<?x32xf32>
   return %0: tensor<?x32xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split-reduction.mlir
@@ -2,7 +2,7 @@
 
 func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -16,16 +16,16 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x4x64xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<4x64x32xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:   , iterator_types = ["parallel", "parallel", "parallel", "reduction"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x4x64xf32>, tensor<4x64x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<16x32x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]],
-// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) {
+// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield %{{.*}} : f32
 //      CHECK: } -> tensor<16x32xf32>
@@ -45,7 +45,7 @@
                                           affine_map<(d0) -> ()>],
    iterator_types = ["reduction"]}
    ins(%arg0, %arg1 : tensor<32xf32>, tensor<f32>)
-   outs(%out : tensor<f32>) {
+   inits(%out : tensor<f32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -64,16 +64,16 @@
 //  CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<4x8xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic
 //      CHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
-//      CHECK:   iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor<f32>) outs(%[[F]] : tensor<4xf32>) {
+//      CHECK:   iterator_types = ["parallel", "reduction"]} ins(%[[I1]], %{{.*}} : tensor<4x8xf32>, tensor<f32>) inits(%[[F]] : tensor<4xf32>) {
 //      CHECK:   arith.subf
 //      CHECK:   math.exp
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<4xf32>
-//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor<f32>) {
+//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor<f32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<f32>
@@ -97,7 +97,7 @@
         affine_map<(d0, d1, d2) -> (d2, d0)>
       ],
       iterator_types = ["parallel", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %3 = arith.addf %arg0, %arg1 : f32
       %4 = arith.maxf %3, %arg2 : f32
@@ -116,15 +116,15 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<4x8x2xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x4x8xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<4x8x2xf32>, tensor<5x4x8xf32>) inits(%[[F]] : tensor<5x2x4xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   arith.maxf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<5x2x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}
-// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {
+// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) {
 //      CHECK:   arith.maxf
 //      CHECK:   linalg.yield
 //      CHECK:  } -> tensor<5x2xf32>
@@ -140,7 +140,7 @@
 
 func.func @matmul_split(%A : tensor<16x256xf32>, %B: tensor<256x32xf32>, %C: tensor<16x32xf32>) -> tensor<16x32xf32> {
   %0 = linalg.matmul ins(%A, %B: tensor<16x256xf32>, tensor<256x32xf32>)
-                    outs(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
+                    inits(%C: tensor<16x32xf32>) -> tensor<16x32xf32>
   return %0: tensor<16x32xf32>
 }
 
@@ -154,16 +154,16 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<16x256xf32> into tensor<16x64x4xf32>
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<256x32xf32> into tensor<64x4x32xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<16x32x4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<16x32x4xf32>) -> tensor<16x32x4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
 // CHECK-SAME:   , iterator_types = ["parallel", "parallel", "reduction", "parallel"]}
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) outs(%[[F]] : tensor<16x32x4xf32>) {
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<16x64x4xf32>, tensor<64x4x32xf32>) inits(%[[F]] : tensor<16x32x4xf32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<16x32x4xf32>
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]],
-// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) outs(%{{.*}} : tensor<16x32xf32>) {
+// CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"]} ins(%[[G]] : tensor<16x32x4xf32>) inits(%{{.*}} : tensor<16x32xf32>) {
 //      CHECK:   arith.addf
 //      CHECK:   linalg.yield %{{.*}} : f32
 //      CHECK: } -> tensor<16x32xf32>
@@ -183,7 +183,7 @@
                                           affine_map<(d0) -> ()>],
    iterator_types = ["reduction"]}
    ins(%arg0, %arg1 : tensor<32xf32>, tensor<f32>)
-   outs(%out : tensor<f32>) {
+   inits(%out : tensor<f32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -202,16 +202,16 @@
 //  CHECK-DAG: %[[ID:.*]] = arith.constant 1.000000e+00 : f32
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1]] : tensor<32xf32> into tensor<8x4xf32>
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<4xf32>
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<4xf32>) -> tensor<4xf32>
 //      CHECK: %[[G:.*]] = linalg.generic
 //      CHECK:   {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]],
-//      CHECK:   iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor<f32>) outs(%[[F]] : tensor<4xf32>) {
+//      CHECK:   iterator_types = ["reduction", "parallel"]} ins(%[[I1]], %{{.*}} : tensor<8x4xf32>, tensor<f32>) inits(%[[F]] : tensor<4xf32>) {
 //      CHECK:   arith.subf
 //      CHECK:   math.exp
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<4xf32>
-//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) outs(%{{.*}} : tensor<f32>) {
+//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["reduction"]} ins(%[[G]] : tensor<4xf32>) inits(%{{.*}} : tensor<f32>) {
 //      CHECK:   arith.mulf
 //      CHECK:   linalg.yield
 //      CHECK: } -> tensor<f32>
@@ -235,7 +235,7 @@
         affine_map<(d0, d1, d2) -> (d2, d0)>
       ],
       iterator_types = ["parallel", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<32x2xf32>, tensor<5x32xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %3 = arith.addf %arg0, %arg1 : f32
       %4 = arith.minf %3, %arg2 : f32
@@ -254,15 +254,15 @@
 //  CHECK-DAG: %[[I1:.*]] = tensor.expand_shape %{{.*}}[0, 1], [2]] : tensor<32x2xf32> into tensor<8x4x2xf32>	
 //  CHECK-DAG: %[[I2:.*]] = tensor.expand_shape %{{.*}}[0], [1, 2]] : tensor<5x32xf32> into tensor<5x8x4xf32>	
 //  CHECK-DAG: %[[INI:.*]] = tensor.empty() : tensor<5x2x4xf32>	
-//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) outs(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>	
+//      CHECK: %[[F:.*]] = linalg.fill ins(%[[ID]] : f32) inits(%[[INI]] : tensor<5x2x4xf32>) -> tensor<5x2x4xf32>	
 //      CHECK: %[[G:.*]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]], iterator_types = ["parallel", "reduction", "parallel", "parallel"]}	
-// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) outs(%[[F]] : tensor<5x2x4xf32>) {	
+// CHECK-SAME:   ins(%[[I1]], %[[I2]] : tensor<8x4x2xf32>, tensor<5x8x4xf32>) inits(%[[F]] : tensor<5x2x4xf32>) {	
 //      CHECK:   arith.addf	
 //      CHECK:   arith.minf
 //      CHECK:   linalg.yield	
 //      CHECK: } -> tensor<5x2x4xf32>	
 //      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP3]], #[[$MAP4]]], iterator_types = ["parallel", "parallel", "reduction"]}	
-// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) outs(%{{.*}} : tensor<5x2xf32>) {	
+// CHECK-SAME:   ins(%[[G]] : tensor<5x2x4xf32>) inits(%{{.*}} : tensor<5x2xf32>) {	
 //      CHECK:   arith.minf
 //      CHECK:   linalg.yield	
 //      CHECK:  } -> tensor<5x2xf32>	
diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-split.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir
@@ -17,7 +17,7 @@
   // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT]][0] [42] [1] : tensor<100xf32> to tensor<42xf32>
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_LOW]]
-  // CHECK:   outs(%[[OUT_SLICE_LOW]]
+  // CHECK:   inits(%[[OUT_SLICE_LOW]]
   // CHECK:   linalg.index 0
   // CHECK:   func.call @elem
   // CHECK: %[[RES_PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [42] [1]
@@ -26,7 +26,7 @@
   // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[RES_PARTIAL]][42] [58] [1] : tensor<100xf32> to tensor<58xf32>
   // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_HIGH]]
-  // CHECK:   outs(%[[OUT_SLICE_HIGH]]
+  // CHECK:   inits(%[[OUT_SLICE_HIGH]]
   // CHECK:   %[[IDX:.+]] = linalg.index 0
   // CHECK:   affine.apply #[[$ADD_42_MAP]](%[[IDX]])
   // CHECK:   func.call @elem
@@ -35,7 +35,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32
@@ -53,14 +53,14 @@
   // the splitting altogether.
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN]]
-  // CHECK:   outs(%[[OUT]]
+  // CHECK:   inits(%[[OUT]]
   // CHECK:   linalg.index 0
   // CHECK:   func.call @elem
   %0 = linalg.generic {
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<10xf32>) outs(%arg1: tensor<10xf32>) {
+  ins(%arg0: tensor<10xf32>) inits(%arg1: tensor<10xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %call_res = func.call @elem(%0, %i, %i) : (f32, index, index) -> f32
@@ -92,7 +92,7 @@
   // CHECK: %[[OUT_SLICE_LOW:.+]] = tensor.extract_slice %[[OUT:.+]][0] [%[[SPLIT_LOW]]] [1] : tensor<100xf32> to tensor<?xf32>
   // CHECK: %[[RES_SLICE_LOW:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_LOW]]
-  // CHECK:   outs(%[[OUT_SLICE_LOW]]
+  // CHECK:   inits(%[[OUT_SLICE_LOW]]
   // CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1]
   //
   // CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
@@ -101,7 +101,7 @@
   // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32>
   // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic
   // CHECK:   ins(%[[IN_SLICE_HIGH]]
-  // CHECK:   outs(%[[OUT_SLICE_HIGH]]
+  // CHECK:   inits(%[[OUT_SLICE_HIGH]]
   // CHECK: %[[SPLIT_HIGH_4:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]]
   // CHECK: tensor.insert_slice %[[RES_SLICE_HIGH]] into %[[PARTIAL]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_4]]] [1]
   %0 = func.call @get_size() : () -> index
@@ -109,7 +109,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     %5 = arith.addf %3, %4 : f32
     linalg.yield %5 : f32
@@ -137,7 +137,7 @@
   // CHECK:      %[[OUT_1:.+]] = tensor.extract_slice %[[OUT:.+]][0, 0]
   // CHECK:      %[[RES_1:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_1]] : tensor<4x34xf32>)
-  // CHECK-SAME:   outs(%[[OUT_1]] : tensor<4x34xf32>)
+  // CHECK-SAME:   inits(%[[OUT_1]] : tensor<4x34xf32>)
   // CHECK:      %[[PARTIAL_1:.+]] = tensor.insert_slice %[[RES_1]] into %[[OUT]]
   //
   // CHECK:      %[[IN_2:.+]] = tensor.extract_slice %[[IN]]
@@ -148,14 +148,14 @@
   // CHECK:      %[[OUT_21:.+]] = tensor.extract_slice %[[OUT_2]]
   // CHECK:      %[[RES_21:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_21]] : tensor<6x16xf32>)
-  // CHECK-SAME:   outs(%[[OUT_21]] : tensor<6x16xf32>)
+  // CHECK-SAME:   inits(%[[OUT_21]] : tensor<6x16xf32>)
   // CHECK:      %[[PARTIAL_21:.+]] = tensor.insert_slice %[[RES_21]] into %[[OUT_2]]
   //
   // CHECK:      %[[IN_22:.+]] = tensor.extract_slice %[[IN_2]]
   // CHECK:      %[[OUT_22:.+]] = tensor.extract_slice %[[PARTIAL_21]]
   // CHECK:      %[[RES_22:.+]] = linalg.generic
   // CHECK-SAME:   ins(%[[IN_22]] : tensor<6x18xf32>)
-  // CHECK-SAME:   outs(%[[OUT_22]] : tensor<6x18xf32>)
+  // CHECK-SAME:   inits(%[[OUT_22]] : tensor<6x18xf32>)
   // CHECK:      %[[PARTIAL_22:.+]] = tensor.insert_slice %[[RES_22]] into %[[PARTIAL_21]]
   // CHECK:      %[[PARTIAL_2:.+]] = tensor.insert_slice %[[PARTIAL_22]] into %[[PARTIAL_1]]
   %0 = linalg.generic {
@@ -164,7 +164,7 @@
     iterator_types = ["parallel", "parallel"]
   }
   ins(%arg0: tensor<10x34xf32>)
-  outs(%arg1: tensor<10x34xf32>) {
+  inits(%arg1: tensor<10x34xf32>) {
   ^bb0(%0: f32, %1: f32):
     %i = linalg.index 0 : index
     %j = linalg.index 1 : index
@@ -201,7 +201,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     linalg.yield %3 : f32
   } -> tensor<100xf32>
@@ -225,7 +225,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%3: f32, %4: f32):
     linalg.yield %3 : f32
   } -> tensor<100xf32>
@@ -261,7 +261,7 @@
     indexing_maps = [affine_map<(i) -> (i)>, affine_map<(i) -> (i)>],
     iterator_types = ["parallel"]
   }
-  ins(%arg0: tensor<100xf32>) outs(%arg1: tensor<100xf32>) {
+  ins(%arg0: tensor<100xf32>) inits(%arg1: tensor<100xf32>) {
   ^bb0(%0: f32, %1: f32):
     linalg.yield %0 : f32
   } -> tensor<100xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-tile.mlir b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-tile.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-tile.mlir
@@ -21,13 +21,13 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<4x4xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<4x4xf32>, tensor<4x4xf32>)
-// CHECK-SAME:                                   outs(%[[sTC]] : tensor<4x4xf32>)  -> tensor<4x4xf32>
+// CHECK-SAME:                                   inits(%[[sTC]] : tensor<4x4xf32>)  -> tensor<4x4xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<4x4xf32> into tensor<128x128xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<128x128xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<128x128xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<128x128xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
 
 //      CHECK: return %[[TD0]] : tensor<128x128xf32>
@@ -60,14 +60,14 @@
 //      CHECK:       %[[sTB:.*]] = tensor.extract_slice %[[TB]][{{.*}}] : tensor<128x128xf32> to tensor<4x?xf32>
 //      CHECK:       %[[sTC:.*]] = tensor.extract_slice %[[TC2]][{{.*}}] : tensor<128x128xf32> to tensor<?x?xf32>
 //      CHECK:       %[[sTD:.*]] = linalg.matmul ins(%[[sTA]], %[[sTB]] : tensor<?x4xf32>, tensor<4x?xf32>)
-// CHECK-SAME:                                   outs(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
+// CHECK-SAME:                                   inits(%[[sTC]] : tensor<?x?xf32>)  -> tensor<?x?xf32>
 //      CHECK:       %[[TD:.*]] = tensor.insert_slice %[[sTD]] into %[[TC2]][{{.*}}]  : tensor<?x?xf32> into tensor<128x128xf32>
 //      CHECK:       scf.yield %[[TD]] : tensor<128x128xf32>
 //      CHECK:     scf.yield %[[TD2]] : tensor<128x128xf32>
 //      CHECK:   scf.yield %[[TD1]] : tensor<128x128xf32>
   %sz = func.call @get_dynamic_tile_size() : () -> index
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
 
 //      CHECK: return %[[TD0]] : tensor<128x128xf32>
diff --git a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
--- a/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
+++ b/mlir/test/Dialect/Linalg/transform-op-vectorize.mlir
@@ -12,7 +12,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
@@ -57,7 +57,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   return %9 : tensor<24x25xf32>
 }
@@ -105,7 +105,7 @@
   // CHECK: %[[vC:.+]] = vector.transfer_read %[[C]]
   // CHECK: %[[vR:.+]] = vector.contract {{.*}} %[[vA]], %[[vB]], %[[vC]]
   // CHECK: vector.transfer_write %[[vR]], %[[C]]
-  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
+  %8 = linalg.matmul ins(%5, %7 : tensor<4x7xf32>, tensor<7x5xf32>) inits(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
   %9 = tensor.insert_slice %8 into %arg2[%arg3, %arg4] [4, 5] [1, 1] : tensor<4x5xf32> into tensor<24x25xf32>
   return %9 : tensor<24x25xf32>
 }
@@ -123,7 +123,7 @@
                      %arg1: tensor<12x25xf32>,
                      %arg2: tensor<24x25xf32>) -> tensor<24x25xf32> {
   // expected-note @below {{non-isolated target}}
-  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) outs(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
+  %0 = linalg.matmul ins(%arg0, %arg1 : tensor<24x12xf32>, tensor<12x25xf32>) inits(%arg2 : tensor<24x25xf32>) -> tensor<24x25xf32>
   func.return %0 : tensor<24x25xf32>
 }
 
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -7,7 +7,7 @@
           %v: memref<f32>) {
   linalg.dot ins(%x, %y: memref<?xf32, strided<[1], offset: ?>>,
                          memref<?xf32, strided<[1], offset: ?>>)
-            outs(%v: memref<f32>)
+            inits(%v: memref<f32>)
   return
 }
 
@@ -31,7 +31,7 @@
   linalg.matvec
     ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                 memref<?xf32, strided<[1], offset: ?>>)
-    outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+    inits(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 
@@ -49,7 +49,7 @@
 // CHECK:           scf.for {{.*}} step %[[c6]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-// CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK:              inits({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -58,7 +58,7 @@
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
   linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%C: memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
 
@@ -99,7 +99,7 @@
 // CHECK:                               scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c4]] {
 // CHECK:                                 linalg.matmul
 // CHECK:                                   ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK:                                  outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK:                                  inits({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
 
 // -----
 
@@ -126,7 +126,7 @@
   linalg.generic #generic_matmul_trait
     ins(%A, %B : memref<?x?xf32, strided<[?, 1], offset: ?>>,
                  memref<?x?xf32, strided<[?, 1], offset: ?>>)
-   outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
+   inits(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>) {
     ^bb(%a: f32, %b: f32, %c: f32):
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -157,7 +157,7 @@
              %y: memref<?xf32, strided<[1], offset: ?>>) {
   linalg.matvec ins(%A, %x: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?xf32, strided<[1], offset: ?>>)
-               outs(%y: memref<?xf32, strided<[1], offset: ?>>)
+               inits(%y: memref<?xf32, strided<[1], offset: ?>>)
   return
 }
 
@@ -175,7 +175,7 @@
 // CHECK:           scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c5]]
 // CHECK:             linalg.matvec
 // CHECK:               ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?xf32, strided<[1], offset: ?>>)
-// CHECK:              outs({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
+// CHECK:              inits({{.*}}: memref<?xf32, strided<[1], offset: ?>>)
 
 // -----
 
@@ -184,7 +184,7 @@
              %C: memref<?x?xf32, strided<[?, 1], offset: ?>>) {
   linalg.matmul ins(%A, %B: memref<?x?xf32, strided<[?, 1], offset: ?>>,
                             memref<?x?xf32, strided<[?, 1], offset: ?>>)
-               outs(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
+               inits(%C : memref<?x?xf32, strided<[?, 1], offset: ?>>)
   return
 }
 
@@ -218,4 +218,4 @@
 // CHECK:                         scf.for {{.*}} = %[[c0]] to {{.*}} step %[[c40]] {
 // CHECK:                                 linalg.matmul
 // CHECK:                                  ins({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>, memref<?x?xf32, strided<[?, 1], offset: ?>>)
-// CHECK:                                   outs({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
+// CHECK:                                   inits({{.*}}: memref<?x?xf32, strided<[?, 1], offset: ?>>)
diff --git a/mlir/test/Dialect/Linalg/transform-promotion.mlir b/mlir/test/Dialect/Linalg/transform-promotion.mlir
--- a/mlir/test/Dialect/Linalg/transform-promotion.mlir
+++ b/mlir/test/Dialect/Linalg/transform-promotion.mlir
@@ -22,7 +22,7 @@
              memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
         linalg.matmul ins(%3, %4: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                                   memref<?x?xf32, strided<[?, ?], offset: ?>>)
-                     outs(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+                     inits(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -56,7 +56,7 @@
 // CHECK:               memref.copy %[[s2]], %[[l2]] : memref<?x?xf32, strided{{.*}}> to memref<?x?xf32, strided{{.*}}>
 // CHECK:               linalg.matmul
 // CHECK-SAME:                 ins(%[[v0]], %[[v1]] : memref<?x?xf32>, memref<?x?xf32>)
-// CHECK-SAME:                outs(%[[v2]] : memref<?x?xf32>)
+// CHECK-SAME:                inits(%[[v2]] : memref<?x?xf32>)
 
 transform.sequence failures(propagate) {
 ^bb0(%arg1: !pdl.operation):
@@ -89,7 +89,7 @@
         linalg.matmul {__internal_linalg_transform__ = "_promote_first_view_"}
           ins(%3, %4: memref<?x?xf32, strided<[?, ?], offset: ?>>,
                       memref<?x?xf32, strided<[?, ?], offset: ?>>)
-         outs(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
+         inits(%5: memref<?x?xf32, strided<[?, ?], offset: ?>>)
       }
     }
   }
@@ -116,7 +116,7 @@
 // CHECK-NOT:     memref.copy
 // CHECK:         linalg.matmul
 // CHECK-SAME:           ins(%[[v0]], %[[s1]] : memref<?x?xf32>, memref<?x?xf32, strided<[?, ?], offset: ?>>)
-// CHECK-SAME:          outs(%[[s2]] : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+// CHECK-SAME:          inits(%[[s2]] : memref<?x?xf32, strided<[?, ?], offset: ?>>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
@@ -138,7 +138,7 @@
   %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
  	 memref<?x?xf32, strided<[?, 1], offset: ?>> to memref<?x?xf32, strided<[?, ?], offset: ?>>
   linalg.fill
-   ins(%cf : f32) outs(%3 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
+   ins(%cf : f32) inits(%3 : memref<?x?xf32, strided<[?, ?], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill
@@ -147,9 +147,9 @@
 // CHECK:         %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<32000000xi8>
 // CHECK:         %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<32000000xi8> to memref<?x?xf32>
 // CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xf32> to memref<?x?xf32, strided<[?, 1], offset: ?>>
-// CHECK:         linalg.fill ins({{.*}} : f32) outs(%[[v0]] : memref<?x?xf32>)
+// CHECK:         linalg.fill ins({{.*}} : f32) inits(%[[v0]] : memref<?x?xf32>)
 // CHECK:         memref.copy %[[s0]], %[[l0]] : memref<?x?xf32, strided{{.*}}> to memref<?x?xf32, strided{{.*}}>
-// CHECK:         linalg.fill ins(%[[cf]] : f32) outs(%[[v0]] : memref<?x?xf32>)
+// CHECK:         linalg.fill ins(%[[cf]] : f32) inits(%[[v0]] : memref<?x?xf32>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
@@ -172,7 +172,7 @@
   %3 = memref.subview %arg0[%c0, %c0][%c2000, %c4000][%c1, %c1] :
  	 memref<?x?xcomplex<f32>, strided<[?, 1], offset: ?>> to memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>
   linalg.fill ins(%cc : complex<f32>) 
-             outs(%3 : memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>)
+             inits(%3 : memref<?x?xcomplex<f32>, strided<[?, ?], offset: ?>>)
   return
 }
 // CHECK-LABEL: func @aligned_promote_fill_complex
@@ -181,9 +181,9 @@
 // CHECK:         %[[a0:.*]] = memref.alloc() {alignment = 32 : i64} : memref<64000000xi8>
 // CHECK:         %[[v0:.*]] = memref.view %[[a0]]{{.*}} : memref<64000000xi8> to memref<?x?xcomplex<f32>>
 // CHECK:         %[[l0:.*]] = memref.subview %[[v0]][0, 0] [%{{.*}}, %{{.*}}] [1, 1] : memref<?x?xcomplex<f32>> to memref<?x?xcomplex<f32>, strided<[?, 1], offset: ?>>
-// CHECK:         linalg.fill ins({{.*}} : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)
+// CHECK:         linalg.fill ins({{.*}} : complex<f32>) inits(%[[v0]] : memref<?x?xcomplex<f32>>)
 // CHECK:         memref.copy %[[s0]], %[[l0]] : memref<?x?xcomplex<f32>, strided{{.*}}> to memref<?x?xcomplex<f32>, strided{{.*}}>
-// CHECK:         linalg.fill ins(%[[cc]] : complex<f32>) outs(%[[v0]] : memref<?x?xcomplex<f32>>)
+// CHECK:         linalg.fill ins(%[[cc]] : complex<f32>) inits(%[[v0]] : memref<?x?xcomplex<f32>>)
 
 transform.with_pdl_patterns {
 ^bb0(%arg0: !pdl.operation):
diff --git a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
--- a/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-and-fuse.mlir
@@ -17,11 +17,11 @@
     %5 = linalg.fill
         {__producer__}
         ins(%cst : f32)
-        outs(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
     %6 = linalg.matmul
         {__producer__}
         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-        outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
     %7 = linalg.generic
         {__root__,
          indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -30,7 +30,7 @@
          iterator_types = ["parallel", "parallel"]
         }
         ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
-        outs(%D : tensor<?x?xf32>) {
+        inits(%D : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
       %16 = arith.maxf %arg3, %cst : f32
       %17 = arith.cmpf ogt, %arg2, %cst : f32
@@ -74,11 +74,11 @@
     %5 = linalg.fill
         {__producer__}
         ins(%cst : f32)
-        outs(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%D : tensor<?x?xf32>) -> tensor<?x?xf32>
     %6 = linalg.matmul
         {__producer__}
         ins(%A, %B : tensor<?x?xf32>, tensor<?x?xf32>)
-        outs(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
+        inits(%5 : tensor<?x?xf32>) -> tensor<?x?xf32>
     %7 = linalg.generic
         {__root__,
          indexing_maps = [affine_map<(d0, d1) -> (d0)>,
@@ -87,7 +87,7 @@
          iterator_types = ["parallel", "parallel"]
         }
         ins(%C, %6 : tensor<?xf32>, tensor<?x?xf32>)
-        outs(%D : tensor<?x?xf32>) {
+        inits(%D : tensor<?x?xf32>) {
     ^bb0(%arg2: f32, %arg3: f32, %arg4: f32):
       %16 = arith.maxf %arg3, %cst : f32
       %17 = arith.cmpf ogt, %arg2, %cst : f32
diff --git a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
--- a/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
+++ b/mlir/test/Dialect/Linalg/transform-tile-reduction.mlir
@@ -5,7 +5,7 @@
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %1 = arith.mulf %arg7, %arg7 : f32
       %2 = arith.addf %1, %arg9 : f32
@@ -32,12 +32,12 @@
 // CHECK-DAG:   %[[D1:.*]] = tensor.dim %[[ARG0]], %[[C1]] : tensor<?x?xf32>
 // CHECK-DAG:   %[[D2:.*]] = tensor.dim %[[ARG1]], %[[C0]] : tensor<?xf32>
 //     CHECK:   %[[E:.*]] = tensor.empty(%[[D2]]) : tensor<?x5xf32>
-//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) outs(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
+//     CHECK:   %[[F:.*]] = linalg.fill ins(%[[I]] : f32) inits(%[[E]] : tensor<?x5xf32>) -> tensor<?x5xf32>
 //     CHECK:   %[[L:.*]] = scf.for %[[K:.*]] = %[[C0]] to %[[D1]] step %[[C5]] iter_args(%[[ARG3:.*]] = %[[F]]) -> (tensor<?x5xf32>) {
 //     CHECK:     %[[PS:.*]] = affine.min #[[MAP2]](%[[K]])[%[[D1]]]
 //     CHECK:     %[[EXT2:.*]] = tensor.extract_slice %[[ARG0]][0, %[[K:.*]]] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
 //     CHECK:     %[[EXT:.*]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[D0]], %[[PS]]] [1, 1] : tensor<?x5xf32> to tensor<?x?xf32>
-//     CHECK:     %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) outs(%[[EXT]] : tensor<?x?xf32>) {
+//     CHECK:     %[[PR:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP0]]], iterator_types = ["parallel", "parallel"]} ins(%[[EXT2]] : tensor<?x?xf32>) inits(%[[EXT]] : tensor<?x?xf32>) {
 //     CHECK:       arith.mulf
 //     CHECK:       arith.addf
 //     CHECK:       linalg.yield
@@ -47,7 +47,7 @@
 //     CHECK:     %[[INS:.*]] = tensor.insert_slice %[[PR]] into %[[ARG3]][0, 0] [%[[D3]], %[[D4]]] [1, 1] : tensor<?x?xf32> into tensor<?x5xf32>
 //     CHECK:     scf.yield %[[INS]] : tensor<?x5xf32>
 //     CHECK:   }
-//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) outs(%[[ARG1]] : tensor<?xf32>) {
+//     CHECK:   %[[R:.*]] = linalg.generic {indexing_maps = [#[[MAP0]], #[[MAP1]]], iterator_types = ["parallel", "reduction"]} ins(%[[L]] : tensor<?x5xf32>) inits(%[[ARG1]] : tensor<?xf32>) {
 //     CHECK:     arith.addf
 //     CHECK:     linalg.yield
 //     CHECK:   } -> tensor<?xf32>
@@ -60,7 +60,7 @@
                                           affine_map<(d0, d1) -> (d1)>],
    iterator_types = ["reduction", "parallel"]}
    ins(%arg0 : tensor<?x?xf32>)
-   outs(%out : tensor<?xf32>) {
+   inits(%out : tensor<?xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       %42 = arith.addf %arg7, %arg9 : f32
       linalg.yield %42 : f32
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -8,7 +8,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [0] : vector<1584xf32> to f32
   linalg.dot ins(%A, %B: memref<1584xf32>, memref<1584xf32>)
-            outs(%C: memref<f32>)
+            inits(%C: memref<f32>)
   return
 }
 
@@ -27,7 +27,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [1] : vector<1584x1584xf32> to vector<1584xf32>
   linalg.matvec ins(%A, %B: memref<1584x1584xf32>, memref<1584xf32>)
-            outs(%C: memref<1584xf32>)
+            inits(%C: memref<1584xf32>)
   return
 }
 
@@ -45,7 +45,7 @@
 // CHECK: arith.mulf %{{.*}}, %{{.*}} : vector<1584x1584x1584xf32>
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [2] : vector<1584x1584x1584xf32> to vector<1584x1584xf32>
   linalg.matmul ins(%A, %B: memref<1584x1584xf32>, memref<1584x1584xf32>)
-            outs(%C: memref<1584x1584xf32>)
+            inits(%C: memref<1584x1584xf32>)
   return
 }
 
@@ -64,7 +64,7 @@
 // CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [3] : vector<1584x1584x1584x1584xf32> to vector<1584x1584x1584xf32>
   linalg.batch_matmul
     ins(%A, %B: memref<1584x1584x1584xf32>, memref<1584x1584x1584xf32>)
-   outs(%C: memref<1584x1584x1584xf32>)
+   inits(%C: memref<1584x1584x1584xf32>)
   return
 }
 
@@ -99,7 +99,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<8x32xf32>
   linalg.generic #matmul_trait
     ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C : memref<8x32xf32>) {
+   inits(%C : memref<8x32xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -139,7 +139,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xf32>, memref<32x8xf32>
   linalg.generic #matmul_transpose_out_trait
     ins(%A, %B : memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C : memref<32x8xf32>) {
+   inits(%C : memref<32x8xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32) :
       %d = arith.mulf %a, %b: f32
       %e = arith.addf %c, %d: f32
@@ -168,7 +168,7 @@
   %1 = linalg.generic {indexing_maps = [#map0, #map1],
                        iterator_types = ["parallel", "parallel", "parallel"]}
     ins(%arg0 : tensor<12x128x32xf32>)
-    outs(%0 : tensor<128x12x32xf32>) {
+    inits(%0 : tensor<128x12x32xf32>) {
   ^bb0(%arg1: f32, %arg2: f32):
     linalg.yield %arg1 : f32
   } -> tensor<128x12x32xf32>
@@ -206,7 +206,7 @@
   //       CHECK: vector.transfer_write %{{.*}}, %{{.*}} : vector<8x32xi32>, memref<8x32xi32>
   linalg.generic #matmul_trait
     ins(%A, %B : memref<8x16xi32>, memref<16x32xi32>)
-   outs(%C : memref<8x32xi32>) {
+   inits(%C : memref<8x32xi32>) {
     ^bb(%a: i32, %b: i32, %c: i32) :
       %d = arith.muli %a, %b: i32
       %e = arith.addi %c, %d: i32
@@ -231,7 +231,7 @@
   //       CHECK: vector.multi_reduction <add>, %{{.*}}, {{.*}} [2] : vector<8x32x16xf32> to vector<8x32xf32>
   linalg.matmul
     ins(%A, %B: memref<8x16xf32>, memref<16x32xf32>)
-   outs(%C: memref<8x32xf32>)
+   inits(%C: memref<8x32xf32>)
   return
 }
 
@@ -252,7 +252,7 @@
     indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>],
     iterator_types = ["parallel", "parallel"]}
    ins(%arg0 : f32)
-  outs(%A: memref<8x16xf32>) {
+  inits(%A: memref<8x16xf32>) {
     ^bb(%0: f32, %1: f32) :
       linalg.yield %0 : f32
   }
@@ -276,7 +276,7 @@
     indexing_maps = [affine_map<(m, n) -> ()>, affine_map<(m, n) -> (m, n)>],
     iterator_types = ["parallel", "parallel"]}
    ins(%arg0 : complex<f32>)
-  outs(%A: memref<8x16xcomplex<f32>>) {
+  inits(%A: memref<8x16xcomplex<f32>>) {
     ^bb(%0: complex<f32>, %1: complex<f32>) :
       linalg.yield %0 : complex<f32>
   }
@@ -296,7 +296,7 @@
 func.func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) {
   //       CHECK: %[[V:.*]] = vector.broadcast {{.*}} : f32 to vector<8x16xf32>
   //       CHECK: vector.transfer_write %[[V]], {{.*}} : vector<8x16xf32>, memref<8x16xf32>
-  linalg.fill ins(%arg0 : f32) outs(%A : memref<8x16xf32>)
+  linalg.fill ins(%arg0 : f32) inits(%A : memref<8x16xf32>)
   return
 }
 
@@ -314,7 +314,7 @@
   // CHECK-SAME: (%[[M:.*]]: memref<f32>, %[[val:.*]]: f32)
   //      CHECK:   %[[VEC:.*]] = vector.broadcast %[[val]] : f32 to vector<f32>
   //      CHECK:   vector.transfer_write %[[VEC]], %[[M]][] : vector<f32>, memref<f32>
-  linalg.fill ins(%arg0 : f32) outs(%A : memref<f32>)
+  linalg.fill ins(%arg0 : f32) inits(%A : memref<f32>)
   return
 }
 
@@ -372,7 +372,7 @@
     indexing_maps = [
       affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  outs(%arg0: memref<1x2x4x8xindex>) {
+  inits(%arg0: memref<1x2x4x8xindex>) {
   ^bb0(%arg1: index):
   //       CHECK:   %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<8xindex> to vector<1x2x4x8xindex>
   //       CHECK:   vector.transfer_write %[[BCST]], %[[ARG0]][%[[C0]], %[[C0]], %[[C0]], %[[C0]]] {{.*}} : vector<1x2x4x8xindex>, memref<1x2x4x8xindex>
@@ -400,7 +400,7 @@
     indexing_maps = [
       affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
-  outs(%arg0: memref<1x2x4x8xindex>) {
+  inits(%arg0: memref<1x2x4x8xindex>) {
   ^bb0(%arg1: index):
   //       CHECK:   %[[BCST:.*]] = vector.broadcast %[[CST0]] : vector<2xindex> to vector<1x8x4x2xindex>
   //       CHECK:   %[[TRAN:.*]] = vector.transpose %[[BCST]], [0, 3, 2, 1] : vector<1x8x4x2xindex> to vector<1x2x4x8xindex>
@@ -448,7 +448,7 @@
       affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%arg1, %arg2: memref<4x256xf32>, memref<256xf32>)
-  outs(
+  inits(
     %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 :
     memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>,
     memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>, memref<4x256xf32>,
@@ -530,7 +530,7 @@
       affine_map<(d0, d1) -> (d0, d1)>],
     iterator_types = ["parallel", "parallel"]}
   ins(%arg1, %arg2: tensor<4x256xf32>, tensor<256xf32>)
-  outs(
+  inits(
     %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0, %arg0 :
     tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>,
     tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>, tensor<4x256xf32>,
@@ -621,7 +621,7 @@
                    affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
   iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
   ins(%B, %A, %A, %B: memref<4x4xf32>, memref<4xf32>, memref<4xf32>, memref<4x4xf32>)
-  outs(%C : memref<4x4x4x4xf32>) {
+  inits(%C : memref<4x4x4x4xf32>) {
   ^bb0(%arg0: f32, %arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
     %s = arith.subf %arg0, %arg1 : f32
     %a = arith.addf %arg2, %s : f32
@@ -665,7 +665,7 @@
                          %C: memref<16x14x7x8xf32>, %D: memref<7x14x8x16xf32>) {
   linalg.generic #matmul_trait
     ins(%A, %B, %C : memref<14x7xf32>, memref<16x14xf32>, memref<16x14x7x8xf32>)
-   outs(%D : memref<7x14x8x16xf32>) {
+   inits(%D : memref<7x14x8x16xf32>) {
     ^bb(%a: f32, %b: f32, %c: f32, %d: f32) :
       %e = arith.addf %a, %b: f32
       %f = arith.addf %e, %c: f32
@@ -700,7 +700,7 @@
   //       CHECK:   %[[R:.*]] = vector.multi_reduction <add>, %[[MUL]], %[[V2]] [2] : vector<8x12x4xf32> to vector<8x12xf32>
   //       CHECK:   %[[W:.*]] = vector.transfer_write %[[R]], %[[ARG2]][%[[C0]], %[[C0]]] {in_bounds = [true, true]} : vector<8x12xf32>, tensor<8x12xf32>
   %0 = linalg.matmul  ins(%arg0, %arg1: tensor<8x4xf32>, tensor<4x12xf32>)
-                     outs(%arg2: tensor<8x12xf32>)
+                     inits(%arg2: tensor<8x12xf32>)
     -> tensor<8x12xf32>
   //       CHECK:   return %[[W]] : tensor<8x12xf32>
   return %0 : tensor<8x12xf32>
@@ -787,7 +787,7 @@
 //       CHECK:   %[[V4:.*]] = arith.addi %[[DIM3]], %[[C3]] : index
 //       CHECK:   %[[V5:.*]] = arith.addi %[[V4]], %[[C2]] : index
 //       CHECK:   %[[INIT:.*]] = tensor.empty(%[[V1]], %[[V2]], %[[V5]]) : tensor<6x?x?x?xf32>
-//       CHECK:   %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) outs(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
+//       CHECK:   %[[FILL:.*]] = linalg.fill ins(%{{.*}} : f32) inits(%[[INIT]] : tensor<6x?x?x?xf32>) -> tensor<6x?x?x?xf32>
 //       CHECK:   %[[SRCDIM:.*]] = tensor.dim %[[SRC]], %[[C3]] : tensor<1x2x2x?xf32>
 //       CHECK:   %[[RESULT:.*]] = tensor.insert_slice %[[SRC]] into %[[FILL]][2, %[[LOW]], 3, 3] [1, 2, 2, %[[SRCDIM]]] [1, 1, 1, 1] : tensor<1x2x2x?xf32> into tensor<6x?x?x?xf32>
 //       CHECK:   return %[[RESULT]]
@@ -1026,7 +1026,7 @@
         affine_map<(d0, d1, d2) -> (d0, d1)>
       ],
       iterator_types = ["parallel", "parallel", "reduction"]
-    } ins(%input : tensor<4x16x8xf32>) outs(%output : tensor<4x16xf32>) {
+    } ins(%input : tensor<4x16x8xf32>) inits(%output : tensor<4x16xf32>) {
     ^bb0(%arg0: f32, %arg1: f32):
       %1 = math.exp %arg0 : f32
       %2 = arith.addf %1, %arg1 : f32
@@ -1069,7 +1069,7 @@
         affine_map<(d0, d1, d2, d3) -> (d3, d0)>
       ],
       iterator_types = ["parallel", "reduction", "reduction", "parallel"]
-    } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) outs(%output : tensor<5x2xf32>) {
+    } ins(%input, %input_2 : tensor<3x2xf32>, tensor<5x4xf32>) inits(%output : tensor<5x2xf32>) {
     ^bb0(%arg0: f32, %arg1: f32, %arg2: f32):
       %1 = math.exp %arg0 : f32
       %2 = math.exp %arg1 : f32
@@ -1098,11 +1098,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant -3.40282e+38 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %max = arith.maxf %in0, %out0 : f32
     linalg.yield %max : f32
@@ -1129,11 +1129,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %maxf32 = arith.constant 3.40282e+38 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%maxf32 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%maxf32 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %min = arith.minf %out0, %in0 : f32
     linalg.yield %min : f32
@@ -1159,11 +1159,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xf32>, tensor<4xf32>
   %ident = arith.constant 1.0 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%ident : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%ident : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xf32>) outs(%fill : tensor<4xf32>) {
+                         ins(%arg0 : tensor<4x4xf32>) inits(%fill : tensor<4xf32>) {
   ^bb0(%in0: f32, %out0: f32):
     %mul = arith.mulf %in0, %out0 : f32
     linalg.yield %mul : f32
@@ -1189,11 +1189,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %or = arith.ori %in0, %out0 : i1
     linalg.yield %or : i1
@@ -1219,11 +1219,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant true
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %and = arith.andi %in0, %out0 : i1
     linalg.yield %and : i1
@@ -1249,11 +1249,11 @@
   // CHECK: vector.transfer_write {{.*}} : vector<4xi1>, tensor<4xi1>
   %ident = arith.constant false
   %init = tensor.empty() : tensor<4xi1>
-  %fill = linalg.fill ins(%ident : i1) outs(%init : tensor<4xi1>) -> tensor<4xi1>
+  %fill = linalg.fill ins(%ident : i1) inits(%init : tensor<4xi1>) -> tensor<4xi1>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0)>],
                          iterator_types = ["parallel", "reduction"]}
-                         ins(%arg0 : tensor<4x4xi1>) outs(%fill : tensor<4xi1>) {
+                         ins(%arg0 : tensor<4x4xi1>) inits(%fill : tensor<4xi1>) {
   ^bb0(%in0: i1, %out0: i1):
     %xor = arith.xori %in0, %out0 : i1
     linalg.yield %xor : i1
@@ -1281,13 +1281,13 @@
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true, true]} : vector<4x4xf32>, tensor<4x4xf32>
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<4x4xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4x4xf32>) -> tensor<4x4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
                                           affine_map<(d0, d1) -> (d0, d1)>],
    iterator_types = ["parallel", "parallel"]}
    ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
-   outs(%fill : tensor<4x4xf32>) {
+   inits(%fill : tensor<4x4xf32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       linalg.yield %40 : f32
@@ -1317,13 +1317,13 @@
   // CHECK: vector.transfer_write {{.*}} {in_bounds = [true]} : vector<4xf32>, tensor<4xf32>
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<4xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<4xf32>) -> tensor<4xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<4xf32>) -> tensor<4xf32>
   %red = linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
                                           affine_map<(d0, d1) -> (d0, 0)>,
                                           affine_map<(d0, d1) -> (d0)>],
    iterator_types = ["parallel", "reduction"]}
    ins(%arg0, %arg1 : tensor<4x4xf32>, tensor<4x1xf32>)
-   outs(%fill : tensor<4xf32>) {
+   inits(%fill : tensor<4xf32>) {
     ^bb0(%arg7: f32, %arg8: f32, %arg9: f32):
       %40 = arith.subf %arg7, %arg8 : f32
       %41 = math.exp %40 : f32
@@ -1358,7 +1358,7 @@
   //      CHECK: %[[init:.*]] = tensor.empty() : tensor<f32>
   %0 = tensor.empty() : tensor<f32>
 
-  %1 = linalg.fill ins(%f0 : f32) outs(%0 : tensor<f32>) -> tensor<f32>
+  %1 = linalg.fill ins(%f0 : f32) inits(%0 : tensor<f32>) -> tensor<f32>
   //      CHECK: %[[r:.*]] = vector.transfer_read %[[A]][%[[C0]]]
   // CHECK-SAME:   : tensor<32xf32>, vector<32xf32>
   //      CHECK: %[[f0:.*]] = vector.extractelement %[[vF0]][] : vector<f32>
@@ -1372,7 +1372,7 @@
                           affine_map<(d0) -> ()>],
          iterator_types = ["reduction"]}
          ins(%arg0 : tensor<32xf32>)
-         outs(%1 : tensor<f32>) {
+         inits(%1 : tensor<f32>) {
     ^bb0(%a: f32, %b: f32):
       %3 = arith.addf %a, %b : f32
       linalg.yield %3 : f32
@@ -1399,13 +1399,13 @@
 func.func @not_projected_permutation(%arg0: tensor<8x8xf32>) -> tensor<6x6x3x3xf32> {
   %c0 = arith.constant 0.0 : f32
   %init = tensor.empty() : tensor<6x6x3x3xf32>
-  %fill = linalg.fill ins(%c0 : f32) outs(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
+  %fill = linalg.fill ins(%c0 : f32) inits(%init : tensor<6x6x3x3xf32>) -> tensor<6x6x3x3xf32>
   // CHECK: linalg.generic
   %result = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0 + d2, d1 + d3)>,
                                              affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>],
    iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
    ins(%arg0 : tensor<8x8xf32>)
-   outs(%fill : tensor<6x6x3x3xf32>) {
+   inits(%fill : tensor<6x6x3x3xf32>) {
     ^bb0(%arg7: f32, %arg9: f32):
       linalg.yield %arg7 : f32
     } -> tensor<6x6x3x3xf32>
@@ -1430,7 +1430,7 @@
                        affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel", "reduction"]}
       ins(%arg0, %arg1 : tensor<2x4x8xf32>, tensor<2x4xf32>)
-      outs(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) {
+      inits(%arg2, %arg3 : tensor<2x4x8xf32>, tensor<2x4xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32, %b3 : f32):
       %1 = arith.mulf %b0, %b1 : f32
       %2 = arith.addf %1, %b3 : f32
@@ -1467,7 +1467,7 @@
   %2 = linalg.generic {
     indexing_maps = [#map0, #map1, #map2],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%arg1, %arg2 : tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg3 : tensor<4x7x3x2xf32>) {
+  } ins(%arg1, %arg2 : tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg3 : tensor<4x7x3x2xf32>) {
   ^bb0(%arg4: i32, %arg5: f32, %arg6: f32):
     %3 = arith.index_cast %arg4 : i32 to index
     %7 = tensor.extract %arg0[%3] : tensor<3xf32>
@@ -1504,7 +1504,7 @@
   %2 = linalg.generic {
     indexing_maps = [#map0, #map0, #map1, #map2],
     iterator_types = ["parallel", "parallel", "parallel", "parallel"]
-  } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) outs(%arg4 : tensor<4x7x3x2xf32>) {
+  } ins(%arg1, %arg2, %arg3 : tensor<4x3xi32>, tensor<4x3xi32>, tensor<4x7x2xf32>) inits(%arg4 : tensor<4x7x3x2xf32>) {
   ^bb0(%arg5: i32, %arg6: i32, %arg7: f32, %arg8: f32):
     %3 = arith.index_cast %arg5 : i32 to index
     %4 = arith.index_cast %arg6 : i32 to index
@@ -1528,7 +1528,7 @@
 func.func @vectorize_map(%arg0: memref<64xf32>,
     %arg1: memref<64xf32>, %arg2: memref<64xf32>) {
   linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>)
-             outs(%arg2 : memref<64xf32>)
+             inits(%arg2 : memref<64xf32>)
     (%in: f32, %in_0: f32) {
       %0 = arith.addf %in, %in_0 : f32
       linalg.yield %0 : f32
@@ -1552,7 +1552,7 @@
 func.func @vectorize_transpose(%arg0: memref<16x32x64xf32>,
                                %arg1: memref<32x64x16xf32>) {
   linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
-                   outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
+                   inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
   return
 }
 // CHECK-LABEL: func @vectorize_transpose
@@ -1571,7 +1571,7 @@
 func.func @vectorize_reduce(%arg0: memref<16x32x64xf32>,
                   %arg1: memref<16x64xf32>) {
   linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
-                outs(%arg1 : memref<16x64xf32>) dimensions = [1]
+                inits(%arg1 : memref<16x64xf32>) dimensions = [1]
     (%in: f32, %init: f32) {
       %0 = arith.addf %in, %init : f32
       linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
--- a/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
+++ b/mlir/test/Dialect/Linalg/vectorize-convolution.mlir
@@ -4,7 +4,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<1x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -67,7 +67,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xi8>, memref<1x3x8xi8>)
-    outs(%output : memref<4x2x8xi32>)
+    inits(%output : memref<4x2x8xi32>)
   return
 }
 
@@ -129,7 +129,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -206,7 +206,7 @@
   linalg.conv_1d_nwc_wcf
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x6x3xf32>, memref<2x3x8xf32>)
-    outs(%output : memref<4x2x8xf32>)
+    inits(%output : memref<4x2x8xf32>)
   return
 }
 
@@ -255,7 +255,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<1> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x1xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -324,7 +324,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<2> : tensor<1xi64>, strides = dense<3> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -409,7 +409,7 @@
   linalg.conv_1d_ncw_fcw
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<4x3x6xf32>, memref<8x3x2xf32>)
-    outs(%output : memref<4x8x2xf32>)
+    inits(%output : memref<4x8x2xf32>)
   return
 }
 
@@ -467,7 +467,7 @@
   linalg.depthwise_conv_1d_nwc_wc
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<3x5x4xf32>, memref<2x4xf32>)
-    outs(%output : memref<3x2x4xf32>)
+    inits(%output : memref<3x2x4xf32>)
   return
 }
 
@@ -508,7 +508,7 @@
   linalg.depthwise_conv_1d_nwc_wc
     {dilations = dense<2> : tensor<1xi64>, strides = dense<1> : tensor<1xi64>}
     ins(%input, %filter : memref<3x5x4xi8>, memref<2x4xi8>)
-    outs(%output : memref<3x2x4xi32>)
+    inits(%output : memref<3x2x4xi32>)
   return
 }
 
@@ -553,7 +553,7 @@
   linalg.conv_1d_nwc_wcf
   {dilations = dense<1> : vector<1xi64>, strides = dense<1> : vector<1xi64>}
    ins(%input, %filter : memref<1x2x3xf16>, memref<1x3x2xf16>)
-   outs(%output : memref<1x2x2xf32>)
+   inits(%output : memref<1x2x2xf32>)
   return
 }
 
diff --git a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
--- a/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
+++ b/mlir/test/Dialect/SCF/foreach-thread-canonicalization.mlir
@@ -5,9 +5,9 @@
   %c2 = arith.constant 2 : index
   %cst_0 = arith.constant -0.000000e+00 : f32
   %0 = memref.alloc() : memref<128x384xf32>
-  linalg.fill ins(%cst_0 : f32) outs(%0 : memref<128x384xf32>)
+  linalg.fill ins(%cst_0 : f32) inits(%0 : memref<128x384xf32>)
   %2 = memref.alloc() : memref<128xf32>
-  linalg.fill ins(%cst_0 : f32) outs(%2 : memref<128xf32>)
+  linalg.fill ins(%cst_0 : f32) inits(%2 : memref<128xf32>)
   scf.foreach_thread (%arg0) in (%c2) {
     %7 = affine.min affine_map<(d0) -> (d0 * -64 + 128, 64)>(%arg0)
     %8 = affine.max affine_map<(d0) -> (0, d0)>(%7)
@@ -21,12 +21,12 @@
     %12 = memref.subview %2[%9] [%10] [1] : 
       memref<128xf32> to memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
 
-    // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref<?x384xf32, {{.*}}>) outs(%{{.*}} : memref<?xf32, {{.*}}>)
+    // CHECK: linalg.generic {{.*}} ins(%{{.*}} : memref<?x384xf32, {{.*}}>) inits(%{{.*}} : memref<?xf32, {{.*}}>)
     linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 
                                       affine_map<(d0, d1) -> (d0)>], 
                      iterator_types = ["parallel", "reduction"]} 
       ins(%11 : memref<?x384xf32, affine_map<(d0, d1)[s0] -> (d0 * 384 + s0 + d1)>>) 
-      outs(%12 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {
+      inits(%12 : memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>) {
         ^bb0(%arg1: f32, %arg2: f32):
           %14 = arith.addf %arg1, %arg2 : f32
           linalg.yield %14 : f32
diff --git a/mlir/test/Dialect/SCF/loop-pipelining.mlir b/mlir/test/Dialect/SCF/loop-pipelining.mlir
--- a/mlir/test/Dialect/SCF/loop-pipelining.mlir
+++ b/mlir/test/Dialect/SCF/loop-pipelining.mlir
@@ -558,7 +558,7 @@
 // CHECK-SAME: iter_args(%[[IA:.+]] = %[[PAV0]], %[[IB:.+]] = %[[PBV0:.+]])
 // CHECK:     %[[CV:.+]] = memref.subview %[[ARG2]]
 // CHECK:     linalg.generic
-// CHECK-SAME:  ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) outs(%[[CV]] :
+// CHECK-SAME:  ins(%[[IA]], %[[IB]], %{{.*}} : {{.*}}) inits(%[[CV]] :
 // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] 
 // CHECK:     %[[ASV:.+]] = memref.subview %[[ARG0]][%[[NEXT]]] [8] [1] :
 // CHECK:     %[[NEXT:.+]] = arith.addi %[[IV]], %[[C1]] :
@@ -573,7 +573,7 @@
 // CHECK:   }
 // CHECK:   %[[CV:.+]] = memref.subview %[[ARG2]][%[[C3]]] [8] [1] :
 // CHECK:   linalg.generic 
-// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) outs(%[[CV]] :
+// CHECK-SAME: ins(%[[R]]#0, %[[R]]#1, %{{.*}} : {{.*}}) inits(%[[CV]] :
 
 
 #map = affine_map<(d0)[s0]->(d0 + s0)>
@@ -608,7 +608,7 @@
     %C_view = memref.subview %result[%i0][8][1] { __test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 0 } : memref<?xf32> to memref<8xf32, #map>
     %scalar = arith.addf %cf, %cf {__test_pipelining_stage__ = 1, __test_pipelining_op_order__ = 1} : f32
     linalg.generic #linalg_attrs ins(%a_buf_view, %b_buf_view, %scalar : memref<8xf32, #map>, memref<8xf32, #map>, f32)
-      outs(%C_view: memref<8xf32, #map>) {
+      inits(%C_view: memref<8xf32, #map>) {
       ^bb0(%a: f32, %b: f32, %s: f32, %c: f32):
         %add = arith.addf %a, %b : f32
         %accum = arith.addf %add, %c : f32 
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize-analysis.mlir
@@ -153,7 +153,7 @@
   // Use %t3 in some way without reading it, so that it does not get DCE'd.
   // CHECK:      linalg.generic
   // CHECK-SAME: __inplace_operands_attr__ = ["true"]
-  %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
+  %o = linalg.generic #trait inits (%t3 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
     } -> (tensor<?xf32>)
@@ -191,7 +191,7 @@
     // Write to %t1 via %t2. (Overwrite %t3.)
     // CHECK:      linalg.generic
     // CHECK-SAME: __inplace_operands_attr__ = ["true"]
-    %o2 = linalg.generic #trait outs (%t2 : tensor<?xf32>) {
+    %o2 = linalg.generic #trait inits (%t2 : tensor<?xf32>) {
         ^bb(%0: f32) :
           linalg.yield %cst : f32
       } -> (tensor<?xf32>)
@@ -204,7 +204,7 @@
   // Use %t3 in some way without reading it, so that it does not get DCE'd.
   // CHECK:      linalg.generic
   // CHECK-SAME: __inplace_operands_attr__ = ["true"]
-  %o = linalg.generic #trait outs (%t3 : tensor<?xf32>) {
+  %o = linalg.generic #trait inits (%t3 : tensor<?xf32>) {
       ^bb(%0: f32) :
         linalg.yield %cst : f32
     } -> (tensor<?xf32>)
@@ -620,9 +620,9 @@
     // CHECK: tensor.extract_slice {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %6 = tensor.extract_slice %arg1[%arg0] [1] [1] : tensor<320xf32> to tensor<1xf32>
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %7 = linalg.fill ins(%cst : f32) outs(%6 : tensor<1xf32>) -> tensor<1xf32>
+    %7 = linalg.fill ins(%cst : f32) inits(%6 : tensor<1xf32>) -> tensor<1xf32>
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %8 = linalg.fill ins(%cst : f32) outs(%7 : tensor<1xf32>) -> tensor<1xf32>
+    %8 = linalg.fill ins(%cst : f32) inits(%7 : tensor<1xf32>) -> tensor<1xf32>
 
     scf.foreach_thread.perform_concurrently {
       // CHECK: tensor.parallel_insert_slice {{.*}} {__inplace_operands_attr__ = ["true", "true", "none"]}
@@ -647,14 +647,14 @@
   %0 = bufferization.alloc_tensor() : tensor<4xf32>
 
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
 
   %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) {
     // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %4 = tensor.extract %1[%arg4] : tensor<4xf32>
     vector.print %4 : f32
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-    %5 = linalg.fill ins(%cst2 : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+    %5 = linalg.fill ins(%cst2 : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
     scf.yield %5 : tensor<4xf32>
   }
 
@@ -677,14 +677,14 @@
   %0 = bufferization.alloc_tensor() : tensor<4xf32>
 
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<4xf32>) -> tensor<4xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<4xf32>) -> tensor<4xf32>
 
   %2 = scf.for %arg5 = %arg2 to %arg3 step %arg4 iter_args(%arg6 = %arg1) -> (tensor<4xf32>) {
     // CHECK: tensor.extract {{.*}} {__inplace_operands_attr__ = ["true", "none"]}
     %4 = tensor.extract %1[%arg4] : tensor<4xf32>
     vector.print %4 : f32
     // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "false"]}
-    %5 = linalg.fill ins(%cst2 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
+    %5 = linalg.fill ins(%cst2 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32>
     scf.yield %5 : tensor<4xf32>
   }
 
@@ -693,7 +693,7 @@
   %6 = tensor.extract %1[%arg4] : tensor<4xf32>
   vector.print %6 : f32
   // CHECK: linalg.fill {__inplace_operands_attr__ = ["none", "true"]}
-  %7 = linalg.fill ins(%cst3 : f32) outs(%1 : tensor<4xf32>) -> tensor<4xf32>
+  %7 = linalg.fill ins(%cst3 : f32) inits(%1 : tensor<4xf32>) -> tensor<4xf32>
 
   return %2, %7 : tensor<4xf32>, tensor<4xf32>
 }
diff --git a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/SCF/one-shot-bufferize.mlir
@@ -54,7 +54,7 @@
   // CHECK: %[[clone:.*]] = bufferization.clone %[[alloc]]
   // CHECK: scf.for {{.*}} iter_args(%{{.*}} = %[[clone]])
   %0 = scf.for %iv = %lb to %ub step %c1 iter_args(%1 = %A) -> tensor<?xf32> {
-    %r = linalg.fill ins(%cst : f32) outs(%1 : tensor<?xf32>) -> tensor<?xf32>
+    %r = linalg.fill ins(%cst : f32) inits(%1 : tensor<?xf32>) -> tensor<?xf32>
     scf.yield %B : tensor<?xf32>
   }
   %1 = tensor.extract %0[%c1] : tensor<?xf32>
@@ -546,8 +546,8 @@
   %2 = scf.foreach_thread (%arg3) in (%idx2) shared_outs(%o = %arg2) -> (tensor<?xf32>) {
       // CHECK: %[[subview:.*]] = memref.subview %[[arg2]][5] [%[[idx]]] [1]
       %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
-      // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview]] : memref<?xf32
-      %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview]] : memref<?xf32
+      %8 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
       // Self-copy will DCE away later.
       // CHECK: memref.copy %[[subview]], %[[subview]]
 
@@ -574,8 +574,8 @@
 //  CHECK-SAME:     %[[arg1:.*]]: memref<?xf32, strided{{.*}}>,
 //  CHECK-SAME:     %[[arg2:.*]]: memref<?xf32, strided{{.*}}>
 func.func @parallel_insert_slice_with_conflict(
-    %idx: index, 
-    %idx2: index, 
+    %idx: index,
+    %idx2: index,
     %arg1: tensor<?xf32> {bufferization.writable = true},
     %arg2: tensor<?xf32> {bufferization.writable = true}) -> (f32, f32)
 {
@@ -593,8 +593,8 @@
       // CHECK: %[[subview1:.*]] = memref.subview %[[alloc1]][5] [%[[idx]]] [1]
       %6 = tensor.extract_slice %o[5] [%idx] [%c1] : tensor<?xf32> to tensor<?xf32>
 
-      // CHECK: linalg.fill ins(%{{.*}}) outs(%[[subview1]] : memref<?xf32
-      %8 = linalg.fill ins(%cst : f32) outs(%6 : tensor<?xf32>) -> tensor<?xf32>
+      // CHECK: linalg.fill ins(%{{.*}}) inits(%[[subview1]] : memref<?xf32
+      %8 = linalg.fill ins(%cst : f32) inits(%6 : tensor<?xf32>) -> tensor<?xf32>
 
       // Now the copy of the actual insert_slice. (It will fold away.)
       // CHECK: memref.copy %[[subview1]], %[[subview1]]
@@ -636,8 +636,8 @@
     %6 = tensor.extract_slice %arg1[0, %4] [8, 4] [1, 1] : tensor<8x8xf32> to tensor<8x4xf32>
     %7 = tensor.extract_slice %o[%1, %4] [4, 4] [1, 1] : tensor<8x8xf32> to tensor<4x4xf32>
 
-    //      CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) outs({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>)
-    %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) outs(%7 : tensor<4x4xf32>) -> tensor<4x4xf32>
+    //      CHECK: linalg.matmul ins({{.*}}memref<4x8xf32, strided<[?, ?], offset: ?>>, memref<8x4xf32, strided<[?, ?], offset: ?>>) inits({{.*}} : memref<4x4xf32, strided<[?, ?], offset: ?>>)
+    %8 = linalg.matmul ins(%3, %6 : tensor<4x8xf32>, tensor<8x4xf32>) inits(%7 : tensor<4x4xf32>) -> tensor<4x4xf32>
     scf.foreach_thread.perform_concurrently {
       tensor.parallel_insert_slice %8 into %o[%1, %4] [4, 4] [1, 1] : tensor<4x4xf32> into tensor<8x8xf32>
     }
@@ -872,7 +872,7 @@
 // -----
 
 // CHECK-LABEL: func @non_tensor_for_arg
-func.func @non_tensor_for_arg(%A : tensor<?xf32> {bufferization.writable = true}) 
+func.func @non_tensor_for_arg(%A : tensor<?xf32> {bufferization.writable = true})
     -> tensor<?xf32> {
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
@@ -888,7 +888,7 @@
 // -----
 
 // This is a regression test. Just check that the IR bufferizes.
-  
+
 // CHECK-LABEL: func @buffer_type_of_collapse_shape
 func.func @buffer_type_of_collapse_shape(%arg0: tensor<f64>) {
   %true = arith.constant true
@@ -906,10 +906,10 @@
 // -----
 
 // This is a regression test. Just check that the IR bufferizes.
-  
+
 // CHECK-LABEL: func @non_block_argument_yield
 func.func @non_block_argument_yield() {
-  %true = arith.constant true 
+  %true = arith.constant true
   %0 = bufferization.alloc_tensor() : tensor<i32>
   %1 = scf.while (%arg0 = %0) : (tensor<i32>) -> (tensor<i32>) {
     scf.condition(%true) %arg0 : tensor<i32>
diff --git a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
--- a/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
+++ b/mlir/test/Dialect/SparseTensor/buffer_rewriting.mlir
@@ -47,14 +47,14 @@
 //       CHECK:   } do {
 //       CHECK:     ^bb0(%[[I2:.*]]: index):
 //       CHECK:     scf.yield %[[I2]] : index
-//       CHECK:   }  
+//       CHECK:   }
 //       CHECK:  %[[M2:.*]] = memref.realloc %[[B]](%[[P2]])
 //       CHECK:  scf.yield %[[M2]] : memref<?xf64>
 //       CHECK: } else {
 //       CHECK:  scf.yield %[[B]] : memref<?xf64>
 //       CHECK: }
 //       CHECK: %[[S:.*]] = memref.subview %[[M]]{{\[}}%[[S1]]] {{\[}}%[[D]]] [1]
-//       CHECK: linalg.fill ins(%[[C]] : f64) outs(%[[S]]
+//       CHECK: linalg.fill ins(%[[C]] : f64) inits(%[[S]]
 //       CHECK: memref.store %[[S2]], %[[A]]{{\[}}%[[C2]]]
 //       CHECK: return %[[M]] : memref<?xf64>
 func.func @sparse_push_back_n(%arg0: memref<?xindex>, %arg1: memref<?xf64>, %arg2: f64, %arg3: index) -> memref<?xf64> {
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -291,7 +291,7 @@
 //       CHECK: %[[T5:.*]] = memref.cast %[[T4]] : memref<16xindex> to memref<?xindex>
 //       CHECK: %[[T6:.*]] = memref.alloc() : memref<16xf64>
 //       CHECK: %[[T7:.*]] = memref.cast %[[T6]] : memref<16xf64> to memref<?xf64>
-//       CHECK: linalg.fill ins(%[[C0]] : index) outs(%[[T1]] : memref<3xindex>)
+//       CHECK: linalg.fill ins(%[[C0]] : index) inits(%[[T1]] : memref<3xindex>)
 //       CHECK: memref.store %[[A]], %[[T0]][%[[C0]]] : memref<2xindex>
 //       CHECK: memref.store %[[C10]], %[[T0]][%[[C1]]] : memref<2xindex>
 //       CHECK: %[[P0:.*]] = sparse_tensor.push_back %[[T1]], %[[T3]]
@@ -318,7 +318,7 @@
 //       CHECK: %[[A1:.*]] = memref.alloc() : memref<1xindex>
 //       CHECK: %[[AV:.*]] = memref.alloc() : memref<16xf64>
 //       CHECK: %[[A2:.*]] = memref.cast %[[AV]] : memref<16xf64> to memref<?xf64>
-//       CHECK: linalg.fill ins(%[[C0]] : index) outs(%[[A1]] : memref<1xindex>)
+//       CHECK: linalg.fill ins(%[[C0]] : index) inits(%[[A1]] : memref<1xindex>)
 //       CHECK: memref.store %[[C30]], %[[A0]][%[[C0]]] : memref<3xindex>
 //       CHECK: memref.store %[[C10]], %[[A0]][%[[C1]]] : memref<3xindex>
 //       CHECK: memref.store %[[C20]], %[[A0]][%[[C2]]] : memref<3xindex>
@@ -336,8 +336,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) outs(%[[A]] : memref<8xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) outs(%[[B]] : memref<8xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) inits(%[[A]] : memref<8xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) inits(%[[B]] : memref<8xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion1() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR>
@@ -351,8 +351,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) outs(%[[A]] : memref<4xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) outs(%[[B]] : memref<4xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : f64) inits(%[[A]] : memref<4xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}}  : i1) inits(%[[B]] : memref<4xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion2() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC>
@@ -371,8 +371,8 @@
 //       CHECK: %[[V:.*]] = memref.alloc(%[[D1]]) : memref<?xf64>
 //       CHECK: %[[B:.*]] = memref.alloc(%[[D1]]) : memref<?xi1>
 //       CHECK: %[[D:.*]] = memref.alloc(%[[D1]]) : memref<?xindex>
-//       CHECK: linalg.fill ins(%{{.*}} : f64) outs(%[[V]] : memref<?xf64>)
-//       CHECK: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+//       CHECK: linalg.fill ins(%{{.*}} : f64) inits(%[[V]] : memref<?xf64>)
+//       CHECK: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref<?xindex> {
   %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor<?x?xf64, #CSC>
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -321,8 +321,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<8xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<8xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<8xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<8xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<8xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<8xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<8xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion1() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSR>
@@ -337,8 +337,8 @@
 //       CHECK: %[[B:.*]] = memref.alloc() : memref<4xi1>
 //       CHECK: %[[C:.*]] = memref.alloc() : memref<4xindex>
 //       CHECK: %[[D:.*]] = memref.cast %[[C]] : memref<4xindex> to memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<4xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<4xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<4xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<4xi1>)
 //       CHECK: return %[[D]] : memref<?xindex>
 func.func @sparse_expansion2() -> memref<?xindex> {
   %0 = bufferization.alloc_tensor() : tensor<4x8xf64, #CSC>
@@ -354,8 +354,8 @@
 //       CHECK: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 //       CHECK: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 //       CHECK: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+//   CHECK-DAG: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 //       CHECK: return %[[C]] : memref<?xindex>
 func.func @sparse_expansion3(%arg0: index, %arg1: index) -> memref<?xindex> {
   %0 = bufferization.alloc_tensor(%arg0, %arg1) : tensor<?x?xf64, #CSC>
diff --git a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
--- a/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/convert_sparse2dense.mlir
@@ -37,7 +37,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<13xi32>
-//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<13xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) inits(%[[M]] : memref<13xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -75,7 +75,7 @@
 //   CHECK-DAG: %[[IndD:.*]] = memref.cast %[[IndS]] : memref<1xindex> to memref<?xindex>
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<i32>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?xi32>
-//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) outs(%[[M]] : memref<?xi32>)
+//   CHECK-DAG: linalg.fill ins(%[[zeroI32]] : i32) inits(%[[M]] : memref<?xi32>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextI32(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<i32>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -118,7 +118,7 @@
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x4xf64>
 //   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -136,7 +136,7 @@
 //  CHECK-RWT-SAME: %[[A:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>>) -> tensor<2x4xf64> {
 //       CHECK-RWT: %[[F0:.*]] = arith.constant 0.000000e+00 : f64
 //       CHECK-RWT: %[[B:.*]] = memref.alloc() : memref<2x4xf64>
-//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]]
+//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]]
 //       CHECK-RWT: sparse_tensor.foreach in %[[A]]
 //       CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64):
 //       CHECK-RWT:   memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]]
@@ -174,7 +174,7 @@
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]]) : memref<?x4xf64>
 //   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<?x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -218,7 +218,7 @@
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI1]]) : memref<2x?xf64>
 //   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x?xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -262,7 +262,7 @@
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc(%[[SizeI0]], %[[SizeI1]]) : memref<?x?xf64>
 //   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<?x?xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<?x?xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
@@ -284,7 +284,7 @@
 //       CHECK-RWT: %[[D0:.*]] = tensor.dim %[[A]], %[[C0]]
 //       CHECK-RWT: %[[D1:.*]] = tensor.dim %[[A]], %[[C1]]
 //       CHECK-RWT: %[[B:.*]] = memref.alloc(%[[D0]], %[[D1]])
-//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) outs(%[[B]]
+//       CHECK-RWT: linalg.fill ins(%[[F0]] : f64) inits(%[[B]]
 //       CHECK-RWT: sparse_tensor.foreach in %[[A]]
 //       CHECK-RWT: ^bb0(%[[FI0:.*]]: index, %[[FI1:.*]]: index, %[[FV:.*]]: f64):
 //       CHECK-RWT:   memref.store %[[FV]], %[[B]]{{\[}}%[[FI0]], %[[FI1]]]
@@ -326,7 +326,7 @@
 //   CHECK-DAG: %[[ElemBuffer:.*]] = memref.alloca() : memref<f64>
 //   CHECK-DAG: %[[M:.*]] = memref.alloc() : memref<2x3x4xf64>
 //   CHECK-DAG: %[[E0:.*]] = arith.constant 0.000000e+00 : f64
-//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) outs(%[[M]] : memref<2x3x4xf64>)
+//   CHECK-DAG: linalg.fill ins(%[[E0]] : f64) inits(%[[M]] : memref<2x3x4xf64>)
 //       CHECK: scf.while : () -> () {
 //       CHECK:   %[[Cond:.*]] = func.call @getNextF64(%[[Iter]], %[[IndD]], %[[ElemBuffer]]) : (!llvm.ptr<i8>, memref<?xindex>, memref<f64>) -> i1
 //       CHECK:   scf.condition(%[[Cond]])
diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir
--- a/mlir/test/Dialect/SparseTensor/dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/dense.mlir
@@ -59,7 +59,7 @@
   %c = arith.constant 1.0 : f32
   %0 = linalg.generic #trait_2d
      ins(%arga: tensor<32x16xf32, #DenseMatrix>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %a, %c : f32
         linalg.yield %1 : f32
@@ -99,7 +99,7 @@
   %c = arith.constant 1.0 : f32
   %0 = linalg.generic #trait_2d
      ins(%arga: tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
+    inits(%argx: tensor<32x16xf32, #DenseMatrix>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %a, %c : f32
         linalg.yield %1 : f32
@@ -145,7 +145,7 @@
 	     -> tensor<32x16xf32, #DenseMatrix> {
   %0 = linalg.generic #trait_3d
      ins(%arga: tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16xf32, #DenseMatrix>) {
+    inits(%argx: tensor<32x16xf32, #DenseMatrix>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.addf %x, %a : f32
         linalg.yield %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
--- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir
@@ -59,12 +59,12 @@
   -> (tensor<10xf32>, tensor<10xf32>)
 {
   // CHECK: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [false]} : tensor<10xf32>
-  // CHECK: linalg.generic {{.*}} outs(%[[alloc]]
+  // CHECK: linalg.generic {{.*}} inits(%[[alloc]]
   // CHECK-FUNC: %[[alloc:.*]] = bufferization.alloc_tensor() copy(%[[argb]]) {bufferization.escape = [true]} : tensor<10xf32>
-  // CHECK-FUNC: linalg.generic {{.*}} outs(%[[alloc]]
+  // CHECK-FUNC: linalg.generic {{.*}} inits(%[[alloc]]
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xf32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: f32, %x : f32):
       %up = arith.addf %a, %x : f32
       linalg.yield %up : f32
diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir
--- a/mlir/test/Dialect/SparseTensor/one_trip.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir
@@ -25,7 +25,7 @@
 func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
-    outs(%argx: tensor<1x1xf32, #Dense>) {
+    inits(%argx: tensor<1x1xf32, #Dense>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
--- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
+++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
@@ -56,7 +56,7 @@
 func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #SortedCOO> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
-    outs(%argx: tensor<?x?xf32, #SortedCOO>) {
+    inits(%argx: tensor<?x?xf32, #SortedCOO>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
@@ -96,7 +96,7 @@
                   %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #SortedCOO>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
@@ -121,7 +121,7 @@
 // CHECK-DAG:     %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xindex>
 // CHECK-DAG:     %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed-nu", "singleton" ] }>> to memref<?xf64>
 // CHECK-DAG:     %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
-// CHECK-DAG:     linalg.fill ins(%[[VAL_3]] : f64) outs(%[[VAL_14]] : memref<32x64xf64>)
+// CHECK-DAG:     linalg.fill ins(%[[VAL_3]] : f64) inits(%[[VAL_14]] : memref<32x64xf64>)
 // CHECK-DAG:     %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:     %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:     %[[VAL_17:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -190,7 +190,7 @@
                      %argz: tensor<32x64xf64>) -> tensor<32x64xf64> {
   %0 = linalg.generic #trait_mul
       ins(%argx, %argy : tensor<32x64xf64, #SortedCOO>, tensor<32x64xf64, #SortedCOO>)
-      outs(%argz: tensor<32x64xf64>) {
+      inits(%argz: tensor<32x64xf64>) {
     ^bb(%x: f64, %y: f64, %z: f64):
       %1 = arith.mulf %x, %y : f64
       linalg.yield %1 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -22,7 +22,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
 // CHECK:             %[[VAL_11:.*]] = arith.addf %[[VAL_10]], %[[VAL_1]] : f32
@@ -34,7 +34,7 @@
 func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -52,7 +52,7 @@
 // CHECK:           %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
-// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_7]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_9:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<?xf32>
 // CHECK:             %[[VAL_10:.*]] = arith.addf %[[VAL_9]], %[[VAL_1]] : f32
@@ -65,7 +65,7 @@
   %u = tensor.empty() : tensor<32xf32>
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%u: tensor<32xf32>) {
+    inits(%u: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -82,7 +82,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_9]]] : memref<?xf32>
 // CHECK:             %[[VAL_11:.*]] = arith.mulf %[[VAL_10]], %[[VAL_1]] : f32
@@ -94,7 +94,7 @@
 func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #DV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %argb : f32
         linalg.yield %0 : f32
@@ -116,7 +116,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32xf32>)
 // CHECK:           %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
 // CHECK:             %[[VAL_17:.*]] = arith.cmpi ult, %[[VAL_15]], %[[VAL_13]] : index
 // CHECK:             scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
@@ -149,7 +149,7 @@
 func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %argb : f32
         linalg.yield %0 : f32
@@ -168,7 +168,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_11]]] : memref<?xindex>
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf32>
@@ -186,7 +186,7 @@
 func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %a, %a : f32  // same tensor
         %1 = arith.addf %a, %a : f32  // should yield
@@ -206,7 +206,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
@@ -221,7 +221,7 @@
 func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %argb : f32
         linalg.yield %0 : f32
@@ -249,7 +249,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
@@ -262,7 +262,7 @@
 func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -280,7 +280,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_9]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<32xf32>
@@ -293,7 +293,7 @@
 func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #DV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -314,7 +314,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@@ -352,7 +352,7 @@
 func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -371,7 +371,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
@@ -387,7 +387,7 @@
 func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -408,7 +408,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]]:2 = scf.while (%[[VAL_16:.*]] = %[[VAL_13]], %[[VAL_17:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
@@ -446,7 +446,7 @@
 func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -465,7 +465,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_4]] {
@@ -481,7 +481,7 @@
 func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -502,7 +502,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -564,7 +564,7 @@
 func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -585,7 +585,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -625,7 +625,7 @@
 func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf32, #SV>, tensor<32xf32, #SV>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -647,7 +647,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -716,7 +716,7 @@
   // Kernel "x(i) = a(i) * c + b(i) * c".
   %0 = linalg.generic #trait2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<16xf32>) {
+    inits(%argx: tensor<16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %argc : f32
         %1 = arith.mulf %b, %argc : f32
@@ -740,7 +740,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -809,7 +809,7 @@
   // Same kernel, but now expressed as "x(i) = (a(i) + b(i)) * c".
   %0 = linalg.generic #trait2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<16xf32>) {
+    inits(%argx: tensor<16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         %1 = arith.mulf %0, %argc : f32
@@ -850,7 +850,7 @@
 func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
     ins(%arga: tensor<?xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -954,7 +954,7 @@
   // as two separate reductions kernels.
   %0 = linalg.generic #trait_sum_reduction2
     ins(%arga, %argb: tensor<16xf32, #SV>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -1067,7 +1067,7 @@
   // as two separate reductions kernels.
   %0 = linalg.generic #trait_sum_reduction_inv
     ins(%arga, %argb, %argc : tensor<16xf32, #SV>, tensor<f32>, tensor<16xf32, #SV>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %0, %c : f32
@@ -1108,7 +1108,7 @@
 // CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "compressed" ] }>> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?xf64>
 // CHECK-DAG:       %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
-// CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
+// CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_18]] : memref<?xf64>)
 // CHECK-DAG:       %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_21:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -1246,7 +1246,7 @@
                       %argx: tensor<?xf64>) -> tensor<?xf64> {
   %r = linalg.generic #trait_four_tensors
     ins(%arga, %argb, %argc, %argd: tensor<?xf64>, tensor<?xf64, #SV>, tensor<?xf64>, tensor<?xf64, #SV>)
-    outs(%argx: tensor<?xf64>) {
+    inits(%argx: tensor<?xf64>) {
       ^bb(%a: f64, %b: f64, %c: f64, %d: f64, %x: f64):
         %0 = arith.addf %a, %b : f64
         %1 = arith.addf %c, %d : f64
@@ -1559,7 +1559,7 @@
 	    %argc: tensor<?xf64, #SV>, %argx: tensor<f64>) ->tensor<f64>{
  %0 = linalg.generic #trait_red3s
    ins(%arga, %argb, %argc: tensor<?xf64, #SV>, tensor<?xf64, #SV>, tensor<?xf64, #SV>)
-   outs(%argx: tensor<f64>) {
+   inits(%argx: tensor<f64>) {
      ^bb(%a: f64,%b: f64,%c: f64,%x: f64):
         %0 = arith.addf %x, %a : f64
         %1 = arith.addf %0, %b : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -27,7 +27,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
@@ -44,7 +44,7 @@
 func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -63,7 +63,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_10]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_11]], %[[VAL_4]] : index
@@ -80,7 +80,7 @@
 func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tdd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -102,7 +102,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_14]]] : memref<?xindex>
 // CHECK:             %[[VAL_16:.*]] = arith.addi %[[VAL_14]], %[[VAL_7]] : index
@@ -143,7 +143,7 @@
 func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -163,7 +163,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_5]] : index
@@ -182,7 +182,7 @@
 func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tds>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -204,7 +204,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_14]], %[[VAL_18:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
@@ -250,7 +250,7 @@
 func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -270,7 +270,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_11]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_5]] {
@@ -290,7 +290,7 @@
 func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -314,7 +314,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_6]]) : (index, index) -> (index, index) {
@@ -384,7 +384,7 @@
 func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -405,7 +405,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_12]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_4]] {
@@ -427,7 +427,7 @@
 func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -452,7 +452,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -591,7 +591,7 @@
 func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -616,7 +616,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -687,7 +687,7 @@
 func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #Tss>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tss>, tensor<32x16xf32, #Tss>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -711,7 +711,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]]:2 = scf.while (%[[VAL_19:.*]] = %[[VAL_16]], %[[VAL_20:.*]] = %[[VAL_5]]) : (index, index) -> (index, index) {
@@ -793,7 +793,7 @@
 func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -815,7 +815,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
-// CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
+// CHECK:           linalg.fill ins(%{{.*}} : f32) inits(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] {
@@ -839,7 +839,7 @@
 func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #Tds>, %argx: tensor<32x16xf32>) -> tensor<32x16xf32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32x16xf32, #Tsd>, tensor<32x16xf32, #Tds>)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -890,7 +890,7 @@
 func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
        ins(%argA, %argb: tensor<16x32xf32, #Tds>, tensor<32xf32>)
-      outs(%argx: tensor<16xf32>) {
+      inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
@@ -936,7 +936,7 @@
 func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
      ins(%arga: tensor<10x20xf32, #Tds>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -964,7 +964,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed" ] }>> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
-// CHECK:           linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref<?x?xf64>)
+// CHECK:           linalg.fill ins(%{{.*}} : f64) inits(%[[VAL_11]] : memref<?x?xf64>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xindex>
 // CHECK:             %[[VAL_14:.*]] = arith.addi %[[VAL_12]], %[[VAL_4]] : index
@@ -983,7 +983,7 @@
   %0 = arith.constant 2.0 : f64
   %1 = linalg.generic #trait_scale
      ins(%arga: tensor<?x?xf64, #Tds>)
-    outs(%argx: tensor<?x?xf64>) {
+    inits(%argx: tensor<?x?xf64>) {
       ^bb(%a: f64, %x: f64):
         %2 = arith.mulf %a, %0 : f64
         linalg.yield %2 : f64
@@ -1049,7 +1049,7 @@
                           %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_sampled_dense_dense
      ins(%args, %arga, %argb: tensor<?x?xf32, #Tss>, tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%s: f32, %a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %s, %0 : f32
@@ -1273,7 +1273,7 @@
                                             tensor<?x?xf32, #Tds>,
                                             tensor<?xf32>,
                                             tensor<f32>)
-    outs(%argx: tensor<?xf32>) {
+    inits(%argx: tensor<?xf32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %d: f32, %e: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %0, %d : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@@ -35,7 +35,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index
@@ -56,7 +56,7 @@
 func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -77,7 +77,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_11]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_6]] to %[[VAL_3]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_13:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_14:.*]] = arith.muli %[[VAL_12]], %[[VAL_4]] : index
@@ -98,7 +98,7 @@
 func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tddd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -122,7 +122,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             scf.for %[[VAL_17:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_9]] {
 // CHECK:               %[[VAL_18:.*]] = arith.muli %[[VAL_16]], %[[VAL_5]] : index
@@ -167,7 +167,7 @@
 func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -189,7 +189,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_6]] to %[[VAL_4]] step %[[VAL_7]] {
 // CHECK:             scf.for %[[VAL_15:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] {
 // CHECK:               %[[VAL_16:.*]] = arith.muli %[[VAL_14]], %[[VAL_5]] : index
@@ -212,7 +212,7 @@
 func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -236,7 +236,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_7]] to %[[VAL_3]] step %[[VAL_8]] {
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_8]] : index
@@ -285,7 +285,7 @@
 func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -307,7 +307,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_15:.*]] = arith.addi %[[VAL_13]], %[[VAL_6]] : index
@@ -330,7 +330,7 @@
 func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdsd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -356,7 +356,7 @@
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_8]] to %[[VAL_4]] step %[[VAL_9]] {
 // CHECK:             %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xindex>
 // CHECK:             %[[VAL_20:.*]] = arith.addi %[[VAL_18]], %[[VAL_9]] : index
@@ -429,7 +429,7 @@
 func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -452,7 +452,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_6]] : index
@@ -477,7 +477,7 @@
 func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tdss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -501,7 +501,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]]:2 = scf.while (%[[VAL_18:.*]] = %[[VAL_15]], %[[VAL_19:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
@@ -555,7 +555,7 @@
 func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -577,7 +577,7 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_12]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_15:.*]] = %[[VAL_13]] to %[[VAL_14]] step %[[VAL_6]] {
@@ -601,7 +601,7 @@
 func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsdd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -627,7 +627,7 @@
 // CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_17]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]]:2 = scf.while (%[[VAL_21:.*]] = %[[VAL_18]], %[[VAL_22:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
@@ -705,7 +705,7 @@
 func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -728,7 +728,7 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "dense", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_14]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_15]] to %[[VAL_16]] step %[[VAL_6]] {
@@ -754,7 +754,7 @@
 func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsds>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -780,7 +780,7 @@
 // CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_16]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]]:2 = scf.while (%[[VAL_20:.*]] = %[[VAL_17]], %[[VAL_21:.*]] = %[[VAL_7]]) : (index, index) -> (index, index) {
@@ -862,7 +862,7 @@
 func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -885,7 +885,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_16:.*]] = %[[VAL_14]] to %[[VAL_15]] step %[[VAL_5]] {
@@ -911,7 +911,7 @@
 func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tssd>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -939,7 +939,7 @@
 // CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_19]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_9]]] : memref<?xindex>
 // CHECK:           %[[VAL_22:.*]]:2 = scf.while (%[[VAL_23:.*]] = %[[VAL_20]], %[[VAL_24:.*]] = %[[VAL_8]]) : (index, index) -> (index, index) {
@@ -1045,7 +1045,7 @@
 func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.addf %a, %b : f32
         linalg.yield %0 : f32
@@ -1069,7 +1069,7 @@
 // CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{ dimLevelType = [ "compressed", "compressed", "compressed" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_15]] : memref<32x16x8xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_18:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] {
@@ -1097,7 +1097,7 @@
 func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>, %argx: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16x8xf32, #Tsss>, tensor<32x16x8xf32>)
-    outs(%argx: tensor<32x16x8xf32>) {
+    inits(%argx: tensor<32x16x8xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -1163,7 +1163,7 @@
 	        %argd: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_kernel_3d
        ins(%argb, %argc, %argd: tensor<?x?x?xf32, #Tdds>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arga: tensor<?x?xf32>) {
+      inits(%arga: tensor<?x?xf32>) {
     ^bb(%b: f32, %c: f32, %d: f32, %a: f32):
       %0 = arith.mulf %b, %c : f32
       %1 = arith.mulf %0, %d : f32
@@ -1219,7 +1219,7 @@
 func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction
      ins(%arga: tensor<10x20x30xf32, #Tsss>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
@@ -1273,7 +1273,7 @@
 		        %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_sum_reduction_inv
     ins(%arga, %argb: tensor<?x?x?xf32>, tensor<?xf32, #Td>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -1308,7 +1308,7 @@
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<10x20x30xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_13]] : memref<10x20x30xf32>)
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_7]] to %[[VAL_4]] step %[[VAL_8]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_14]]] : memref<?xf32>
 // CHECK:             scf.for %[[VAL_16:.*]] = %[[VAL_7]] to %[[VAL_5]] step %[[VAL_8]] {
@@ -1330,7 +1330,7 @@
                  %argx: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
   %0 = linalg.generic #trait_invariants
      ins(%arga, %argb, %argc : tensor<10xf32, #Td>, tensor<20xf32>, tensor<30xf32>)
-    outs(%argx: tensor<10x20x30xf32>) {
+    inits(%argx: tensor<10x20x30xf32>) {
       ^bb(%a: f32, %b: f32, %c: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.mulf %0, %c : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -45,7 +45,7 @@
                       %argx: tensor<32xf32>) -> tensor<32xf32> {
   %0 = linalg.generic #trait1
      ins(%arga, %argb: tensor<32xf32, #SpVec>, tensor<4xf32>)
-    outs(%argx: tensor<32xf32>) {
+    inits(%argx: tensor<32xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -77,7 +77,7 @@
 // CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : i32) inits(%[[VAL_11]] : memref<32xi32>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_12]] to %[[VAL_13]] step %[[VAL_4]] {
@@ -96,7 +96,7 @@
                          %argx: tensor<32xi32>) -> tensor<32xi32> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi32, #SpVec>, tensor<34xi32>)
-    outs(%argx: tensor<32xi32>) {
+    inits(%argx: tensor<32xi32>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.andi %a, %b : i32
         linalg.yield %0 : i32
@@ -152,7 +152,7 @@
                          %argx: tensor<32x16xf64>) -> tensor<32x16xf64> {
   %0 = linalg.generic #trait3
      ins(%arga, %argb: tensor<32x16xf64, #CSR>, tensor<34x19xf64>)
-    outs(%argx: tensor<32x16xf64>) {
+    inits(%argx: tensor<32x16xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_broadcast.mlir
@@ -46,7 +46,7 @@
   func.func public @main(%arg0: tensor<4x5xi32, #DCSR>) -> tensor<4x3x5xi32, #SparseTensor> {
     %0 = bufferization.alloc_tensor() : tensor<4x3x5xi32, #SparseTensor>
     %1 = linalg.generic #trait
-    ins(%arg0 : tensor<4x5xi32, #DCSR>) outs(%0 : tensor<4x3x5xi32, #SparseTensor>) {
+    ins(%arg0 : tensor<4x5xi32, #DCSR>) inits(%0 : tensor<4x3x5xi32, #SparseTensor>) {
     ^bb0(%in: i32, %out: i32):
       linalg.yield %in : i32
     } -> tensor<4x3x5xi32, #SparseTensor>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -21,7 +21,7 @@
 // CHECK-DAG:     %[[TMP_c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK:         %[[TMP_0:.*]] = memref.alloc() : memref<5x4xf64>
-// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<5x4xf64>)
+// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<5x4xf64>)
 // CHECK:         scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:           scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
 // CHECK:             %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<2x4xf64>
@@ -250,7 +250,7 @@
 // CHECK-DAG:         %[[TMP_c0:.*]] = arith.constant 0 : index
 // CHECK-DAG:         %[[TMP_c4:.*]] = arith.constant 4 : index
 // CHECK:         %[[TMP_0:.*]] = memref.alloc() : memref<4x5xf64>
-// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<4x5xf64>)
+// CHECK:         linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<4x5xf64>)
 // CHECK:         scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c4]] step %[[TMP_c1]] {
 // CHECK:           scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:             %[[TMP_12:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<4x2xf64>
@@ -311,7 +311,7 @@
 // CHECK-DAG:       %[[TMP_c1:.*]] = arith.constant 1 : index
 // CHECK:           %[[TMP_0:.*]] = memref.alloc() : memref<3x5xf64>
 // CHECK:           %[[TMP_1:.*]] = memref.cast %[[TMP_0]] : memref<3x5xf64> to memref<?x?xf64>
-// CHECK:           linalg.fill ins(%[[TMP_cst]] : f64) outs(%[[TMP_0]] : memref<3x5xf64>)
+// CHECK:           linalg.fill ins(%[[TMP_cst]] : f64) inits(%[[TMP_0]] : memref<3x5xf64>)
 // CHECK:           scf.for %[[TMP_arg2:.*]] = %[[TMP_c0]] to %[[TMP_c3]] step %[[TMP_c1]] {
 // CHECK:             scf.for %[[TMP_arg3:.*]] = %[[TMP_c0]] to %[[TMP_c2]] step %[[TMP_c1]] {
 // CHECK:               %[[TMP_13:.*]] = tensor.extract %[[TMP_arg0]][%[[TMP_arg2]], %[[TMP_arg3]]] : tensor<3x2xf64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_expand.mlir
@@ -53,8 +53,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[S]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[S]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[S]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:   }
@@ -71,7 +71,7 @@
   %v = bufferization.alloc_tensor(%n) : tensor<?xf64, #SV>
   %0 = linalg.generic #rowsum
     ins(%arga: tensor<?x?xf64, #DCSC>)
-    outs(%v: tensor<?xf64, #SV>) {
+    inits(%v: tensor<?xf64, #SV>) {
     ^bb(%a: f64, %x: f64):
       %1 = arith.addf %x, %a : f64
       linalg.yield %1 : f64
@@ -104,8 +104,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C4]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C4]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C4]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C8]] step %[[C1]] {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:     scf.for {{.*}} {
@@ -123,7 +123,7 @@
   %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<8x2xf64, #CSR>, tensor<2x4xf64, #CSR>)
-       outs(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
+       inits(%C: tensor<8x4xf64, #CSR>) -> tensor<8x4xf64, #CSR>
   return %D: tensor<8x4xf64, #CSR>
 }
 
@@ -152,8 +152,8 @@
 // CHECK-CONVERT: %[[A:.*]] = memref.alloc(%[[C8]]) : memref<?xf64>
 // CHECK-CONVERT: %[[B:.*]] = memref.alloc(%[[C8]]) : memref<?xi1>
 // CHECK-CONVERT: %[[C:.*]] = memref.alloc(%[[C8]]) : memref<?xindex>
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) outs(%[[A]] : memref<?xf64>)
-// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) outs(%[[B]] : memref<?xi1>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : f64) inits(%[[A]] : memref<?xf64>)
+// CHECK-CONVERT: linalg.fill ins(%{{.*}} : i1) inits(%[[B]] : memref<?xi1>)
 // CHECK-CONVERT: scf.for %{{.*}} = %[[C0]] to %[[C4]] step %[[C1]] {{.*}} {
 // CHECK-CONVERT:   scf.for {{.*}} {
 // CHECK-CONVERT:     scf.for {{.*}} {
@@ -171,6 +171,6 @@
   %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
   %D = linalg.matmul
     ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
-       outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+       inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
   return %D: tensor<8x4xf64, #CSC>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fill_zero.mlir
@@ -35,8 +35,8 @@
 // CHECK:           %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<300xi1> to memref<?xi1>
 // CHECK:           %[[VAL_24:.*]] = memref.alloc() : memref<300xindex>
 // CHECK:           %[[VAL_25:.*]] = memref.cast %[[VAL_24]] : memref<300xindex> to memref<?xindex>
-// CHECK:           linalg.fill ins(%[[VAL_2]] : f64) outs(%[[VAL_20]] : memref<300xf64>)
-// CHECK:           linalg.fill ins(%[[VAL_7]] : i1) outs(%[[VAL_22]] : memref<300xi1>)
+// CHECK:           linalg.fill ins(%[[VAL_2]] : f64) inits(%[[VAL_20]] : memref<300xf64>)
+// CHECK:           linalg.fill ins(%[[VAL_7]] : i1) inits(%[[VAL_22]] : memref<300xi1>)
 // CHECK:           %[[VAL_26:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK:           %[[VAL_27:.*]] = call @sparseIndices0(%[[VAL_0]], %[[VAL_5]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
 // CHECK:           %[[VAL_28:.*]] = call @sparsePointers0(%[[VAL_0]], %[[VAL_6]]) : (!llvm.ptr<i8>, index) -> memref<?xindex>
@@ -120,8 +120,8 @@
   %0 = bufferization.alloc_tensor() : tensor<100x300xf64, #DCSR>
   %cst = arith.constant 0.000000e+00 : f64
   %1 = linalg.fill ins(%cst : f64)
-                   outs(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
+                   inits(%0 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<100x200xf64, #DCSR>, tensor<200x300xf64, #DCSR>)
-                     outs(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
+                     inits(%1 : tensor<100x300xf64, #DCSR>) -> tensor<100x300xf64, #DCSR>
   return %2 : tensor<100x300xf64, #DCSR>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -54,7 +54,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.absf %a : f64
         linalg.yield %0 : f64
@@ -86,7 +86,7 @@
                 %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.ceil %a : f64
         linalg.yield %0 : f64
@@ -118,7 +118,7 @@
                  %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = math.floor %a : f64
         linalg.yield %0 : f64
@@ -150,7 +150,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.negf %a : f64
         linalg.yield %0 : f64
@@ -210,7 +210,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.addf %a, %b : f64
         linalg.yield %0 : f64
@@ -272,7 +272,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.subf %a, %b : f64
         linalg.yield %0 : f64
@@ -308,7 +308,7 @@
                %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xf64, #SV>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
@@ -342,7 +342,7 @@
   %c = arith.constant 2.0 : f64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.divf %a, %c : f64
         linalg.yield %0 : f64
@@ -382,7 +382,7 @@
   %xinp = bufferization.alloc_tensor() : tensor<32xf64, #SV>
   %0 = linalg.generic #trait1
      ins(%arga: tensor<32xf64, #SV>)
-    outs(%xinp: tensor<32xf64, #SV>) {
+    inits(%xinp: tensor<32xf64, #SV>) {
       ^bb(%a: f64, %x: f64):
 	%0 = math.absf %a : f64
         %1 = math.ceil %0 : f64
@@ -423,7 +423,7 @@
   %init = bufferization.alloc_tensor() : tensor<32xcomplex<f64>, #SV>
   %0 = linalg.generic #traitc
      ins(%arg0: tensor<32xcomplex<f64>, #SV>)
-    outs(%init: tensor<32xcomplex<f64>, #SV>) {
+    inits(%init: tensor<32xcomplex<f64>, #SV>) {
       ^bb(%a: complex<f64>, %x: complex<f64>):
         %0 = complex.div %a, %c : complex<f64>
         linalg.yield %0 : complex<f64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
@@ -55,7 +55,7 @@
   %init = bufferization.alloc_tensor(%0, %1) : tensor<?x?xi64, #DenseMatrix>
   %r = linalg.generic #trait
       ins(%arga: tensor<?x?xi64, #DenseMatrix>)
-     outs(%init: tensor<?x?xi64, #DenseMatrix>) {
+     inits(%init: tensor<?x?xi64, #DenseMatrix>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %j = linalg.index 1 : index
@@ -112,7 +112,7 @@
   %init = bufferization.alloc_tensor(%0, %1) : tensor<?x?xi64, #SparseMatrix>
   %r = linalg.generic #trait
       ins(%arga: tensor<?x?xi64, #SparseMatrix>)
-     outs(%init: tensor<?x?xi64, #SparseMatrix>) {
+     inits(%init: tensor<?x?xi64, #SparseMatrix>) {
       ^bb(%a: i64, %x: i64):
         %i = linalg.index 0 : index
         %j = linalg.index 1 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@@ -74,7 +74,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.addi %a, %b : i64
         linalg.yield %0 : i64
@@ -137,7 +137,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.subi %a, %b : i64
         linalg.yield %0 : i64
@@ -173,7 +173,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.muli %a, %b : i64
         linalg.yield %0 : i64
@@ -207,7 +207,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.divsi %a, %c : i64
         linalg.yield %0 : i64
@@ -241,7 +241,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.divui %a, %c : i64
         linalg.yield %0 : i64
@@ -277,7 +277,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.andi %a, %b : i64
         linalg.yield %0 : i64
@@ -337,7 +337,7 @@
               %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.ori %a, %b : i64
         linalg.yield %0 : i64
@@ -397,7 +397,7 @@
                %argx: tensor<32xi64>) -> tensor<32xi64> {
   %0 = linalg.generic #trait2
      ins(%arga, %argb: tensor<32xi64, #SV>, tensor<32xi64>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %b: i64, %x: i64):
         %0 = arith.xori %a, %b : i64
         linalg.yield %0 : i64
@@ -431,7 +431,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shrsi %a, %c : i64
         linalg.yield %0 : i64
@@ -465,7 +465,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shrui %a, %c : i64
         linalg.yield %0 : i64
@@ -499,7 +499,7 @@
   %c = arith.constant 2 : i64
   %0 = linalg.generic #traitc
      ins(%arga: tensor<32xi64, #SV>)
-    outs(%argx: tensor<32xi64>) {
+    inits(%argx: tensor<32xi64>) {
       ^bb(%a: i64, %x: i64):
         %0 = arith.shli %a, %c : i64
         linalg.yield %0 : i64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
@@ -47,7 +47,7 @@
               %c: tensor<10x30xf32>) -> tensor<10x30xf32> {
   %0 = linalg.matmul
     ins(%a, %b: tensor<10x20xf32, #DCSR>, tensor<20x30xf32>)
-    outs(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
+    inits(%c: tensor<10x30xf32>) -> tensor<10x30xf32>
   return %0 : tensor<10x30xf32>
 }
 
@@ -144,7 +144,7 @@
   %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>)
-       outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+       inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
   return %D: tensor<4x4xf64, #DCSR>
 }
 
@@ -193,7 +193,7 @@
              %output: tensor<6x6xi32>) -> tensor<6x6xi32> {
   %0 = linalg.conv_2d
     ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
-    outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
+    inits (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
   return %0 : tensor<6x6xi32>
 }
 
@@ -244,7 +244,7 @@
   %c2 = arith.constant 2 : i32
   %0 = linalg.quantized_matmul
     ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
-    outs(%output : tensor<5x6xi64>) -> tensor<5x6xi64>
+    inits(%output : tensor<5x6xi64>) -> tensor<5x6xi64>
   return %0: tensor<5x6xi64>
 }
 
@@ -306,6 +306,6 @@
 		 %x: tensor<f32>) -> tensor<f32> {
   %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>,
                                 tensor<1024xf32, #SparseVector>)
-                   outs(%x: tensor<f32>) -> tensor<f32>
+                   inits(%x: tensor<f32>) -> tensor<f32>
   return %dot : tensor<f32>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
@@ -115,7 +115,7 @@
              %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
@@ -115,7 +115,7 @@
              %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSC>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
@@ -115,7 +115,7 @@
 	           %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<32x64xf64, #CSR>, tensor<64xf64>)
-      outs(%argx: tensor<32xf64>) {
+      inits(%argx: tensor<32xf64>) {
     ^bb(%A: f64, %b: f64, %x: f64):
       %0 = arith.mulf %A, %b : f64
       %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_matmul_codegen.mlir
@@ -38,7 +38,7 @@
 // CHECK-DAG:       %[[VAL_21:.*]] = memref.cast %[[VAL_20]] : memref<16xindex> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_22:.*]] = memref.alloc() : memref<16xf64>
 // CHECK-DAG:       %[[VAL_23:.*]] = memref.cast %[[VAL_22]] : memref<16xf64> to memref<?xf64>
-// CHECK:           linalg.fill ins(%[[VAL_12]] : index) outs(%[[VAL_17]] : memref<3xindex>)
+// CHECK:           linalg.fill ins(%[[VAL_12]] : index) inits(%[[VAL_17]] : memref<3xindex>)
 // CHECK:           memref.store %[[VAL_10]], %[[VAL_16]]{{\[}}%[[VAL_12]]] : memref<2xindex>
 // CHECK:           memref.store %[[VAL_10]], %[[VAL_16]]{{\[}}%[[VAL_13]]] : memref<2xindex>
 // CHECK:           %[[VAL_24:.*]] = sparse_tensor.push_back %[[VAL_17]], %[[VAL_19]], %[[VAL_12]] {idx = 0 : index} : memref<3xindex>, memref<?xindex>, index
@@ -47,8 +47,8 @@
 // CHECK:           %[[VAL_27:.*]] = memref.alloc() : memref<4xi1>
 // CHECK:           %[[VAL_28:.*]] = memref.alloc() : memref<4xindex>
 // CHECK:           %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<4xindex> to memref<?xindex>
-// CHECK:           linalg.fill ins(%[[VAL_11]] : f64) outs(%[[VAL_26]] : memref<4xf64>)
-// CHECK:           linalg.fill ins(%[[VAL_14]] : i1) outs(%[[VAL_27]] : memref<4xi1>)
+// CHECK:           linalg.fill ins(%[[VAL_11]] : f64) inits(%[[VAL_26]] : memref<4xf64>)
+// CHECK:           linalg.fill ins(%[[VAL_14]] : i1) inits(%[[VAL_27]] : memref<4xi1>)
 // CHECK:           %[[VAL_30:.*]]:2 = scf.for %[[VAL_31:.*]] = %[[VAL_12]] to %[[VAL_10]] step %[[VAL_13]] iter_args(%[[VAL_32:.*]] = %[[VAL_21]], %[[VAL_33:.*]] = %[[VAL_23]]) -> (memref<?xindex>, memref<?xf64>) {
 // CHECK:             %[[VAL_34:.*]] = memref.load %[[VAL_2]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:             %[[VAL_35:.*]] = arith.addi %[[VAL_31]], %[[VAL_13]] : index
@@ -133,6 +133,6 @@
   %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
   %D = linalg.matmul
     ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>)
-       outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+       inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
   return %D: tensor<4x4xf64, #CSR>
 }
diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
@@ -40,7 +40,7 @@
 // CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.indices %[[VAL_1]] {dimension = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref<?xindex>
 // CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{ dimLevelType = [ "dense", "dense", "dense", "compressed", "compressed", "dense", "dense", "dense" ] }>> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
 // CHECK:             scf.for %[[VAL_22:.*]] = %[[VAL_11]] to %[[VAL_9]] step %[[VAL_12]] {
 // CHECK:               %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_9]] : index
@@ -89,7 +89,7 @@
   %0 = linalg.generic #trait_mul
     ins(%arga, %argb: tensor<10x20x30x40x50x60x70x80xf32>,
                       tensor<80x70x60x50x40x30x20x10xf32, #SparseTensor>)
-    outs(%argx: tensor<10x20x30x40x50x60x70x80xf32>) {
+    inits(%argx: tensor<10x20x30x40x50x60x70x80xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
@@ -48,7 +48,7 @@
 func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale_inpl
-    outs(%argx: tensor<32x16xf32, #DCSR>) {
+    inits(%argx: tensor<32x16xf32, #DCSR>) {
       ^bb(%x: f32):
         %1 = arith.mulf %x, %c : f32
         linalg.yield %1 : f32
@@ -81,7 +81,7 @@
 // CHECK:         }
 func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %0 = linalg.generic #trait_scale_inpl
-    outs(%argx: tensor<32x16xf32, #DCSR>) {
+    inits(%argx: tensor<32x16xf32, #DCSR>) {
       ^bb(%x: f32):
         %1 = arith.addf %x, %x : f32
         linalg.yield %1 : f32
@@ -129,7 +129,7 @@
   %xm = bufferization.alloc_tensor() : tensor<10x20xf32, #DCSR>
   %0 = linalg.generic #trait_scale
      ins(%arga: tensor<10x20xf32, #CSR>)
-      outs(%xm: tensor<10x20xf32, #DCSR>) {
+      inits(%xm: tensor<10x20xf32, #DCSR>) {
       ^bb(%a: f32, %x: f32):
         %1 = arith.mulf %a, %s : f32
         linalg.yield %1 : f32
@@ -285,7 +285,7 @@
   %0 = linalg.generic #trait_sumred
     ins(%arga, %argb: tensor<?x?x?xi32, #SparseTensor>,
                       tensor<?x?x?xi32, #SparseTensor>)
-    outs(%xinit: tensor<?x?xi32, #DCSR>) {
+    inits(%xinit: tensor<?x?xi32, #DCSR>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.muli %a, %b : i32
         %1 = arith.addi %x, %0 : i32
@@ -400,7 +400,7 @@
   %0 = linalg.generic #trait_matmat
        ins(%arga, %argb: tensor<?x?xf32, #DCSR>,
                          tensor<?x?xf32, #DCSR>)
-      outs(%cinit: tensor<?x?xf32, #DCSR>) {
+      inits(%cinit: tensor<?x?xf32, #DCSR>) {
     ^bb(%a: f32, %b: f32, %c: f32):
       %1 = arith.mulf %a, %b : f32
       %2 = arith.addf %c, %1 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_outbuf.mlir
@@ -20,7 +20,7 @@
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<10xi32, #{{.*}}> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref<?xi32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<10xf32>
-// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_8]] : memref<10xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_3]] : f32) inits(%[[VAL_8]] : memref<10xf32>)
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
@@ -36,7 +36,7 @@
                           %argb: tensor<10xf32>) -> tensor<10xf32> {
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xi32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: i32, %x : f32):
       %cst = arith.sitofp %a : i32 to f32
       linalg.yield %cst : f32
@@ -54,7 +54,7 @@
 // CHECK:           %[[VAL_6:.*]] = sparse_tensor.indices %[[VAL_0]] {dimension = 0 : index} : tensor<10xi32, #{{.*}}> to memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xi32, #{{.*}}> to memref<?xi32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_4]] : memref<10xf32>
-// CHECK:           linalg.fill ins(%[[VAL_2]] : f32) outs(%[[VAL_8]] : memref<10xf32>)
+// CHECK:           linalg.fill ins(%[[VAL_2]] : f32) inits(%[[VAL_8]] : memref<10xf32>)
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_3]] {
@@ -70,7 +70,7 @@
   %m = bufferization.alloc_tensor() : tensor<10xf32>
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xi32, #SV>)
-  outs(%m: tensor<10xf32>) {
+  inits(%m: tensor<10xf32>) {
     ^bb(%a: i32, %x : f32):
       %cst = arith.sitofp %a : i32 to f32
       linalg.yield %cst : f32
@@ -103,7 +103,7 @@
                           %argb: tensor<10xf32>) -> tensor<10xf32> {
   %0 = linalg.generic #trait
   ins(%arga: tensor<10xf32, #SV>)
-  outs(%argb: tensor<10xf32>) {
+  inits(%argb: tensor<10xf32>) {
     ^bb(%a: f32, %x : f32):
       %up = arith.addf %a, %x : f32
       linalg.yield %up : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel.mlir
@@ -61,7 +61,7 @@
 	       %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_dd
      ins(%arga: tensor<?x?xf32, #DenseMatrix>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %scale : f32
         linalg.yield %0 : f32
@@ -109,7 +109,7 @@
 	       %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.generic #trait_ss
      ins(%arga: tensor<?x?xf32, #SparseMatrix>)
-    outs(%argx: tensor<?x?xf32>) {
+    inits(%argx: tensor<?x?xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %scale : f32
         linalg.yield %0 : f32
@@ -159,7 +159,7 @@
 	     %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>)
-     outs(%argx: tensor<16xf32>) {
+     inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
@@ -53,7 +53,7 @@
 	          %argx: tensor<16xf32>) -> tensor<16xf32> {
   %0 = linalg.generic #trait_matvec
       ins(%arga, %argb : tensor<16x32xf32, #CSR>, tensor<32xf32>)
-     outs(%argx: tensor<16xf32>) {
+      inits(%argx: tensor<16xf32>) {
     ^bb(%A: f32, %b: f32, %x: f32):
       %0 = arith.mulf %A, %b : f32
       %1 = arith.addf %0, %x : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@@ -25,7 +25,7 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_9]] : memref<20x30x10xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_12:.*]] = arith.muli %[[VAL_10]], %[[VAL_4]] : index
@@ -45,7 +45,7 @@
                               %argx: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<10x20x30xf32, #X>)
-    outs(%argx: tensor<20x30x10xf32>) {
+    inits(%argx: tensor<20x30x10xf32>) {
       ^bb(%a : f32, %x: f32):
         linalg.yield %a : f32
   } -> tensor<20x30x10xf32>
@@ -64,7 +64,7 @@
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
-// CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref<?x?x?xf32>)
+// CHECK:           linalg.fill ins(%[[ZERO]] : f32) inits(%[[VAL_10]] : memref<?x?x?xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_6]] step %[[VAL_4]] {
 // CHECK:             scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
 // CHECK:               %[[VAL_13:.*]] = arith.muli %[[VAL_7]], %[[VAL_11]] : index
@@ -84,7 +84,7 @@
                                %argx: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<?x?x?xf32, #X>)
-    outs(%argx: tensor<?x?x?xf32>) {
+    inits(%argx: tensor<?x?x?xf32>) {
       ^bb(%a : f32, %x: f32):
         linalg.yield %a : f32
   } -> tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -83,7 +83,7 @@
                           %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait
     ins(%arga: tensor<?x?x?xf32, #X>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a : f32, %x: f32):
         %0 = arith.addf %x, %a : f32
         linalg.yield %0 : f32
diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
@@ -68,7 +68,7 @@
   %c = arith.constant 2.2 : f32
   %0 = linalg.generic #trait
      ins(%arga, %argp, %argq: tensor<32x16xf32, #SparseMatrix>, tensor<f32>, f32)
-    outs(%argx: tensor<32x16xf32>) {
+    inits(%argx: tensor<32x16xf32>) {
       ^bb(%a: f32, %p: f32, %q: f32, %x: f32):
         %0 = arith.mulf %a, %p : f32     // scalar tensor argument
         %1 = arith.mulf %0, %q : f32     // scalar argument
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -32,7 +32,7 @@
                                         affine_map<(d0, d1) -> (d0, d1)>],
                                         iterator_types = ["parallel", "parallel"]}
                                         ins(%cst : f64)
-                                        outs(%0 : tensor<1024x1024xf64>) {
+                                        inits(%0 : tensor<1024x1024xf64>) {
     ^bb0(%a: f64, %x: f64):
       linalg.yield %a : f64
     } -> tensor<1024x1024xf64>
@@ -49,7 +49,7 @@
   %0 = tensor.empty() : tensor<32xf64>
   %1 = linalg.generic {indexing_maps = [affine_map<(d0) -> (d0)>],
                                         iterator_types = ["parallel"]}
-                                        outs(%0 : tensor<32xf64>) {
+                                        inits(%0 : tensor<32xf64>) {
     ^bb0(%x: f64):
       linalg.yield %cst : f64
     } -> tensor<32xf64>
@@ -106,7 +106,7 @@
   %1 = arith.constant dense<0.0> : tensor<8x8xf64>
   %2 = linalg.generic #trait_matmul
     ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%a: f64, %b: f64, %x: f64):
         %p = arith.mulf %a, %b : f64
         %q = arith.addf %x, %p : f64
@@ -115,7 +115,7 @@
   // Sample the result with elements-wise multiplication with sparse matrix.
   %3 = linalg.generic #trait_scale
     ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%t: f64, %s: f64, %x: f64):
         %r = arith.mulf %t, %s : f64
         linalg.yield %r : f64
@@ -188,7 +188,7 @@
   %1 = arith.constant dense<0.0> : tensor<8x8xf64>
   %2 = linalg.generic #trait_matmul
     ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-    outs(%1 : tensor<8x8xf64>) {
+    inits(%1 : tensor<8x8xf64>) {
       ^bb0(%a: f64, %b: f64, %x: f64):
         %p = arith.mulf %a, %b : f64
         %q = arith.addf %x, %p : f64
@@ -198,7 +198,7 @@
   %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
   %4 = linalg.generic #trait_scale
     ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-    outs(%3 : tensor<8x8xf64, #SM>) {
+    inits(%3 : tensor<8x8xf64, #SM>) {
       ^bb0(%t: f64, %s: f64, %x: f64):
         %r = arith.mulf %t, %s : f64
         linalg.yield %r : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_storage.mlir
@@ -40,7 +40,7 @@
 func.func @mul64(%arga: tensor<32xf64, #SparseVector64>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_mul
      ins(%arga, %argb: tensor<32xf64, #SparseVector64>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
@@ -69,7 +69,7 @@
 func.func @mul32(%arga: tensor<32xf64, #SparseVector32>, %argb: tensor<32xf64>, %argx: tensor<32xf64>) -> tensor<32xf64> {
   %0 = linalg.generic #trait_mul
      ins(%arga, %argb: tensor<32xf64, #SparseVector32>, tensor<32xf64>)
-    outs(%argx: tensor<32xf64>) {
+    inits(%argx: tensor<32xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
@@ -50,7 +50,7 @@
   %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
   %0 = linalg.generic #transpose_trait
      ins(%arga: tensor<3x4xf64, #DCSR>)
-     outs(%i: tensor<4x3xf64, #DCSR>) {
+     inits(%i: tensor<4x3xf64, #DCSR>) {
      ^bb(%a: f64, %x: f64):
        linalg.yield %a : f64
   } -> tensor<4x3xf64, #DCSR>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
--- a/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector.mlir
@@ -28,7 +28,7 @@
 func.func @scale_d(%arga: tensor<1024xf32, #DenseVector>, %b: f32, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_scale_d
     ins(%arga: tensor<1024xf32, #DenseVector>)
-    outs(%argx: tensor<1024xf32>) {
+    inits(%argx: tensor<1024xf32>) {
       ^bb(%a: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -78,7 +78,7 @@
 func.func @mul_s(%arga: tensor<1024xf32, #SparseVector>, %argb: tensor<1024xf32>, %argx: tensor<1024xf32>) -> tensor<1024xf32> {
   %0 = linalg.generic #trait_mul_s
     ins(%arga, %argb: tensor<1024xf32, #SparseVector>, tensor<1024xf32>)
-    outs(%argx: tensor<1024xf32>) {
+    inits(%argx: tensor<1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -117,7 +117,7 @@
 func.func @reduction_d(%arga: tensor<1024xf32, #DenseVector>, %argb: tensor<1024xf32>, %argx: tensor<f32>) -> tensor<f32> {
   %0 = linalg.generic #trait_reduction_d
     ins(%arga, %argb: tensor<1024xf32, #DenseVector>, tensor<1024xf32>)
-    outs(%argx: tensor<f32>) {
+    inits(%argx: tensor<f32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         %1 = arith.addf %x, %0 : f32
@@ -172,7 +172,7 @@
 func.func @mul_ds(%arga: tensor<512x1024xf32, #SparseMatrix>, %argb: tensor<512x1024xf32>, %argx: tensor<512x1024xf32>) -> tensor<512x1024xf32> {
   %0 = linalg.generic #trait_mul_ds
     ins(%arga, %argb: tensor<512x1024xf32, #SparseMatrix>, tensor<512x1024xf32>)
-    outs(%argx: tensor<512x1024xf32>) {
+    inits(%argx: tensor<512x1024xf32>) {
       ^bb(%a: f32, %b: f32, %x: f32):
         %0 = arith.mulf %a, %b : f32
         linalg.yield %0 : f32
@@ -216,7 +216,7 @@
                 %argx: tensor<33x64xf64>) -> tensor<33x64xf64> {
   %0 = linalg.generic #trait_affine
      ins(%arga: tensor<32x64xf64, #SparseMatrix>)
-    outs(%argx: tensor<33x64xf64>) {
+    inits(%argx: tensor<33x64xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
--- a/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
+++ b/mlir/test/Dialect/Tensor/extract-slice-from-collapse-shape.mlir
@@ -17,9 +17,9 @@
 // CHECK-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
 // CHECK-DAG: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c20]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
 //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
-//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
-//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] : 
+//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
+//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
+//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 11] [1, 1] :
 //     CHECK:   scf.yield %[[update]] :
 //     CHECK: return %[[tile]]
 
@@ -31,8 +31,8 @@
 // FOREACH-DAG: %[[init:.+]] = tensor.empty() : tensor<20x11xf32>
 //     FOREACH: %[[tile:.+]] = scf.foreach_thread (%[[iv:.+]]) in (%[[c20]]) shared_outs(%[[dest:.+]] = %[[init]])
 //     FOREACH:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[iv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     FOREACH:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] : 
-//     FOREACH:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
+//     FOREACH:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 11] [1, 1, 1, 1] :
+//     FOREACH:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
 //     FOREACH:   perform_concurrently
 // FOREACH-NEXT:   tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[iv]], 0] [1, 11] [1, 1] :
 //     FOREACH: return %[[tile]]
@@ -58,9 +58,9 @@
 //     CHECK: %[[tile:.+]] = scf.for %[[iv:.+]] = %[[c0]] to %[[c10]] step %[[c1]] iter_args(%[[iterArg:.+]] = %[[init]])
 //     CHECK:   %[[inputIv:.+]] = affine.apply #[[$map0]](%[[iv]])
 //     CHECK:   %[[multiIndex:.+]]:3 = affine.delinearize_index %[[inputIv]] into (%[[c3]], %[[c5]], %[[c7]]
-//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] : 
-//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} : 
-//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] : 
+//     CHECK:   %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex]]#0, %[[multiIndex]]#1, %[[multiIndex]]#2, 0] [1, 1, 1, 5] [1, 1, 1, 2] :
+//     CHECK:   %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3]{{\]}} :
+//     CHECK:   %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg]][%[[iv]], 0] [1, 5] [1, 1] :
 //     CHECK:   scf.yield %[[update]] :
 //     CHECK: return %[[tile]]
 
@@ -110,8 +110,8 @@
 // CHECK-DAG: %[[c4:.+]] = arith.constant 4 : index
 // CHECK-DAG: %[[c11:.+]] = arith.constant 11 : index
 //     CHECK: %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
-// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
-// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
+// CHECK-DAG: %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
+// CHECK-DAG: %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
 // CHECK-DAG: %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
 //     CHECK: %[[tile1:.+]] = scf.for %[[iv1:.+]] = %[[c0]] to %[[sz1]] step %[[c1]] iter_args(%[[iterArg1:.+]] = %[[init]])
 //     CHECK:   %[[tile2:.+]] = scf.for %[[iv2:.+]] = %[[c0]] to %[[sz2]] step %[[c1]] iter_args(%[[iterArg2:.+]] = %[[iterArg1]])
@@ -119,12 +119,12 @@
 //     CHECK:       %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[inputIv1]] into (%[[c3]], %[[d1]], %[[d2]]) :
 //     CHECK:       %[[inputIv2:.+]] = affine.apply #[[map0:.+]](%[[iv2]])[%[[lb2]]]
 //     CHECK:       %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[inputIv2]] into (%[[c11]], %[[d4]]) :
-//     CHECK:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : 
-//     CHECK:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : 
-//     CHECK:       %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] : 
+//     CHECK:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] :
+//     CHECK:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} :
+//     CHECK:       %[[update:.+]] = tensor.insert_slice %[[sliceFlat]] into %[[iterArg2]][%[[iv1]], %[[iv2]]] [1, 1] [1, 1] :
 //     CHECK:       scf.yield %[[update]] :
 //     CHECK:     scf.yield %[[tile2]] :
-//     CHECK:   return %[[tile1]] : 
+//     CHECK:   return %[[tile1]] :
 
 //     FOREACH: #[[map1:.+]] = affine_map<(d0)[s0] -> (d0 + s0)>
 //     FOREACH: func.func @extract_slice_dynamic_multidim(%[[arg0:.+]]: tensor<3x?x?x11x?xf32>, %[[lb1:.+]]: index, %[[sz1:.+]]: index, %[[lb2:.+]]: index, %[[sz2:.+]]: index)
@@ -134,16 +134,16 @@
 // FOREACH-DAG: %[[c4:.+]] = arith.constant 4 : index
 // FOREACH-DAG: %[[c11:.+]] = arith.constant 11 : index
 //     FOREACH:     %[[init:.+]] = tensor.empty(%[[sz1]], %[[sz2]]) : tensor<?x?xf32>
-// FOREACH-DAG:     %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
-// FOREACH-DAG:     %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] : 
+// FOREACH-DAG:     %[[d1:.+]] = tensor.dim %[[arg0]], %[[c1]] :
+// FOREACH-DAG:     %[[d2:.+]] = tensor.dim %[[arg0]], %[[c2]] :
 // FOREACH-DAG:     %[[d4:.+]] = tensor.dim %[[arg0]], %[[c4]] :
 //     FOREACH:     %[[tile1:.+]] = scf.foreach_thread (%[[tid1:.+]], %[[tid2:.+]]) in (%[[sz1]], %[[sz2]]) shared_outs(%[[dest:.+]] = %[[init]])
 // FOREACH-DAG:       %[[iv1:.+]] = affine.apply #[[map1]](%[[tid1]])[%[[lb1]]]
 //     FOREACH:       %[[multiIndex1:.+]]:3 = affine.delinearize_index %[[iv1]] into (%[[c3]], %[[d1]], %[[d2]]) :
 // FOREACH-DAG:       %[[iv2:.+]] = affine.apply #[[map1]](%[[tid2]])[%[[lb2]]]
 //     FOREACH:       %[[multiIndex2:.+]]:2 = affine.delinearize_index %[[iv2]] into (%[[c11]], %[[d4]]) :
-//     FOREACH:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] : 
-//     FOREACH:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} : 
+//     FOREACH:       %[[slice:.+]] = tensor.extract_slice %[[arg0]][%[[multiIndex1]]#0, %[[multiIndex1]]#1, %[[multiIndex1]]#2, %[[multiIndex2]]#0, %[[multiIndex2]]#1] [1, 1, 1, 1, 1] [1, 1, 1, 1, 1] :
+//     FOREACH:       %[[sliceFlat:.+]] = tensor.collapse_shape %[[slice]] {{\[}}[0, 1, 2], [3, 4]{{\]}} :
 //     FOREACH:       perform_concurrently
 //FOREACH-NEXT:         tensor.parallel_insert_slice %[[sliceFlat]] into %[[dest]][%[[tid1]], %[[tid2]]] [1, 1] [1, 1] :
 
@@ -154,7 +154,7 @@
 
 // CHECK: @extract_slice_non_sliced_linearized_dim(%[[arg0:.+]]: tensor<{{.*}}>,
 func.func @extract_slice_non_sliced_linearized_dim(%input: tensor<3x?x?x11x2xf32>, %offt: index, %size: index) -> tensor<?x22xf32> {
-  %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor<?x22xf32>  
+  %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3, 4]] : tensor<3x?x?x11x2xf32> into tensor<?x22xf32>
   %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 22] [1, 1] : tensor<?x22xf32> to tensor<?x22xf32>
   // CHECK: scf.for
   // CHECK-NOT: scf.for
@@ -169,7 +169,7 @@
 func.func @no_sliced_linearized_dims(%input: tensor<30x11x100xf32>, %offt: index, %size: index) -> tensor<330x?xf32> {
   %collapsed = tensor.collapse_shape %input [[0, 1], [2]] : tensor<30x11x100xf32> into tensor<330x100xf32>
   %slice = tensor.extract_slice %collapsed [0, %offt] [330, %size] [1, 1] : tensor<330x100xf32> to tensor<330x?xf32>
-  // CHECK-NOT: scf.for  
+  // CHECK-NOT: scf.for
   // CHECK: %[[init:.+]] = tensor.empty(%[[arg2]])
   // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, %[[arg1]]] [30, 11, %[[arg2]]] [1, 1, 1]
   // CHECK: %[[c:.+]] = tensor.collapse_shape %[[e]] {{\[}}[0, 1], [2]]
@@ -191,7 +191,7 @@
   // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0] [1, 11, 100] [1, 1, 1]
   // CHECK-SAME:           tensor<1x11x100xf32> to tensor<11x100xf32>
   // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1]
-  // CHECK-SAME:           tensor<11x100xf32> to tensor<?x100xf32>    
+  // CHECK-SAME:           tensor<11x100xf32> to tensor<?x100xf32>
   return %slice : tensor<?x100xf32>
 }
 
@@ -201,11 +201,11 @@
   %slice = tensor.extract_slice %collapsed [%offt, 0] [%size, 100] [1, 1] : tensor<?x100xf32> to tensor<?x100xf32>
   // CHECK-NOT: scf.for
   // CHECK: %[[c1:.+]] = arith.constant 1 : index
-  // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] : 
+  // CHECK: %[[dim:.+]] = tensor.dim %[[arg0]], %[[c1]] :
   // CHECK: %[[e:.+]] = tensor.extract_slice %[[arg0]][0, 0, 0, 0] [1, %[[dim]], 1, 100] [1, 1, 1, 1]
   // CHECK-SAME:           tensor<1x?x1x100xf32> to tensor<?x100xf32>
   // CHECK: %[[e1:.+]] = tensor.extract_slice %[[e]][%[[arg1]], 0] [%[[arg2]], 100] [1, 1]
-  // CHECK-SAME:           tensor<?x100xf32> to tensor<?x100xf32>  
+  // CHECK-SAME:           tensor<?x100xf32> to tensor<?x100xf32>
   return %slice : tensor<?x100xf32>
 }
 
@@ -228,14 +228,14 @@
   return %slice : tensor<?x?xf32>
 }
 
-// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape, 
+// Edge case where all collapsed dims are unit dims. This pattern can't eliminate the collapse shape,
 // that should be handled by `linalg-fold-unit-extent-dims`.
 
 // CHECK: @collapse_and_slice_multiple_all_unit_dim(%[[arg0:.+]]: tensor<{{.*}}>)
 func.func @collapse_and_slice_multiple_all_unit_dim(%input: tensor<1x1x1x100xf32>) -> tensor<1x100xf32> {
   %collapsed = tensor.collapse_shape %input [[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32>
-  %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32>  
-  return %slice : tensor<1x100xf32>  
+  %slice = tensor.extract_slice %collapsed [0, 0] [1, 100] [1, 1] : tensor<1x100xf32> to tensor<1x100xf32>
+  return %slice : tensor<1x100xf32>
   // CHECK: %[[collapse:.+]] = tensor.collapse_shape %[[arg0]] {{\[}}[0, 1, 2], [3]] : tensor<1x1x1x100xf32> into tensor<1x100xf32>
-  // CHECK: return %[[collapse]]  
+  // CHECK: return %[[collapse]]
 }
diff --git a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
--- a/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/one-shot-bufferize.mlir
@@ -70,8 +70,8 @@
   %r0 = tensor.insert_slice %t into %A[0][4][1] : tensor<4xf32> into tensor<?xf32>
 
   /// Overwrite A inplace.
-  //      CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
-  %r1 = linalg.fill ins(%f0 : f32) outs(%r0 : tensor<?xf32>) -> tensor<?xf32>
+  //      CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]]
+  %r1 = linalg.fill ins(%f0 : f32) inits(%r0 : tensor<?xf32>) -> tensor<?xf32>
 
   //     CHECK: return
   // CHECK-NOT: tensor
@@ -90,8 +90,8 @@
 {
   %f0 = arith.constant 0.0 : f32
 
-  //      CHECK: linalg.fill ins({{.*}}{{.*}}outs(%[[A]]
-  %r0 = linalg.fill ins(%f0 : f32) outs(%A : tensor<?xf32>) -> tensor<?xf32>
+  //      CHECK: linalg.fill ins({{.*}}{{.*}}inits(%[[A]]
+  %r0 = linalg.fill ins(%f0 : f32) inits(%A : tensor<?xf32>) -> tensor<?xf32>
 
   //  CHECK-NOT: alloc
   //      CHECK: %[[SV_A:.*]] = memref.subview %[[A]]
@@ -246,7 +246,7 @@
   // CHECK-NOT: memref.alloc
   %cst = arith.constant 4.200000e+01 : f32
   // CHECK: linalg.fill
-  %0 = linalg.fill ins(%cst : f32) outs(%t : tensor<10xf32>) -> tensor<10xf32>
+  %0 = linalg.fill ins(%cst : f32) inits(%t : tensor<10xf32>) -> tensor<10xf32>
   // CHECK-NOT: memref.copy
   %1 = tensor.insert_slice %0 into %t[0][10][1] : tensor<10xf32> into tensor<10xf32>
   return %1 : tensor<10xf32>
diff --git a/mlir/test/Dialect/Transform/selective-targeting.mlir b/mlir/test/Dialect/Transform/selective-targeting.mlir
--- a/mlir/test/Dialect/Transform/selective-targeting.mlir
+++ b/mlir/test/Dialect/Transform/selective-targeting.mlir
@@ -12,7 +12,7 @@
   // CHECK-SAME: -> tensor<4x4xf32>
   %0 = linalg.matmul { test.attrA }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -30,7 +30,7 @@
   // CHECK:       vector.transfer_write
   %0 = linalg.matmul { test.attrA, test.attrC }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -47,7 +47,7 @@
   // CHECK: vector.transfer_write
   %0 = linalg.matmul { test.attrC }
                       ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -94,7 +94,7 @@
   // CHECK: vector.contract
   %0 = linalg.matmul {test.attrA}
                      ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -105,7 +105,7 @@
     -> tensor<128x128xf32> {
   // CHECK: linalg.matmul
   %0 = linalg.matmul ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   func.return %0 : tensor<128x128xf32>
 }
@@ -139,11 +139,11 @@
   // CHECK: vector.contract
   %0 = linalg.matmul {test.attrA}
                      ins(%arg0, %arg1: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg2: tensor<128x128xf32>)
+                     inits(%arg2: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   // CHECK: vector.contract
   %1 = linalg.matmul ins(%arg0, %0: tensor<128x128xf32>, tensor<128x128xf32>)
-                     outs(%arg3: tensor<128x128xf32>)
+                     inits(%arg3: tensor<128x128xf32>)
     -> tensor<128x128xf32>
   return %1 : tensor<128x128xf32>
 }
diff --git a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
--- a/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
+++ b/mlir/test/Dialect/Vector/vector-transfer-full-partial-split.mlir
@@ -72,7 +72,7 @@
   //      LINALG:   scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index
   //      LINALG: } else {
   //               slow path, fill tmp alloc and yield a memref_casted version of it
-  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
+  //      LINALG:   linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>)
   //      LINALG:   %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32>
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
@@ -165,7 +165,7 @@
   // LINALG-SAME:     memref<?x8xf32, strided<[?, 1], offset: ?>>, index, index
   //      LINALG: } else {
   //               slow path, fill tmp alloc and yield a memref_casted version of it
-  //      LINALG:   linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>)
+  //      LINALG:   linalg.fill ins(%cst : f32) inits(%[[alloc]] : memref<4x8xf32>)
   //      LINALG:   %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]])
   //      LINALG:   %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]])
   //      LINALG:   %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1]
diff --git a/mlir/test/IR/slice.mlir b/mlir/test/IR/slice.mlir
--- a/mlir/test/IR/slice.mlir
+++ b/mlir/test/IR/slice.mlir
@@ -6,9 +6,9 @@
   %c = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   %d = memref.alloc(%arg0, %arg1) : memref<?x?xf32>
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
-               outs(%c : memref<?x?xf32>)
+                inits(%c : memref<?x?xf32>)
   linalg.matmul ins(%a, %b : memref<?x?xf32>, memref<?x?xf32>)
-               outs(%d : memref<?x?xf32>)
+                inits(%d : memref<?x?xf32>)
   memref.dealloc %c : memref<?x?xf32>
   memref.dealloc %b : memref<?x?xf32>
   memref.dealloc %a : memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -43,7 +43,7 @@
     iterator_types = ["parallel", "parallel"]
   }
     ins(%lhs, %rhs : memref<?x?xf32>, memref<?x?xf32>)
-    outs(%sum : memref<?x?xf32>)
+    inits(%sum : memref<?x?xf32>)
   {
     ^bb0(%lhs_in: f32, %rhs_in: f32, %sum_out: f32):
       %0 = arith.addf %lhs_in, %rhs_in : f32
@@ -68,8 +68,8 @@
   %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
   %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
 
-  linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
-  linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>)
 
   %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
   %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
@@ -90,8 +90,8 @@
   %RHS10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
   %DST10 = memref.alloc() {alignment = 64} : memref<1x10xf32>
 
-  linalg.fill ins(%f1 : f32) outs(%LHS10 : memref<1x10xf32>)
-  linalg.fill ins(%f1 : f32) outs(%RHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%LHS10 : memref<1x10xf32>)
+  linalg.fill ins(%f1 : f32) inits(%RHS10 : memref<1x10xf32>)
 
   %LHS = memref.cast %LHS10 : memref<1x10xf32> to memref<?x?xf32>
   %RHS = memref.cast %RHS10 : memref<1x10xf32> to memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/matmul-vs-matvec.mlir
@@ -12,9 +12,9 @@
   %x = memref.dim %A, %c0 : memref<?x?xf32>
   %y = memref.dim %B, %c1 : memref<?x?xf32>
   %C = memref.alloc(%x, %y) : memref<?x?xf32>
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
   linalg.matmul ins(%A, %B: memref<?x?xf32>, memref<?x?xf32>)
-                outs(%C: memref<?x?xf32>)
+                inits(%C: memref<?x?xf32>)
   return %C : memref<?x?xf32>
 }
 
@@ -26,12 +26,12 @@
   %x = memref.dim %A, %c1 : memref<?x?xf32>
   %n = memref.dim %B, %c1 : memref<?x?xf32>
   %C = memref.alloc(%m, %n) : memref<?x?xf32>
-  linalg.fill ins(%f0 : f32) outs(%C : memref<?x?xf32>)
+  linalg.fill ins(%f0 : f32) inits(%C : memref<?x?xf32>)
   scf.for %i = %c0 to %n step %c1 {
     %b = memref.subview %B[0, %i][%x, 1][1, 1] : memref<?x?xf32> to memref<?xf32, strided<[?], offset: ?>>
     %c = memref.subview %C[0, %i][%m, 1][1, 1] : memref<?x?xf32> to memref<?xf32, strided<[?], offset: ?>>
     linalg.matvec ins(%A, %b: memref<?x?xf32>, memref<?xf32, strided<[?], offset: ?>>)
-                  outs(%c: memref<?xf32, strided<[?], offset: ?>>)
+                  inits(%c: memref<?xf32, strided<[?], offset: ?>>)
   }
   return %C : memref<?x?xf32>
 }
@@ -46,8 +46,8 @@
   %val2 = arith.constant 17.0 : f32
   %A = memref.alloc(%m, %x) : memref<?x?xf32>
   %B = memref.alloc(%x, %n) : memref<?x?xf32>
-  linalg.fill ins(%val1 : f32) outs(%A : memref<?x?xf32>)
-  linalg.fill ins(%val2 : f32) outs(%B : memref<?x?xf32>)
+  linalg.fill ins(%val1 : f32) inits(%A : memref<?x?xf32>)
+  linalg.fill ins(%val2 : f32) inits(%B : memref<?x?xf32>)
   memref.store %val1, %B[%c0, %c0] : memref<?x?xf32>
   %C1 = call @matmul(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
   %C2 = call @matvec(%A, %B) : (memref<?x?xf32>, memref<?x?xf32>) -> memref<?x?xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-call.mlir
@@ -14,13 +14,13 @@
 // Creates and returns a 1-D buffer of size %s1 filled with the value %f
 func.func @alloc_1d_filled_f32(%s1 : index, %f : f32) -> memref<?xf32> {
   %buf = memref.alloc(%s1) : memref<?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?xf32>)
   return %buf : memref<?xf32>
 }
 
 func.func @conv_1d(%arg0: memref<?xf32>, %arg1: memref<?xf32>, %arg2: memref<?xf32>) {
   linalg.conv_1d ins (%arg0, %arg1: memref<?xf32>, memref<?xf32>)
-                outs (%arg2: memref<?xf32>)
+                inits (%arg2: memref<?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-1d-nwc-wcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?xf32>)
   return %buf : memref<?x?x?xf32>
 }
 
@@ -22,7 +22,7 @@
   linalg.conv_1d_nwc_wcf {dilations = dense<1> : tensor<1xi64>,
                          strides = dense<1> : tensor<1xi64>}
      ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
-    outs (%arg2: memref<?x?x?xf32>)
+    inits (%arg2: memref<?x?x?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-call.mlir
@@ -14,13 +14,13 @@
 // Creates and returns a 2-D buffer of size (%s1, %s2) filled with the value %f
 func.func @alloc_2d_filled_f32(%s1 : index, %s2 : index, %f : f32) -> memref<?x?xf32> {
   %buf = memref.alloc(%s1, %s2) : memref<?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?xf32>)
   return %buf : memref<?x?xf32>
 }
 
 func.func @conv_2d(%arg0: memref<?x?xf32>, %arg1: memref<?x?xf32>, %arg2: memref<?x?xf32>) {
   linalg.conv_2d ins (%arg0, %arg1: memref<?x?xf32>, memref<?x?xf32>)
-                outs (%arg2: memref<?x?xf32>)
+                inits (%arg2: memref<?x?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-2d-nhwc-hwcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
 func.func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?x?xf32>)
   return %buf : memref<?x?x?x?xf32>
 }
 
@@ -22,7 +22,7 @@
   linalg.conv_2d_nhwc_hwcf {dilations = dense<1> : tensor<2xi64>,
                           strides = dense<1> : tensor<2xi64>}
      ins (%arg0, %arg1: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-    outs (%arg2: memref<?x?x?x?xf32>)
+    inits (%arg2: memref<?x?x?x?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-call.mlir
@@ -14,13 +14,13 @@
 // Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
 func.func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?xf32>)
   return %buf : memref<?x?x?xf32>
 }
 
 func.func @conv_3d(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
   linalg.conv_3d ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
-                outs (%arg2: memref<?x?x?xf32>)
+                inits (%arg2: memref<?x?x?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-conv-3d-ndhwc-dhwcf-call.mlir
@@ -14,7 +14,7 @@
 // Creates and returns 5-D buffer of size (%s1, %s2, %s3, %s4, %s5) filled with the value %f
 func.func @alloc_5d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %s5 : index, %f : f32) -> memref<?x?x?x?x?xf32> {
   %buf = memref.alloc(%s1, %s2, %s3, %s4, %s5) : memref<?x?x?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%buf : memref<?x?x?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%buf : memref<?x?x?x?x?xf32>)
   return %buf : memref<?x?x?x?x?xf32>
 }
 
@@ -22,7 +22,7 @@
   linalg.conv_3d_ndhwc_dhwcf {dilations = dense<1> : tensor<3xi64>,
                            strides = dense<1> : tensor<3xi64>}
      ins (%arg0, %arg1: memref<?x?x?x?x?xf32>, memref<?x?x?x?x?xf32>)
-    outs (%arg2: memref<?x?x?x?x?xf32>)
+    inits (%arg2: memref<?x?x?x?x?xf32>)
   return
 }
 
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-one-shot-bufferize.mlir
@@ -14,7 +14,7 @@
   %cst = arith.constant 0.000000e+00 : f32
   %c2 = arith.constant 2 : index
   %c0 = arith.constant 0 : index
-  %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<f32>) -> tensor<f32>
+  %0 = linalg.fill ins(%cst : f32) inits(%arg2 : tensor<f32>) -> tensor<f32>
   %1 = affine.apply #map0(%c0, %c64)[%c2]
   %2 = bufferization.alloc_tensor(%1) : tensor<?x2xf32>
   %3 = scf.for %arg3 = %c0 to %c64 step %c2 iter_args(%arg4 = %2) -> (tensor<?x2xf32>) {
@@ -61,7 +61,7 @@
     %13 = tensor.extract_slice %6[%12, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
     %14 = affine.apply #map1(%arg3, %c0)[%c2]
     %15 = tensor.extract_slice %3[%14, 0] [1, 2] [1, 1] : tensor<?x2xf32> to tensor<2xf32>
-    %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) outs(%arg4 : tensor<f32>) -> tensor<f32>
+    %16 = linalg.dot ins(%13, %15 : tensor<2xf32>, tensor<2xf32>) inits(%arg4 : tensor<f32>) -> tensor<f32>
 
     // %AA = tensor.cast %13 : tensor<2xf32> to tensor<*xf32>
     // call @printMemrefF32(%AA) : (tensor<*xf32>) -> ()
@@ -83,9 +83,9 @@
   %A = bufferization.alloc_tensor() : tensor<64xf32>
   %B = bufferization.alloc_tensor() : tensor<64xf32>
   %C = bufferization.alloc_tensor() : tensor<f32>
-  %AA = linalg.fill ins(%v1 : f32) outs(%A : tensor<64xf32>) -> tensor<64xf32>
-  %BB = linalg.fill ins(%v2 : f32) outs(%B : tensor<64xf32>) -> tensor<64xf32>
-  %CC = linalg.fill ins(%v0 : f32) outs(%C : tensor<f32>) -> tensor<f32>
+  %AA = linalg.fill ins(%v1 : f32) inits(%A : tensor<64xf32>) -> tensor<64xf32>
+  %BB = linalg.fill ins(%v2 : f32) inits(%B : tensor<64xf32>) -> tensor<64xf32>
+  %CC = linalg.fill ins(%v0 : f32) inits(%C : tensor<f32>) -> tensor<f32>
 
   %res = call @init_and_dot(%AA, %BB, %CC) :
     (tensor<64xf32>, tensor<64xf32>, tensor<f32>) -> tensor<f32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-tensor-matmul.mlir
@@ -23,7 +23,7 @@
   %C = arith.constant dense<1000.0> : tensor<2x4xf32>
 
   %D = linalg.matmul ins(%A, %B: tensor<2x3xf32>, tensor<3x4xf32>)
-                     outs(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
+                     inits(%C: tensor<2x4xf32>) -> tensor<2x4xf32>
 
   %unranked = tensor.cast %D : tensor<2x4xf32> to tensor<*xf32>
   call @printMemrefF32(%unranked) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output.mlir
@@ -50,7 +50,7 @@
     %init = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DenseMatrix>
     %0 = linalg.generic #trait_assign
        ins(%arga: tensor<?x?xf64, #SparseMatrix>)
-      outs(%init: tensor<?x?xf64, #DenseMatrix>) {
+      inits(%init: tensor<?x?xf64, #DenseMatrix>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.mulf %a, %c2 : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_bf16.mlir
@@ -28,7 +28,7 @@
     %xv = bufferization.alloc_tensor (%d) : tensor<?xbf16, #DenseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xbf16, #SparseVector>, tensor<?xbf16, #SparseVector>)
-        outs(%xv: tensor<?xbf16, #DenseVector>) {
+        inits(%xv: tensor<?xbf16, #DenseVector>) {
         ^bb(%a: bf16, %b: bf16, %x: bf16):
           %1 = sparse_tensor.binary %a, %b : bf16, bf16 to bf16
             overlap={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/dense_output_f16.mlir
@@ -26,7 +26,7 @@
     %xv = bufferization.alloc_tensor (%d) : tensor<?xf16, #DenseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf16, #SparseVector>, tensor<?xf16, #SparseVector>)
-        outs(%xv: tensor<?xf16, #DenseVector>) {
+        inits(%xv: tensor<?xf16, #DenseVector>) {
         ^bb(%a: f16, %b: f16, %x: f16):
           %1 = sparse_tensor.binary %a, %b : f16, f16 to f16
             overlap={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_abs.mlir
@@ -23,7 +23,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xf64, #SparseVector>)
-      outs(%xin: tensor<?xf64, #SparseVector>) {
+      inits(%xin: tensor<?xf64, #SparseVector>) {
       ^bb0(%a: f64, %x: f64) :
         %result = math.absf %a : f64
         linalg.yield %result : f64
@@ -38,7 +38,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xi32, #SparseVector>)
-      outs(%xin: tensor<?xi32, #SparseVector>) {
+      inits(%xin: tensor<?xi32, #SparseVector>) {
       ^bb0(%a: i32, %x: i32) :
         %result = math.absi %a : i32
         linalg.yield %result : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_binary.mlir
@@ -50,7 +50,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xi32, #SparseVector>, tensor<?xi32, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: i32, %b: i32, %x: i32):
           %1 = sparse_tensor.binary %a, %b : i32, i32 to i32
             overlap={
@@ -74,7 +74,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -98,7 +98,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={}
@@ -116,7 +116,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: f64, %x: i32):
           %idx = linalg.index 0 : index
           %1 = sparse_tensor.binary %a, %idx : f64, index to i32
@@ -143,7 +143,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = sparse_tensor.binary %a, %b: f64, f64 to f64
             overlap={
@@ -169,7 +169,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -191,7 +191,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -219,7 +219,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
@@ -256,7 +256,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={
@@ -301,7 +301,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xi8, #DCSR>) {
+      inits(%C: tensor<4x4xi8, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: i8) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to i8
             overlap={
@@ -324,7 +324,7 @@
     %0 = linalg.generic #trait_mat_op
       ins(%A, %B: tensor<4x4xf64, #DCSR>,
                   tensor<4x4xf64, #DCSR>)
-      outs(%C: tensor<4x4xf64, #DCSR>) {
+      inits(%C: tensor<4x4xf64, #DCSR>) {
         ^bb0(%a: f64, %b: f64, %c: f64) :
           %result = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap={}
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_cast.mlir
@@ -41,7 +41,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: i32, %x : f32):
           %cst = arith.sitofp %a : i32 to f32
           linalg.yield %cst : f32
@@ -52,7 +52,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: i32, %x : f32):
           %cst = arith.uitofp %a : i32 to f32
           linalg.yield %cst : f32
@@ -63,7 +63,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f32, %x : i32):
           %cst = arith.fptosi %a : f32 to i32
           linalg.yield %cst : i32
@@ -74,7 +74,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf64, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f64, %x : i32):
           %cst = arith.fptoui %a : f64 to i32
           linalg.yield %cst : i32
@@ -85,7 +85,7 @@
                                     %argb: tensor<10xf64>) -> tensor<10xf64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xf64>) {
+      inits(%argb: tensor<10xf64>) {
         ^bb(%a: f32, %x : f64):
           %cst = arith.extf %a : f32 to f64
           linalg.yield %cst : f64
@@ -96,7 +96,7 @@
                                     %argb: tensor<10xf32>) -> tensor<10xf32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf64, #SV>)
-      outs(%argb: tensor<10xf32>) {
+      inits(%argb: tensor<10xf32>) {
         ^bb(%a: f64, %x : f32):
           %cst = arith.truncf %a : f64 to f32
           linalg.yield %cst : f32
@@ -107,7 +107,7 @@
                                     %argb: tensor<10xi64>) -> tensor<10xi64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi64>) {
+      inits(%argb: tensor<10xi64>) {
         ^bb(%a: i32, %x : i64):
           %cst = arith.extsi %a : i32 to i64
           linalg.yield %cst : i64
@@ -118,7 +118,7 @@
                                     %argb: tensor<10xi64>) -> tensor<10xi64> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi64>) {
+      inits(%argb: tensor<10xi64>) {
         ^bb(%a: i32, %x : i64):
           %cst = arith.extui %a : i32 to i64
           linalg.yield %cst : i64
@@ -129,7 +129,7 @@
                                    %argb: tensor<10xi8>) -> tensor<10xi8> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xi32, #SV>)
-      outs(%argb: tensor<10xi8>) {
+      inits(%argb: tensor<10xi8>) {
         ^bb(%a: i32, %x : i8):
           %cst = arith.trunci %a : i32 to i8
           linalg.yield %cst : i8
@@ -140,7 +140,7 @@
                                     %argb: tensor<10xi32>) -> tensor<10xi32> {
     %0 = linalg.generic #trait_cast
       ins(%arga: tensor<10xf32, #SV>)
-      outs(%argb: tensor<10xi32>) {
+      inits(%argb: tensor<10xi32>) {
         ^bb(%a: f32, %x : i32):
           %cst = arith.bitcast %a : f32 to i32
           linalg.yield %cst : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex32.mlir
@@ -26,7 +26,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f32>, #SparseVector>,
                          tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
         ^bb(%a: complex<f32>, %b: complex<f32>, %x: complex<f32>):
           %1 = complex.add %a, %b : complex<f32>
           linalg.yield %1 : complex<f32>
@@ -43,7 +43,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f32>, #SparseVector>,
                          tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f32>, #SparseVector>) {
         ^bb(%a: complex<f32>, %b: complex<f32>, %x: complex<f32>):
           %1 = complex.mul %a, %b : complex<f32>
           linalg.yield %1 : complex<f32>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex64.mlir
@@ -26,7 +26,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.add %a, %b : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -43,7 +43,7 @@
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.mul %a, %b : complex<f64>
           linalg.yield %1 : complex<f64>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_complex_ops.mlir
@@ -35,7 +35,7 @@
     %0 = linalg.generic #trait_op2
        ins(%arga, %argb: tensor<?xcomplex<f64>, #SparseVector>,
                          tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %b: complex<f64>, %x: complex<f64>):
           %1 = complex.neg %b : complex<f64>
           %2 = complex.sub %a, %1 : complex<f64>
@@ -51,7 +51,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.sin %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -66,7 +66,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.sqrt %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -81,7 +81,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
        ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.tanh %a : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -96,7 +96,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xcomplex<f64>, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.log1p %a : complex<f64>
           %2 = complex.expm1 %1 : complex<f64>
@@ -113,7 +113,7 @@
     %c = complex.constant [2.0 : f64, 0.0 : f64] : complex<f64>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
+        inits(%xv: tensor<?xcomplex<f64>, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: complex<f64>):
           %1 = complex.div %a, %c  : complex<f64>
           linalg.yield %1 : complex<f64>
@@ -128,7 +128,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op1
        ins(%arga: tensor<?xcomplex<f64>, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: complex<f64>, %x: f64):
           %1 = complex.abs %a : complex<f64>
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_dot.mlir
@@ -15,7 +15,7 @@
                         %x: tensor<f32>) -> tensor<f32> {
     %dot = linalg.dot ins(%a, %b: tensor<1024xf32, #SparseVector>,
                                   tensor<1024xf32, #SparseVector>)
-         outs(%x: tensor<f32>) -> tensor<f32>
+         inits(%x: tensor<f32>) -> tensor<f32>
     return %dot : tensor<f32>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_expand.mlir
@@ -19,7 +19,7 @@
     %C = bufferization.alloc_tensor() : tensor<8x4xf64, #CSC>
     %D = linalg.matmul
       ins(%A, %B: tensor<8x2xf64, #CSC>, tensor<2x4xf64, #CSC>)
-         outs(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
+         inits(%C: tensor<8x4xf64, #CSC>) -> tensor<8x4xf64, #CSC>
     return %D: tensor<8x4xf64, #CSC>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_filter_conv2d.mlir
@@ -13,7 +13,7 @@
                %output: tensor<6x6xi32>) -> tensor<6x6xi32> {
     %0 = linalg.conv_2d
       ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
-      outs (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
+      inits (%output: tensor<6x6xi32>) -> tensor<6x6xi32>
     return %0 : tensor<6x6xi32>
   }
 
@@ -22,7 +22,7 @@
     %s = bufferization.alloc_tensor() : tensor<6x6xi32, #DCSR>           
     %0 = linalg.conv_2d
       ins  (%input, %filter: tensor<8x8xi32>, tensor<3x3xi32, #DCSR>)
-      outs (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
+      inits (%s: tensor<6x6xi32, #DCSR>) -> tensor<6x6xi32, #DCSR>
     return %0 : tensor<6x6xi32, #DCSR>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_flatten.mlir
@@ -40,7 +40,7 @@
 		       -> tensor<7x3xf64> {
     %0 = linalg.generic #trait_flatten
       ins(%arga: tensor<7x3x3x3x3x3x5x3xf64, #SparseTensor>)
-      outs(%argx: tensor<7x3xf64>) {
+      inits(%argx: tensor<7x3xf64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index.mlir
@@ -43,7 +43,7 @@
     %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector>
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64, #SparseVector>) {
+       inits(%init: tensor<8xi64, #SparseVector>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -61,7 +61,7 @@
     %init = bufferization.alloc_tensor() : tensor<8xi64, #SparseVector>
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%init: tensor<8xi64, #SparseVector>) {
+       inits(%init: tensor<8xi64, #SparseVector>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -79,7 +79,7 @@
     %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix>
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64, #SparseMatrix>) {
+       inits(%init: tensor<3x4xi64, #SparseMatrix>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -100,7 +100,7 @@
     %init = bufferization.alloc_tensor() : tensor<3x4xi64, #SparseMatrix>
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%init: tensor<3x4xi64, #SparseMatrix>) {
+       inits(%init: tensor<3x4xi64, #SparseMatrix>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -118,7 +118,7 @@
     %0 = bufferization.alloc_tensor() : tensor<2x3xf32, #SparseMatrix>
     %1 = linalg.generic #trait_2d
       ins(%arg0 : tensor<2x3xf32, #SparseMatrix>)
-      outs(%0 : tensor<2x3xf32, #SparseMatrix>) {
+      inits(%0 : tensor<2x3xf32, #SparseMatrix>) {
     ^bb0(%arg1: f32, %arg2: f32):
       %2 = linalg.index 0 : index
       %3 = arith.index_cast %2 : index to i64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_index_dense.mlir
@@ -41,7 +41,7 @@
                                   %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%out: tensor<8xi64>) {
+       inits(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -58,7 +58,7 @@
                                   %out: tensor<8xi64>) -> tensor<8xi64> {
     %r = linalg.generic #trait_1d
         ins(%arga: tensor<8xi64, #SparseVector>)
-       outs(%out: tensor<8xi64>) {
+       inits(%out: tensor<8xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %ii = arith.index_cast %i : index to i64
@@ -75,7 +75,7 @@
                                   %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%out: tensor<3x4xi64>) {
+       inits(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
@@ -95,7 +95,7 @@
                                   %out: tensor<3x4xi64>) -> tensor<3x4xi64> {
     %r = linalg.generic #trait_2d
         ins(%arga: tensor<3x4xi64, #SparseMatrix>)
-       outs(%out: tensor<3x4xi64>) {
+       inits(%out: tensor<3x4xi64>) {
         ^bb(%a: i64, %x: i64):
           %i = linalg.index 0 : index
           %j = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matmul.mlir
@@ -29,7 +29,7 @@
                      %C: tensor<4x4xf64>) -> tensor<4x4xf64> {
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64>, tensor<8x4xf64>)
-      outs(%C: tensor<4x4xf64>) -> tensor<4x4xf64>
+      inits(%C: tensor<4x4xf64>) -> tensor<4x4xf64>
     return %D: tensor<4x4xf64>
   }
 
@@ -41,7 +41,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #CSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #CSR>, tensor<8x4xf64, #CSR>)
-         outs(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
+         inits(%C: tensor<4x4xf64, #CSR>) -> tensor<4x4xf64, #CSR>
     return %D: tensor<4x4xf64, #CSR>
   }
 
@@ -53,7 +53,7 @@
     %C = bufferization.alloc_tensor() : tensor<4x4xf64, #DCSR>
     %D = linalg.matmul
       ins(%A, %B: tensor<4x8xf64, #DCSR>, tensor<8x4xf64, #DCSR>)
-         outs(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
+         inits(%C: tensor<4x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR>
     return %D: tensor<4x4xf64, #DCSR>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matrix_ops.mlir
@@ -45,7 +45,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?x?xf64, #DCSR>)
-        outs(%xm: tensor<?x?xf64, #DCSR>) {
+        inits(%xm: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
@@ -57,7 +57,7 @@
   func.func @matrix_scale_inplace(%argx: tensor<?x?xf64, #DCSR>) -> tensor<?x?xf64, #DCSR> {
     %s = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale_inpl
-      outs(%argx: tensor<?x?xf64, #DCSR>) {
+      inits(%argx: tensor<?x?xf64, #DCSR>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %s : f64
           linalg.yield %1 : f64
@@ -75,7 +75,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.addf %a, %b : f64
           linalg.yield %1 : f64
@@ -93,7 +93,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?x?xf64, #DCSR>, tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_matvec.mlir
@@ -49,7 +49,7 @@
 		      -> tensor<?xi32> {
     %0 = linalg.generic #matvec
       ins(%arga, %argb: tensor<?x?xi32, #SparseMatrix>, tensor<?xi32>)
-      outs(%argx: tensor<?xi32>) {
+      inits(%argx: tensor<?xi32>) {
       ^bb(%a: i32, %b: i32, %x: i32):
         %0 = arith.muli %a, %b : i32
         %1 = arith.addi %x, %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_mttkrp.mlir
@@ -40,7 +40,7 @@
     %0 = linalg.generic #mttkrp
       ins(%argb, %argc, %argd:
             tensor<?x?x?xf64, #SparseTensor>, tensor<?x?xf64>, tensor<?x?xf64>)
-      outs(%arga: tensor<?x?xf64>) {
+      inits(%arga: tensor<?x?xf64>) {
       ^bb(%b: f64, %c: f64, %d: f64, %a: f64):
         %0 = arith.mulf %b, %c : f64
         %1 = arith.mulf %d, %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_mult_elt.mlir
@@ -25,7 +25,7 @@
     %argx = bufferization.alloc_tensor() : tensor<32x16xf32, #DCSR>
     %0 = linalg.generic #trait_mult_elt
       ins(%arga, %argb: tensor<32x16xf32, #DCSR>, tensor<32x16xf32, #DCSR>)
-      outs(%argx: tensor<32x16xf32, #DCSR>) {
+      inits(%argx: tensor<32x16xf32, #DCSR>) {
         ^bb(%a: f32, %b: f32, %x: f32):
           %1 = arith.mulf %a, %b : f32
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_reduction.mlir
@@ -34,7 +34,7 @@
     %0 = linalg.generic #redsum
       ins(%arga, %argb: tensor<?x?x?xi32, #SparseTensor>,
                         tensor<?x?x?xi32, #SparseTensor>)
-      outs(%xinit: tensor<?x?xi32, #SparseMatrix>) {
+      inits(%xinit: tensor<?x?xi32, #SparseMatrix>) {
         ^bb(%a: i32, %b: i32, %x: i32):
           %0 = arith.muli %a, %b : i32
           %1 = arith.addi %x, %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_out_simple.mlir
@@ -35,7 +35,7 @@
   func.func @kernel_eltwise_mult(%argx: tensor<?x?xf64, #DCSR>)
     -> tensor<?x?xf64, #DCSR> {
     %0 = linalg.generic #eltwise_mult
-      outs(%argx: tensor<?x?xf64, #DCSR>) {
+      inits(%argx: tensor<?x?xf64, #DCSR>) {
       ^bb(%x: f64):
         %0 = arith.mulf %x, %x : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_quantized_matmul.mlir
@@ -18,7 +18,7 @@
     %c2 = arith.constant 2 : i32
     %0 = linalg.quantized_matmul
       ins(%input1, %input2, %c2, %c0 : tensor<5x3xi8>, tensor<3x6xi8, #DCSR>, i32, i32)
-      outs(%output : tensor<5x6xi32>) -> tensor<5x6xi32>
+      inits(%output : tensor<5x6xi32>) -> tensor<5x6xi32>
     return %0: tensor<5x6xi32>
   }
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_re_im.mlir
@@ -23,7 +23,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf32, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga: tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xf32, #SparseVector>) {
+        inits(%xv: tensor<?xf32, #SparseVector>) {
         ^bb(%a: complex<f32>, %x: f32):
           %1 = complex.re %a : complex<f32>
           linalg.yield %1 : f32
@@ -38,7 +38,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf32, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga: tensor<?xcomplex<f32>, #SparseVector>)
-        outs(%xv: tensor<?xf32, #SparseVector>) {
+        inits(%xv: tensor<?xf32, #SparseVector>) {
         ^bb(%a: complex<f32>, %x: f32):
           %1 = complex.im %a : complex<f32>
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom.mlir
@@ -50,7 +50,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_mat_reduce_rowwise
       ins(%arga: tensor<?x?xf64, #CSR>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 {
               ^bb0(%x: f64, %y: f64):
@@ -69,7 +69,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_mat_reduce_rowwise
       ins(%arga: tensor<?x?xf64, #CSC>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.reduce %a, %b, %cf1 : f64 {
               ^bb0(%x: f64, %y: f64):
@@ -91,7 +91,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_matmul
        ins(%arga, %argb: tensor<?x?xf64, #CSR>, tensor<?x?xf64, #CSR>)
-        outs(%xm: tensor<?x?xf64, #CSR>) {
+        inits(%xm: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64, %output: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap = {
@@ -122,7 +122,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_matmul
        ins(%arga, %argb: tensor<?x?xf64, #CSR>, tensor<?x?xf64, #CSC>)
-        outs(%xm: tensor<?x?xf64, #CSR>) {
+        inits(%xm: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64, %output: f64):
           %1 = sparse_tensor.binary %a, %b : f64, f64 to f64
             overlap = {
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reductions.mlir
@@ -22,7 +22,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.addi %x, %a : i32
           linalg.yield %0 : i32
@@ -34,7 +34,7 @@
                           %argx: tensor<f32>) -> tensor<f32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xf32, #SV>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
         ^bb(%a: f32, %x: f32):
           %0 = arith.addf %x, %a : f32
           linalg.yield %0 : f32
@@ -46,7 +46,7 @@
                            %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.muli %x, %a : i32
           linalg.yield %0 : i32
@@ -58,7 +58,7 @@
                            %argx: tensor<f32>) -> tensor<f32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xf32, #DV>)
-      outs(%argx: tensor<f32>) {
+      inits(%argx: tensor<f32>) {
         ^bb(%a: f32, %x: f32):
           %0 = arith.mulf %x, %a : f32
           linalg.yield %0 : f32
@@ -70,7 +70,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #DV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.andi %x, %a : i32
           linalg.yield %0 : i32
@@ -82,7 +82,7 @@
                          %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.ori %x, %a : i32
           linalg.yield %0 : i32
@@ -94,7 +94,7 @@
                           %argx: tensor<i32>) -> tensor<i32> {
     %0 = linalg.generic #trait_reduction
       ins(%arga: tensor<32xi32, #SV>)
-      outs(%argx: tensor<i32>) {
+      inits(%argx: tensor<i32>) {
         ^bb(%a: i32, %x: i32):
           %0 = arith.xori %x, %a : i32
           linalg.yield %0 : i32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_matmul.mlir
@@ -39,7 +39,7 @@
                                  %argx: tensor<?x?xf32>) -> tensor<?x?xf32> {
     %0 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<?x?xf32, #SparseMatrix>, tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%argx: tensor<?x?xf32>) {
+      inits(%argx: tensor<?x?xf32>) {
         ^bb(%s: f32, %a: f32, %b: f32, %x: f32):
           %0 = arith.mulf %a, %b : f32
           %1 = arith.mulf %s, %0 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sampled_mm_fusion.mlir
@@ -49,7 +49,7 @@
     %2 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<8x8xf64, #SM>,
                                tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1: tensor<8x8xf64>) {
+      inits(%1: tensor<8x8xf64>) {
         ^bb(%s: f64, %a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.mulf %s, %p : f64
@@ -70,7 +70,7 @@
     %1 = arith.constant dense<0.0> : tensor<8x8xf64>
     %2 = linalg.generic #trait_matmul
       ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.addf %x, %p : f64
@@ -79,7 +79,7 @@
     // Sample the result with elements-wise multiplication with sparse matrix.
     %3 = linalg.generic #trait_scale
       ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%t: f64, %s: f64, %x: f64):
           %r = arith.mulf %t, %s : f64
           linalg.yield %r : f64
@@ -98,7 +98,7 @@
     %2 = linalg.generic #trait_sampled_dense_dense
       ins(%args, %arga, %argb: tensor<8x8xf64, #SM>,
                                tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1: tensor<8x8xf64, #SM>) {
+      inits(%1: tensor<8x8xf64, #SM>) {
         ^bb(%s: f64, %a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.mulf %s, %p : f64
@@ -120,7 +120,7 @@
     %1 = arith.constant dense<0.0> : tensor<8x8xf64>
     %2 = linalg.generic #trait_matmul
       ins(%arga, %argb : tensor<8x8xf64>, tensor<8x8xf64>)
-      outs(%1 : tensor<8x8xf64>) {
+      inits(%1 : tensor<8x8xf64>) {
         ^bb0(%a: f64, %b: f64, %x: f64):
           %p = arith.mulf %a, %b : f64
           %q = arith.addf %x, %p : f64
@@ -130,7 +130,7 @@
     %3 = bufferization.alloc_tensor() : tensor<8x8xf64, #SM>
     %4 = linalg.generic #trait_scale
       ins(%2, %args : tensor<8x8xf64>, tensor<8x8xf64, #SM>)
-      outs(%3 : tensor<8x8xf64, #SM>) {
+      inits(%3 : tensor<8x8xf64, #SM>) {
         ^bb0(%t: f64, %s: f64, %x: f64):
           %r = arith.mulf %t, %s : f64
           linalg.yield %r : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scale.mlir
@@ -26,7 +26,7 @@
   func.func @sparse_scale(%argx: tensor<8x8xf32, #CSR>) -> tensor<8x8xf32, #CSR> {
     %c = arith.constant 2.0 : f32
     %0 = linalg.generic #trait_scale
-      outs(%argx: tensor<8x8xf32, #CSR>) {
+      inits(%argx: tensor<8x8xf32, #CSR>) {
         ^bb(%x: f32):
           %1 = arith.mulf %x, %c : f32
           linalg.yield %1 : f32
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_scf_nested.mlir
@@ -18,7 +18,7 @@
         indexing_maps = [#map, #map, #map],
         iterator_types = ["parallel", "parallel", "parallel"]}
         ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>)
-        outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
+        inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
           ^bb0(%arg1: f64, %arg2: f64, %arg3: f64):
             %4 = arith.subf %arg1, %arg2 : f64
             linalg.yield %4 : f64
@@ -32,7 +32,7 @@
         indexing_maps = [#map, #map, #map],
         iterator_types = ["parallel", "parallel", "parallel"]}
         ins(%arg0, %cst_3 : tensor<2x3x4xf64, #SparseMatrix>, tensor<2x3x4xf64>)
-        outs(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
+        inits(%2 : tensor<2x3x4xf64, #SparseMatrix>) {
           ^bb0(%arg1: f64, %arg2: f64, %arg3: f64):
             %4 = arith.addf %arg1, %arg2 : f64
             linalg.yield %4 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_select.mlir
@@ -38,7 +38,7 @@
     %xv = bufferization.alloc_tensor(%d0): tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_select
       ins(%arga: tensor<?xf64, #SparseVector>)
-      outs(%xv: tensor<?xf64, #SparseVector>) {
+      inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64):
           %1 = sparse_tensor.select %a : f64 {
               ^bb0(%x: f64):
@@ -58,7 +58,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1): tensor<?x?xf64, #CSR>
     %0 = linalg.generic #trait_mat_select
       ins(%arga: tensor<?x?xf64, #CSR>)
-      outs(%xv: tensor<?x?xf64, #CSR>) {
+      inits(%xv: tensor<?x?xf64, #CSR>) {
         ^bb(%a: f64, %b: f64):
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sign.mlir
@@ -33,7 +33,7 @@
     %xin = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
       ins(%arg0: tensor<?xf64, #SparseVector>)
-      outs(%xin: tensor<?xf64, #SparseVector>) {
+      inits(%xin: tensor<?xf64, #SparseVector>) {
       ^bb0(%a: f64, %x: f64) :
         %result = sparse_tensor.unary %a : f64 to f64
           present={
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sorted_coo.mlir
@@ -49,7 +49,7 @@
                               -> tensor<?x?xf64, #SortedCOO> {
     %c = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale
-      outs(%argx: tensor<?x?xf64, #SortedCOO>) {
+      inits(%argx: tensor<?x?xf64, #SortedCOO>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %c : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_spmm.mlir
@@ -36,7 +36,7 @@
                          %argx: tensor<?x?xf64>) -> tensor<?x?xf64> {
     %0 = linalg.generic #spmm
       ins(%arga, %argb: tensor<?x?xf64, #SparseMatrix>, tensor<?x?xf64>)
-      outs(%argx: tensor<?x?xf64>) {
+      inits(%argx: tensor<?x?xf64>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %0 = arith.mulf %a, %b : f64
         %1 = arith.addf %x, %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum.mlir
@@ -33,7 +33,7 @@
                                %argx: tensor<f64>) -> tensor<f64> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xf64, #SparseMatrix>)
-      outs(%argx: tensor<f64>) {
+      inits(%argx: tensor<f64>) {
       ^bb(%a: f64, %x: f64):
         %0 = arith.addf %x, %a : f64
         linalg.yield %0 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_bf16.mlir
@@ -29,7 +29,7 @@
                                %argx: tensor<bf16>) -> tensor<bf16> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xbf16, #SparseMatrix>)
-      outs(%argx: tensor<bf16>) {
+      inits(%argx: tensor<bf16>) {
       ^bb(%a: bf16, %x: bf16):
         %0 = arith.addf %x, %a : bf16
         linalg.yield %0 : bf16
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_c32.mlir
@@ -33,7 +33,7 @@
                                %argx: tensor<complex<f64>>) -> tensor<complex<f64>> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xcomplex<f64>, #SparseMatrix>)
-      outs(%argx: tensor<complex<f64>>) {
+      inits(%argx: tensor<complex<f64>>) {
       ^bb(%a: complex<f64>, %x: complex<f64>):
         %0 = complex.add %x, %a : complex<f64>
         linalg.yield %0 : complex<f64>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_sum_f16.mlir
@@ -27,7 +27,7 @@
                                %argx: tensor<f16>) -> tensor<f16> {
     %0 = linalg.generic #trait_sum_reduce
       ins(%arga: tensor<?x?xf16, #SparseMatrix>)
-      outs(%argx: tensor<f16>) {
+      inits(%argx: tensor<f16>) {
       ^bb(%a: f16, %x: f16):
         %0 = arith.addf %x, %a : f16
         linalg.yield %0 : f16
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tanh.mlir
@@ -19,7 +19,7 @@
   func.func @sparse_tanh(%vec: tensor<?xf64, #SparseVector>)
                        -> tensor<?xf64, #SparseVector> {
     %0 = linalg.generic #trait_op
-      outs(%vec: tensor<?xf64, #SparseVector>) {
+      inits(%vec: tensor<?xf64, #SparseVector>) {
         ^bb(%x: f64):
           %1 = math.tanh %x : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_mul.mlir
@@ -32,7 +32,7 @@
     %xt = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor<?x?x?xf64, #ST>
     %0 = linalg.generic #trait_mul
        ins(%arga, %argb: tensor<?x?x?xf64, #ST>, tensor<?x?x?xf64, #ST>)
-        outs(%xt: tensor<?x?x?xf64, #ST>) {
+        inits(%xt: tensor<?x?x?xf64, #ST>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_tensor_ops.mlir
@@ -32,7 +32,7 @@
     %xm = bufferization.alloc_tensor(%d0, %d1, %d2) : tensor<?x?x?xf64, #ST2>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?x?x?xf64, #ST1>)
-        outs(%xm: tensor<?x?x?xf64, #ST2>) {
+        inits(%xm: tensor<?x?x?xf64, #ST2>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_transpose.mlir
@@ -37,7 +37,7 @@
     %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
     %0 = linalg.generic #transpose_trait
        ins(%t: tensor<3x4xf64, #DCSC>)
-       outs(%i: tensor<4x3xf64, #DCSR>) {
+       inits(%i: tensor<4x3xf64, #DCSR>) {
        ^bb(%a: f64, %x: f64):
          linalg.yield %a : f64
     } -> tensor<4x3xf64, #DCSR>
@@ -56,7 +56,7 @@
     %i = bufferization.alloc_tensor() : tensor<4x3xf64, #DCSR>
     %0 = linalg.generic #transpose_trait
        ins(%arga: tensor<3x4xf64, #DCSR>)
-       outs(%i: tensor<4x3xf64, #DCSR>) {
+       inits(%i: tensor<4x3xf64, #DCSR>) {
        ^bb(%a: f64, %x: f64):
          linalg.yield %a : f64
     } -> tensor<4x3xf64, #DCSR>
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_unary.mlir
@@ -35,7 +35,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xi32, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xi32, #SparseVector>) {
+        inits(%xv: tensor<?xi32, #SparseVector>) {
         ^bb(%a: f64, %x: i32):
           %1 = sparse_tensor.unary %a : f64 to i32
             present={}
@@ -55,7 +55,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %1 = sparse_tensor.unary %a : f64 to f64
             present={
@@ -78,7 +78,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_vec_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %idx = linalg.index 0 : index
           %1 = sparse_tensor.unary %a : f64 to f64
@@ -106,7 +106,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_scale
        ins(%argx: tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %1 = sparse_tensor.unary %a: f64 to f64
             present={
@@ -133,7 +133,7 @@
     %xv = bufferization.alloc_tensor(%d0, %d1) : tensor<?x?xf64, #DCSR>
     %0 = linalg.generic #trait_mat_scale
        ins(%argx: tensor<?x?xf64, #DCSR>)
-        outs(%xv: tensor<?x?xf64, #DCSR>) {
+        inits(%xv: tensor<?x?xf64, #DCSR>) {
         ^bb(%a: f64, %x: f64):
           %row = linalg.index 0 : index
           %col = linalg.index 1 : index
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_vector_ops.mlir
@@ -53,7 +53,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_scale
        ins(%arga: tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %x: f64):
           %1 = arith.mulf %a, %s : f64
           linalg.yield %1 : f64
@@ -65,7 +65,7 @@
   func.func @vector_scale_inplace(%argx: tensor<?xf64, #SparseVector>) -> tensor<?xf64, #SparseVector> {
     %s = arith.constant 2.0 : f64
     %0 = linalg.generic #trait_scale_inpl
-      outs(%argx: tensor<?xf64, #SparseVector>) {
+      inits(%argx: tensor<?xf64, #SparseVector>) {
         ^bb(%x: f64):
           %1 = arith.mulf %x, %s : f64
           linalg.yield %1 : f64
@@ -81,7 +81,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.addf %a, %b : f64
           linalg.yield %1 : f64
@@ -97,7 +97,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #SparseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #SparseVector>) {
+        inits(%xv: tensor<?xf64, #SparseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
@@ -113,7 +113,7 @@
     %xv = bufferization.alloc_tensor(%d) : tensor<?xf64, #DenseVector>
     %0 = linalg.generic #trait_op
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%xv: tensor<?xf64, #DenseVector>) {
+        inits(%xv: tensor<?xf64, #DenseVector>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           linalg.yield %1 : f64
@@ -127,7 +127,7 @@
                        %argx: tensor<f64>) -> tensor<f64> {
     %0 = linalg.generic #trait_dot
        ins(%arga, %argb: tensor<?xf64, #SparseVector>, tensor<?xf64, #SparseVector>)
-        outs(%argx: tensor<f64>) {
+        inits(%argx: tensor<f64>) {
         ^bb(%a: f64, %b: f64, %x: f64):
           %1 = arith.mulf %a, %b : f64
           %2 = arith.addf %x, %1 : f64
diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
--- a/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_elementwise_add_sparse_output.py
@@ -38,7 +38,7 @@
   %argx = bufferization.alloc_tensor() : tensor<3x4xf64, #DCSR>
   %0 = linalg.generic #trait_add_elt
     ins(%arga, %argb: tensor<3x4xf64, #DCSR>, tensor<3x4xf64, #DCSR>)
-    outs(%argx: tensor<3x4xf64, #DCSR>) {
+    inits(%argx: tensor<3x4xf64, #DCSR>) {
       ^bb(%a: f64, %b: f64, %x: f64):
         %1 = arith.addf %a, %b : f64
         linalg.yield %1 : f64
diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir
@@ -3,7 +3,7 @@
 func.func @gemm(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>,
   %arg2 : memref<?x?xf32>) {
   linalg.matmul ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?xf32>)
+      inits(%arg2 : memref<?x?xf32>)
   return
 }
 // CHECK-LABEL: func @gemm
@@ -34,7 +34,7 @@
                        affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1, d0)>],
       iterator_types = ["parallel", "parallel"]}
       ins(%arg0, %arg1, %arg2 : memref<200x300xi32>, memref<300xi16>, memref<200xi8>)
-      outs(%arg3 : memref<300x200xi64>) {
+      inits(%arg3 : memref<300x200xi64>) {
     ^bb0(%b0 : i32, %b1 : i16, %b2 : i8, %b3 : i64):
       %0 = linalg.index 0 : index
       %1 = arith.index_cast %0 : index to i16
@@ -84,7 +84,7 @@
       strides = dense<[1, 2]> : tensor<2xi64>,
       dilations = dense<[3, 4]> : tensor<2xi64>}
       ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
-      outs(%arg2 : memref<?x?x?x?xf32>)
+      inits(%arg2 : memref<?x?x?x?xf32>)
   return
 }
 //  CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d1 + d4 * 3)>
@@ -128,7 +128,7 @@
       strides = dense<[1, 2]> : tensor<2xi64>,
       dilations = dense<[3, 4]> : tensor<2xi64>}
       ins(%arg0, %arg1 : memref<?x?x?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?x?x?xf32>)
+      inits(%arg2 : memref<?x?x?x?xf32>)
   return
 }
 //  CHECK-DAG:  #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5) -> (d1 + d4 * 3)>
@@ -165,7 +165,7 @@
 func.func @map(%lhs: memref<64xf32>,
     %rhs: memref<64xf32>, %out: memref<64xf32>) {
   linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>)
-             outs(%out : memref<64xf32>)
+             inits(%out : memref<64xf32>)
     (%in: f32, %in_0: f32) {
       %0 = arith.addf %in, %in_0 : f32
       linalg.yield %0 : f32
@@ -192,7 +192,7 @@
 func.func @transpose(%arg0: memref<16x32x64xf32>,
                                %arg1: memref<32x64x16xf32>) {
   linalg.transpose ins(%arg0 : memref<16x32x64xf32>)
-                   outs(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
+                   inits(%arg1 : memref<32x64x16xf32>) permutation = [1, 2, 0]
   return
 }
 // CHECK-LABEL: func.func @transpose(
@@ -216,7 +216,7 @@
 func.func @reduce(%arg0: memref<16x32x64xf32>,
                   %arg1: memref<16x64xf32>) {
   linalg.reduce ins(%arg0 : memref<16x32x64xf32>)
-                outs(%arg1 : memref<16x64xf32>) dimensions = [1]
+                inits(%arg1 : memref<16x64xf32>) dimensions = [1]
     (%in: f32, %init: f32) {
       %0 = arith.addf %in, %init : f32
       linalg.yield %0 : f32
@@ -247,7 +247,7 @@
                      %init: memref<8x16x32xf32>) {
   linalg.broadcast
       ins(%input:memref<8x32xf32>)
-      outs(%init:memref<8x16x32xf32>)
+      inits(%init:memref<8x16x32xf32>)
       dimensions = [0, 2]
   func.return
 }
diff --git a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-and-fuse-using-interface.mlir
@@ -7,10 +7,10 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul {__internal_linalg_transform__ = "fusion"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %gemm : tensor<?x?xf32>
 }
 //      CHECK: func.func @gemm_fill_fusion(
@@ -25,10 +25,10 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -42,15 +42,15 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   %generic = linalg.generic {
       __internal_linalg_transform__ = "fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%gemm, %arg2 : tensor<?x?xf32>, tensor<?xf32>) outs(%init : tensor<?x?xf32>) {
+      ins(%gemm, %arg2 : tensor<?x?xf32>, tensor<?xf32>) inits(%init : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       %add = arith.addf %b0, %b1 : f32
       linalg.yield %add : f32 
@@ -70,15 +70,15 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //  CHECK-DAG:       %[[BIAS_TILE:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]]]
 //  CHECK-DAG:       %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]], %[[BIAS_TILE]] :
-// CHECK-SAME:           outs(%[[OUTS_TILE]] :
+// CHECK-SAME:           inits(%[[OUTS_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV0]], %[[IV1]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -91,14 +91,14 @@
   %d0 = tensor.dim %lhs0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %rhs0, %c1 : tensor<?x?xf32>
   %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill0 = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill0 = linalg.fill ins(%cst : f32) inits(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm0 = linalg.matmul
-      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%lhs0, %rhs0 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %d2 = tensor.dim %rhs1, %c1 : tensor<?x?xf32>
   %init1 = tensor.empty(%d0, %d2) : tensor<?x?xf32>
-  %fill1 = linalg.fill ins(%cst : f32) outs(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill1 = linalg.fill ins(%cst : f32) inits(%init1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm1 = linalg.matmul  {__internal_linalg_transform__ = "gemm_fusion"}
-      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      ins(%gemm0, %rhs1 : tensor<?x?xf32>, tensor<?x?xf32>) inits(%fill1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %gemm1 : tensor<?x?xf32>
 }
 //      CHECK: func.func @gemm_gemm_fusion(
@@ -118,17 +118,17 @@
 //  CHECK-DAG:     %[[RHS0_TILE:.+]] = tensor.extract_slice %[[RHS0]][0, 0]
 //  CHECK-DAG:     %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV]], 0]
 //      CHECK:     %[[FILL0_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT0_TILE]] :
+// CHECK-SAME:         inits(%[[INIT0_TILE]] :
 //      CHECK:     %[[GEMM0_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[LHS0_TILE]], %[[RHS0_TILE]] :
-// CHECK-SAME:         outs(%[[FILL0_TILE]] :
+// CHECK-SAME:         inits(%[[FILL0_TILE]] :
 //  CHECK-DAG:     %[[RHS1_TILE:.+]] = tensor.extract_slice %[[RHS1]][0, 0]
 //  CHECK-DAG:     %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ITERARG]][%[[IV]], 0]
 //      CHECK:     %[[FILL1_TILE:.+]] = linalg.fill
-// CHECK-SAME:         outs(%[[INIT1_TILE]] :
+// CHECK-SAME:         inits(%[[INIT1_TILE]] :
 //      CHECK:     %[[GEMM1_TILE:.+]] = linalg.matmul
 // CHECK-SAME:         ins(%[[GEMM0_TILE]], %[[RHS1_TILE]] :
-// CHECK-SAME:         outs(%[[FILL1_TILE]] :
+// CHECK-SAME:         inits(%[[FILL1_TILE]] :
 //      CHECK:     %[[INSERT:.+]] = tensor.insert_slice %[[GEMM1_TILE]] into %[[ITERARG]][%[[IV]], 0]
 //      CHECK:     scf.yield %[[INSERT]]
 
@@ -141,16 +141,16 @@
   %d0 = tensor.dim %arg0, %c0 : tensor<?x?xf32>
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %init0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %fill = linalg.fill ins(%cst : f32) outs(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %fill = linalg.fill ins(%cst : f32) inits(%init0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %gemm = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%fill : tensor<?x?xf32>) -> tensor<?x?xf32>
   %init1 = tensor.empty(%d1, %d0) : tensor<?x?xf32>
   %transpose = linalg.generic {
       __internal_linalg_transform__ = "fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d1, d0)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%gemm : tensor<?x?xf32>) outs(%init1 : tensor<?x?xf32>) {
+      ins(%gemm : tensor<?x?xf32>) inits(%init1 : tensor<?x?xf32>) {
     ^bb0(%b0 : f32, %b1 : f32):
       linalg.yield %b0 : f32 
   } -> tensor<?x?xf32>
@@ -173,14 +173,14 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //  CHECK-DAG:       %[[INIT0_TILE:.+]] = tensor.extract_slice %[[INIT0]][%[[IV0]], %[[IV1]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT0_TILE]] :
+// CHECK-SAME:           inits(%[[INIT0_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //  CHECK-DAG:       %[[OUTS_TILE:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]] :
-// CHECK-SAME:           outs(%[[OUTS_TILE]] :
+// CHECK-SAME:           inits(%[[OUTS_TILE]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -193,15 +193,15 @@
   %d1 = tensor.dim %arg1, %c1 : tensor<?x?xf32>
   %cst = arith.constant 0.0 : f32
   %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
-  %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
+  %1 = linalg.fill ins(%cst : f32) inits(%0 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %2 = linalg.matmul
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%1 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = linalg.generic {
       __internal_linalg_transform__ = "gemm_interchange_fusion",
       indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0, d1)>],
       iterator_types = ["parallel", "parallel"]}
-      ins(%2 : tensor<?x?xf32>) outs(%0 : tensor<?x?xf32>) {
+      ins(%2 : tensor<?x?xf32>) inits(%0 : tensor<?x?xf32>) {
       ^bb0(%b0 : f32, %b1 : f32):
         %4 = arith.addf %b0, %b0 : f32
         linalg.yield %4 : f32
@@ -220,14 +220,14 @@
 //  CHECK-DAG:       %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
 //  CHECK-DAG:       %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[FILL_TILE:.+]] = linalg.fill
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[FILL_TILE]] :
+// CHECK-SAME:           inits(%[[FILL_TILE]] :
 //      CHECK:       %[[INIT_TILE_2:.+]] = tensor.extract_slice %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       %[[GENERIC_TILE:.+]] = linalg.generic
 // CHECK-SAME:           ins(%[[GEMM_TILE]] :
-// CHECK-SAME:           outs(%[[INIT_TILE_2]] :
+// CHECK-SAME:           inits(%[[INIT_TILE_2]] :
 //      CHECK:       %[[INSERT:.+]] = tensor.insert_slice %[[GENERIC_TILE]] into %[[ITERARG1]][%[[IV1]], %[[IV0]]]
 //      CHECK:       scf.yield %[[INSERT]]
 
@@ -240,7 +240,7 @@
   %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
   %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
@@ -251,7 +251,7 @@
      iterator_types = ["parallel", "parallel"],
      __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
     ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%5 : tensor<?x?xf32>) {
+    inits(%5 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
       %7 = arith.addf %arg3, %arg4 : f32
       linalg.yield %7 : f32
@@ -273,17 +273,17 @@
 //   CHECK-DAG:       %[[ST_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[LHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]] :
-//  CHECK-SAME:         outs(%[[ST_ARG2]] :
+//  CHECK-SAME:         inits(%[[ST_ARG2]] :
 //   CHECK-DAG:       %[[ST_ARG0_1:.+]] = tensor.extract_slice %[[ARG0]][%[[IV0]], 0]
 //   CHECK-DAG:       %[[ST_ARG1_1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV1]]]
 //   CHECK-DAG:       %[[ST_ARG2_1:.+]] = tensor.extract_slice %[[ARG2]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[RHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0_1]], %[[ST_ARG1_1]] :
-//  CHECK-SAME:         outs(%[[ST_ARG2_1]] :
+//  CHECK-SAME:         inits(%[[ST_ARG2_1]] :
 //       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[LHS]], %[[RHS]] :
-//  CHECK-SAME:         outs(%[[ST_ARG6]] :
+//  CHECK-SAME:         inits(%[[ST_ARG6]] :
 //       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
@@ -299,7 +299,7 @@
   %0 = tensor.dim %arg2, %c0 : tensor<?x?xf32>
   %1 = tensor.dim %arg2, %c1 : tensor<?x?xf32>
   %2 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   %3 = tensor.dim %2, %c0 : tensor<?x?xf32>
   %4 = tensor.dim %2, %c1 : tensor<?x?xf32>
   %5 = tensor.empty(%3, %4) : tensor<?x?xf32>
@@ -310,7 +310,7 @@
      iterator_types = ["parallel", "parallel"],
      __internal_linalg_transform__ = "gemm_plus_gemm_fusion"}
     ins(%2, %2 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%5 : tensor<?x?xf32>) {
+    inits(%5 : tensor<?x?xf32>) {
     ^bb0(%arg3 : f32, %arg4 : f32, %arg5 : f32) :
       %7 = arith.addf %arg3, %arg4 : f32
       linalg.yield %7 : f32
@@ -331,17 +331,17 @@
 //       CHECK:       %[[LHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[ST_ARG0]], %[[ST_ARG1]]
 //  CHECK-SAME:           : tensor<?x?xf32>, tensor<?x?xf32>)
-//  CHECK-SAME:         outs(%[[ST_ARG2]] : tensor<?x?xf32>)
+//  CHECK-SAME:         inits(%[[ST_ARG2]] : tensor<?x?xf32>)
 //   CHECK-DAG:       %[[STR_ARG0:.+]] = tensor.extract_slice %[[ARG0]][%[[IV1]], 0]
 //   CHECK-DAG:       %[[STR_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, %[[IV0]]]
 //   CHECK-DAG:       %[[STR_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV1]], %[[IV0]]]
 //       CHECK:       %[[RHS:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[STR_ARG0]], %[[STR_ARG1]] :
-//  CHECK-SAME:         outs(%[[STR_ARG2]] :
+//  CHECK-SAME:         inits(%[[STR_ARG2]] :
 //       CHECK:       %[[ST_ARG6:.+]] = tensor.extract_slice %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       %[[ST_RESULT:.+]] = linalg.generic
 //  CHECK-SAME:         ins(%[[LHS]], %[[RHS]] :
-//  CHECK-SAME:         outs(%[[ST_ARG6]] :
+//  CHECK-SAME:         inits(%[[ST_ARG6]] :
 //       CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[ST_RESULT]]
 //  CHECK-SAME:         into %[[ARG6]][%[[IV0]], %[[IV1]]]
 //       CHECK:       scf.yield %[[UPDATE]]
@@ -354,13 +354,13 @@
     %arg2: tensor<?x?xf32>, %arg3: tensor<?x?xf32>, %arg4: tensor<?x?xf32>,
     %arg5: tensor<?x?xf32>, %arg6: tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N0] * [N0, N1]
+    inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N0] * [N0, N1]
   %1 = linalg.matmul ins(%0, %arg3 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg4 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N1] * [N1, N2]
+    inits(%arg4 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N1] * [N1, N2]
   %2 = linalg.matmul
     {__internal_linalg_transform__ = "gemm_sequence_fusion"}
     ins(%1, %arg5 : tensor<?x?xf32>, tensor<?x?xf32>)
-    outs(%arg6 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N2] * [N2, N3]
+    inits(%arg6 : tensor<?x?xf32>) -> tensor<?x?xf32> // [M, N2] * [N2, N3]
   return %2 : tensor<?x?xf32>
 }
 
@@ -389,15 +389,15 @@
 //   CHECK-DAG:     %[[SLICE_ARG1:.+]] = tensor.extract_slice %[[ARG1]][0, 0] [%[[N0]], %[[N1]]]
 //   CHECK-DAG:     %[[SLICE_ARG2:.+]] = tensor.extract_slice %[[ARG2]][%[[IV]], 0] [%[[TILE_M]], %[[N1]]]
 //   CHECK-DAG:     %[[TILE_GEMM1:.+]] = linalg.matmul ins(%[[SLICE_ARG0]], %[[SLICE_ARG1]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG2]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG2]] :
 //   CHECK-DAG:     %[[SLICE_ARG3:.+]] = tensor.extract_slice %[[ARG3]][0, 0] [%[[N1]], %[[N2]]]
 //   CHECK-DAG:     %[[SLICE_ARG4:.+]] = tensor.extract_slice %[[ARG4]][%[[IV]], 0] [%[[TILE_M]], %[[N2]]]
 //   CHECK-DAG:     %[[TILE_GEMM2:.+]] = linalg.matmul ins(%[[TILE_GEMM1]], %[[SLICE_ARG3]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG4]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG4]] :
 //   CHECK-DAG:     %[[SLICE_ARG5:.+]] = tensor.extract_slice %[[ARG5]][0, 0] [%[[N2]], %[[N3]]]
 //   CHECK-DAG:     %[[SLICE_ARG6:.+]] = tensor.extract_slice %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
 //   CHECK-DAG:     %[[TILE_GEMM3:.+]] = linalg.matmul
 //  CHECK-SAME:         ins(%[[TILE_GEMM2]], %[[SLICE_ARG5]] :
-//  CHECK-SAME:         outs(%[[SLICE_ARG6]] :
+//  CHECK-SAME:         inits(%[[SLICE_ARG6]] :
 //       CHECK:     %[[UPDATE:.+]] = tensor.insert_slice %[[TILE_GEMM3]] into %[[ARG8]][%[[IV]], 0] [%[[TILE_M]], %[[N3]]]
 //       CHECK:     scf.yield %[[UPDATE]]
diff --git a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
--- a/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
+++ b/mlir/test/Interfaces/TilingInterface/tile-using-interface.mlir
@@ -4,7 +4,7 @@
     %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -34,7 +34,7 @@
 // CHECK-SAME:           [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1]
 //      CHECK:       %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:           ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:           outs(%[[INIT_TILE]] :
+// CHECK-SAME:           inits(%[[INIT_TILE]] :
 //      CHECK:       %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT1]]
 // CHECK-SAME:           [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1]
 //      CHECK:       scf.yield %[[UPDATE]]
@@ -47,7 +47,7 @@
     %arg2 : memref<?x?xf32>) {
   linalg.matmul {__internal_linalg_transform__ = "simple_gemm_memref"}
       ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>)
-      outs(%arg2 : memref<?x?xf32>)
+      inits(%arg2 : memref<?x?xf32>)
   return
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -79,7 +79,7 @@
 // CHECK-SAME:             [%[[IV0]], %[[IV1]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         linalg.matmul
 // CHECK-SAME:             ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:             outs(%[[OUT_TILE]] :
+// CHECK-SAME:             inits(%[[OUT_TILE]] :
 
 // -----
 
@@ -94,7 +94,7 @@
       iterator_types = ["parallel", "parallel", "parallel"]}
       {__internal_linalg_transform__ = "parallel_generic_transpose"}
       ins(%arg0 : tensor<128x200x300xf32>)
-      outs(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
+      inits(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) {
     ^bb0(%b0 : f32, %b1 : f32, %b2 : f32):
       linalg.yield %b0, %b0 : f32, f32
     } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>)
@@ -123,7 +123,7 @@
 // CHECK-SAME:           [%[[IV1]], %[[IV0]], 0] [20, %[[TS_Y]], 200] [1, 1, 1]
 //      CHECK:       %[[RESULT_TILE:.+]]:2 = linalg.generic
 // CHECK-SAME:           ins(%[[ARG_TILE]] :
-// CHECK-SAME:           outs(%[[INIT0_TILE]], %[[INIT1_TILE]] :
+// CHECK-SAME:           inits(%[[INIT0_TILE]], %[[INIT1_TILE]] :
 //      CHECK:       %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]]
 // CHECK-SAME:           [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], 20, 200] [1, 1, 1]
 //      CHECK:       %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]]
@@ -141,7 +141,7 @@
       dilation = dense<[4, 5]> : tensor<2xi64>,
       __internal_linalg_transform__ = "simple_conv"}
       ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>)
-      outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
+      inits(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
   return %0 : tensor<?x?x?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (10, -d0 + s0)>
@@ -187,7 +187,7 @@
 //      CHECK:         %[[CONV_TILE:.+]] = linalg.conv_2d_nhwc_hwcf
 // CHECK-SAME:             dilation = dense<[4, 5]> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64>
 // CHECK-SAME:             ins(%[[INPUT_TILE]], %[[FILTER_TILE]] :
-// CHECK-SAME:             outs(%[[INIT_TILE]] :
+// CHECK-SAME:             inits(%[[INIT_TILE]] :
 //      CHECK:         tensor.insert_slice %[[CONV_TILE]] into %[[INIT2]]
 // CHECK-SAME:             [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]]
 
@@ -207,7 +207,7 @@
     iterator_types = ["parallel", "parallel"]}
     {__internal_linalg_transform__ = "indexed_semantics"}
     ins(%arg0: tensor<?x?xf32>)
-    outs(%arg1: tensor<?x?xf32>) {
+    inits(%arg1: tensor<?x?xf32>) {
   ^bb0(%arg2: f32, %arg3: f32):
     // CHECK: %[[INDEX0:.+]] = linalg.index 0
     // CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]])
@@ -231,7 +231,7 @@
     %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> {
   %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"}
       ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>)
-      outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
+      inits(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
 //  CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0)[s0] -> (20, -d0 + s0)>
@@ -266,7 +266,7 @@
 // CHECK-SAME:             [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         %[[GEMM_TILE:.+]] = linalg.matmul
 // CHECK-SAME:             ins(%[[LHS_TILE]], %[[RHS_TILE]] :
-// CHECK-SAME:             outs(%[[INIT_TILE]] :
+// CHECK-SAME:             inits(%[[INIT_TILE]] :
 //      CHECK:         %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT2]]
 // CHECK-SAME:             [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1]
 //      CHECK:         scf.yield %[[UPDATE]]
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2806,7 +2806,7 @@
 
   let assemblyFormat = [{
     attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?
-    `outs` `(` $outputs `:` type($outputs) `)`
+    `inits` `(` $outputs `:` type($outputs) `)`
     $region (`->` type($results)^)?
   }];
 
@@ -2866,7 +2866,7 @@
 
   let assemblyFormat = [{
     attr-dict (`ins` `(` $inputs^ `:` type($inputs) `)`)?
-    `outs` `(` $outputs `:` type($outputs) `)`
+    `inits` `(` $outputs `:` type($outputs) `)`
     $region (`->` type($results)^)?
   }];
 
diff --git a/mlir/test/mlir-cpu-runner/async.mlir b/mlir/test/mlir-cpu-runner/async.mlir
--- a/mlir/test/mlir-cpu-runner/async.mlir
+++ b/mlir/test/mlir-cpu-runner/async.mlir
@@ -22,7 +22,7 @@
   %c4 = arith.constant 4.0 : f32
 
   %A = memref.alloc() : memref<4xf32>
-  linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)
+  linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>)
 
   // CHECK: [0, 0, 0, 0]
   %U = memref.cast %A :  memref<4xf32> to memref<*xf32>
diff --git a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
--- a/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
+++ b/mlir/test/mlir-cpu-runner/sgemm-naive-codegen.mlir
@@ -7,14 +7,14 @@
 
   %cf1 = arith.constant 1.00000e+00 : f32
 
-  linalg.fill ins(%cf1 : f32) outs(%A : memref<16x16xf32>)
-  linalg.fill ins(%cf1 : f32) outs(%B : memref<16x16xf32>)
+  linalg.fill ins(%cf1 : f32) inits(%A : memref<16x16xf32>)
+  linalg.fill ins(%cf1 : f32) inits(%B : memref<16x16xf32>)
 
   %reps = arith.constant 1 : index
 
   %t_start = call @rtclock() : () -> f64
   affine.for %arg0 = 0 to 5 {
-    linalg.fill ins(%cf1 : f32) outs(%C : memref<16x16xf32>)
+    linalg.fill ins(%cf1 : f32) inits(%C : memref<16x16xf32>)
     func.call @sgemm_naive(%A, %B, %C) : (memref<16x16xf32>, memref<16x16xf32>, memref<16x16xf32>) -> ()
   }
   %t_end = call @rtclock() : () -> f64
diff --git a/mlir/test/mlir-cpu-runner/unranked-memref.mlir b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
--- a/mlir/test/mlir-cpu-runner/unranked-memref.mlir
+++ b/mlir/test/mlir-cpu-runner/unranked-memref.mlir
@@ -42,18 +42,18 @@
     %f10 = arith.constant 10.00000e+00 : f32
 
     %V = memref.cast %A : memref<10x3xf32, 0> to memref<?x?xf32>
-    linalg.fill ins(%f10 : f32) outs(%V : memref<?x?xf32, 0>)
+    linalg.fill ins(%f10 : f32) inits(%V : memref<?x?xf32, 0>)
     %U = memref.cast %A : memref<10x3xf32, 0> to memref<*xf32>
     call @printMemrefF32(%U) : (memref<*xf32>) -> ()
 
     %V2 = memref.cast %U : memref<*xf32> to memref<?x?xf32>
-    linalg.fill ins(%f5 : f32) outs(%V2 : memref<?x?xf32, 0>)
+    linalg.fill ins(%f5 : f32) inits(%V2 : memref<?x?xf32, 0>)
     %U2 = memref.cast %V2 : memref<?x?xf32, 0> to memref<*xf32>
     call @printMemrefF32(%U2) : (memref<*xf32>) -> ()
 
     %V3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
     %V4 = memref.cast %V3 : memref<*xf32> to memref<?x?xf32>
-    linalg.fill ins(%f2 : f32) outs(%V4 : memref<?x?xf32, 0>)
+    linalg.fill ins(%f2 : f32) inits(%V4 : memref<?x?xf32, 0>)
     %U3 = memref.cast %V2 : memref<?x?xf32> to memref<*xf32>
     call @printMemrefF32(%U3) : (memref<*xf32>) -> ()
 
@@ -79,7 +79,7 @@
 func.func @return_two_var_memref_caller() {
   %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = arith.constant 1.0 : f32
-  linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
+  linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>)
   %1:2 = call @return_two_var_memref(%0) : (memref<4x3xf32>) -> (memref<*xf32>, memref<*xf32>)
   call @printMemrefF32(%1#0) : (memref<*xf32>) -> ()
   call @printMemrefF32(%1#1) : (memref<*xf32>) -> ()
@@ -94,7 +94,7 @@
 func.func @return_var_memref_caller() {
   %0 = memref.alloca() : memref<4x3xf32>
   %c0f32 = arith.constant 1.0 : f32
-  linalg.fill ins(%c0f32 : f32) outs(%0 : memref<4x3xf32>)
+  linalg.fill ins(%c0f32 : f32) inits(%0 : memref<4x3xf32>)
   %1 = call @return_var_memref(%0) : (memref<4x3xf32>) -> memref<*xf32>
   call @printMemrefF32(%1) : (memref<*xf32>) -> ()
   return
diff --git a/mlir/test/mlir-cpu-runner/utils.mlir b/mlir/test/mlir-cpu-runner/utils.mlir
--- a/mlir/test/mlir-cpu-runner/utils.mlir
+++ b/mlir/test/mlir-cpu-runner/utils.mlir
@@ -19,7 +19,7 @@
   %f = arith.constant 2.00000e+00 : f32
   %A = memref.alloc() : memref<16xf32>
   %B = memref.cast %A: memref<16xf32> to memref<?xf32>
-  linalg.fill ins(%f : f32) outs(%B : memref<?xf32>)
+  linalg.fill ins(%f : f32) inits(%B : memref<?xf32>)
   %U = memref.cast %B :  memref<?xf32> to memref<*xf32>
   call @printMemrefF32(%U): (memref<*xf32>) -> ()
   memref.dealloc %A : memref<16xf32>
@@ -33,7 +33,7 @@
   %f4 = arith.constant 4.00000e+00 : f32
   %A = memref.alloc() : memref<3x4x5xf32>
   %B = memref.cast %A: memref<3x4x5xf32> to memref<?x?x?xf32>
-  linalg.fill ins(%f : f32) outs(%B : memref<?x?x?xf32>)
+  linalg.fill ins(%f : f32) inits(%B : memref<?x?x?xf32>)
 
   %c2 = arith.constant 2 : index
   memref.store %f4, %B[%c2, %c2, %c2]: memref<?x?x?xf32>
diff --git a/mlir/test/mlir-opt/async.mlir b/mlir/test/mlir-opt/async.mlir
--- a/mlir/test/mlir-opt/async.mlir
+++ b/mlir/test/mlir-opt/async.mlir
@@ -20,7 +20,7 @@
   %c4 = arith.constant 4.0 : f32
 
   %A = memref.alloc() : memref<4xf32>
-  linalg.fill ins(%c0 : f32) outs(%A : memref<4xf32>)
+  linalg.fill ins(%c0 : f32) inits(%A : memref<4xf32>)
 
   %U = memref.cast %A :  memref<4xf32> to memref<*xf32>
   call @printMemrefF32(%U): (memref<*xf32>) -> ()
diff --git a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
--- a/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
+++ b/mlir/test/python/dialects/linalg/opdsl/emit_matmul.py
@@ -56,7 +56,7 @@
     # CHECK-SAME: indexing_maps = [#[[$MUL_MAP_A]], #[[$MUL_MAP_B]], #[[$MUL_MAP_C]]]
     # CHECK-SAME: iterator_types = ["parallel", "parallel", "reduction"]
     # CHECK-SAME: ins(%[[A]], %[[B]]
-    # CHECK-SAME: outs(%[[INITC]]
+    # CHECK-SAME: inits(%[[INITC]]
     @func.FuncOp.from_py_func(
         RankedTensorType.get((4, 16), f32), RankedTensorType.get((16, 8), f32))
     def test_matmul_mono(lhs, rhs):
diff --git a/mlir/test/python/dialects/linalg/ops.py b/mlir/test/python/dialects/linalg/ops.py
--- a/mlir/test/python/dialects/linalg/ops.py
+++ b/mlir/test/python/dialects/linalg/ops.py
@@ -21,7 +21,7 @@
       # CHECK-LABEL: func @fill_tensor
       #  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: tensor<12x?xf32>
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
-      #  CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
+      #  CHECK-NEXT: %[[RES:.*]] = linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : tensor<12x?xf32>) -> tensor<12x?xf32>
       #  CHECK-NEXT: return %[[RES]] : tensor<12x?xf32>
       @func.FuncOp.from_py_func(
           RankedTensorType.get((12, ShapedType.get_dynamic_size()), f32))
@@ -32,7 +32,7 @@
       # CHECK-LABEL: func @fill_buffer
       #  CHECK-SAME:   %[[OUT:[0-9a-z]+]]: memref<12x?xf32>
       #  CHECK-NEXT: %[[CST:.*]] = arith.constant 0.0{{.*}} : f32
-      #  CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) outs(%[[OUT]] : memref<12x?xf32>)
+      #  CHECK-NEXT: linalg.fill ins(%[[CST]] : f32) inits(%[[OUT]] : memref<12x?xf32>)
       #  CHECK-NEXT: return
       @func.FuncOp.from_py_func(
           MemRefType.get((12, ShapedType.get_dynamic_size()), f32))
@@ -59,12 +59,12 @@
         #      CHECK: linalg.elemwise_unary
         # CHECK-SAME:    cast = #linalg.type_fn<cast_signed>
         # CHECK-SAME:    fun = #linalg.unary_fn<exp>
-        # CHECK-SAME:    ins(%{{.*}} : tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
+        # CHECK-SAME:    ins(%{{.*}} : tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>)
         unary_result = linalg.elemwise_unary(lhs, outs=[init_result.result])
         #      CHECK: linalg.elemwise_binary
         # CHECK-SAME:    cast = #linalg.type_fn<cast_unsigned>
         # CHECK-SAME:    fun = #linalg.binary_fn<mul>
-        # CHECK-SAME:    ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) outs(%{{.*}} : tensor<4x8xf32>)
+        # CHECK-SAME:    ins(%{{.*}}, %{{.*}} : tensor<4x8xf32>, tensor<4x8xf32>) inits(%{{.*}} : tensor<4x8xf32>)
         #      CHECK: return
         binary_result = linalg.elemwise_binary(
             lhs,
@@ -144,7 +144,7 @@
         init = tensor.EmptyOp([4, 8], f32)
         # CHECK: linalg.matmul
         # CHECK: ins(%[[LHS]], %[[RHS]]
-        # CHECK: outs(%[[INIT]]
+        # CHECK: inits(%[[INIT]]
         return linalg.matmul(lhs, rhs, outs=init)
 
   print(module)
diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -29,10 +29,10 @@
   %rhs = memref.alloc() : memref<4x8xf32>
   %O0 = memref.alloc() : memref<4x8xf32>
   %O1 = memref.alloc() : memref<4x8xf32>
-  linalg.fill ins(%v1 : f32) outs(%lhs : memref<f32>)
-  linalg.fill ins(%v2 : f32) outs(%rhs : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O0 : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%O1 : memref<4x8xf32>)
+  linalg.fill ins(%v1 : f32) inits(%lhs : memref<f32>)
+  linalg.fill ins(%v2 : f32) inits(%rhs : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%O0 : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%O1 : memref<4x8xf32>)
 
   call @elemwise_exp_add_on_buffers(%lhs, %rhs, %O0) :
     (memref<f32>, memref<4x8xf32>, memref<4x8xf32>) -> ()
@@ -60,10 +60,10 @@
   %B = memref.alloc() : memref<16x8xf32>
   %C0 = memref.alloc() : memref<4x8xf32>
   %C1 = memref.alloc() : memref<4x8xf32>
-  linalg.fill ins(%v1 : i8) outs(%A : memref<4x16xi8>)
-  linalg.fill ins(%v2 : f32) outs(%B : memref<16x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%C0 : memref<4x8xf32>)
-  linalg.fill ins(%v0 : f32) outs(%C1 : memref<4x8xf32>)
+  linalg.fill ins(%v1 : i8) inits(%A : memref<4x16xi8>)
+  linalg.fill ins(%v2 : f32) inits(%B : memref<16x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%C0 : memref<4x8xf32>)
+  linalg.fill ins(%v0 : f32) inits(%C1 : memref<4x8xf32>)
 
   call @matmul_signed_on_buffers(%A, %B, %C0) :
     (memref<4x16xi8>, memref<16x8xf32>, memref<4x8xf32>) -> ()
@@ -137,9 +137,9 @@
   %input = memref.alloc() : memref<1x4x16x1xf64>
   %filter = memref.alloc() : memref<2x2x1xf64>
   %output = memref.alloc() : memref<1x2x4x1xi32>
-  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
-  linalg.fill ins(%v2 : f64) outs(%filter : memref<2x2x1xf64>)
-  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
+  linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>)
+  linalg.fill ins(%v2 : f64) inits(%filter : memref<2x2x1xf64>)
+  linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>)
 
   call @conv_on_buffers(%input, %filter, %output) :
     (memref<1x4x16x1xf64>, memref<2x2x1xf64>, memref<1x2x4x1xi32>) -> ()
@@ -163,9 +163,9 @@
   %input = memref.alloc() : memref<1x4x16x1xf64>
   %shape = memref.alloc() : memref<2x2xf64>
   %output = memref.alloc() : memref<1x2x4x1xi32>
-  linalg.fill ins(%v1 : f64) outs(%input : memref<1x4x16x1xf64>)
-  linalg.fill ins(%v1 : f64) outs(%shape : memref<2x2xf64>)
-  linalg.fill ins(%v0 : i32) outs(%output : memref<1x2x4x1xi32>)
+  linalg.fill ins(%v1 : f64) inits(%input : memref<1x4x16x1xf64>)
+  linalg.fill ins(%v1 : f64) inits(%shape : memref<2x2xf64>)
+  linalg.fill ins(%v0 : i32) inits(%output : memref<1x2x4x1xi32>)
 
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index